go-twig/parser.go
semihalev b44bad903b Implement spaceless filter and apply tag
1. Added spaceless filter that removes whitespace between HTML tags
2. Implemented {% apply filter %} ... {% endapply %} tag
3. Updated spaceless tag to use the spaceless filter internally
4. Fixed endverbatim tag handling
5. Added tests for all new functionality

The apply tag allows applying filters to blocks of content, which is the
modern recommended approach in Twig, replacing the deprecated spaceless tag.

🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-03-11 17:28:57 +03:00

1217 lines
34 KiB
Go

package twig
import (
"fmt"
"strconv"
"strings"
)
// Token types
const (
TOKEN_TEXT = iota
TOKEN_VAR_START // {{
TOKEN_VAR_END // }}
TOKEN_BLOCK_START // {%
TOKEN_BLOCK_END // %}
TOKEN_COMMENT_START // {#
TOKEN_COMMENT_END // #}
TOKEN_NAME
TOKEN_NUMBER
TOKEN_STRING
TOKEN_OPERATOR
TOKEN_PUNCTUATION
TOKEN_EOF
// Whitespace control token types
TOKEN_VAR_START_TRIM // {{-
TOKEN_VAR_END_TRIM // -}}
TOKEN_BLOCK_START_TRIM // {%-
TOKEN_BLOCK_END_TRIM // -%}
)
// Parser handles parsing Twig templates into node trees
type Parser struct {
source string
tokens []Token
tokenIndex int
filename string
cursor int
line int
blockHandlers map[string]blockHandlerFunc
}
type blockHandlerFunc func(*Parser) (Node, error)
// Token represents a lexical token
type Token struct {
Type int
Value string
Line int
}
// Parse parses a template source into a node tree
func (p *Parser) Parse(source string) (Node, error) {
p.source = source
p.cursor = 0
p.line = 1
p.tokenIndex = 0
// Initialize default block handlers
p.initBlockHandlers()
// Use the HTML-preserving tokenizer to preserve HTML content exactly
// This will treat everything outside twig tags as TEXT tokens
var err error
p.tokens, err = p.htmlPreservingTokenize()
if err != nil {
return nil, fmt.Errorf("tokenization error: %w", err)
}
// Template tokenization complete
// Apply whitespace control processing to the tokens to handle
// the whitespace trimming between template elements
p.tokens = processWhitespaceControl(p.tokens)
// Parse tokens into nodes
nodes, err := p.parseOuterTemplate()
if err != nil {
// Clean up token slice on error
ReleaseTokenSlice(p.tokens)
return nil, fmt.Errorf("parsing error: %w", err)
}
// Clean up token slice after successful parsing
ReleaseTokenSlice(p.tokens)
return NewRootNode(nodes, 1), nil
}
// Initialize block handlers for different tag types
func (p *Parser) initBlockHandlers() {
p.blockHandlers = map[string]blockHandlerFunc{
"if": p.parseIf,
"for": p.parseFor,
"block": p.parseBlock,
"extends": p.parseExtends,
"include": p.parseInclude,
"set": p.parseSet,
"do": p.parseDo,
"macro": p.parseMacro,
"import": p.parseImport,
"from": p.parseFrom,
"spaceless": p.parseSpaceless,
"verbatim": p.parseVerbatim,
"apply": p.parseApply,
// Special closing tags - they will be handled in their corresponding open tag parsers
"endif": p.parseEndTag,
"endfor": p.parseEndTag,
"endmacro": p.parseEndTag,
"endblock": p.parseEndTag,
"endspaceless": p.parseEndTag,
"endapply": p.parseEndTag,
"else": p.parseEndTag,
"elseif": p.parseEndTag,
"endverbatim": p.parseEndTag,
}
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isOperator(c byte) bool {
return strings.ContainsRune("+-*/=<>!&~^%", rune(c))
}
func isPunctuation(c byte) bool {
return strings.ContainsRune("()[]{},.:|?", rune(c))
}
func isWhitespace(c byte) bool {
return c == ' ' || c == '\t' || c == '\n' || c == '\r'
}
// processEscapeSequences handles escape sequences in string literals
func processEscapeSequences(s string) string {
var result strings.Builder
result.Grow(len(s))
for i := 0; i < len(s); i++ {
if s[i] == '\\' && i+1 < len(s) {
i++
switch s[i] {
case 'n':
result.WriteByte('\n')
case 'r':
result.WriteByte('\r')
case 't':
result.WriteByte('\t')
case '\\':
result.WriteByte('\\')
case '"':
result.WriteByte('"')
case '\'':
result.WriteByte('\'')
case '{':
// Special case for escaping Twig variable/block syntax
result.WriteByte('{')
case '}':
// Special case for escaping Twig variable/block syntax
result.WriteByte('}')
default:
result.WriteByte(s[i])
}
} else {
result.WriteByte(s[i])
}
}
return result.String()
}
// Parse the outer level of a template (text, print tags, blocks)
func (p *Parser) parseOuterTemplate() ([]Node, error) {
var nodes []Node
for p.tokenIndex < len(p.tokens) && p.tokens[p.tokenIndex].Type != TOKEN_EOF {
token := p.tokens[p.tokenIndex]
switch token.Type {
case TOKEN_TEXT:
nodes = append(nodes, NewTextNode(token.Value, token.Line))
p.tokenIndex++
case TOKEN_VAR_START, TOKEN_VAR_START_TRIM:
// Handle both normal and whitespace trimming var start tokens
p.tokenIndex++
expr, err := p.parseExpression()
if err != nil {
return nil, err
}
nodes = append(nodes, NewPrintNode(expr, token.Line))
// Check for either normal or whitespace trimming var end tokens
if p.tokenIndex >= len(p.tokens) || !isVarEndToken(p.tokens[p.tokenIndex].Type) {
return nil, fmt.Errorf("expected }} or -}} at line %d", token.Line)
}
p.tokenIndex++
case TOKEN_BLOCK_START, TOKEN_BLOCK_START_TRIM:
// Handle both normal and whitespace trimming block start tokens
p.tokenIndex++
if p.tokenIndex >= len(p.tokens) || p.tokens[p.tokenIndex].Type != TOKEN_NAME {
return nil, fmt.Errorf("expected block name at line %d", token.Line)
}
blockName := p.tokens[p.tokenIndex].Value
p.tokenIndex++
// Check if this is a control ending tag (endif, endfor, endblock, etc.)
if blockName == "endif" || blockName == "endfor" || blockName == "endblock" ||
blockName == "endmacro" || blockName == "else" || blockName == "elseif" ||
blockName == "endspaceless" || blockName == "endapply" || blockName == "endverbatim" {
// We should return to the parent parser that's handling the parent block
// First move back two steps to the start of the block tag
p.tokenIndex -= 2
return nodes, nil
}
// Check if we have a handler for this block type
handler, ok := p.blockHandlers[blockName]
if !ok {
return nil, fmt.Errorf("unknown block type '%s' at line %d", blockName, token.Line)
}
node, err := handler(p)
if err != nil {
return nil, err
}
nodes = append(nodes, node)
case TOKEN_COMMENT_START:
// Skip comments
p.tokenIndex++
startLine := token.Line
// Find the end of the comment
for p.tokenIndex < len(p.tokens) && p.tokens[p.tokenIndex].Type != TOKEN_COMMENT_END {
p.tokenIndex++
}
if p.tokenIndex >= len(p.tokens) {
return nil, fmt.Errorf("unclosed comment starting at line %d", startLine)
}
p.tokenIndex++
// Add special handling for trim token types
case TOKEN_VAR_END_TRIM, TOKEN_BLOCK_END_TRIM:
// These should have been handled with their corresponding start tokens
return nil, fmt.Errorf("unexpected token %v at line %d", token.Type, token.Line)
// Add special handling for TOKEN_NAME outside of a tag
case TOKEN_NAME, TOKEN_PUNCTUATION, TOKEN_OPERATOR, TOKEN_STRING, TOKEN_NUMBER:
// For raw names, punctuation, operators, and literals not inside tags, convert to text
// In many languages, the text "true" is a literal boolean, but in our parser it's just a name token
// outside of an expression context
// Special handling for text content words - add spaces between consecutive text tokens
// This fixes issues with the spaceless tag's handling of text content
if token.Type == TOKEN_NAME && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Line == token.Line {
// Look ahead for consecutive name tokens and join them with spaces
var textContent strings.Builder
textContent.WriteString(token.Value)
currentLine := token.Line
p.tokenIndex++ // Skip the first token as we've already added it
// Collect consecutive name tokens on the same line
for p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex].Line == currentLine {
textContent.WriteString(" ") // Add space between words
textContent.WriteString(p.tokens[p.tokenIndex].Value)
p.tokenIndex++
}
nodes = append(nodes, NewTextNode(textContent.String(), token.Line))
} else {
// Regular handling for single text tokens
nodes = append(nodes, NewTextNode(token.Value, token.Line))
p.tokenIndex++
}
default:
return nil, fmt.Errorf("unexpected token %v at line %d", token.Type, token.Line)
}
}
return nodes, nil
}
// Parse an expression
func (p *Parser) parseExpression() (Node, error) {
// Parse the primary expression first
expr, err := p.parseSimpleExpression()
if err != nil {
return nil, err
}
// Check for array access with square brackets
for p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "[" {
// Get the line number for error reporting
line := p.tokens[p.tokenIndex].Line
// Skip the opening bracket
p.tokenIndex++
// Parse the index expression
indexExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Expect closing bracket
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != "]" {
return nil, fmt.Errorf("expected closing bracket after array index at line %d", line)
}
p.tokenIndex++ // Skip closing bracket
// Create a GetItemNode
expr = NewGetItemNode(expr, indexExpr, line)
}
// Now check for filter operator (|)
// Process all filters in a loop to handle consecutive filters properly
for p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "|" {
expr, err = p.parseFilters(expr)
if err != nil {
return nil, err
}
}
// Check for binary operators (and, or, ==, !=, <, >, etc.)
// Loop to handle multiple binary operators in sequence, such as 'hello' ~ ' ' ~ 'world'
for p.tokenIndex < len(p.tokens) &&
(p.tokens[p.tokenIndex].Type == TOKEN_OPERATOR ||
(p.tokens[p.tokenIndex].Type == TOKEN_NAME &&
(p.tokens[p.tokenIndex].Value == "and" ||
p.tokens[p.tokenIndex].Value == "or" ||
p.tokens[p.tokenIndex].Value == "in" ||
p.tokens[p.tokenIndex].Value == "not" ||
p.tokens[p.tokenIndex].Value == "is" ||
p.tokens[p.tokenIndex].Value == "matches" ||
p.tokens[p.tokenIndex].Value == "starts" ||
p.tokens[p.tokenIndex].Value == "ends"))) {
expr, err = p.parseBinaryExpression(expr)
if err != nil {
return nil, err
}
}
// Check for ternary operator (? :)
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "?" {
return p.parseConditionalExpression(expr)
}
return expr, nil
}
// Parse ternary conditional expression (condition ? true_expr : false_expr)
func (p *Parser) parseConditionalExpression(condition Node) (Node, error) {
line := p.tokens[p.tokenIndex].Line
// Skip the "?" token
p.tokenIndex++
// Parse the "true" expression
trueExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Expect ":" token
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ":" {
return nil, fmt.Errorf("expected ':' after true expression in conditional at line %d", line)
}
p.tokenIndex++ // Skip ":"
// Parse the "false" expression
falseExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Create a conditional node
return &ConditionalNode{
ExpressionNode: ExpressionNode{
exprType: ExprConditional,
line: line,
},
condition: condition,
trueExpr: trueExpr,
falseExpr: falseExpr,
}, nil
}
// Parse a simple expression (literal, variable, function call, array)
func (p *Parser) parseSimpleExpression() (Node, error) {
if p.tokenIndex >= len(p.tokens) {
return nil, fmt.Errorf("unexpected end of template")
}
token := p.tokens[p.tokenIndex]
// Handle unary operators like 'not' and unary minus/plus
if (token.Type == TOKEN_NAME && token.Value == "not") ||
(token.Type == TOKEN_OPERATOR && (token.Value == "-" || token.Value == "+")) {
// Skip the operator token
operator := token.Value
p.tokenIndex++
// Get the line number for the unary node
line := token.Line
// Parse the operand
operand, err := p.parseSimpleExpression()
if err != nil {
return nil, err
}
// Create a unary node
return NewUnaryNode(operator, operand, line), nil
}
switch token.Type {
case TOKEN_STRING:
p.tokenIndex++
// For string literals, process escape sequences
processedValue := processEscapeSequences(token.Value)
return NewLiteralNode(processedValue, token.Line), nil
case TOKEN_NUMBER:
p.tokenIndex++
// Attempt to convert to int or float
if strings.Contains(token.Value, ".") {
// It's a float
val, _ := strconv.ParseFloat(token.Value, 64)
return NewLiteralNode(val, token.Line), nil
} else {
// It's an int
val, _ := strconv.Atoi(token.Value)
return NewLiteralNode(val, token.Line), nil
}
case TOKEN_NAME:
p.tokenIndex++
// Store the variable name for function calls
varName := token.Value
varLine := token.Line
// Special handling for boolean literals and null
if varName == "true" {
return NewLiteralNode(true, varLine), nil
} else if varName == "false" {
return NewLiteralNode(false, varLine), nil
} else if varName == "null" || varName == "nil" {
return NewLiteralNode(nil, varLine), nil
}
// Check if this is a function call (name followed by opening parenthesis)
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "(" {
// This is a function call
p.tokenIndex++ // Skip the opening parenthesis
// Parse arguments list
var args []Node
// If there are arguments (not empty parentheses)
if p.tokenIndex < len(p.tokens) &&
!(p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == ")") {
for {
// Parse each argument expression
argExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
args = append(args, argExpr)
// Check for comma separator between arguments
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so must be end of argument list
break
}
}
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis after function arguments at line %d", varLine)
}
p.tokenIndex++ // Skip closing parenthesis
// Create and return function node
return NewFunctionNode(varName, args, varLine), nil
}
// If not a function call, it's a regular variable
var result Node = NewVariableNode(varName, varLine)
// Check for attribute access (obj.attr) or method calls (obj.method())
for p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "." {
p.tokenIndex++
if p.tokenIndex >= len(p.tokens) || p.tokens[p.tokenIndex].Type != TOKEN_NAME {
return nil, fmt.Errorf("expected attribute name at line %d", varLine)
}
attrName := p.tokens[p.tokenIndex].Value
attrNode := NewLiteralNode(attrName, p.tokens[p.tokenIndex].Line)
p.tokenIndex++
// Check if this is a method call like (module.method())
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "(" {
if IsDebugEnabled() && debugger.level >= DebugVerbose {
LogVerbose("Detected module.method call: %s.%s(...)", varName, attrName)
}
// This is a method call with the method stored in attrName
// We'll use the moduleExpr field in FunctionNode to store the module expression
// Parse the arguments
p.tokenIndex++ // Skip opening parenthesis
// Parse arguments
var args []Node
// If there are arguments (not empty parentheses)
if p.tokenIndex < len(p.tokens) &&
!(p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == ")") {
for {
// Parse each argument expression
argExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
args = append(args, argExpr)
// Check for comma separator between arguments
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so must be end of argument list
break
}
}
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis after method arguments at line %d", varLine)
}
p.tokenIndex++ // Skip closing parenthesis
// Create a function call with the module expression and method name
result = &FunctionNode{
ExpressionNode: ExpressionNode{
exprType: ExprFunction,
line: varLine,
},
name: attrName,
args: args,
// Special handling - We'll store the module in the FunctionNode
moduleExpr: result,
}
} else {
// Regular attribute access (not a method call)
result = NewGetAttrNode(result, attrNode, varLine)
}
}
return result, nil
case TOKEN_PUNCTUATION:
// Handle array literals [1, 2, 3]
if token.Value == "[" {
return p.parseArrayExpression()
}
// Handle hash/map literals {'key': value}
if token.Value == "{" {
return p.parseMapExpression()
}
// Handle parenthesized expressions
if token.Value == "(" {
p.tokenIndex++ // Skip "("
// Check for unary operator immediately after opening parenthesis
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_OPERATOR &&
(p.tokens[p.tokenIndex].Value == "-" || p.tokens[p.tokenIndex].Value == "+") {
// Handle unary operation inside parentheses
unaryToken := p.tokens[p.tokenIndex]
operator := unaryToken.Value
line := unaryToken.Line
p.tokenIndex++ // Skip the operator
// Parse the operand
operand, err := p.parseExpression()
if err != nil {
return nil, err
}
// Create a unary node
expr := NewUnaryNode(operator, operand, line)
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis at line %d", token.Line)
}
p.tokenIndex++ // Skip ")"
return expr, nil
}
// Regular parenthesized expression
expr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis at line %d", token.Line)
}
p.tokenIndex++ // Skip ")"
return expr, nil
}
default:
return nil, fmt.Errorf("unexpected token in expression at line %d", token.Line)
}
return nil, fmt.Errorf("unexpected token in expression at line %d", token.Line)
}
// Parse array expression [item1, item2, ...]
func (p *Parser) parseArrayExpression() (Node, error) {
// Save the line number for error reporting
line := p.tokens[p.tokenIndex].Line
// Skip the opening bracket
p.tokenIndex++
// Parse the array items
var items []Node
// Check if there are any items
if p.tokenIndex < len(p.tokens) &&
!(p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "]") {
for {
// Parse each item expression
itemExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
items = append(items, itemExpr)
// Check for comma separator between items
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so must be end of array
break
}
}
// Expect closing bracket
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != "]" {
return nil, fmt.Errorf("expected closing bracket after array items at line %d", line)
}
p.tokenIndex++ // Skip closing bracket
// Create array node
return &ArrayNode{
ExpressionNode: ExpressionNode{
exprType: ExprArray,
line: line,
},
items: items,
}, nil
}
// parseMapExpression parses a hash/map literal expression, like {'key': value}
func (p *Parser) parseMapExpression() (Node, error) {
// Save the line number for error reporting
line := p.tokens[p.tokenIndex].Line
// Skip the opening brace
p.tokenIndex++
// Parse the map key-value pairs
items := make(map[Node]Node)
// Check if there are any items
if p.tokenIndex < len(p.tokens) &&
!(p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "}") {
for {
// Parse key expression
keyExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Expect colon separator
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ":" {
return nil, fmt.Errorf("expected ':' after map key at line %d", line)
}
p.tokenIndex++ // Skip colon
// Parse value expression
valueExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
// Add key-value pair to map
items[keyExpr] = valueExpr
// Check for comma separator between items
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so must be end of map
break
}
}
// Expect closing brace
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != "}" {
return nil, fmt.Errorf("expected closing brace after map items at line %d", line)
}
p.tokenIndex++ // Skip closing brace
// Create hash node
return &HashNode{
ExpressionNode: ExpressionNode{
exprType: ExprHash,
line: line,
},
items: items,
}, nil
}
// Parse filter expressions: variable|filter(args)
func (p *Parser) parseFilters(node Node) (Node, error) {
line := p.tokens[p.tokenIndex].Line
// Loop to handle multiple filters (e.g. var|filter1|filter2)
for p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "|" {
p.tokenIndex++ // Skip the | token
// Expect filter name
if p.tokenIndex >= len(p.tokens) || p.tokens[p.tokenIndex].Type != TOKEN_NAME {
return nil, fmt.Errorf("expected filter name at line %d", line)
}
filterName := p.tokens[p.tokenIndex].Value
p.tokenIndex++
// Check for filter arguments
var args []Node
// If there are arguments in parentheses
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "(" {
p.tokenIndex++ // Skip opening parenthesis
// Parse arguments
if p.tokenIndex < len(p.tokens) &&
!(p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == ")") {
for {
// Parse each argument expression
argExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
args = append(args, argExpr)
// Check for comma separator
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so end of argument list
break
}
}
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis after filter arguments at line %d", line)
}
p.tokenIndex++ // Skip closing parenthesis
}
// Create a new FilterNode
node = &FilterNode{
ExpressionNode: ExpressionNode{
exprType: ExprFilter,
line: line,
},
node: node,
filter: filterName,
args: args,
}
}
return node, nil
}
// Operator precedence levels (higher number = higher precedence)
const (
PREC_LOWEST = 0
PREC_OR = 1 // or, ||
PREC_AND = 2 // and, &&
PREC_COMPARE = 3 // ==, !=, <, >, <=, >=, in, not in, matches, starts with, ends with
PREC_SUM = 4 // +, -
PREC_PRODUCT = 5 // *, /, %
PREC_POWER = 6 // ^
PREC_PREFIX = 7 // not, !, +, - (unary)
)
// Get operator precedence
func getOperatorPrecedence(operator string) int {
switch operator {
case "or", "||":
return PREC_OR
case "and", "&&":
return PREC_AND
case "==", "!=", "<", ">", "<=", ">=", "in", "not in", "matches", "starts with", "ends with", "is", "is not":
return PREC_COMPARE
case "+", "-", "~":
return PREC_SUM
case "*", "/", "%":
return PREC_PRODUCT
case "^":
return PREC_POWER
default:
return PREC_LOWEST
}
}
// Parse binary expressions (a + b, a and b, a in b, etc.)
func (p *Parser) parseBinaryExpression(left Node) (Node, error) {
token := p.tokens[p.tokenIndex]
operator := token.Value
line := token.Line
// Special handling for "not defined" pattern
// This is the common pattern used in Twig: {% if variable not defined %}
if operator == "not" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "defined" {
// Next token should be "defined"
p.tokenIndex += 2 // Skip both "not" and "defined"
// Create a TestNode with "defined" test
testNode := &TestNode{
ExpressionNode: ExpressionNode{
exprType: ExprTest,
line: line,
},
node: left,
test: "defined",
args: []Node{},
}
// Then wrap it in a unary "not" node
return &UnaryNode{
ExpressionNode: ExpressionNode{
exprType: ExprUnary,
line: line,
},
operator: "not",
node: testNode,
}, nil
}
// Process multi-word operators
if token.Type == TOKEN_NAME {
// Handle 'not in' operator
if token.Value == "not" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "in" {
operator = "not in"
p.tokenIndex += 2 // Skip both 'not' and 'in'
} else if token.Value == "is" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "not" {
// Handle 'is not' operator
operator = "is not"
p.tokenIndex += 2 // Skip both 'is' and 'not'
} else if token.Value == "starts" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "with" {
// Handle 'starts with' operator
operator = "starts with"
p.tokenIndex += 2 // Skip both 'starts' and 'with'
} else if token.Value == "ends" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "with" {
// Handle 'ends with' operator
operator = "ends with"
p.tokenIndex += 2 // Skip both 'ends' and 'with'
} else {
// Single word operators like 'is', 'and', 'or', 'in', 'matches'
p.tokenIndex++ // Skip the operator token
}
} else {
// Regular operators like +, -, *, /, etc.
p.tokenIndex++ // Skip the operator token
}
// Handle 'is' followed by a test
if operator == "is" || operator == "is not" {
// Check if this is a test
if p.tokenIndex < len(p.tokens) && p.tokens[p.tokenIndex].Type == TOKEN_NAME {
testName := p.tokens[p.tokenIndex].Value
p.tokenIndex++ // Skip the test name
// Parse test arguments if any
var args []Node
// If there's an opening parenthesis, parse arguments
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "(" {
p.tokenIndex++ // Skip opening parenthesis
// Parse arguments
if p.tokenIndex < len(p.tokens) &&
!(p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == ")") {
for {
// Parse each argument expression
argExpr, err := p.parseExpression()
if err != nil {
return nil, err
}
args = append(args, argExpr)
// Check for comma separator
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "," {
p.tokenIndex++ // Skip comma
continue
}
// No comma, so end of argument list
break
}
}
// Expect closing parenthesis
if p.tokenIndex >= len(p.tokens) ||
p.tokens[p.tokenIndex].Type != TOKEN_PUNCTUATION ||
p.tokens[p.tokenIndex].Value != ")" {
return nil, fmt.Errorf("expected closing parenthesis after test arguments at line %d", line)
}
p.tokenIndex++ // Skip closing parenthesis
}
// Create the test node
test := &TestNode{
ExpressionNode: ExpressionNode{
exprType: ExprTest,
line: line,
},
node: left,
test: testName,
args: args,
}
// If it's a negated test (is not), create a unary 'not' node
if operator == "is not" {
return &UnaryNode{
ExpressionNode: ExpressionNode{
exprType: ExprUnary,
line: line,
},
operator: "not",
node: test,
}, nil
}
return test, nil
}
}
// If we get here, we have a regular binary operator
// Get precedence of current operator
precedence := getOperatorPrecedence(operator)
// Parse the right side expression
right, err := p.parseSimpleExpression()
if err != nil {
return nil, err
}
// Create the current binary node
binaryNode := NewBinaryNode(operator, left, right, line)
// Check for another binary operator
if p.tokenIndex < len(p.tokens) &&
(p.tokens[p.tokenIndex].Type == TOKEN_OPERATOR ||
(p.tokens[p.tokenIndex].Type == TOKEN_NAME &&
(p.tokens[p.tokenIndex].Value == "and" ||
p.tokens[p.tokenIndex].Value == "or" ||
p.tokens[p.tokenIndex].Value == "in" ||
p.tokens[p.tokenIndex].Value == "not" ||
p.tokens[p.tokenIndex].Value == "is" ||
p.tokens[p.tokenIndex].Value == "matches" ||
p.tokens[p.tokenIndex].Value == "starts" ||
p.tokens[p.tokenIndex].Value == "ends"))) {
// Get the next operator and its precedence
nextOperator := p.tokens[p.tokenIndex].Value
if p.tokens[p.tokenIndex].Type == TOKEN_NAME {
// Handle multi-word operators
if nextOperator == "not" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "in" {
nextOperator = "not in"
} else if nextOperator == "is" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "not" {
nextOperator = "is not"
} else if nextOperator == "starts" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "with" {
nextOperator = "starts with"
} else if nextOperator == "ends" && p.tokenIndex+1 < len(p.tokens) &&
p.tokens[p.tokenIndex+1].Type == TOKEN_NAME &&
p.tokens[p.tokenIndex+1].Value == "with" {
nextOperator = "ends with"
}
}
nextPrecedence := getOperatorPrecedence(nextOperator)
// If the next operator has higher precedence, we need to parse it first
if nextPrecedence > precedence {
// Replace the right side with a binary expression
newRight, err := p.parseBinaryExpression(right)
if err != nil {
return nil, err
}
// Update the binary node with the new right side
binaryNode = NewBinaryNode(operator, left, newRight, line)
}
}
// Check for ternary operator after parsing the binary expression
if p.tokenIndex < len(p.tokens) &&
p.tokens[p.tokenIndex].Type == TOKEN_PUNCTUATION &&
p.tokens[p.tokenIndex].Value == "?" {
// This is a conditional expression, use the binary node as the condition
return p.parseConditionalExpression(binaryNode)
}
return binaryNode, nil
}
// parseEndTag handles closing tags like endif, endfor, endblock, etc.
// These tags should only be encountered inside their respective block parsing methods,
// so if we reach here directly, it's an error.
func (p *Parser) parseEndTag(parser *Parser) (Node, error) {
// Get the line number and tag name
tagLine := parser.tokens[parser.tokenIndex-2].Line
tagName := parser.tokens[parser.tokenIndex-1].Value
return nil, fmt.Errorf("unexpected '%s' tag at line %d", tagName, tagLine)
}
// parseSpaceless parses a spaceless block
func (p *Parser) parseSpaceless(parser *Parser) (Node, error) {
// Get the line number of the spaceless token
spacelessLine := parser.tokens[parser.tokenIndex-2].Line
// Expect the block end token
if parser.tokenIndex >= len(parser.tokens) ||
(parser.tokens[parser.tokenIndex].Type != TOKEN_BLOCK_END &&
parser.tokens[parser.tokenIndex].Type != TOKEN_BLOCK_END_TRIM) {
return nil, fmt.Errorf("expected block end token after spaceless at line %d", spacelessLine)
}
parser.tokenIndex++
// Parse the spaceless body
spacelessBody, err := parser.parseOuterTemplate()
if err != nil {
return nil, err
}
// Expect endspaceless tag
if parser.tokenIndex >= len(parser.tokens) || parser.tokens[parser.tokenIndex].Type != TOKEN_BLOCK_START {
return nil, fmt.Errorf("expected endspaceless tag at line %d", spacelessLine)
}
parser.tokenIndex++
// Expect the endspaceless token
if parser.tokenIndex >= len(parser.tokens) || parser.tokens[parser.tokenIndex].Type != TOKEN_NAME ||
parser.tokens[parser.tokenIndex].Value != "endspaceless" {
return nil, fmt.Errorf("expected endspaceless token at line %d", parser.tokens[parser.tokenIndex].Line)
}
parser.tokenIndex++
// Expect the block end token
if parser.tokenIndex >= len(parser.tokens) ||
(parser.tokens[parser.tokenIndex].Type != TOKEN_BLOCK_END &&
parser.tokens[parser.tokenIndex].Type != TOKEN_BLOCK_END_TRIM) {
return nil, fmt.Errorf("expected block end token after endspaceless at line %d", parser.tokens[parser.tokenIndex].Line)
}
parser.tokenIndex++
// Create and return the spaceless node
return NewSpacelessNode(spacelessBody, spacelessLine), nil
}
// HtmlPreservingTokenize is an exported version of htmlPreservingTokenize for testing
func (p *Parser) HtmlPreservingTokenize() ([]Token, error) {
return p.htmlPreservingTokenize()
}
// SetSource sets the source for parsing - used for testing
func (p *Parser) SetSource(source string) {
p.source = source
}