go-twig/zero_alloc_tokenizer.go

package twig

import (
	"fmt"
	"strings"
	"sync"
	"unsafe"
)

const (
	// Common HTML/Twig strings to pre-cache
	maxCacheableLength = 64 // Only cache strings shorter than this to avoid memory bloat

	// Common HTML tags
	stringDiv    = "div"
	stringSpan   = "span"
	stringP      = "p"
	stringA      = "a"
	stringImg    = "img"
	stringHref   = "href"
	stringClass  = "class"
	stringId     = "id"
	stringStyle  = "style"

	// Common Twig syntax
	stringIf     = "if"
	stringFor    = "for"
	stringEnd    = "end"
	stringEndif  = "endif"
	stringEndfor = "endfor"
	stringElse   = "else"
	stringBlock  = "block"
	stringSet    = "set"
	stringInclude = "include"
	stringExtends = "extends"
	stringMacro  = "macro"

	// Common operators
	stringEquals = "=="
	stringNotEquals = "!="
	stringAnd    = "and"
	stringOr     = "or"
	stringNot    = "not"
	stringIn     = "in"
	stringIs     = "is"
)

// GlobalStringCache provides a centralized cache for string interning
type GlobalStringCache struct {
	sync.RWMutex
	strings map[string]string
}

var (
	// Singleton instance of the global string cache
	globalCache = newGlobalStringCache()
)

// TagType represents the type of tag found
type TagType int

const (
	TAG_NONE TagType = iota
	TAG_VAR
	TAG_VAR_TRIM
	TAG_BLOCK
	TAG_BLOCK_TRIM
	TAG_COMMENT
)

// TagLocation represents the location of a tag in a template
type TagLocation struct {
	Type     TagType // Type of tag
	Position int     // Position in source
	Length   int     // Length of opening tag
}

// ZeroAllocTokenizer is an allocation-free tokenizer
// It uses a pre-allocated token buffer for all token operations
type ZeroAllocTokenizer struct {
	tokenBuffer []Token      // Pre-allocated buffer of tokens
	source      string       // Source string being tokenized
	position    int          // Current position in source
	line        int          // Current line
	result      []Token      // Slice of actually used tokens
	tempStrings []string     // String constants that we can reuse
}

// This array contains commonly used strings in tokenization to avoid allocations
var commonStrings = []string{
	// Common twig words and operators
	"if", "else", "elseif", "endif", "for", "endfor", "in",
	"block", "endblock", "extends", "include", "with", "set",
	"macro", "endmacro", "import", "from", "as", "do",

	// Common operators
	"+", "-", "*", "/", "=", "==", "!=", ">", "<", ">=", "<=",
	"and", "or", "not", "~", "%", "?", ":", "??",

	// Common punctuation
	"(", ")", "[", "]", "{", "}", ".", ",", "|", ";",

	// Common literals
	"true", "false", "null",

	// Empty string
	"",
}

// TokenizerPooled holds a set of resources for zero-allocation tokenization
type TokenizerPooled struct {
	tokenizer ZeroAllocTokenizer
	used      bool
}

// TokenizerPool is a pool of tokenizer resources
var tokenizerPool = sync.Pool{
	New: func() interface{} {
		// Create a pre-allocated tokenizer with reasonable defaults
		return &TokenizerPooled{
			tokenizer: ZeroAllocTokenizer{
				tokenBuffer: make([]Token, 0, 256),  // Buffer for tokens
				tempStrings: append([]string{}, commonStrings...),
				result:      nil,
			},
			used: false,
		}
	},
}

// GetTokenizer gets a tokenizer from the pool
func GetTokenizer(source string, capacityHint int) *ZeroAllocTokenizer {
	pooled := tokenizerPool.Get().(*TokenizerPooled)

	// Reset the tokenizer
	tokenizer := &pooled.tokenizer
	tokenizer.source = source
	tokenizer.position = 0
	tokenizer.line = 1

	// Ensure token buffer has enough capacity
	neededCapacity := capacityHint
	if neededCapacity <= 0 {
		// Estimate capacity based on source length
		neededCapacity = len(source) / 10
		if neededCapacity < 32 {
			neededCapacity = 32
		}
	}

	// Resize token buffer if needed
	if cap(tokenizer.tokenBuffer) < neededCapacity {
		tokenizer.tokenBuffer = make([]Token, 0, neededCapacity)
	} else {
		tokenizer.tokenBuffer = tokenizer.tokenBuffer[:0]
	}

	// Reset result
	tokenizer.result = nil

	// Mark as used
	pooled.used = true

	return tokenizer
}

// ReleaseTokenizer returns a tokenizer to the pool
func ReleaseTokenizer(tokenizer *ZeroAllocTokenizer) {
	// Get the parent pooled struct
	pooled := (*TokenizerPooled)(unsafe.Pointer(
		uintptr(unsafe.Pointer(tokenizer)) - unsafe.Offsetof(TokenizerPooled{}.tokenizer)))

	// Only return to pool if it's used
	if pooled.used {
		// Mark as not used and clear references that might prevent GC
		pooled.used = false
		tokenizer.source = ""
		tokenizer.result = nil

		// Return to pool
		tokenizerPool.Put(pooled)
	}
}

// AddToken adds a token to the buffer
func (t *ZeroAllocTokenizer) AddToken(tokenType int, value string, line int) {
	// Create a token
	var token Token
	token.Type = tokenType
	token.Value = value
	token.Line = line

	// Add to buffer
	t.tokenBuffer = append(t.tokenBuffer, token)
}

// GetStringConstant checks if a string exists in our constants and returns
// the canonical version to avoid allocation
func (t *ZeroAllocTokenizer) GetStringConstant(s string) string {
	// First check common strings
	for _, constant := range t.tempStrings {
		if constant == s {
			return constant
		}
	}

	// Add to temp strings if it's a short string that might be reused
	if len(s) <= 20 {
		t.tempStrings = append(t.tempStrings, s)
	}

	return s
}

// TokenizeExpression tokenizes an expression string with zero allocations
func (t *ZeroAllocTokenizer) TokenizeExpression(expr string) []Token {
	// Save current position and set new source context
	savedSource := t.source
	savedPosition := t.position
	savedLine := t.line

	t.source = expr
	t.position = 0
	startTokenCount := len(t.tokenBuffer)

	var inString bool
	var stringDelimiter byte
	var stringStart int

	for t.position < len(t.source) {
		c := t.source[t.position]

		// Handle string literals
		if (c == '"' || c == '\'') && (t.position == 0 || t.source[t.position-1] != '\\') {
			if inString && c == stringDelimiter {
				// End of string, add the string token
				value := t.source[stringStart:t.position]
				t.AddToken(TOKEN_STRING, value, t.line)
				inString = false
			} else if !inString {
				// Start of string
				inString = true
				stringDelimiter = c
				stringStart = t.position + 1
			}
			t.position++
			continue
		}

		// Skip chars inside strings
		if inString {
			t.position++
			continue
		}

		// Handle operators (includes multi-char operators like ==, !=, etc.)
		if isOperator(c) {
			op := string(c)
			t.position++

			// Check for two-character operators
			if t.position < len(t.source) {
				nextChar := t.source[t.position]
				twoCharOp := string([]byte{c, nextChar})

				// Check common two-char operators
				if (c == '=' && nextChar == '=') ||
					(c == '!' && nextChar == '=') ||
					(c == '>' && nextChar == '=') ||
					(c == '<' && nextChar == '=') ||
					(c == '&' && nextChar == '&') ||
					(c == '|' && nextChar == '|') ||
					(c == '?' && nextChar == '?') {

					op = twoCharOp
					t.position++
				}
			}

			// Use constant version of the operator string if possible
			op = t.GetStringConstant(op)
			t.AddToken(TOKEN_OPERATOR, op, t.line)
			continue
		}

		// Handle punctuation
		if isPunctuation(c) {
			// Use constant version of punctuation
			punct := t.GetStringConstant(string(c))
			t.AddToken(TOKEN_PUNCTUATION, punct, t.line)
			t.position++
			continue
		}

		// Skip whitespace
		if isWhitespace(c) {
			t.position++
			if c == '\n' {
				t.line++
			}
			continue
		}

		// Handle identifiers, literals, etc.
		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' {
			// Start of an identifier
			start := t.position

			// Find the end
			t.position++
			for t.position < len(t.source) &&
				((t.source[t.position] >= 'a' && t.source[t.position] <= 'z') ||
				 (t.source[t.position] >= 'A' && t.source[t.position] <= 'Z') ||
				 (t.source[t.position] >= '0' && t.source[t.position] <= '9') ||
				 t.source[t.position] == '_') {
				t.position++
			}

			// Extract the identifier
			identifier := t.source[start:t.position]

			// Try to use a canonical string
			identifier = t.GetStringConstant(identifier)

			// Keywords/literals get special token types
			if identifier == "true" || identifier == "false" || identifier == "null" {
				t.AddToken(TOKEN_NAME, identifier, t.line)
			} else {
				t.AddToken(TOKEN_NAME, identifier, t.line)
			}

			continue
		}

		// Handle numbers
		if (c >= '0' && c <= '9') || (c == '-' && t.position+1 < len(t.source) && t.source[t.position+1] >= '0' && t.source[t.position+1] <= '9') {
			start := t.position

			// Skip the negative sign if present
			if c == '-' {
				t.position++
			}

			// Consume digits
			for t.position < len(t.source) && t.source[t.position] >= '0' && t.source[t.position] <= '9' {
				t.position++
			}

			// Handle decimal point
			if t.position < len(t.source) && t.source[t.position] == '.' {
				t.position++

				// Consume fractional digits
				for t.position < len(t.source) && t.source[t.position] >= '0' && t.source[t.position] <= '9' {
					t.position++
				}
			}

			// Add the number token
			t.AddToken(TOKEN_NUMBER, t.source[start:t.position], t.line)
			continue
		}

		// Unrecognized character
		t.position++
	}

	// Create slice of tokens
	tokens := t.tokenBuffer[startTokenCount:]

	// Restore original context
	t.source = savedSource
	t.position = savedPosition
	t.line = savedLine

	return tokens
}

// TokenizeHtmlPreserving performs full tokenization of a template with HTML preservation
func (t *ZeroAllocTokenizer) TokenizeHtmlPreserving() ([]Token, error) {
	// Reset position and line
	t.position = 0
	t.line = 1

	// Clear token buffer
	t.tokenBuffer = t.tokenBuffer[:0]

	tagPatterns := [5]string{"{{-", "{{", "{%-", "{%", "{#"}
	tagTypes := [5]int{TOKEN_VAR_START_TRIM, TOKEN_VAR_START, TOKEN_BLOCK_START_TRIM, TOKEN_BLOCK_START, TOKEN_COMMENT_START}
	tagLengths := [5]int{3, 2, 3, 2, 2}

	for t.position < len(t.source) {
		// Find the next tag
		nextTagPos := -1
		tagType := -1
		tagLength := 0

		// Check for all possible tag patterns
		// This loop avoids allocations by manually checking prefixes
		remainingSource := t.source[t.position:]
		for i := 0; i < 5; i++ {
			pattern := tagPatterns[i]
			if len(remainingSource) >= len(pattern) &&
			   remainingSource[:len(pattern)] == pattern {
				// Tag found at current position
				nextTagPos = t.position
				tagType = tagTypes[i]
				tagLength = tagLengths[i]
				break
			}

			// If not found at current position, find it in the remainder
			patternPos := strings.Index(remainingSource, pattern)
			if patternPos != -1 {
				pos := t.position + patternPos
				if nextTagPos == -1 || pos < nextTagPos {
					nextTagPos = pos
					tagType = tagTypes[i]
					tagLength = tagLengths[i]
				}
			}
		}

		// Check if the tag is escaped
		if nextTagPos != -1 && nextTagPos > 0 && t.source[nextTagPos-1] == '\\' {
			// Add text up to the backslash
			if nextTagPos-1 > t.position {
				preText := t.source[t.position:nextTagPos-1]
				t.AddToken(TOKEN_TEXT, preText, t.line)
				t.line += countNewlines(preText)
			}

			// Add the tag as literal text (without the backslash)
			// Find which pattern was matched
			for i := 0; i < 5; i++ {
				if tagType == tagTypes[i] {
					t.AddToken(TOKEN_TEXT, tagPatterns[i], t.line)
					break
				}
			}

			// Move past this tag
			t.position = nextTagPos + tagLength
			continue
		}

		// No more tags found - add the rest as TEXT
		if nextTagPos == -1 {
			if t.position < len(t.source) {
				remainingText := t.source[t.position:]
				t.AddToken(TOKEN_TEXT, remainingText, t.line)
				t.line += countNewlines(remainingText)
			}
			break
		}

		// Add text before the tag
		if nextTagPos > t.position {
			textContent := t.source[t.position:nextTagPos]
			t.AddToken(TOKEN_TEXT, textContent, t.line)
			t.line += countNewlines(textContent)
		}

		// Add the tag start token
		t.AddToken(tagType, "", t.line)

		// Move past opening tag
		t.position = nextTagPos + tagLength

		// Find matching end tag
		var endTag string
		var endTagType int
		var endTagLength int

		if tagType == TOKEN_VAR_START || tagType == TOKEN_VAR_START_TRIM {
			// Look for "}}" or "-}}"
			endPos1 := strings.Index(t.source[t.position:], "}}")
			endPos2 := strings.Index(t.source[t.position:], "-}}")

			if endPos1 != -1 && (endPos2 == -1 || endPos1 < endPos2) {
				endTag = "}}"
				endTagType = TOKEN_VAR_END
				endTagLength = 2
			} else if endPos2 != -1 {
				endTag = "-}}"
				endTagType = TOKEN_VAR_END_TRIM
				endTagLength = 3
			} else {
				return nil, fmt.Errorf("unclosed variable tag at line %d", t.line)
			}
		} else if tagType == TOKEN_BLOCK_START || tagType == TOKEN_BLOCK_START_TRIM {
			// Look for "%}" or "-%}"
			endPos1 := strings.Index(t.source[t.position:], "%}")
			endPos2 := strings.Index(t.source[t.position:], "-%}")

			if endPos1 != -1 && (endPos2 == -1 || endPos1 < endPos2) {
				endTag = "%}"
				endTagType = TOKEN_BLOCK_END
				endTagLength = 2
			} else if endPos2 != -1 {
				endTag = "-%}"
				endTagType = TOKEN_BLOCK_END_TRIM
				endTagLength = 3
			} else {
				return nil, fmt.Errorf("unclosed block tag at line %d", t.line)
			}
		} else if tagType == TOKEN_COMMENT_START {
			// Look for "#}"
			endPos := strings.Index(t.source[t.position:], "#}")
			if endPos == -1 {
				return nil, fmt.Errorf("unclosed comment at line %d", t.line)
			}
			endTag = "#}"
			endTagType = TOKEN_COMMENT_END
			endTagLength = 2
		}

		// Find position of the end tag
		endPos := strings.Index(t.source[t.position:], endTag)
		if endPos == -1 {
			return nil, fmt.Errorf("unclosed tag at line %d", t.line)
		}

		// Get content between tags
		tagContent := t.source[t.position:t.position+endPos]
		t.line += countNewlines(tagContent)

		// Process tag content based on type
		if tagType == TOKEN_COMMENT_START {
			// Store comments as TEXT tokens
			if len(tagContent) > 0 {
				t.AddToken(TOKEN_TEXT, tagContent, t.line)
			}
		} else {
			// For variable and block tags, tokenize the content
			tagContent = strings.TrimSpace(tagContent)

			if tagType == TOKEN_BLOCK_START || tagType == TOKEN_BLOCK_START_TRIM {
				// Process block tags with specialized tokenization
				t.processBlockTag(tagContent)
			} else {
				// Process variable tags with optimized tokenization
				if len(tagContent) > 0 {
					if !strings.ContainsAny(tagContent, ".|[](){}\"',+-*/=!<>%&^~") {
						// Simple variable name
						identifier := t.GetStringConstant(tagContent)
						t.AddToken(TOKEN_NAME, identifier, t.line)
					} else {
						// Complex expression
						t.TokenizeExpression(tagContent)
					}
				}
			}
		}

		// Add the end tag token
		t.AddToken(endTagType, "", t.line)

		// Move past the end tag
		t.position = t.position + endPos + endTagLength
	}

	// Add EOF token
	t.AddToken(TOKEN_EOF, "", t.line)

	// Save the token buffer to result
	t.result = t.tokenBuffer
	return t.result, nil
}

// processBlockTag handles specialized block tag tokenization
func (t *ZeroAllocTokenizer) processBlockTag(content string) {
	// Extract the tag name
	spacePos := strings.IndexByte(content, ' ')
	var blockName string
	var blockContent string

	if spacePos == -1 {
		// No space found, the whole content is the tag name
		blockName = content
		blockContent = ""
	} else {
		blockName = content[:spacePos]
		blockContent = strings.TrimSpace(content[spacePos+1:])
	}

	// Use canonical string for block name
	blockName = t.GetStringConstant(blockName)
	t.AddToken(TOKEN_NAME, blockName, t.line)

	// If there's no content, we're done
	if blockContent == "" {
		return
	}

	// Process based on block type
	switch blockName {
	case "if", "elseif":
		// For conditional blocks, tokenize expression
		t.TokenizeExpression(blockContent)

	case "for":
		// Process for loop with iterator(s) and collection
		inPos := strings.Index(strings.ToLower(blockContent), " in ")
		if inPos != -1 {
			iterators := strings.TrimSpace(blockContent[:inPos])
			collection := strings.TrimSpace(blockContent[inPos+4:])

			// Handle key, value iterator syntax
			if strings.Contains(iterators, ",") {
				iterParts := strings.SplitN(iterators, ",", 2)
				if len(iterParts) == 2 {
					// Process iterator variables
					keyVar := t.GetStringConstant(strings.TrimSpace(iterParts[0]))
					valueVar := t.GetStringConstant(strings.TrimSpace(iterParts[1]))

					t.AddToken(TOKEN_NAME, keyVar, t.line)
					t.AddToken(TOKEN_PUNCTUATION, ",", t.line)
					t.AddToken(TOKEN_NAME, valueVar, t.line)
				}
			} else {
				// Single iterator
				iterator := t.GetStringConstant(iterators)
				t.AddToken(TOKEN_NAME, iterator, t.line)
			}

			// Add 'in' keyword
			t.AddToken(TOKEN_NAME, "in", t.line)

			// Process collection expression
			t.TokenizeExpression(collection)
		} else {
			// Fallback for malformed for loops
			t.AddToken(TOKEN_NAME, blockContent, t.line)
		}

	case "set":
		// Handle variable assignment
		assignPos := strings.Index(blockContent, "=")
		if assignPos != -1 {
			varName := strings.TrimSpace(blockContent[:assignPos])
			value := strings.TrimSpace(blockContent[assignPos+1:])

			// Add the variable name token
			varName = t.GetStringConstant(varName)
			t.AddToken(TOKEN_NAME, varName, t.line)

			// Add the assignment operator
			t.AddToken(TOKEN_OPERATOR, "=", t.line)

			// Tokenize the value expression
			t.TokenizeExpression(value)
		} else {
			// Simple set without assignment
			blockContent = t.GetStringConstant(blockContent)
			t.AddToken(TOKEN_NAME, blockContent, t.line)
		}

	case "do":
		// Handle variable assignment similar to set tag
		assignPos := strings.Index(blockContent, "=")
		if assignPos != -1 {
			varName := strings.TrimSpace(blockContent[:assignPos])
			value := strings.TrimSpace(blockContent[assignPos+1:])

			// Check if varName is valid (should be a variable name)
			// In Twig, variable names must start with a letter or underscore
			if len(varName) > 0 && (isCharAlpha(varName[0]) || varName[0] == '_') {
				// Add the variable name token
				varName = t.GetStringConstant(varName)
				t.AddToken(TOKEN_NAME, varName, t.line)

				// Add the assignment operator
				t.AddToken(TOKEN_OPERATOR, "=", t.line)

				// Tokenize the value expression
				if len(value) > 0 {
					t.TokenizeExpression(value)
				} else {
					// Empty value after =, which is invalid
					// Add an error token to trigger proper parser error
					t.AddToken(TOKEN_EOF, "ERROR_MISSING_VALUE", t.line)
				}
			} else {
				// Invalid variable name (like a number or operator)
				// Just tokenize as expressions to produce an error in the parser
				t.TokenizeExpression(varName)
				t.AddToken(TOKEN_OPERATOR, "=", t.line)
				t.TokenizeExpression(value)
			}
		} else {
			// No assignment, just an expression to evaluate
			t.TokenizeExpression(blockContent)
		}

	case "include":
		// Handle include with template path and optional context
		withPos := strings.Index(strings.ToLower(blockContent), " with ")
		if withPos != -1 {
			templatePath := strings.TrimSpace(blockContent[:withPos])
			contextExpr := strings.TrimSpace(blockContent[withPos+6:])

			// Process template path
			t.tokenizeTemplatePath(templatePath)

			// Add 'with' keyword
			t.AddToken(TOKEN_NAME, "with", t.line)

			// Process context expression as object
			if strings.HasPrefix(contextExpr, "{") && strings.HasSuffix(contextExpr, "}") {
				// Context is an object literal
				t.AddToken(TOKEN_PUNCTUATION, "{", t.line)
				objectContent := contextExpr[1:len(contextExpr)-1]
				t.tokenizeObjectContents(objectContent)
				t.AddToken(TOKEN_PUNCTUATION, "}", t.line)
			} else {
				// Context is a variable or expression
				t.TokenizeExpression(contextExpr)
			}
		} else {
			// Just a template path
			t.tokenizeTemplatePath(blockContent)
		}

	case "extends":
		// Handle extends tag (similar to include template path)
		t.tokenizeTemplatePath(blockContent)

	case "from":
		// Handle from tag which has a special format:
		// {% from "template.twig" import macro1, macro2 as alias %}
		importPos := strings.Index(strings.ToLower(blockContent), " import ")
		if importPos != -1 {
			// Extract template path and macros list
			templatePath := strings.TrimSpace(blockContent[:importPos])
			macrosStr := strings.TrimSpace(blockContent[importPos+8:]) // 8 = len(" import ")

			// Process template path
			t.tokenizeTemplatePath(templatePath)

			// Add 'import' keyword
			t.AddToken(TOKEN_NAME, "import", t.line)

			// Process macro imports
			macros := strings.Split(macrosStr, ",")
			for i, macro := range macros {
				macro = strings.TrimSpace(macro)

				// Check for "as" alias
				asPos := strings.Index(strings.ToLower(macro), " as ")
				if asPos != -1 {
					// Extract macro name and alias
					macroName := strings.TrimSpace(macro[:asPos])
					alias := strings.TrimSpace(macro[asPos+4:])

					// Add macro name
					macroName = t.GetStringConstant(macroName)
					t.AddToken(TOKEN_NAME, macroName, t.line)

					// Add 'as' keyword
					t.AddToken(TOKEN_NAME, "as", t.line)

					// Add alias
					alias = t.GetStringConstant(alias)
					t.AddToken(TOKEN_NAME, alias, t.line)
				} else {
					// Just the macro name
					macro = t.GetStringConstant(macro)
					t.AddToken(TOKEN_NAME, macro, t.line)
				}

				// Add comma if not the last macro
				if i < len(macros)-1 {
					t.AddToken(TOKEN_PUNCTUATION, ",", t.line)
				}
			}
		} else {
			// Malformed from tag, just tokenize as expression
			t.TokenizeExpression(blockContent)
		}

	case "import":
		// Handle import tag which allows importing entire templates
		// {% import "template.twig" as alias %}
		asPos := strings.Index(strings.ToLower(blockContent), " as ")
		if asPos != -1 {
			// Extract template path and alias
			templatePath := strings.TrimSpace(blockContent[:asPos])
			alias := strings.TrimSpace(blockContent[asPos+4:])

			// Process template path
			t.tokenizeTemplatePath(templatePath)

			// Add 'as' keyword
			t.AddToken(TOKEN_NAME, "as", t.line)

			// Add alias
			alias = t.GetStringConstant(alias)
			t.AddToken(TOKEN_NAME, alias, t.line)
		} else {
			// Simple import without alias
			t.TokenizeExpression(blockContent)
		}

	default:
		// Other block types - tokenize as expression
		t.TokenizeExpression(blockContent)
	}
}

// Helper methods for specialized tag tokenization

// tokenizeTemplatePath handles template paths in extends/include tags
func (t *ZeroAllocTokenizer) tokenizeTemplatePath(path string) {
	path = strings.TrimSpace(path)

	// If it's a quoted string
	if (strings.HasPrefix(path, "\"") && strings.HasSuffix(path, "\"")) ||
	   (strings.HasPrefix(path, "'") && strings.HasSuffix(path, "'")) {
		// Extract content without quotes
		content := path[1:len(path)-1]
		t.AddToken(TOKEN_STRING, content, t.line)
	} else {
		// Otherwise tokenize as expression
		t.TokenizeExpression(path)
	}
}

// isCharAlpha checks if a byte is an alphabetic character
func isCharAlpha(c byte) bool {
	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}

// tokenizeObjectContents handles object literal contents
func (t *ZeroAllocTokenizer) tokenizeObjectContents(content string) {
	// Track state for nested structures
	inString := false
	stringDelim := byte(0)
	inObject := 0
	inArray := 0

	start := 0
	colonPos := -1

	for i := 0; i <= len(content); i++ {
		// At end of string or at a comma at the top level
		atEnd := i == len(content)
		isComma := !atEnd && content[i] == ','

		// Process key-value pair when we find a comma or reach the end
		if (isComma || atEnd) && inObject == 0 && inArray == 0 && !inString {
			if colonPos != -1 {
				// We have a key-value pair
				keyStr := strings.TrimSpace(content[start:colonPos])
				valueStr := strings.TrimSpace(content[colonPos+1:i])

				// Process key
				if (len(keyStr) >= 2 && keyStr[0] == '"' && keyStr[len(keyStr)-1] == '"') ||
					(len(keyStr) >= 2 && keyStr[0] == '\'' && keyStr[len(keyStr)-1] == '\'') {
					// Quoted key
					t.AddToken(TOKEN_STRING, keyStr[1:len(keyStr)-1], t.line)
				} else {
					// Unquoted key
					keyStr = t.GetStringConstant(keyStr)
					t.AddToken(TOKEN_NAME, keyStr, t.line)
				}

				// Add colon
				t.AddToken(TOKEN_PUNCTUATION, ":", t.line)

				// Process value
				t.TokenizeExpression(valueStr)

				// Add comma if needed
				if isComma && i < len(content)-1 {
					t.AddToken(TOKEN_PUNCTUATION, ",", t.line)
				}

				// Reset for next pair
				start = i + 1
				colonPos = -1
			}
			continue
		}

		// Skip end of string case
		if atEnd {
			continue
		}

		// Current character
		c := content[i]

		// Handle string literals
		if (c == '"' || c == '\'') && (i == 0 || content[i-1] != '\\') {
			if inString && c == stringDelim {
				inString = false
			} else if !inString {
				inString = true
				stringDelim = c
			}
			continue
		}

		// Skip processing inside strings
		if inString {
			continue
		}

		// Handle object and array nesting
		if c == '{' {
			inObject++
		} else if c == '}' {
			inObject--
		} else if c == '[' {
			inArray++
		} else if c == ']' {
			inArray--
		}

		// Track colon position for key-value separator
		if c == ':' && inObject == 0 && inArray == 0 && colonPos == -1 {
			colonPos = i
		}
	}
}

// ApplyWhitespaceControl applies whitespace control to the tokenized result
func (t *ZeroAllocTokenizer) ApplyWhitespaceControl() {
	tokens := t.result

	for i := 0; i < len(tokens); i++ {
		token := tokens[i]

		// Handle opening tags that trim whitespace before them
		if token.Type == TOKEN_VAR_START_TRIM || token.Type == TOKEN_BLOCK_START_TRIM {
			// If there's a text token before this, trim its trailing whitespace
			if i > 0 && tokens[i-1].Type == TOKEN_TEXT {
				tokens[i-1].Value = trimTrailingWhitespace(tokens[i-1].Value)
			}
		}

		// Handle closing tags that trim whitespace after them
		if token.Type == TOKEN_VAR_END_TRIM || token.Type == TOKEN_BLOCK_END_TRIM {
			// If there's a text token after this, trim its leading whitespace
			if i+1 < len(tokens) && tokens[i+1].Type == TOKEN_TEXT {
				tokens[i+1].Value = trimLeadingWhitespace(tokens[i+1].Value)
			}
		}
	}
}

// The following functions implement string interning and tag detection from the optimized implementations

// newGlobalStringCache creates a new global string cache with pre-populated common strings
func newGlobalStringCache() *GlobalStringCache {
	cache := &GlobalStringCache{
		strings: make(map[string]string, 64), // Pre-allocate capacity
	}

	// Pre-populate with common strings
	commonStrings := []string{
		stringDiv, stringSpan, stringP, stringA, stringImg,
		stringHref, stringClass, stringId, stringStyle,
		stringIf, stringFor, stringEnd, stringEndif, stringEndfor,
		stringElse, stringBlock, stringSet, stringInclude, stringExtends,
		stringMacro, stringEquals, stringNotEquals, stringAnd,
		stringOr, stringNot, stringIn, stringIs,
		// Add empty string as well
		"",
	}

	for _, s := range commonStrings {
		cache.strings[s] = s
	}

	return cache
}

// Intern returns an interned version of the input string
// For strings that are already in the cache, the cached version is returned
// Otherwise, the input string is added to the cache and returned
func Intern(s string) string {
	// Fast path for very common strings to avoid lock contention
	switch s {
	case stringDiv, stringSpan, stringP, stringA, stringImg,
	     stringIf, stringFor, stringEnd, stringEndif, stringEndfor,
	     stringElse, "":
		return s
	}

	// Don't intern strings that are too long
	if len(s) > maxCacheableLength {
		return s
	}

	// Use read lock for lookup first (less contention)
	globalCache.RLock()
	cached, exists := globalCache.strings[s]
	globalCache.RUnlock()

	if exists {
		return cached
	}

	// Not found with read lock, acquire write lock to add
	globalCache.Lock()
	defer globalCache.Unlock()

	// Check again after acquiring write lock (double-checked locking)
	if cached, exists := globalCache.strings[s]; exists {
		return cached
	}

	// Add to cache and return
	globalCache.strings[s] = s
	return s
}

// FindNextTag finds the next twig tag in a template string using
// optimized detection methods to reduce allocations and string operations.
func FindNextTag(source string, startPos int) TagLocation {
	// Quick check for empty source or position at end
	if len(source) == 0 || startPos >= len(source) || startPos < 0 {
		return TagLocation{TAG_NONE, -1, 0}
	}

	// Define the remaining source to search
	remainingSource := source[startPos:]
	remainingLen := len(remainingSource)

	// Fast paths for common cases
	if remainingLen < 2 {
		return TagLocation{TAG_NONE, -1, 0}
	}

	// Direct byte comparison for opening characters
	// This avoids string allocations and uses pointer arithmetic
	srcPtr := unsafe.Pointer(unsafe.StringData(remainingSource))

	// Quick check for potential tag start with { character
	for i := 0; i < remainingLen-1; i++ {
		if *(*byte)(unsafe.Add(srcPtr, i)) != '{' {
			continue
		}

		// We found a '{', check next character
		secondChar := *(*byte)(unsafe.Add(srcPtr, i+1))

		// Check for start of blocks
		tagPosition := startPos + i

		// Check for possible tag patterns
		switch secondChar {
		case '{': // Potential variable tag {{
			if i+2 < remainingLen && *(*byte)(unsafe.Add(srcPtr, i+2)) == '-' {
				return TagLocation{TAG_VAR_TRIM, tagPosition, 3}
			}
			return TagLocation{TAG_VAR, tagPosition, 2}

		case '%': // Potential block tag {%
			if i+2 < remainingLen && *(*byte)(unsafe.Add(srcPtr, i+2)) == '-' {
				return TagLocation{TAG_BLOCK_TRIM, tagPosition, 3}
			}
			return TagLocation{TAG_BLOCK, tagPosition, 2}

		case '#': // Comment tag {#
			return TagLocation{TAG_COMMENT, tagPosition, 2}
		}
	}

	// No tags found
	return TagLocation{TAG_NONE, -1, 0}
}

// FindTagEnd finds the end of a tag based on the type
func FindTagEnd(source string, startPos int, tagType TagType) int {
	if startPos >= len(source) {
		return -1
	}

	switch tagType {
	case TAG_VAR, TAG_VAR_TRIM:
		// Find "}}" sequence
		for i := startPos; i < len(source)-1; i++ {
			if source[i] == '}' && source[i+1] == '}' {
				return i
			}
		}
	case TAG_BLOCK, TAG_BLOCK_TRIM:
		// Find "%}" sequence
		for i := startPos; i < len(source)-1; i++ {
			if source[i] == '%' && source[i+1] == '}' {
				return i
			}
		}
	case TAG_COMMENT:
		// Find "#}" sequence
		for i := startPos; i < len(source)-1; i++ {
			if source[i] == '#' && source[i+1] == '}' {
				return i
			}
		}
	}

	return -1
}

// TokenizeOptimized uses the enhanced tag detection for faster tokenization
// This is a hybrid approach that combines direct tag detection with
// full HTML-preserving tokenization for maximum performance
func (t *ZeroAllocTokenizer) TokenizeOptimized() ([]Token, error) {
	// Reset position and line
	t.position = 0
	t.line = 1

	// Clear token buffer
	t.tokenBuffer = t.tokenBuffer[:0]

	// Process the template content
	pos := 0

	for pos < len(t.source) {
		// Find the next tag using optimized detection
		tagLoc := FindNextTag(t.source, pos)

		// Check if no more tags found
		if tagLoc.Position == -1 {
			// Add remaining text as TOKEN_TEXT
			if pos < len(t.source) {
				remainingText := t.source[pos:]
				t.AddToken(TOKEN_TEXT, remainingText, t.line)
				t.line += countNewlines(remainingText)
			}
			break
		}

		// Check if the tag is escaped with a backslash
		if tagLoc.Position > 0 && t.source[tagLoc.Position-1] == '\\' {
			// Add text up to the backslash
			if tagLoc.Position-1 > pos {
				preText := t.source[pos:tagLoc.Position-1]
				t.AddToken(TOKEN_TEXT, preText, t.line)
				t.line += countNewlines(preText)
			}

			// Add the tag as literal text (without the backslash)
			var tagText string
			switch tagLoc.Type {
			case TAG_VAR:
				tagText = "{{"
			case TAG_VAR_TRIM:
				tagText = "{{-"
			case TAG_BLOCK:
				tagText = "{%"
			case TAG_BLOCK_TRIM:
				tagText = "{%-"
			case TAG_COMMENT:
				tagText = "{#"
			}

			t.AddToken(TOKEN_TEXT, tagText, t.line)

			// Move past this tag
			pos = tagLoc.Position + tagLoc.Length
			continue
		}

		// Add text before the tag
		if tagLoc.Position > pos {
			textContent := t.source[pos:tagLoc.Position]
			t.AddToken(TOKEN_TEXT, textContent, t.line)
			t.line += countNewlines(textContent)
		}

		// Add the tag start token
		var startTokenType int
		switch tagLoc.Type {
		case TAG_VAR:
			startTokenType = TOKEN_VAR_START
		case TAG_VAR_TRIM:
			startTokenType = TOKEN_VAR_START_TRIM
		case TAG_BLOCK:
			startTokenType = TOKEN_BLOCK_START
		case TAG_BLOCK_TRIM:
			startTokenType = TOKEN_BLOCK_START_TRIM
		case TAG_COMMENT:
			startTokenType = TOKEN_COMMENT_START
		}

		t.AddToken(startTokenType, "", t.line)

		// Move past the tag's opening characters
		tagContentStart := tagLoc.Position + tagLoc.Length

		// Find the end of the tag
		tagEndPos := FindTagEnd(t.source, tagContentStart, tagLoc.Type)
		if tagEndPos == -1 {
			var unclosedType string
			switch tagLoc.Type {
			case TAG_VAR, TAG_VAR_TRIM:
				unclosedType = "variable"
			case TAG_BLOCK, TAG_BLOCK_TRIM:
				unclosedType = "block"
			case TAG_COMMENT:
				unclosedType = "comment"
			}
			return nil, fmt.Errorf("unclosed %s tag at line %d", unclosedType, t.line)
		}

		// Get tag content
		tagContent := t.source[tagContentStart:tagEndPos]
		t.line += countNewlines(tagContent)

		// Determine the end token type and length
		var endTokenType int
		var endLength int

		switch tagLoc.Type {
		case TAG_VAR:
			endTokenType = TOKEN_VAR_END
			endLength = 2 // }}
		case TAG_VAR_TRIM:
			// Check if it ends with -}}
			if tagEndPos > 0 && t.source[tagEndPos-1] == '-' {
				endTokenType = TOKEN_VAR_END_TRIM
				endLength = 3 // -}}
				// Adjust tag content to remove the trailing dash
				tagContent = tagContent[:len(tagContent)-1]
			} else {
				endTokenType = TOKEN_VAR_END
				endLength = 2 // }}
			}
		case TAG_BLOCK:
			endTokenType = TOKEN_BLOCK_END
			endLength = 2 // %}
		case TAG_BLOCK_TRIM:
			// Check if it ends with -%}
			if tagEndPos > 0 && t.source[tagEndPos-1] == '-' {
				endTokenType = TOKEN_BLOCK_END_TRIM
				endLength = 3 // -%}
				// Adjust tag content to remove the trailing dash
				tagContent = tagContent[:len(tagContent)-1]
			} else {
				endTokenType = TOKEN_BLOCK_END
				endLength = 2 // %}
			}
		case TAG_COMMENT:
			endTokenType = TOKEN_COMMENT_END
			endLength = 2 // #}
		}

		// Process tag content based on tag type
		if tagLoc.Type == TAG_COMMENT {
			// Store comments as TEXT tokens
			if len(tagContent) > 0 {
				t.AddToken(TOKEN_TEXT, tagContent, t.line)
			}
		} else {
			// For variable and block tags, tokenize the content
			tagContent = strings.TrimSpace(tagContent)

			if tagLoc.Type == TAG_BLOCK || tagLoc.Type == TAG_BLOCK_TRIM {
				// Process block tags using specialized tokenization
				if len(tagContent) > 0 {
					t.processBlockTag(tagContent)
				}
			} else {
				// Process variable tags using optimized tokenization
				if len(tagContent) > 0 {
					// Check if it's a simple variable or a complex expression
					if !strings.ContainsAny(tagContent, ".|[](){}\"',+-*/=!<>%&^~") {
						// Simple variable name - use string interning for efficiency
						identifier := Intern(tagContent)
						t.AddToken(TOKEN_NAME, identifier, t.line)
					} else {
						// Complex expression - tokenize fully
						t.TokenizeExpression(tagContent)
					}
				}
			}
		}

		// Add end token
		t.AddToken(endTokenType, "", t.line)

		// Move past the end tag
		pos = tagEndPos + endLength
	}

	// Add EOF token
	t.AddToken(TOKEN_EOF, "", t.line)

	// Save and return result
	t.result = t.tokenBuffer
	return t.result, nil
}