mirror of
https://github.com/semihalev/twig.git
synced 2026-03-14 13:55:46 +01:00
- Added global string cache for efficient string interning (5.2x faster) - Implemented optimized tokenizer with object pooling - Created comprehensive benchmarks and documentation - Cleaned up old optimization files and experiments Performance improvements: - String interning: 5.2x faster (1,492 ns/op vs 7,746 ns/op) - Zero allocations for common strings - Same memory efficiency as original (36 B/op, 9 allocs/op)
124 lines
No EOL
3.4 KiB
Go
124 lines
No EOL
3.4 KiB
Go
package twig
|
|
|
|
import (
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// OptimizedTokenizer implements a tokenizer that uses the global string cache
|
|
// for zero-allocation string interning
|
|
type OptimizedTokenizer struct {
|
|
// Use the underlying tokenizer methods but intern strings
|
|
baseTokenizer ZeroAllocTokenizer
|
|
// Local cache of whether a string is a tag name
|
|
tagCache map[string]bool
|
|
}
|
|
|
|
// optimizedTokenizerPool is a sync.Pool for OptimizedTokenizer objects
|
|
var optimizedTokenizerPool = sync.Pool{
|
|
New: func() interface{} {
|
|
return &OptimizedTokenizer{
|
|
tagCache: make(map[string]bool, 32), // Pre-allocate with reasonable capacity
|
|
}
|
|
},
|
|
}
|
|
|
|
// NewOptimizedTokenizer gets an OptimizedTokenizer from the pool
|
|
func NewOptimizedTokenizer() *OptimizedTokenizer {
|
|
return optimizedTokenizerPool.Get().(*OptimizedTokenizer)
|
|
}
|
|
|
|
// ReleaseOptimizedTokenizer returns an OptimizedTokenizer to the pool
|
|
func ReleaseOptimizedTokenizer(t *OptimizedTokenizer) {
|
|
// Clear map but preserve capacity
|
|
for k := range t.tagCache {
|
|
delete(t.tagCache, k)
|
|
}
|
|
|
|
// Return to pool
|
|
optimizedTokenizerPool.Put(t)
|
|
}
|
|
|
|
// TokenizeHtmlPreserving tokenizes HTML, preserving its structure
|
|
func (t *OptimizedTokenizer) TokenizeHtmlPreserving() ([]Token, error) {
|
|
// Use the base tokenizer for complex operations
|
|
tokens, err := t.baseTokenizer.TokenizeHtmlPreserving()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Optimize token strings by interning
|
|
for i := range tokens {
|
|
// Intern the value field of each token
|
|
if tokens[i].Value != "" {
|
|
tokens[i].Value = Intern(tokens[i].Value)
|
|
}
|
|
|
|
// For tag names, intern them as well
|
|
if tokens[i].Type == TOKEN_BLOCK_START || tokens[i].Type == TOKEN_BLOCK_START_TRIM ||
|
|
tokens[i].Type == TOKEN_VAR_START || tokens[i].Type == TOKEN_VAR_START_TRIM {
|
|
// Skip processing as these tokens don't have values
|
|
continue
|
|
}
|
|
|
|
// Process tag names - these will be TOKEN_NAME after a block start token
|
|
if i > 0 && tokens[i].Type == TOKEN_NAME &&
|
|
(tokens[i-1].Type == TOKEN_BLOCK_START || tokens[i-1].Type == TOKEN_BLOCK_START_TRIM) {
|
|
// Intern the tag name
|
|
tokens[i].Value = Intern(tokens[i].Value)
|
|
|
|
// Cache whether this is a tag
|
|
t.tagCache[tokens[i].Value] = true
|
|
}
|
|
}
|
|
|
|
return tokens, nil
|
|
}
|
|
|
|
// TokenizeExpression tokenizes a Twig expression
|
|
func (t *OptimizedTokenizer) TokenizeExpression(expression string) []Token {
|
|
// Use the base tokenizer for complex operations
|
|
tokens := t.baseTokenizer.TokenizeExpression(expression)
|
|
|
|
// Optimize token strings by interning
|
|
for i := range tokens {
|
|
if tokens[i].Value != "" {
|
|
tokens[i].Value = Intern(tokens[i].Value)
|
|
}
|
|
}
|
|
|
|
return tokens
|
|
}
|
|
|
|
// ApplyWhitespaceControl applies whitespace control for trimming tokens
|
|
func (t *OptimizedTokenizer) ApplyWhitespaceControl() {
|
|
t.baseTokenizer.ApplyWhitespaceControl()
|
|
}
|
|
|
|
// Helper to extract tag name from a token value
|
|
func extractTagName(value string) string {
|
|
value = strings.TrimSpace(value)
|
|
space := strings.IndexByte(value, ' ')
|
|
if space >= 0 {
|
|
return value[:space]
|
|
}
|
|
return value
|
|
}
|
|
|
|
// IsTag checks if a string is a known tag name (cached)
|
|
func (t *OptimizedTokenizer) IsTag(name string) bool {
|
|
// Fast path for common tags
|
|
switch name {
|
|
case stringIf, stringFor, stringEnd, stringEndif, stringEndfor,
|
|
stringElse, stringBlock, stringSet, stringInclude, stringExtends:
|
|
return true
|
|
}
|
|
|
|
// Check the local cache
|
|
if isTag, exists := t.tagCache[name]; exists {
|
|
return isTag
|
|
}
|
|
|
|
// Fall back to the base tokenizer's logic
|
|
return false
|
|
} |