woodpecker-email/vendor/github.com/antonmedv/expr/parser/lexer/lexer.go
2023-01-04 13:11:21 +01:00

213 lines
3.8 KiB
Go

package lexer
import (
"fmt"
"strings"
"unicode/utf8"
"github.com/antonmedv/expr/file"
)
func Lex(source *file.Source) ([]Token, error) {
l := &lexer{
input: source.Content(),
tokens: make([]Token, 0),
}
l.loc = file.Location{Line: 1, Column: 0}
l.prev = l.loc
l.startLoc = l.loc
for state := root; state != nil; {
state = state(l)
}
if l.err != nil {
return nil, l.err.Bind(source)
}
return l.tokens, nil
}
type lexer struct {
input string
tokens []Token
start, end int // current position in input
width int // last rune width
startLoc file.Location // start location
prev, loc file.Location // prev location of end location, end location
err *file.Error
}
const eof rune = -1
func (l *lexer) next() rune {
if l.end >= len(l.input) {
l.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.end:])
l.width = w
l.end += w
l.prev = l.loc
if r == '\n' {
l.loc.Line++
l.loc.Column = 0
} else {
l.loc.Column++
}
return r
}
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *lexer) backup() {
l.end -= l.width
l.loc = l.prev
}
func (l *lexer) emit(t Kind) {
l.emitValue(t, l.word())
}
func (l *lexer) emitValue(t Kind, value string) {
l.tokens = append(l.tokens, Token{
Location: l.startLoc,
Kind: t,
Value: value,
})
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) emitEOF() {
l.tokens = append(l.tokens, Token{
Location: l.prev, // Point to previous position for better error messages.
Kind: EOF,
})
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) word() string {
return l.input[l.start:l.end]
}
func (l *lexer) ignore() {
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *lexer) acceptWord(word string) bool {
pos, loc, prev := l.end, l.loc, l.prev
// Skip spaces (U+0020) if any
r := l.peek()
for ; r == ' '; r = l.peek() {
l.next()
}
for _, ch := range word {
if l.next() != ch {
l.end, l.loc, l.prev = pos, loc, prev
return false
}
}
if r = l.peek(); r != ' ' && r != eof {
l.end, l.loc, l.prev = pos, loc, prev
return false
}
return true
}
func (l *lexer) error(format string, args ...interface{}) stateFn {
if l.err == nil { // show first error
l.err = &file.Error{
Location: l.loc,
Message: fmt.Sprintf(format, args...),
}
}
return nil
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= lower(ch) && lower(ch) <= 'f':
return int(lower(ch) - 'a' + 10)
}
return 16 // larger than any legal digit val
}
func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
func (l *lexer) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base {
ch = l.next()
n--
}
if n > 0 {
l.error("invalid char escape")
}
return ch
}
func (l *lexer) scanEscape(quote rune) rune {
ch := l.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do
ch = l.next()
case '0', '1', '2', '3', '4', '5', '6', '7':
ch = l.scanDigits(ch, 8, 3)
case 'x':
ch = l.scanDigits(l.next(), 16, 2)
case 'u':
ch = l.scanDigits(l.next(), 16, 4)
case 'U':
ch = l.scanDigits(l.next(), 16, 8)
default:
l.error("invalid char escape")
}
return ch
}
func (l *lexer) scanString(quote rune) (n int) {
ch := l.next() // read character after quote
for ch != quote {
if ch == '\n' || ch == eof {
l.error("literal not terminated")
return
}
if ch == '\\' {
ch = l.scanEscape(quote)
} else {
ch = l.next()
}
n++
}
return
}