feat/schema: add lexer

This commit is contained in:
mae
2026-01-26 19:26:24 -06:00
parent 0991f8d674
commit 2526d34f21
9 changed files with 2026 additions and 0 deletions

175
schema/lexer/lexer.go Normal file
View File

@@ -0,0 +1,175 @@
// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"os"
"unicode/utf8"
"azalea/schema/token"
)
const (
NoState = -1
NumStates = 41
NumSymbols = 43
)
type Lexer struct {
src []byte
pos int
line int
column int
Context token.Context
}
func NewLexer(src []byte) *Lexer {
lexer := &Lexer{
src: src,
pos: 0,
line: 1,
column: 1,
Context: nil,
}
return lexer
}
// SourceContext is a simple instance of a token.Context which
// contains the name of the source file.
type SourceContext struct {
Filepath string
}
func (s *SourceContext) Source() string {
return s.Filepath
}
func NewLexerFile(fpath string) (*Lexer, error) {
src, err := os.ReadFile(fpath)
if err != nil {
return nil, err
}
lexer := NewLexer(src)
lexer.Context = &SourceContext{Filepath: fpath}
return lexer, nil
}
func (l *Lexer) Scan() (tok *token.Token) {
tok = &token.Token{}
if l.pos >= len(l.src) {
tok.Type = token.EOF
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column
tok.Pos.Context = l.Context
return
}
start, startLine, startColumn, end := l.pos, l.line, l.column, 0
tok.Type = token.INVALID
state, rune1, size := 0, rune(-1), 0
for state != -1 {
if l.pos >= len(l.src) {
rune1 = -1
} else {
rune1, size = utf8.DecodeRune(l.src[l.pos:])
l.pos += size
}
nextState := -1
if rune1 != -1 {
nextState = TransTab[state](rune1)
}
state = nextState
if state != -1 {
switch rune1 {
case '\n':
l.line++
l.column = 1
case '\r':
l.column = 1
case '\t':
l.column += 4
default:
l.column++
}
switch {
case ActTab[state].Accept != -1:
tok.Type = ActTab[state].Accept
end = l.pos
case ActTab[state].Ignore != "":
start, startLine, startColumn = l.pos, l.line, l.column
state = 0
if start >= len(l.src) {
tok.Type = token.EOF
}
}
} else {
if tok.Type == token.INVALID {
end = l.pos
}
}
}
if end > start {
l.pos = end
tok.Lit = l.src[start:end]
} else {
tok.Lit = []byte{}
}
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn
tok.Pos.Context = l.Context
return
}
func (l *Lexer) Reset() {
l.pos = 0
}
/*
Lexer symbols:
0: '`'
1: '`'
2: '"'
3: '"'
4: '0'
5: 'b'
6: '0'
7: 'o'
8: '0'
9: 'x'
10: '-'
11: '_'
12: '~'
13: '!'
14: '@'
15: '#'
16: '$'
17: '%'
18: '^'
19: '&'
20: '*'
21: '-'
22: '_'
23: '+'
24: '='
25: '?'
26: '/'
27: '.'
28: '''
29: ' '
30: '\t'
31: '\n'
32: '\r'
33: ';'
34: '\n'
35: '0'-'1'
36: '2'-'7'
37: '8'-'9'
38: 'A'-'F'
39: 'a'-'f'
40: 'A'-'Z'
41: 'a'-'z'
42: .
*/