feat/schema: add lexer

This commit is contained in:
mae
2026-01-26 19:26:24 -06:00
parent 0991f8d674
commit 2526d34f21
9 changed files with 2026 additions and 0 deletions

187
schema/lexer/acttab.go Normal file
View File

@@ -0,0 +1,187 @@
// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"fmt"
"azalea/schema/token"
)
type ActionTable [NumStates]ActionRow
type ActionRow struct {
Accept token.Type
Ignore string
}
func (a ActionRow) String() string {
return fmt.Sprintf("Accept=%d, Ignore=%s", a.Accept, a.Ignore)
}
var ActTab = ActionTable{
ActionRow{ // S0
Accept: 0,
Ignore: "",
},
ActionRow{ // S1
Accept: -1,
Ignore: "!whitespace",
},
ActionRow{ // S2
Accept: 2,
Ignore: "",
},
ActionRow{ // S3
Accept: 0,
Ignore: "",
},
ActionRow{ // S4
Accept: 2,
Ignore: "",
},
ActionRow{ // S5
Accept: 3,
Ignore: "",
},
ActionRow{ // S6
Accept: 3,
Ignore: "",
},
ActionRow{ // S7
Accept: 3,
Ignore: "",
},
ActionRow{ // S8
Accept: 3,
Ignore: "",
},
ActionRow{ // S9
Accept: 0,
Ignore: "",
},
ActionRow{ // S10
Accept: 0,
Ignore: "",
},
ActionRow{ // S11
Accept: 2,
Ignore: "",
},
ActionRow{ // S12
Accept: 2,
Ignore: "",
},
ActionRow{ // S13
Accept: 2,
Ignore: "",
},
ActionRow{ // S14
Accept: 2,
Ignore: "",
},
ActionRow{ // S15
Accept: 4,
Ignore: "",
},
ActionRow{ // S16
Accept: 3,
Ignore: "",
},
ActionRow{ // S17
Accept: 3,
Ignore: "",
},
ActionRow{ // S18
Accept: 3,
Ignore: "",
},
ActionRow{ // S19
Accept: 3,
Ignore: "",
},
ActionRow{ // S20
Accept: 0,
Ignore: "",
},
ActionRow{ // S21
Accept: 0,
Ignore: "",
},
ActionRow{ // S22
Accept: 0,
Ignore: "",
},
ActionRow{ // S23
Accept: -1,
Ignore: "!comment",
},
ActionRow{ // S24
Accept: 2,
Ignore: "",
},
ActionRow{ // S25
Accept: 2,
Ignore: "",
},
ActionRow{ // S26
Accept: 2,
Ignore: "",
},
ActionRow{ // S27
Accept: 3,
Ignore: "",
},
ActionRow{ // S28
Accept: 3,
Ignore: "",
},
ActionRow{ // S29
Accept: 3,
Ignore: "",
},
ActionRow{ // S30
Accept: 3,
Ignore: "",
},
ActionRow{ // S31
Accept: 3,
Ignore: "",
},
ActionRow{ // S32
Accept: 3,
Ignore: "",
},
ActionRow{ // S33
Accept: 3,
Ignore: "",
},
ActionRow{ // S34
Accept: 3,
Ignore: "",
},
ActionRow{ // S35
Accept: 3,
Ignore: "",
},
ActionRow{ // S36
Accept: 3,
Ignore: "",
},
ActionRow{ // S37
Accept: 3,
Ignore: "",
},
ActionRow{ // S38
Accept: 3,
Ignore: "",
},
ActionRow{ // S39
Accept: 3,
Ignore: "",
},
ActionRow{ // S40
Accept: 3,
Ignore: "",
},
}

175
schema/lexer/lexer.go Normal file
View File

@@ -0,0 +1,175 @@
// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"os"
"unicode/utf8"
"azalea/schema/token"
)
const (
NoState = -1
NumStates = 41
NumSymbols = 43
)
type Lexer struct {
src []byte
pos int
line int
column int
Context token.Context
}
func NewLexer(src []byte) *Lexer {
lexer := &Lexer{
src: src,
pos: 0,
line: 1,
column: 1,
Context: nil,
}
return lexer
}
// SourceContext is a simple instance of a token.Context which
// contains the name of the source file.
type SourceContext struct {
Filepath string
}
func (s *SourceContext) Source() string {
return s.Filepath
}
func NewLexerFile(fpath string) (*Lexer, error) {
src, err := os.ReadFile(fpath)
if err != nil {
return nil, err
}
lexer := NewLexer(src)
lexer.Context = &SourceContext{Filepath: fpath}
return lexer, nil
}
func (l *Lexer) Scan() (tok *token.Token) {
tok = &token.Token{}
if l.pos >= len(l.src) {
tok.Type = token.EOF
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column
tok.Pos.Context = l.Context
return
}
start, startLine, startColumn, end := l.pos, l.line, l.column, 0
tok.Type = token.INVALID
state, rune1, size := 0, rune(-1), 0
for state != -1 {
if l.pos >= len(l.src) {
rune1 = -1
} else {
rune1, size = utf8.DecodeRune(l.src[l.pos:])
l.pos += size
}
nextState := -1
if rune1 != -1 {
nextState = TransTab[state](rune1)
}
state = nextState
if state != -1 {
switch rune1 {
case '\n':
l.line++
l.column = 1
case '\r':
l.column = 1
case '\t':
l.column += 4
default:
l.column++
}
switch {
case ActTab[state].Accept != -1:
tok.Type = ActTab[state].Accept
end = l.pos
case ActTab[state].Ignore != "":
start, startLine, startColumn = l.pos, l.line, l.column
state = 0
if start >= len(l.src) {
tok.Type = token.EOF
}
}
} else {
if tok.Type == token.INVALID {
end = l.pos
}
}
}
if end > start {
l.pos = end
tok.Lit = l.src[start:end]
} else {
tok.Lit = []byte{}
}
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn
tok.Pos.Context = l.Context
return
}
func (l *Lexer) Reset() {
l.pos = 0
}
/*
Lexer symbols:
0: '`'
1: '`'
2: '"'
3: '"'
4: '0'
5: 'b'
6: '0'
7: 'o'
8: '0'
9: 'x'
10: '-'
11: '_'
12: '~'
13: '!'
14: '@'
15: '#'
16: '$'
17: '%'
18: '^'
19: '&'
20: '*'
21: '-'
22: '_'
23: '+'
24: '='
25: '?'
26: '/'
27: '.'
28: '''
29: ' '
30: '\t'
31: '\n'
32: '\r'
33: ';'
34: '\n'
35: '0'-'1'
36: '2'-'7'
37: '8'-'9'
38: 'A'-'F'
39: 'a'-'f'
40: 'A'-'Z'
41: 'a'-'z'
42: .
*/

File diff suppressed because it is too large Load Diff