feat/schema: add lexer

2026-01-26 19:26:24 -06:00
parent 0991f8d674
commit 2526d34f21
9 changed files with 2026 additions and 0 deletions
--- a/schema/lexer/lexer.go
+++ b/schema/lexer/lexer.go
@@ -0,0 +1,175 @@
+// Code generated by gocc; DO NOT EDIT.
+
+package lexer
+
+import (
+	"os"
+	"unicode/utf8"
+
+	"azalea/schema/token"
+)
+
+const (
+	NoState    = -1
+	NumStates  = 41
+	NumSymbols = 43
+)
+
+type Lexer struct {
+	src     []byte
+	pos     int
+	line    int
+	column  int
+	Context token.Context
+}
+
+func NewLexer(src []byte) *Lexer {
+	lexer := &Lexer{
+		src:     src,
+		pos:     0,
+		line:    1,
+		column:  1,
+		Context: nil,
+	}
+	return lexer
+}
+
+// SourceContext is a simple instance of a token.Context which
+// contains the name of the source file.
+type SourceContext struct {
+	Filepath string
+}
+
+func (s *SourceContext) Source() string {
+	return s.Filepath
+}
+
+func NewLexerFile(fpath string) (*Lexer, error) {
+	src, err := os.ReadFile(fpath)
+	if err != nil {
+		return nil, err
+	}
+	lexer := NewLexer(src)
+	lexer.Context = &SourceContext{Filepath: fpath}
+	return lexer, nil
+}
+
+func (l *Lexer) Scan() (tok *token.Token) {
+	tok = &token.Token{}
+	if l.pos >= len(l.src) {
+		tok.Type = token.EOF
+		tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column
+		tok.Pos.Context = l.Context
+		return
+	}
+	start, startLine, startColumn, end := l.pos, l.line, l.column, 0
+	tok.Type = token.INVALID
+	state, rune1, size := 0, rune(-1), 0
+	for state != -1 {
+		if l.pos >= len(l.src) {
+			rune1 = -1
+		} else {
+			rune1, size = utf8.DecodeRune(l.src[l.pos:])
+			l.pos += size
+		}
+
+		nextState := -1
+		if rune1 != -1 {
+			nextState = TransTab[state](rune1)
+		}
+		state = nextState
+
+		if state != -1 {
+
+			switch rune1 {
+			case '\n':
+				l.line++
+				l.column = 1
+			case '\r':
+				l.column = 1
+			case '\t':
+				l.column += 4
+			default:
+				l.column++
+			}
+
+			switch {
+			case ActTab[state].Accept != -1:
+				tok.Type = ActTab[state].Accept
+				end = l.pos
+			case ActTab[state].Ignore != "":
+				start, startLine, startColumn = l.pos, l.line, l.column
+				state = 0
+				if start >= len(l.src) {
+					tok.Type = token.EOF
+				}
+
+			}
+		} else {
+			if tok.Type == token.INVALID {
+				end = l.pos
+			}
+		}
+	}
+	if end > start {
+		l.pos = end
+		tok.Lit = l.src[start:end]
+	} else {
+		tok.Lit = []byte{}
+	}
+	tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn
+	tok.Pos.Context = l.Context
+
+	return
+}
+
+func (l *Lexer) Reset() {
+	l.pos = 0
+}
+
+/*
+Lexer symbols:
+0: '`'
+1: '`'
+2: '"'
+3: '"'
+4: '0'
+5: 'b'
+6: '0'
+7: 'o'
+8: '0'
+9: 'x'
+10: '-'
+11: '_'
+12: '~'
+13: '!'
+14: '@'
+15: '#'
+16: '$'
+17: '%'
+18: '^'
+19: '&'
+20: '*'
+21: '-'
+22: '_'
+23: '+'
+24: '='
+25: '?'
+26: '/'
+27: '.'
+28: '''
+29: ' '
+30: '\t'
+31: '\n'
+32: '\r'
+33: ';'
+34: '\n'
+35: '0'-'1'
+36: '2'-'7'
+37: '8'-'9'
+38: 'A'-'F'
+39: 'a'-'f'
+40: 'A'-'Z'
+41: 'a'-'z'
+42: .
+*/