feat/schema: add lexer

This commit is contained in:
mae
2026-01-26 19:26:24 -06:00
parent 0991f8d674
commit 2526d34f21
9 changed files with 2026 additions and 0 deletions

18
schema/azschema.bnf Normal file
View File

@@ -0,0 +1,18 @@
string: '`' {.} '`' | '"' {.} '"';
_bin_digit: '0' - '1';
_oct_digit: _bin_digit | '2' - '7';
_dec_digit: _oct_digit | '8' - '9';
_hex_digit: _dec_digit | 'A' - 'F' | 'a' - 'f';
_negative: '-';
number: [_negative] '0' 'b' _bin_digit {_bin_digit}
| [_negative] '0' 'o' _oct_digit {_oct_digit}
| [_negative] _dec_digit {_dec_digit}
| [_negative] '0' 'x' _hex_digit {_hex_digit};
_name_initial: 'A' - 'Z' | 'a' - 'z' | '_' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '-' | '_' | '+' | '=' | '?' | '/' | '.' | '\'';
_name_char: _name_initial | _dec_digit;
name: _name_initial {_name_char};
!whitespace: ' ' | '\t' | '\n' | '\r';
!comment: ';' {.} '\n';

187
schema/lexer/acttab.go Normal file
View File

@@ -0,0 +1,187 @@
// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"fmt"
"azalea/schema/token"
)
type ActionTable [NumStates]ActionRow
type ActionRow struct {
Accept token.Type
Ignore string
}
func (a ActionRow) String() string {
return fmt.Sprintf("Accept=%d, Ignore=%s", a.Accept, a.Ignore)
}
var ActTab = ActionTable{
ActionRow{ // S0
Accept: 0,
Ignore: "",
},
ActionRow{ // S1
Accept: -1,
Ignore: "!whitespace",
},
ActionRow{ // S2
Accept: 2,
Ignore: "",
},
ActionRow{ // S3
Accept: 0,
Ignore: "",
},
ActionRow{ // S4
Accept: 2,
Ignore: "",
},
ActionRow{ // S5
Accept: 3,
Ignore: "",
},
ActionRow{ // S6
Accept: 3,
Ignore: "",
},
ActionRow{ // S7
Accept: 3,
Ignore: "",
},
ActionRow{ // S8
Accept: 3,
Ignore: "",
},
ActionRow{ // S9
Accept: 0,
Ignore: "",
},
ActionRow{ // S10
Accept: 0,
Ignore: "",
},
ActionRow{ // S11
Accept: 2,
Ignore: "",
},
ActionRow{ // S12
Accept: 2,
Ignore: "",
},
ActionRow{ // S13
Accept: 2,
Ignore: "",
},
ActionRow{ // S14
Accept: 2,
Ignore: "",
},
ActionRow{ // S15
Accept: 4,
Ignore: "",
},
ActionRow{ // S16
Accept: 3,
Ignore: "",
},
ActionRow{ // S17
Accept: 3,
Ignore: "",
},
ActionRow{ // S18
Accept: 3,
Ignore: "",
},
ActionRow{ // S19
Accept: 3,
Ignore: "",
},
ActionRow{ // S20
Accept: 0,
Ignore: "",
},
ActionRow{ // S21
Accept: 0,
Ignore: "",
},
ActionRow{ // S22
Accept: 0,
Ignore: "",
},
ActionRow{ // S23
Accept: -1,
Ignore: "!comment",
},
ActionRow{ // S24
Accept: 2,
Ignore: "",
},
ActionRow{ // S25
Accept: 2,
Ignore: "",
},
ActionRow{ // S26
Accept: 2,
Ignore: "",
},
ActionRow{ // S27
Accept: 3,
Ignore: "",
},
ActionRow{ // S28
Accept: 3,
Ignore: "",
},
ActionRow{ // S29
Accept: 3,
Ignore: "",
},
ActionRow{ // S30
Accept: 3,
Ignore: "",
},
ActionRow{ // S31
Accept: 3,
Ignore: "",
},
ActionRow{ // S32
Accept: 3,
Ignore: "",
},
ActionRow{ // S33
Accept: 3,
Ignore: "",
},
ActionRow{ // S34
Accept: 3,
Ignore: "",
},
ActionRow{ // S35
Accept: 3,
Ignore: "",
},
ActionRow{ // S36
Accept: 3,
Ignore: "",
},
ActionRow{ // S37
Accept: 3,
Ignore: "",
},
ActionRow{ // S38
Accept: 3,
Ignore: "",
},
ActionRow{ // S39
Accept: 3,
Ignore: "",
},
ActionRow{ // S40
Accept: 3,
Ignore: "",
},
}

175
schema/lexer/lexer.go Normal file
View File

@@ -0,0 +1,175 @@
// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"os"
"unicode/utf8"
"azalea/schema/token"
)
const (
NoState = -1
NumStates = 41
NumSymbols = 43
)
type Lexer struct {
src []byte
pos int
line int
column int
Context token.Context
}
func NewLexer(src []byte) *Lexer {
lexer := &Lexer{
src: src,
pos: 0,
line: 1,
column: 1,
Context: nil,
}
return lexer
}
// SourceContext is a simple instance of a token.Context which
// contains the name of the source file.
type SourceContext struct {
Filepath string
}
func (s *SourceContext) Source() string {
return s.Filepath
}
func NewLexerFile(fpath string) (*Lexer, error) {
src, err := os.ReadFile(fpath)
if err != nil {
return nil, err
}
lexer := NewLexer(src)
lexer.Context = &SourceContext{Filepath: fpath}
return lexer, nil
}
func (l *Lexer) Scan() (tok *token.Token) {
tok = &token.Token{}
if l.pos >= len(l.src) {
tok.Type = token.EOF
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column
tok.Pos.Context = l.Context
return
}
start, startLine, startColumn, end := l.pos, l.line, l.column, 0
tok.Type = token.INVALID
state, rune1, size := 0, rune(-1), 0
for state != -1 {
if l.pos >= len(l.src) {
rune1 = -1
} else {
rune1, size = utf8.DecodeRune(l.src[l.pos:])
l.pos += size
}
nextState := -1
if rune1 != -1 {
nextState = TransTab[state](rune1)
}
state = nextState
if state != -1 {
switch rune1 {
case '\n':
l.line++
l.column = 1
case '\r':
l.column = 1
case '\t':
l.column += 4
default:
l.column++
}
switch {
case ActTab[state].Accept != -1:
tok.Type = ActTab[state].Accept
end = l.pos
case ActTab[state].Ignore != "":
start, startLine, startColumn = l.pos, l.line, l.column
state = 0
if start >= len(l.src) {
tok.Type = token.EOF
}
}
} else {
if tok.Type == token.INVALID {
end = l.pos
}
}
}
if end > start {
l.pos = end
tok.Lit = l.src[start:end]
} else {
tok.Lit = []byte{}
}
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn
tok.Pos.Context = l.Context
return
}
func (l *Lexer) Reset() {
l.pos = 0
}
/*
Lexer symbols:
0: '`'
1: '`'
2: '"'
3: '"'
4: '0'
5: 'b'
6: '0'
7: 'o'
8: '0'
9: 'x'
10: '-'
11: '_'
12: '~'
13: '!'
14: '@'
15: '#'
16: '$'
17: '%'
18: '^'
19: '&'
20: '*'
21: '-'
22: '_'
23: '+'
24: '='
25: '?'
26: '/'
27: '.'
28: '''
29: ' '
30: '\t'
31: '\n'
32: '\r'
33: ';'
34: '\n'
35: '0'-'1'
36: '2'-'7'
37: '8'-'9'
38: 'A'-'F'
39: 'a'-'f'
40: 'A'-'Z'
41: 'a'-'z'
42: .
*/

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1,2 @@
//go:generate gocc azschema.bnf
package schema

14
schema/token/context.go Normal file
View File

@@ -0,0 +1,14 @@
// Code generated by gocc; DO NOT EDIT.
package token
// Context allows user-defined data to be associated with the
// lexer/scanner to be associated with each token that lexer
// produces.
type Context interface{}
// Sourcer is a Context interface which presents a Source() method
// identifying e.g the filename for the current code.
type Sourcer interface {
Source() string
}

153
schema/token/token.go Normal file
View File

@@ -0,0 +1,153 @@
// Code generated by gocc; DO NOT EDIT.
package token
import (
"bytes"
"fmt"
"strconv"
"unicode/utf8"
)
type Token struct {
Type
Lit []byte
Pos
}
type Type int
const (
INVALID Type = iota
EOF
)
type Pos struct {
Offset int
Line int
Column int
Context Context
}
func (p Pos) String() string {
// If the context provides a filename, provide a human-readable File:Line:Column representation.
switch src := p.Context.(type) {
case Sourcer:
return fmt.Sprintf("%s:%d:%d", src.Source(), p.Line, p.Column)
default:
return fmt.Sprintf("Pos(offset=%d, line=%d, column=%d)", p.Offset, p.Line, p.Column)
}
}
type TokenMap struct {
typeMap []string
idMap map[string]Type
}
func (m TokenMap) Id(tok Type) string {
if int(tok) < len(m.typeMap) {
return m.typeMap[tok]
}
return "unknown"
}
func (m TokenMap) Type(tok string) Type {
if typ, exist := m.idMap[tok]; exist {
return typ
}
return INVALID
}
func (m TokenMap) TokenString(tok *Token) string {
return fmt.Sprintf("%s(%d,%s)", m.Id(tok.Type), tok.Type, tok.Lit)
}
func (m TokenMap) StringType(typ Type) string {
return fmt.Sprintf("%s(%d)", m.Id(typ), typ)
}
// Equals returns returns true if the token Type and Lit are matches.
func (t *Token) Equals(rhs interface{}) bool {
switch rhsT := rhs.(type) {
case *Token:
return t == rhsT || (t.Type == rhsT.Type && bytes.Equal(t.Lit, rhsT.Lit))
default:
return false
}
}
// CharLiteralValue returns the string value of the char literal.
func (t *Token) CharLiteralValue() string {
return string(t.Lit[1 : len(t.Lit)-1])
}
// Float32Value returns the float32 value of the token or an error if the token literal does not
// denote a valid float32.
func (t *Token) Float32Value() (float32, error) {
if v, err := strconv.ParseFloat(string(t.Lit), 32); err != nil {
return 0, err
} else {
return float32(v), nil
}
}
// Float64Value returns the float64 value of the token or an error if the token literal does not
// denote a valid float64.
func (t *Token) Float64Value() (float64, error) {
return strconv.ParseFloat(string(t.Lit), 64)
}
// IDValue returns the string representation of an identifier token.
func (t *Token) IDValue() string {
return string(t.Lit)
}
// Int32Value returns the int32 value of the token or an error if the token literal does not
// denote a valid float64.
func (t *Token) Int32Value() (int32, error) {
if v, err := strconv.ParseInt(string(t.Lit), 10, 64); err != nil {
return 0, err
} else {
return int32(v), nil
}
}
// Int64Value returns the int64 value of the token or an error if the token literal does not
// denote a valid float64.
func (t *Token) Int64Value() (int64, error) {
return strconv.ParseInt(string(t.Lit), 10, 64)
}
// UTF8Rune decodes the UTF8 rune in the token literal. It returns utf8.RuneError if
// the token literal contains an invalid rune.
func (t *Token) UTF8Rune() (rune, error) {
r, _ := utf8.DecodeRune(t.Lit)
if r == utf8.RuneError {
err := fmt.Errorf("Invalid rune")
return r, err
}
return r, nil
}
// StringValue returns the string value of the token literal.
func (t *Token) StringValue() string {
return string(t.Lit[1 : len(t.Lit)-1])
}
var TokMap = TokenMap{
typeMap: []string{
"INVALID",
"␚",
"name",
"number",
"string",
},
idMap: map[string]Type{
"INVALID": 0,
"␚": 1,
"name": 2,
"number": 3,
"string": 4,
},
}

101
schema/util/litconv.go Normal file
View File

@@ -0,0 +1,101 @@
// Code generated by gocc; DO NOT EDIT.
package util
import (
"fmt"
"strconv"
"unicode"
"unicode/utf8"
)
// Interface.
// RuneValue will convert the literal value of a scanned token to a rune.
func RuneValue(lit []byte) rune {
if lit[1] == '\\' {
return escapeCharVal(lit)
}
r, size := utf8.DecodeRune(lit[1:])
if size != len(lit)-2 {
panic(fmt.Sprintf("Error decoding rune. Lit: %s, rune: %d, size%d\n", lit, r, size))
}
return r
}
// UintValue will attempt to parse a byte-slice as a signed base-10 64-bit integer.
func IntValue(lit []byte) (int64, error) {
return strconv.ParseInt(string(lit), 10, 64)
}
// UintValue will attempt to parse a byte-slice as an unsigned base-10 64-bit integer.
func UintValue(lit []byte) (uint64, error) {
return strconv.ParseUint(string(lit), 10, 64)
}
// Helpers.
func escapeCharVal(lit []byte) rune {
var i, base, max uint32
offset := 2
switch lit[offset] {
case 'a':
return '\a'
case 'b':
return '\b'
case 'f':
return '\f'
case 'n':
return '\n'
case 'r':
return '\r'
case 't':
return '\t'
case 'v':
return '\v'
case '\\':
return '\\'
case '\'':
return '\''
case '0', '1', '2', '3', '4', '5', '6', '7':
i, base, max = 3, 8, 255
case 'x':
i, base, max = 2, 16, 255
offset++
case 'u':
i, base, max = 4, 16, unicode.MaxRune
offset++
case 'U':
i, base, max = 8, 16, unicode.MaxRune
offset++
default:
panic(fmt.Sprintf("Error decoding character literal: %s\n", lit))
}
var x uint32
for ; i > 0 && offset < len(lit)-1; i-- {
ch, size := utf8.DecodeRune(lit[offset:])
offset += size
d := uint32(digitVal(ch))
if d >= base {
panic(fmt.Sprintf("charVal(%s): illegal character (%c) in escape sequence. size=%d, offset=%d", lit, ch, size, offset))
}
x = x*base + d
}
if x > max || 0xD800 <= x && x < 0xE000 {
panic(fmt.Sprintf("Error decoding escape char value. Lit:%s, offset:%d, escape sequence is invalid Unicode code point\n", lit, offset))
}
return rune(x)
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch) - '0'
case 'a' <= ch && ch <= 'f':
return int(ch) - 'a' + 10
case 'A' <= ch && ch <= 'F':
return int(ch) - 'A' + 10
}
return 16 // larger than any legal digit val
}

39
schema/util/rune.go Normal file
View File

@@ -0,0 +1,39 @@
// Code generated by gocc; DO NOT EDIT.
package util
import (
"fmt"
)
func RuneToString(r rune) string {
if r >= 0x20 && r < 0x7f {
return fmt.Sprintf("'%c'", r)
}
switch r {
case 0x07:
return "'\\a'"
case 0x08:
return "'\\b'"
case 0x0C:
return "'\\f'"
case 0x0A:
return "'\\n'"
case 0x0D:
return "'\\r'"
case 0x09:
return "'\\t'"
case 0x0b:
return "'\\v'"
case 0x5c:
return "'\\\\\\'"
case 0x27:
return "'\\''"
case 0x22:
return "'\\\"'"
}
if r < 0x10000 {
return fmt.Sprintf("\\u%04x", r)
}
return fmt.Sprintf("\\U%08x", r)
}