diff --git a/schema/ast/ast.go b/schema/ast/ast.go new file mode 100644 index 0000000..f412de8 --- /dev/null +++ b/schema/ast/ast.go @@ -0,0 +1,117 @@ +package ast + +import ( + "azalea/schema/token" + "strconv" + "strings" +) + +type Schema ExprList +type ExprList []Expr +type Expr struct { + Name string + Left *Val + Right *Val +} +type ValList []Val +type Val struct { + string string + number string + name string + *Expr +} + +func NewExprList(expr any) (ExprList, error) { + return ExprList{expr.(Expr)}, nil +} + +func NewStringVal(val *token.Token) (Val, error) { + return Val{string: string(val.Lit)}, nil +} + +func NewNumberVal(val *token.Token) (Val, error) { + return Val{number: string(val.Lit)}, nil +} + +func NewNameVal(val *token.Token) (Val, error) { + return Val{name: string(val.Lit)}, nil +} + +func NewExprVal(val any) (Val, error) { + expr := val.(Expr) + return Val{Expr: &expr}, nil +} + +func AppendExpr(exprList, expr any) (ExprList, error) { + return append(exprList.(ExprList), expr.(Expr)), nil +} + +func NewValList(val any) (ValList, error) { + return ValList{val.(Val)}, nil +} + +func AppendVal(valList, val any) (ValList, error) { + return append(valList.(ValList), val.(Val)), nil +} +func NewExpr(name *token.Token, left any, right any) (Expr, error) { + var l Val + var r Val + if left != nil { + l = left.(Val) + } + if right != nil { + r = right.(Val) + } + + return Expr{string(name.Lit), &l, &r}, nil +} +func ListExpr(val any) (Expr, error) { + vals := val.(ValList) + root := Expr{ + Name: ".", + } + current := &root + for _, val := range vals[:len(vals)-2] { + current.Left = &val + current.Right = &Val{ + Expr: &Expr{ + Name: ".", + }} + current = current.Right.Expr + } + current.Left = &vals[len(vals)-2] + current.Right = &vals[len(vals)-1] + return root, nil +} + +func (e Expr) String() string { + sb := new(strings.Builder) + sb.WriteRune('(') + sb.WriteString(e.Name) + sb.WriteRune(' ') + if e.Left != nil { + sb.WriteString(e.Left.String()) + } + sb.WriteRune(' ') + if e.Left != nil { + sb.WriteString(e.Right.String()) + } + sb.WriteRune(')') + return sb.String() +} +func (v *Val) String() string { + if v.string != "" { + return v.string + } + if v.number != "" { + num, _ := strconv.ParseInt(v.number, 0, 64) + return strconv.FormatInt(num, 10) + } + if v.name != "" { + return v.name + } + if v.Expr != nil { + return v.Expr.String() + } + return "" +} diff --git a/schema/azschema.bnf b/schema/azschema.bnf new file mode 100644 index 0000000..84299c1 --- /dev/null +++ b/schema/azschema.bnf @@ -0,0 +1,45 @@ +string: '`' {.} '`' | '"' {.} '"'; + +_bin_digit: '0' - '1'; +_oct_digit: _bin_digit | '2' - '7'; +_dec_digit: _oct_digit | '8' - '9'; +_hex_digit: _dec_digit | 'A' - 'F' | 'a' - 'f'; +number: ['-' | '+'] '0' 'b' _bin_digit {_bin_digit | '_'} + | ['-' | '+'] '0' 'o' _oct_digit {_oct_digit | '_'} + | ['-' | '+'] _dec_digit {_dec_digit | '_'} + | ['-' | '+'] '0' 'x' _hex_digit {_hex_digit | '_'}; + +_name_initial: 'A' - 'Z' | 'a' - 'z' | '_' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '-' | '_' | '+' | '=' | '?' | '/' | '.' | '\''; +_name_char: _name_initial | _dec_digit; +name: _name_initial {_name_char}; + +!whitespace: ' ' | '\t' | '\n' | '\r'; +!comment: ';' {.} '\n'; + +<< +import ( + "azalea/schema/ast" + "azalea/schema/token" +) +>> +Schema: ExprList; +ExprList + : Expr <> + | ExprList Expr <> + ; +ValList + : Val <> + | ValList Val <> + ; +Val + : string <> + | number <> + | name <> + | Expr <> + ; +Expr + : "(" name Val Val ")" <> + | "(" name Val ")" <> + | "(" name ")" <> + | "(" "." ValList ")" <> + ; \ No newline at end of file diff --git a/schema/errors/errors.go b/schema/errors/errors.go new file mode 100644 index 0000000..770affa --- /dev/null +++ b/schema/errors/errors.go @@ -0,0 +1,108 @@ +// Code generated by gocc; DO NOT EDIT. + +package errors + +import ( + "fmt" + "strconv" + "strings" + "unicode" + + "azalea/schema/token" +) + +type ErrorSymbol interface { +} + +type Error struct { + Err error + ErrorToken *token.Token + ErrorSymbols []ErrorSymbol + ExpectedTokens []string + StackTop int +} + +func (e *Error) String() string { + w := new(strings.Builder) + if e.Err != nil { + fmt.Fprintln(w, "Error ", e.Err) + } else { + fmt.Fprintln(w, "Error") + } + fmt.Fprintf(w, "Token: type=%d, lit=%s\n", e.ErrorToken.Type, e.ErrorToken.Lit) + fmt.Fprintf(w, "Pos: offset=%d, line=%d, column=%d\n", e.ErrorToken.Pos.Offset, e.ErrorToken.Pos.Line, e.ErrorToken.Pos.Column) + fmt.Fprint(w, "Expected one of: ") + for _, sym := range e.ExpectedTokens { + fmt.Fprint(w, string(sym), " ") + } + fmt.Fprintln(w, "ErrorSymbol:") + for _, sym := range e.ErrorSymbols { + fmt.Fprintf(w, "%v\n", sym) + } + + return w.String() +} + +func DescribeExpected(tokens []string) string { + switch len(tokens) { + case 0: + return "unexpected additional tokens" + + case 1: + return "expected " + tokens[0] + + case 2: + return "expected either " + tokens[0] + " or " + tokens[1] + + case 3: + // Oxford-comma rules require more than 3 items in a list for the + // comma to appear before the 'or' + return fmt.Sprintf("expected one of %s, %s or %s", tokens[0], tokens[1], tokens[2]) + + default: + // Oxford-comma separated alternatives list. + tokens = append(tokens[:len(tokens)-1], "or "+tokens[len(tokens)-1]) + return "expected one of " + strings.Join(tokens, ", ") + } +} + +func DescribeToken(tok *token.Token) string { + switch tok.Type { + case token.INVALID: + return fmt.Sprintf("unknown/invalid token %q", tok.Lit) + case token.EOF: + return "end-of-file" + default: + return fmt.Sprintf("%q", tok.Lit) + } +} + +func (e *Error) Error() string { + // identify the line and column of the error in 'gnu' style so it can be understood + // by editors and IDEs; user will need to prefix it with a filename. + text := fmt.Sprintf("%d:%d: error: ", e.ErrorToken.Pos.Line, e.ErrorToken.Pos.Column) + + // See if the error token can provide us with the filename. + switch src := e.ErrorToken.Pos.Context.(type) { + case token.Sourcer: + text = src.Source() + ":" + text + } + + if e.Err != nil { + // Custom error specified, e.g. by << nil, errors.New("missing newline") >> + text += e.Err.Error() + } else { + tokens := make([]string, len(e.ExpectedTokens)) + for idx, token := range e.ExpectedTokens { + if !unicode.IsLetter(rune(token[0])) { + token = strconv.Quote(token) + } + tokens[idx] = token + } + text += DescribeExpected(tokens) + actual := DescribeToken(e.ErrorToken) + text += fmt.Sprintf("; got: %s", actual) + } + + return text +} diff --git a/schema/lexer/acttab.go b/schema/lexer/acttab.go new file mode 100644 index 0000000..0a61208 --- /dev/null +++ b/schema/lexer/acttab.go @@ -0,0 +1,231 @@ +// Code generated by gocc; DO NOT EDIT. + +package lexer + +import ( + "fmt" + + "azalea/schema/token" +) + +type ActionTable [NumStates]ActionRow + +type ActionRow struct { + Accept token.Type + Ignore string +} + +func (a ActionRow) String() string { + return fmt.Sprintf("Accept=%d, Ignore=%s", a.Accept, a.Ignore) +} + +var ActTab = ActionTable{ + ActionRow{ // S0 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S1 + Accept: -1, + Ignore: "!whitespace", + }, + ActionRow{ // S2 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S3 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S4 + Accept: 5, + Ignore: "", + }, + ActionRow{ // S5 + Accept: 6, + Ignore: "", + }, + ActionRow{ // S6 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S7 + Accept: 7, + Ignore: "", + }, + ActionRow{ // S8 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S9 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S10 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S11 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S12 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S13 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S14 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S15 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S16 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S17 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S18 + Accept: 2, + Ignore: "", + }, + ActionRow{ // S19 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S20 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S21 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S22 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S23 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S24 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S25 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S26 + Accept: 0, + Ignore: "", + }, + ActionRow{ // S27 + Accept: -1, + Ignore: "!comment", + }, + ActionRow{ // S28 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S29 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S30 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S31 + Accept: 4, + Ignore: "", + }, + ActionRow{ // S32 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S33 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S34 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S35 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S36 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S37 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S38 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S39 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S40 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S41 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S42 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S43 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S44 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S45 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S46 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S47 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S48 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S49 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S50 + Accept: 3, + Ignore: "", + }, + ActionRow{ // S51 + Accept: 3, + Ignore: "", + }, +} diff --git a/schema/lexer/lexer.go b/schema/lexer/lexer.go new file mode 100644 index 0000000..4761ce2 --- /dev/null +++ b/schema/lexer/lexer.go @@ -0,0 +1,189 @@ +// Code generated by gocc; DO NOT EDIT. + +package lexer + +import ( + "os" + "unicode/utf8" + + "azalea/schema/token" +) + +const ( + NoState = -1 + NumStates = 52 + NumSymbols = 57 +) + +type Lexer struct { + src []byte + pos int + line int + column int + Context token.Context +} + +func NewLexer(src []byte) *Lexer { + lexer := &Lexer{ + src: src, + pos: 0, + line: 1, + column: 1, + Context: nil, + } + return lexer +} + +// SourceContext is a simple instance of a token.Context which +// contains the name of the source file. +type SourceContext struct { + Filepath string +} + +func (s *SourceContext) Source() string { + return s.Filepath +} + +func NewLexerFile(fpath string) (*Lexer, error) { + src, err := os.ReadFile(fpath) + if err != nil { + return nil, err + } + lexer := NewLexer(src) + lexer.Context = &SourceContext{Filepath: fpath} + return lexer, nil +} + +func (l *Lexer) Scan() (tok *token.Token) { + tok = &token.Token{} + if l.pos >= len(l.src) { + tok.Type = token.EOF + tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column + tok.Pos.Context = l.Context + return + } + start, startLine, startColumn, end := l.pos, l.line, l.column, 0 + tok.Type = token.INVALID + state, rune1, size := 0, rune(-1), 0 + for state != -1 { + if l.pos >= len(l.src) { + rune1 = -1 + } else { + rune1, size = utf8.DecodeRune(l.src[l.pos:]) + l.pos += size + } + + nextState := -1 + if rune1 != -1 { + nextState = TransTab[state](rune1) + } + state = nextState + + if state != -1 { + + switch rune1 { + case '\n': + l.line++ + l.column = 1 + case '\r': + l.column = 1 + case '\t': + l.column += 4 + default: + l.column++ + } + + switch { + case ActTab[state].Accept != -1: + tok.Type = ActTab[state].Accept + end = l.pos + case ActTab[state].Ignore != "": + start, startLine, startColumn = l.pos, l.line, l.column + state = 0 + if start >= len(l.src) { + tok.Type = token.EOF + } + + } + } else { + if tok.Type == token.INVALID { + end = l.pos + } + } + } + if end > start { + l.pos = end + tok.Lit = l.src[start:end] + } else { + tok.Lit = []byte{} + } + tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn + tok.Pos.Context = l.Context + + return +} + +func (l *Lexer) Reset() { + l.pos = 0 +} + +/* +Lexer symbols: +0: '`' +1: '`' +2: '"' +3: '"' +4: '-' +5: '+' +6: '0' +7: 'b' +8: '_' +9: '-' +10: '+' +11: '0' +12: 'o' +13: '_' +14: '-' +15: '+' +16: '_' +17: '-' +18: '+' +19: '0' +20: 'x' +21: '_' +22: '(' +23: ')' +24: '.' +25: '_' +26: '~' +27: '!' +28: '@' +29: '#' +30: '$' +31: '%' +32: '^' +33: '&' +34: '*' +35: '-' +36: '_' +37: '+' +38: '=' +39: '?' +40: '/' +41: '.' +42: ''' +43: ' ' +44: '\t' +45: '\n' +46: '\r' +47: ';' +48: '\n' +49: '0'-'1' +50: '2'-'7' +51: '8'-'9' +52: 'A'-'F' +53: 'a'-'f' +54: 'A'-'Z' +55: 'a'-'z' +56: . +*/ diff --git a/schema/lexer/transitiontable.go b/schema/lexer/transitiontable.go new file mode 100644 index 0000000..9a4fcb2 --- /dev/null +++ b/schema/lexer/transitiontable.go @@ -0,0 +1,1684 @@ +// Code generated by gocc; DO NOT EDIT. + +package lexer + +/* +Let s be the current state +Let r be the current input rune +transitionTable[s](r) returns the next state. +*/ +type TransitionTable [NumStates]func(rune) int + +var TransTab = TransitionTable{ + // S0 + func(r rune) int { + switch { + case r == 9: // ['\t','\t'] + return 1 + case r == 10: // ['\n','\n'] + return 1 + case r == 13: // ['\r','\r'] + return 1 + case r == 32: // [' ',' '] + return 1 + case r == 33: // ['!','!'] + return 2 + case r == 34: // ['"','"'] + return 3 + case r == 35: // ['#','#'] + return 2 + case r == 36: // ['$','$'] + return 2 + case r == 37: // ['%','%'] + return 2 + case r == 38: // ['&','&'] + return 2 + case r == 39: // [''','''] + return 2 + case r == 40: // ['(','('] + return 4 + case r == 41: // [')',')'] + return 5 + case r == 42: // ['*','*'] + return 2 + case r == 43: // ['+','+'] + return 6 + case r == 45: // ['-','-'] + return 6 + case r == 46: // ['.','.'] + return 7 + case r == 47: // ['/','/'] + return 2 + case r == 48: // ['0','0'] + return 8 + case r == 49: // ['1','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 59: // [';',';'] + return 12 + case r == 61: // ['=','='] + return 2 + case r == 63: // ['?','?'] + return 2 + case r == 64: // ['@','@'] + return 2 + case 65 <= r && r <= 90: // ['A','Z'] + return 2 + case r == 94: // ['^','^'] + return 2 + case r == 95: // ['_','_'] + return 2 + case r == 96: // ['`','`'] + return 13 + case 97 <= r && r <= 122: // ['a','z'] + return 2 + case r == 126: // ['~','~'] + return 2 + } + return NoState + }, + // S1 + func(r rune) int { + switch { + } + return NoState + }, + // S2 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S3 + func(r rune) int { + switch { + case r == 34: // ['"','"'] + return 18 + default: + return 3 + } + }, + // S4 + func(r rune) int { + switch { + } + return NoState + }, + // S5 + func(r rune) int { + switch { + } + return NoState + }, + // S6 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case r == 48: // ['0','0'] + return 19 + case r == 49: // ['1','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S7 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S8 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 95: // ['_','_'] + return 23 + case r == 98: // ['b','b'] + return 24 + case r == 111: // ['o','o'] + return 25 + case r == 120: // ['x','x'] + return 26 + } + return NoState + }, + // S9 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 95: // ['_','_'] + return 23 + } + return NoState + }, + // S10 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 95: // ['_','_'] + return 23 + } + return NoState + }, + // S11 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 95: // ['_','_'] + return 23 + } + return NoState + }, + // S12 + func(r rune) int { + switch { + case r == 10: // ['\n','\n'] + return 27 + default: + return 12 + } + }, + // S13 + func(r rune) int { + switch { + case r == 96: // ['`','`'] + return 18 + default: + return 13 + } + }, + // S14 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S15 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S16 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S17 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 15 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S18 + func(r rune) int { + switch { + } + return NoState + }, + // S19 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 28 + case r == 97: // ['a','a'] + return 14 + case r == 98: // ['b','b'] + return 29 + case 99 <= r && r <= 110: // ['c','n'] + return 14 + case r == 111: // ['o','o'] + return 30 + case 112 <= r && r <= 119: // ['p','w'] + return 14 + case r == 120: // ['x','x'] + return 31 + case 121 <= r && r <= 122: // ['y','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S20 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 28 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S21 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 28 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S22 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 28 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S23 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 9 + case 50 <= r && r <= 55: // ['2','7'] + return 10 + case 56 <= r && r <= 57: // ['8','9'] + return 11 + case r == 95: // ['_','_'] + return 23 + } + return NoState + }, + // S24 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 32 + } + return NoState + }, + // S25 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 33 + case 50 <= r && r <= 55: // ['2','7'] + return 34 + } + return NoState + }, + // S26 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S27 + func(r rune) int { + switch { + } + return NoState + }, + // S28 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 20 + case 50 <= r && r <= 55: // ['2','7'] + return 21 + case 56 <= r && r <= 57: // ['8','9'] + return 22 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 28 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S29 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 39 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S30 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 40 + case 50 <= r && r <= 55: // ['2','7'] + return 41 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S31 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 14 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S32 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 32 + case r == 95: // ['_','_'] + return 46 + } + return NoState + }, + // S33 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 33 + case 50 <= r && r <= 55: // ['2','7'] + return 34 + case r == 95: // ['_','_'] + return 47 + } + return NoState + }, + // S34 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 33 + case 50 <= r && r <= 55: // ['2','7'] + return 34 + case r == 95: // ['_','_'] + return 47 + } + return NoState + }, + // S35 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case r == 95: // ['_','_'] + return 48 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S36 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case r == 95: // ['_','_'] + return 48 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S37 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case r == 95: // ['_','_'] + return 48 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S38 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case r == 95: // ['_','_'] + return 48 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S39 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 39 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 49 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S40 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 40 + case 50 <= r && r <= 55: // ['2','7'] + return 41 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 50 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S41 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 40 + case 50 <= r && r <= 55: // ['2','7'] + return 41 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 50 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S42 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 51 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S43 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 51 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S44 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 51 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S45 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 51 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S46 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 32 + case r == 95: // ['_','_'] + return 46 + } + return NoState + }, + // S47 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 33 + case 50 <= r && r <= 55: // ['2','7'] + return 34 + case r == 95: // ['_','_'] + return 47 + } + return NoState + }, + // S48 + func(r rune) int { + switch { + case 48 <= r && r <= 49: // ['0','1'] + return 35 + case 50 <= r && r <= 55: // ['2','7'] + return 36 + case 56 <= r && r <= 57: // ['8','9'] + return 37 + case 65 <= r && r <= 70: // ['A','F'] + return 38 + case r == 95: // ['_','_'] + return 48 + case 97 <= r && r <= 102: // ['a','f'] + return 38 + } + return NoState + }, + // S49 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 39 + case 50 <= r && r <= 55: // ['2','7'] + return 16 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 49 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S50 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 40 + case 50 <= r && r <= 55: // ['2','7'] + return 41 + case 56 <= r && r <= 57: // ['8','9'] + return 17 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 90: // ['A','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 50 + case 97 <= r && r <= 122: // ['a','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, + // S51 + func(r rune) int { + switch { + case r == 33: // ['!','!'] + return 14 + case r == 35: // ['#','#'] + return 14 + case r == 36: // ['$','$'] + return 14 + case r == 37: // ['%','%'] + return 14 + case r == 38: // ['&','&'] + return 14 + case r == 39: // [''','''] + return 14 + case r == 42: // ['*','*'] + return 14 + case r == 43: // ['+','+'] + return 14 + case r == 45: // ['-','-'] + return 14 + case r == 46: // ['.','.'] + return 14 + case r == 47: // ['/','/'] + return 14 + case 48 <= r && r <= 49: // ['0','1'] + return 42 + case 50 <= r && r <= 55: // ['2','7'] + return 43 + case 56 <= r && r <= 57: // ['8','9'] + return 44 + case r == 61: // ['=','='] + return 14 + case r == 63: // ['?','?'] + return 14 + case r == 64: // ['@','@'] + return 14 + case 65 <= r && r <= 70: // ['A','F'] + return 45 + case 71 <= r && r <= 90: // ['G','Z'] + return 14 + case r == 94: // ['^','^'] + return 14 + case r == 95: // ['_','_'] + return 51 + case 97 <= r && r <= 102: // ['a','f'] + return 45 + case 103 <= r && r <= 122: // ['g','z'] + return 14 + case r == 126: // ['~','~'] + return 14 + } + return NoState + }, +} diff --git a/schema/main.go b/schema/main.go index b9e149c..98ce085 100644 --- a/schema/main.go +++ b/schema/main.go @@ -1 +1,18 @@ +//go:generate gocc -a azschema.bnf package schema + +import ( + "azalea/schema/ast" + "azalea/schema/lexer" + "azalea/schema/parser" +) + +func CreateSchema(in string) (schema ast.Schema, err error) { + s := lexer.NewLexer([]byte(in)) + p := parser.NewParser() + a, err := p.Parse(s) + if err == nil { + schema = ast.Schema(a.(ast.ExprList)) + } + return +} diff --git a/schema/parse.go b/schema/parse.go deleted file mode 100644 index 2328d41..0000000 --- a/schema/parse.go +++ /dev/null @@ -1,135 +0,0 @@ -package schema - -import ( - "fmt" -) - -type Node struct { - Function string - Left, Right *Node - *Token -} -type RawArgument struct { - Index uintptr - Size uintptr -} - -func (n *Node) String() string { - if n.Token != nil { - return n.Token.String() - } - return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right) -} - -func Parse(tokens [][]*Token) ([]*Node, error) { - trees := make([]*Node, len(tokens)) - for i, statement := range tokens { - node, err := parse(statement, 0) - if err != nil { - return nil, err - } - trees[i] = node - } - return trees, nil -} -func parse(statement []*Token, depth uintptr) (*Node, error) { - if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) { - return &Node{ - Function: "", - Left: nil, - Right: nil, - Token: nil, - }, nil - } - if len(statement) < 3 { - return nil, fmt.Errorf("statement too short") - } - if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType { - return nil, fmt.Errorf("malformed statement") - } - statement = statement[1 : len(statement)-1] - expressions := make([]*Node, len(statement)) - exprCounter := 0 - lastBegin := -1 - for i := 0; i < len(statement); i++ { - if lastBegin == -1 { - switch statement[i].Type { - case OpenParenTokenType: - if statement[i].Number == int64(depth)+1 { - lastBegin = i - } - break - case CloseParenTokenType: - return nil, fmt.Errorf("unexpected end of statement") - default: - expressions[exprCounter] = &Node{ - Function: "", - Left: nil, - Right: nil, - Token: statement[i], - } - exprCounter++ - break - } - } - if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 { - res, err := parse(statement[lastBegin:i+1], depth+1) - if err != nil { - return nil, err - } - expressions[exprCounter] = res - exprCounter++ - lastBegin = -1 - } - } - for i, expr := range expressions { - if expr == nil { - expressions = expressions[:i] - break - } - } - switch len(expressions) { - case 1: - node := expressions[0] - if node.Token != nil && node.Type == NameTokenType { - return &Node{ - Function: node.Value, - Left: nil, - Right: nil, - Token: nil, - }, nil - } - return node, nil - case 2, 3: - first := expressions[0] - if first.Token != nil && first.Type == NameTokenType { - var right *Node = nil - if len(expressions) == 3 { - right = expressions[2] - } - return &Node{ - Function: first.Value, - Left: expressions[1], - Right: right, - Token: nil, - }, nil - } - //fallthrough - default: - root := &Node{ - Function: ".", - } - current := root - for _, expr := range expressions[:len(expressions)-2] { - current.Left = expr - current.Right = &Node{ - Function: ".", - } - current = current.Right - } - current.Left = expressions[len(expressions)-2] - current.Right = expressions[len(expressions)-1] - return root, nil - } - return nil, fmt.Errorf("parsing error") -} diff --git a/schema/parse_test.go b/schema/parse_test.go deleted file mode 100644 index 3794489..0000000 --- a/schema/parse_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package schema - -import ( - "fmt" - "os" - "os/exec" - "strings" - "testing" - "unsafe" - - gv "github.com/dominikbraun/graph" - "github.com/dominikbraun/graph/draw" -) - -func TestParse(t *testing.T) { - in := "()" + - "(test)" + - "(test a)" + - "(test a b)" + - "(test a b c)" + - "(test (a b c))" + - "(test (a b c d))" + - "(\"hello world\")" + - "(concat \"hello\" \"world\")" + - "(+ 1 2)" - want := "( )\n" + - "(test )\n" + - "(test [n'a'] )\n" + - "(test [n'a'] [n'b'])\n" + - "(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" + - "(test (a [n'b'] [n'c']) )\n" + - "(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) )\n" + - "[l'hello world']\n" + - "(concat [l'hello'] [l'world'])\n" + - "(+ [l1] [l2])\n" - tokens, err := Tokenize([]byte(in)) - if err != nil { - t.Fatal(err) - } - parse, err := Parse(tokens) - if err != nil { - t.Fatal(err) - } - test := strings.Builder{} - for _, line := range parse { - test.Write([]byte(fmt.Sprintf("%s\n", line))) - } - if test.String() != want { - t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) - } - if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" { - Visualize(parse) - } -} -func hash(n *Node) uintptr { - return uintptr(unsafe.Pointer(n)) -} -func Visualize(nodes []*Node) { - g := gv.New(hash, gv.Tree(), gv.Directed()) - for _, node := range nodes { - addNode(node, g) - } - dot, _ := os.CreateTemp("", "azalea-graph-*.gv") - _ = draw.DOT(g, dot) - _ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run() - _ = exec.Command("qimgv", dot.Name()+".svg").Run() - _ = os.Remove(dot.Name()) - _ = os.Remove(dot.Name() + ".svg") -} -func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node { - str := "" - if node.Function != "" { - str = node.Function - } else { - if node.Token != nil { - str = node.Token.String() - } else { - return nil - } - } - _ = g.AddVertex(node, gv.VertexAttribute("label", str)) - if node.Left != nil { - left := addNode(node.Left, g) - _ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line")) - } - if node.Right != nil { - right := addNode(node.Right, g) - _ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line")) - } - return node -} diff --git a/schema/parser/action.go b/schema/parser/action.go new file mode 100644 index 0000000..54bc55e --- /dev/null +++ b/schema/parser/action.go @@ -0,0 +1,51 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +import ( + "fmt" +) + +type action interface { + act() + String() string +} + +type ( + accept bool + shift int // value is next state index + reduce int // value is production index +) + +func (this accept) act() {} +func (this shift) act() {} +func (this reduce) act() {} + +func (this accept) Equal(that action) bool { + if _, ok := that.(accept); ok { + return true + } + return false +} + +func (this reduce) Equal(that action) bool { + that1, ok := that.(reduce) + if !ok { + return false + } + return this == that1 +} + +func (this shift) Equal(that action) bool { + that1, ok := that.(shift) + if !ok { + return false + } + return this == that1 +} + +func (this accept) String() string { return "accept(0)" } +func (this shift) String() string { return fmt.Sprintf("shift:%d", this) } +func (this reduce) String() string { + return fmt.Sprintf("reduce:%d(%s)", this, productionsTable[this].String) +} diff --git a/schema/parser/actiontable.go b/schema/parser/actiontable.go new file mode 100644 index 0000000..e2b5352 --- /dev/null +++ b/schema/parser/actiontable.go @@ -0,0 +1,599 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +type ( + actionTable [numStates]actionRow + actionRow struct { + canRecover bool + actions [numSymbols]action + } +) + +var actionTab = actionTable{ + actionRow{ // S0 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + shift(4), // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S1 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + accept(true), // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S2 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(1), // ␚, reduce: Schema + nil, // string + nil, // number + nil, // name + shift(4), // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S3 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(2), // ␚, reduce: ExprList + nil, // string + nil, // number + nil, // name + reduce(2), // (, reduce: ExprList + nil, // ) + nil, // . + }, + }, + actionRow{ // S4 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + shift(6), // name + nil, // ( + nil, // ) + shift(7), // . + }, + }, + actionRow{ // S5 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(3), // ␚, reduce: ExprList + nil, // string + nil, // number + nil, // name + reduce(3), // (, reduce: ExprList + nil, // ) + nil, // . + }, + }, + actionRow{ // S6 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(14), // ) + nil, // . + }, + }, + actionRow{ // S7 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S8 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(9), // string, reduce: Val + reduce(9), // number, reduce: Val + reduce(9), // name, reduce: Val + reduce(9), // (, reduce: Val + reduce(9), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S9 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(19), // string + shift(20), // number + shift(21), // name + shift(22), // ( + shift(23), // ) + nil, // . + }, + }, + actionRow{ // S10 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(6), // string, reduce: Val + reduce(6), // number, reduce: Val + reduce(6), // name, reduce: Val + reduce(6), // (, reduce: Val + reduce(6), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S11 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(7), // string, reduce: Val + reduce(7), // number, reduce: Val + reduce(7), // name, reduce: Val + reduce(7), // (, reduce: Val + reduce(7), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S12 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(8), // string, reduce: Val + reduce(8), // number, reduce: Val + reduce(8), // name, reduce: Val + reduce(8), // (, reduce: Val + reduce(8), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S13 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + shift(24), // name + nil, // ( + nil, // ) + shift(25), // . + }, + }, + actionRow{ // S14 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(12), // ␚, reduce: Expr + nil, // string + nil, // number + nil, // name + reduce(12), // (, reduce: Expr + nil, // ) + nil, // . + }, + }, + actionRow{ // S15 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(27), // ) + nil, // . + }, + }, + actionRow{ // S16 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(4), // string, reduce: ValList + reduce(4), // number, reduce: ValList + reduce(4), // name, reduce: ValList + reduce(4), // (, reduce: ValList + reduce(4), // ), reduce: ValList + nil, // . + }, + }, + actionRow{ // S17 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(9), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S18 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + shift(28), // ) + nil, // . + }, + }, + actionRow{ // S19 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(6), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S20 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(7), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S21 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(8), // ), reduce: Val + nil, // . + }, + }, + actionRow{ // S22 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + shift(29), // name + nil, // ( + nil, // ) + shift(30), // . + }, + }, + actionRow{ // S23 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(11), // ␚, reduce: Expr + nil, // string + nil, // number + nil, // name + reduce(11), // (, reduce: Expr + nil, // ) + nil, // . + }, + }, + actionRow{ // S24 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(32), // ) + nil, // . + }, + }, + actionRow{ // S25 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S26 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(5), // string, reduce: ValList + reduce(5), // number, reduce: ValList + reduce(5), // name, reduce: ValList + reduce(5), // (, reduce: ValList + reduce(5), // ), reduce: ValList + nil, // . + }, + }, + actionRow{ // S27 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(13), // ␚, reduce: Expr + nil, // string + nil, // number + nil, // name + reduce(13), // (, reduce: Expr + nil, // ) + nil, // . + }, + }, + actionRow{ // S28 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + reduce(10), // ␚, reduce: Expr + nil, // string + nil, // number + nil, // name + reduce(10), // (, reduce: Expr + nil, // ) + nil, // . + }, + }, + actionRow{ // S29 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(35), // ) + nil, // . + }, + }, + actionRow{ // S30 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + nil, // ) + nil, // . + }, + }, + actionRow{ // S31 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(19), // string + shift(20), // number + shift(21), // name + shift(22), // ( + shift(38), // ) + nil, // . + }, + }, + actionRow{ // S32 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(12), // string, reduce: Expr + reduce(12), // number, reduce: Expr + reduce(12), // name, reduce: Expr + reduce(12), // (, reduce: Expr + reduce(12), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S33 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(39), // ) + nil, // . + }, + }, + actionRow{ // S34 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(19), // string + shift(20), // number + shift(21), // name + shift(22), // ( + shift(41), // ) + nil, // . + }, + }, + actionRow{ // S35 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(12), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S36 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + shift(10), // string + shift(11), // number + shift(12), // name + shift(13), // ( + shift(42), // ) + nil, // . + }, + }, + actionRow{ // S37 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + shift(43), // ) + nil, // . + }, + }, + actionRow{ // S38 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(11), // string, reduce: Expr + reduce(11), // number, reduce: Expr + reduce(11), // name, reduce: Expr + reduce(11), // (, reduce: Expr + reduce(11), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S39 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(13), // string, reduce: Expr + reduce(13), // number, reduce: Expr + reduce(13), // name, reduce: Expr + reduce(13), // (, reduce: Expr + reduce(13), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S40 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + shift(44), // ) + nil, // . + }, + }, + actionRow{ // S41 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(11), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S42 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(13), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S43 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + reduce(10), // string, reduce: Expr + reduce(10), // number, reduce: Expr + reduce(10), // name, reduce: Expr + reduce(10), // (, reduce: Expr + reduce(10), // ), reduce: Expr + nil, // . + }, + }, + actionRow{ // S44 + canRecover: false, + actions: [numSymbols]action{ + nil, // INVALID + nil, // ␚ + nil, // string + nil, // number + nil, // name + nil, // ( + reduce(10), // ), reduce: Expr + nil, // . + }, + }, +} diff --git a/schema/parser/context.go b/schema/parser/context.go new file mode 100644 index 0000000..3ed954a --- /dev/null +++ b/schema/parser/context.go @@ -0,0 +1,7 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +// Parser-specific user-defined and entirely-optional context, +// accessible as '$Context' in SDT actions. +type Context interface{} diff --git a/schema/parser/gototable.go b/schema/parser/gototable.go new file mode 100644 index 0000000..b0649d8 --- /dev/null +++ b/schema/parser/gototable.go @@ -0,0 +1,373 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +const numNTSymbols = 6 + +type ( + gotoTable [numStates]gotoRow + gotoRow [numNTSymbols]int +) + +var gotoTab = gotoTable{ + gotoRow{ // S0 + -1, // S' + 1, // Schema + 2, // ExprList + -1, // ValList + -1, // Val + 3, // Expr + }, + gotoRow{ // S1 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S2 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + 5, // Expr + }, + gotoRow{ // S3 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S4 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S5 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S6 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 9, // Val + 8, // Expr + }, + gotoRow{ // S7 + -1, // S' + -1, // Schema + -1, // ExprList + 15, // ValList + 16, // Val + 8, // Expr + }, + gotoRow{ // S8 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S9 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 18, // Val + 17, // Expr + }, + gotoRow{ // S10 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S11 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S12 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S13 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S14 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S15 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 26, // Val + 8, // Expr + }, + gotoRow{ // S16 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S17 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S18 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S19 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S20 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S21 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S22 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S23 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S24 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 31, // Val + 8, // Expr + }, + gotoRow{ // S25 + -1, // S' + -1, // Schema + -1, // ExprList + 33, // ValList + 16, // Val + 8, // Expr + }, + gotoRow{ // S26 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S27 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S28 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S29 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 34, // Val + 8, // Expr + }, + gotoRow{ // S30 + -1, // S' + -1, // Schema + -1, // ExprList + 36, // ValList + 16, // Val + 8, // Expr + }, + gotoRow{ // S31 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 37, // Val + 17, // Expr + }, + gotoRow{ // S32 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S33 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 26, // Val + 8, // Expr + }, + gotoRow{ // S34 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 40, // Val + 17, // Expr + }, + gotoRow{ // S35 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S36 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + 26, // Val + 8, // Expr + }, + gotoRow{ // S37 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S38 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S39 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S40 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S41 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S42 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S43 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, + gotoRow{ // S44 + -1, // S' + -1, // Schema + -1, // ExprList + -1, // ValList + -1, // Val + -1, // Expr + }, +} diff --git a/schema/parser/parser.go b/schema/parser/parser.go new file mode 100644 index 0000000..56b160c --- /dev/null +++ b/schema/parser/parser.go @@ -0,0 +1,217 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +import ( + "fmt" + "strings" + + parseError "azalea/schema/errors" + "azalea/schema/token" +) + +const ( + numProductions = 14 + numStates = 45 + numSymbols = 14 +) + +// Stack + +type stack struct { + state []int + attrib []Attrib +} + +const iNITIAL_STACK_SIZE = 100 + +func newStack() *stack { + return &stack{ + state: make([]int, 0, iNITIAL_STACK_SIZE), + attrib: make([]Attrib, 0, iNITIAL_STACK_SIZE), + } +} + +func (s *stack) reset() { + s.state = s.state[:0] + s.attrib = s.attrib[:0] +} + +func (s *stack) push(state int, a Attrib) { + s.state = append(s.state, state) + s.attrib = append(s.attrib, a) +} + +func (s *stack) top() int { + return s.state[len(s.state)-1] +} + +func (s *stack) peek(pos int) int { + return s.state[pos] +} + +func (s *stack) topIndex() int { + return len(s.state) - 1 +} + +func (s *stack) popN(items int) []Attrib { + lo, hi := len(s.state)-items, len(s.state) + + attrib := s.attrib[lo:hi] + + s.state = s.state[:lo] + s.attrib = s.attrib[:lo] + + return attrib +} + +func (s *stack) String() string { + w := new(strings.Builder) + fmt.Fprintf(w, "stack:\n") + for i, st := range s.state { + fmt.Fprintf(w, "\t%d: %d , ", i, st) + if s.attrib[i] == nil { + fmt.Fprintf(w, "nil") + } else { + switch attr := s.attrib[i].(type) { + case *token.Token: + fmt.Fprintf(w, "%s", attr.Lit) + default: + fmt.Fprintf(w, "%v", attr) + } + } + fmt.Fprintf(w, "\n") + } + return w.String() +} + +// Parser + +type Parser struct { + stack *stack + nextToken *token.Token + pos int + Context Context +} + +type Scanner interface { + Scan() (tok *token.Token) +} + +func NewParser() *Parser { + p := &Parser{stack: newStack()} + p.Reset() + return p +} + +func (p *Parser) Reset() { + p.stack.reset() + p.stack.push(0, nil) +} + +func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib *parseError.Error) { + errorAttrib = &parseError.Error{ + Err: err, + ErrorToken: p.nextToken, + ErrorSymbols: p.popNonRecoveryStates(), + ExpectedTokens: make([]string, 0, 8), + } + for t, action := range actionTab[p.stack.top()].actions { + if action != nil { + errorAttrib.ExpectedTokens = append(errorAttrib.ExpectedTokens, token.TokMap.Id(token.Type(t))) + } + } + + if action := actionTab[p.stack.top()].actions[token.TokMap.Type("error")]; action != nil { + p.stack.push(int(action.(shift)), errorAttrib) // action can only be shift + } else { + return + } + + if action := actionTab[p.stack.top()].actions[p.nextToken.Type]; action != nil { + recovered = true + } + for !recovered && p.nextToken.Type != token.EOF { + p.nextToken = scanner.Scan() + if action := actionTab[p.stack.top()].actions[p.nextToken.Type]; action != nil { + recovered = true + } + } + + return +} + +func (p *Parser) popNonRecoveryStates() (removedAttribs []parseError.ErrorSymbol) { + if rs, ok := p.firstRecoveryState(); ok { + errorSymbols := p.stack.popN(p.stack.topIndex() - rs) + removedAttribs = make([]parseError.ErrorSymbol, len(errorSymbols)) + for i, e := range errorSymbols { + removedAttribs[i] = e + } + } else { + removedAttribs = []parseError.ErrorSymbol{} + } + return +} + +// recoveryState points to the highest state on the stack, which can recover +func (p *Parser) firstRecoveryState() (recoveryState int, canRecover bool) { + recoveryState, canRecover = p.stack.topIndex(), actionTab[p.stack.top()].canRecover + for recoveryState > 0 && !canRecover { + recoveryState-- + canRecover = actionTab[p.stack.peek(recoveryState)].canRecover + } + return +} + +func (p *Parser) newError(err error) error { + e := &parseError.Error{ + Err: err, + StackTop: p.stack.top(), + ErrorToken: p.nextToken, + } + actRow := actionTab[p.stack.top()] + for i, t := range actRow.actions { + if t != nil { + e.ExpectedTokens = append(e.ExpectedTokens, token.TokMap.Id(token.Type(i))) + } + } + return e +} + +func (p *Parser) Parse(scanner Scanner) (res interface{}, err error) { + p.Reset() + p.nextToken = scanner.Scan() + for acc := false; !acc; { + action := actionTab[p.stack.top()].actions[p.nextToken.Type] + if action == nil { + if recovered, errAttrib := p.Error(nil, scanner); !recovered { + p.nextToken = errAttrib.ErrorToken + return nil, p.newError(nil) + } + if action = actionTab[p.stack.top()].actions[p.nextToken.Type]; action == nil { + panic("Error recovery led to invalid action") + } + } + + switch act := action.(type) { + case accept: + res = p.stack.popN(1)[0] + acc = true + case shift: + p.stack.push(int(act), p.nextToken) + p.nextToken = scanner.Scan() + case reduce: + prod := productionsTable[int(act)] + attrib, err := prod.ReduceFunc(p.stack.popN(prod.NumSymbols), p.Context) + if err != nil { + return nil, p.newError(err) + } else { + p.stack.push(gotoTab[p.stack.top()][prod.NTType], attrib) + } + default: + panic("unknown action: " + action.String()) + } + } + return res, nil +} diff --git a/schema/parser/productionstable.go b/schema/parser/productionstable.go new file mode 100644 index 0000000..259eb7d --- /dev/null +++ b/schema/parser/productionstable.go @@ -0,0 +1,165 @@ +// Code generated by gocc; DO NOT EDIT. + +package parser + +import ( + "azalea/schema/ast" + "azalea/schema/token" +) + +type ( + ProdTab [numProductions]ProdTabEntry + ProdTabEntry struct { + String string + Id string + NTType int + Index int + NumSymbols int + ReduceFunc func([]Attrib, interface{}) (Attrib, error) + } + Attrib interface { + } +) + +var productionsTable = ProdTab{ + ProdTabEntry{ + String: `S' : Schema << >>`, + Id: "S'", + NTType: 0, + Index: 0, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return X[0], nil + }, + }, + ProdTabEntry{ + String: `Schema : ExprList << >>`, + Id: "Schema", + NTType: 1, + Index: 1, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return X[0], nil + }, + }, + ProdTabEntry{ + String: `ExprList : Expr << ast.NewExprList(X[0]) >>`, + Id: "ExprList", + NTType: 2, + Index: 2, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewExprList(X[0]) + }, + }, + ProdTabEntry{ + String: `ExprList : ExprList Expr << ast.AppendExpr(X[0], X[1]) >>`, + Id: "ExprList", + NTType: 2, + Index: 3, + NumSymbols: 2, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.AppendExpr(X[0], X[1]) + }, + }, + ProdTabEntry{ + String: `ValList : Val << ast.NewValList(X[0]) >>`, + Id: "ValList", + NTType: 3, + Index: 4, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewValList(X[0]) + }, + }, + ProdTabEntry{ + String: `ValList : ValList Val << ast.AppendVal(X[0], X[1]) >>`, + Id: "ValList", + NTType: 3, + Index: 5, + NumSymbols: 2, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.AppendVal(X[0], X[1]) + }, + }, + ProdTabEntry{ + String: `Val : string << ast.NewStringVal(X[0].(*token.Token)) >>`, + Id: "Val", + NTType: 4, + Index: 6, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewStringVal(X[0].(*token.Token)) + }, + }, + ProdTabEntry{ + String: `Val : number << ast.NewNumberVal(X[0].(*token.Token)) >>`, + Id: "Val", + NTType: 4, + Index: 7, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewNumberVal(X[0].(*token.Token)) + }, + }, + ProdTabEntry{ + String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`, + Id: "Val", + NTType: 4, + Index: 8, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewNameVal(X[0].(*token.Token)) + }, + }, + ProdTabEntry{ + String: `Val : Expr << ast.NewExprVal(X[0]) >>`, + Id: "Val", + NTType: 4, + Index: 9, + NumSymbols: 1, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewExprVal(X[0]) + }, + }, + ProdTabEntry{ + String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`, + Id: "Expr", + NTType: 5, + Index: 10, + NumSymbols: 5, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewExpr(X[1].(*token.Token), X[2], X[3]) + }, + }, + ProdTabEntry{ + String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`, + Id: "Expr", + NTType: 5, + Index: 11, + NumSymbols: 4, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewExpr(X[1].(*token.Token), X[2], nil) + }, + }, + ProdTabEntry{ + String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`, + Id: "Expr", + NTType: 5, + Index: 12, + NumSymbols: 3, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.NewExpr(X[1].(*token.Token), nil, nil) + }, + }, + ProdTabEntry{ + String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`, + Id: "Expr", + NTType: 5, + Index: 13, + NumSymbols: 4, + ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { + return ast.ListExpr(X[2]) + }, + }, +} diff --git a/schema/schema_test.go b/schema/schema_test.go new file mode 100644 index 0000000..e153952 --- /dev/null +++ b/schema/schema_test.go @@ -0,0 +1,18 @@ +package schema + +import ( + "fmt" + "testing" +) + +func TestParser(t *testing.T) { + test := "(test)" + + "(test a)" + + "(test a b)" + + "(test \"a\" \"b\")" + + "(+ 0b1010 -0xDEAD_BEEF)" + + "(. a b c d e f g)" + + "(test (test1 \"hi\") (test2 \"hi 2\"))" + + "(test (. \"awa\" \"awawa\" \"awawawa\" \"awawawawa\"))" + fmt.Println(CreateSchema(test)) +} diff --git a/schema/token.go b/schema/token.go deleted file mode 100644 index 9d897b3..0000000 --- a/schema/token.go +++ /dev/null @@ -1,198 +0,0 @@ -package schema - -import ( - "bytes" - "errors" - "fmt" - "log" - "slices" - "strconv" - "sync" -) - -type Token struct { - Type TokenType - Number int64 - Value string -} - -type TokenType uintptr - -const ( - StringLiteralTokenType TokenType = iota - NumberLiteralTokenType - NameTokenType - OpenParenTokenType - CloseParenTokenType -) - -func (t *Token) String() string { - switch t.Type { - case StringLiteralTokenType: - return fmt.Sprintf("[l'%s']", t.Value) - case NumberLiteralTokenType: - return fmt.Sprintf("[l%d]", t.Number) - case NameTokenType: - return fmt.Sprintf("[n'%s']", t.Value) - case OpenParenTokenType: - return fmt.Sprintf("[(%d]", t.Number) - case CloseParenTokenType: - return fmt.Sprintf("[%d)]", t.Number) - } - return fmt.Sprintf("[?'%s']", t.Value) -} - -func StringLiteralToken(Value string) *Token { - return &Token{Type: StringLiteralTokenType, Value: Value} -} - -func NumberLiteralToken(Value string) *Token { - number, err := strconv.ParseInt(Value, 0, 64) - if err != nil { - log.Panicf("failed to parse '%s' as number: %s", Value, err) - } - return &Token{Type: NumberLiteralTokenType, Number: number} -} -func NameToken(Name string) *Token { - return &Token{Type: NameTokenType, Value: Name} -} -func OpenParenToken(Depth int) *Token { - return &Token{Type: OpenParenTokenType, Number: int64(Depth)} -} -func CloseParenToken(Depth int) *Token { - return &Token{Type: CloseParenTokenType, Number: int64(Depth)} -} - -// preprocess removes comments and newlines. -func preprocess(in []byte) ([]byte, int) { - lines := bytes.Split(in, []byte("\n")) - var wg sync.WaitGroup - length := len(lines) - wg.Add(length) - for n, l := range lines { - go func(n int, l []byte) { - defer wg.Done() - quote := false // " - grave := false // ` - - for i, c := range l { - if c == '"' && !quote && !grave { - quote = true - } - if c == '"' && quote && !grave { - quote = false - } - if c == '`' && !quote && !grave { - grave = true - } - if c == '`' && !quote && grave { - grave = false - } - if c == ';' && !(quote || grave) { - lines[n] = l[:i] - break - } - } - }(n, l) - } - wg.Wait() - return bytes.Join(lines, []byte(" ")), length -} -func Tokenize(s []byte) ([][]*Token, error) { - s, _ = preprocess(s) - var tokens = make([][]*Token, 0) - statement := 0 - token := 0 - depth := 0 - literalbegin := -1 - namebegin := -1 - quote := false - grave := false - - for i, c := range s { - if !quote && !grave { - switch c { - case '(': - if depth == 0 { - tokens = append(tokens, make([]*Token, 0)) - } - tokens[statement] = append(tokens[statement], OpenParenToken(depth)) - depth++ - token++ - break - case ')': - if namebegin != -1 { - tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) - namebegin = -1 - token++ - } else if literalbegin != -1 { - tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) - token++ - literalbegin = -1 - } - depth-- - if depth < 0 { - return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token)) - } - tokens[statement] = append(tokens[statement], CloseParenToken(depth)) - token++ - if depth == 0 { - statement++ - if statement >= len(tokens) { - slices.Grow(tokens, 1) - } - } - break - case '"': - literalbegin = i + 1 - quote = true - break - case '`': - literalbegin = i + 1 - grave = true - break - case ' ': - if namebegin != -1 { - tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) - token++ - namebegin = -1 - } else if literalbegin != -1 { - tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) - token++ - literalbegin = -1 - } - break - default: - if namebegin == -1 && literalbegin == -1 { - if isDigit(c) { - literalbegin = i - } else if isAllowedName(c) { - namebegin = i - } - } - } - } else if c == '"' && quote { - tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) - literalbegin = -1 - quote = false - token++ - } else if c == '`' && grave { - tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) - literalbegin = -1 - grave = false - token++ - } - } - return tokens, nil -} - -// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal. -func isDigit(c byte) bool { - return c >= '0' && c <= '9' -} - -// isAllowedName checks if a character is allowed to be the first character of a name. -// Variable names beginning with a number or containing any of the reserved characters are forbidden. -func isAllowedName(c byte) bool { - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@') -} diff --git a/schema/token/context.go b/schema/token/context.go new file mode 100644 index 0000000..0f4e420 --- /dev/null +++ b/schema/token/context.go @@ -0,0 +1,14 @@ +// Code generated by gocc; DO NOT EDIT. + +package token + +// Context allows user-defined data to be associated with the +// lexer/scanner to be associated with each token that lexer +// produces. +type Context interface{} + +// Sourcer is a Context interface which presents a Source() method +// identifying e.g the filename for the current code. +type Sourcer interface { + Source() string +} diff --git a/schema/token/token.go b/schema/token/token.go new file mode 100644 index 0000000..60db0fa --- /dev/null +++ b/schema/token/token.go @@ -0,0 +1,159 @@ +// Code generated by gocc; DO NOT EDIT. + +package token + +import ( + "bytes" + "fmt" + "strconv" + "unicode/utf8" +) + +type Token struct { + Type + Lit []byte + Pos +} + +type Type int + +const ( + INVALID Type = iota + EOF +) + +type Pos struct { + Offset int + Line int + Column int + Context Context +} + +func (p Pos) String() string { + // If the context provides a filename, provide a human-readable File:Line:Column representation. + switch src := p.Context.(type) { + case Sourcer: + return fmt.Sprintf("%s:%d:%d", src.Source(), p.Line, p.Column) + default: + return fmt.Sprintf("Pos(offset=%d, line=%d, column=%d)", p.Offset, p.Line, p.Column) + } +} + +type TokenMap struct { + typeMap []string + idMap map[string]Type +} + +func (m TokenMap) Id(tok Type) string { + if int(tok) < len(m.typeMap) { + return m.typeMap[tok] + } + return "unknown" +} + +func (m TokenMap) Type(tok string) Type { + if typ, exist := m.idMap[tok]; exist { + return typ + } + return INVALID +} + +func (m TokenMap) TokenString(tok *Token) string { + return fmt.Sprintf("%s(%d,%s)", m.Id(tok.Type), tok.Type, tok.Lit) +} + +func (m TokenMap) StringType(typ Type) string { + return fmt.Sprintf("%s(%d)", m.Id(typ), typ) +} + +// Equals returns returns true if the token Type and Lit are matches. +func (t *Token) Equals(rhs interface{}) bool { + switch rhsT := rhs.(type) { + case *Token: + return t == rhsT || (t.Type == rhsT.Type && bytes.Equal(t.Lit, rhsT.Lit)) + default: + return false + } +} + +// CharLiteralValue returns the string value of the char literal. +func (t *Token) CharLiteralValue() string { + return string(t.Lit[1 : len(t.Lit)-1]) +} + +// Float32Value returns the float32 value of the token or an error if the token literal does not +// denote a valid float32. +func (t *Token) Float32Value() (float32, error) { + if v, err := strconv.ParseFloat(string(t.Lit), 32); err != nil { + return 0, err + } else { + return float32(v), nil + } +} + +// Float64Value returns the float64 value of the token or an error if the token literal does not +// denote a valid float64. +func (t *Token) Float64Value() (float64, error) { + return strconv.ParseFloat(string(t.Lit), 64) +} + +// IDValue returns the string representation of an identifier token. +func (t *Token) IDValue() string { + return string(t.Lit) +} + +// Int32Value returns the int32 value of the token or an error if the token literal does not +// denote a valid float64. +func (t *Token) Int32Value() (int32, error) { + if v, err := strconv.ParseInt(string(t.Lit), 10, 64); err != nil { + return 0, err + } else { + return int32(v), nil + } +} + +// Int64Value returns the int64 value of the token or an error if the token literal does not +// denote a valid float64. +func (t *Token) Int64Value() (int64, error) { + return strconv.ParseInt(string(t.Lit), 10, 64) +} + +// UTF8Rune decodes the UTF8 rune in the token literal. It returns utf8.RuneError if +// the token literal contains an invalid rune. +func (t *Token) UTF8Rune() (rune, error) { + r, _ := utf8.DecodeRune(t.Lit) + if r == utf8.RuneError { + err := fmt.Errorf("Invalid rune") + return r, err + } + return r, nil +} + +// StringValue returns the string value of the token literal. +func (t *Token) StringValue() string { + return string(t.Lit[1 : len(t.Lit)-1]) +} + +var TokMap = TokenMap{ + typeMap: []string{ + "INVALID", + "␚", + "string", + "number", + "name", + "(", + ")", + ".", + }, + + idMap: map[string]Type{ + "INVALID": 0, + "␚": 1, + "string": 2, + "number": 3, + "name": 4, + "(": 5, + ")": 6, + ".": 7, + }, +} diff --git a/schema/token_test.go b/schema/token_test.go deleted file mode 100644 index bedce65..0000000 --- a/schema/token_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package schema - -import ( - "strings" - "testing" -) - -func TestTokenize(t *testing.T) { - in := "(test ; test comment\n" + - "@test) ; test comment\n" + - `(test "Hello World")` + "\n" + - "; test comment 2\n" + - "(+ 1 2)\n" + - "(test `\"Hello world\"`)\n" - want := "[(0][n'test'][n'@test'][0)]\n" + - "[(0][n'test'][l'Hello World'][0)]\n" + - "[(0][n'+'][l1][l2][0)]\n" + - "[(0][n'test'][l'\"Hello world\"'][0)]\n" - tokens, _ := Tokenize([]byte(in)) - var test strings.Builder - - for _, statement := range tokens { - for _, token := range statement { - test.WriteString(token.String()) - } - test.WriteString("\n") - } - if test.String() != want { - t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) - } -} diff --git a/schema/util/litconv.go b/schema/util/litconv.go new file mode 100644 index 0000000..16407a4 --- /dev/null +++ b/schema/util/litconv.go @@ -0,0 +1,101 @@ +// Code generated by gocc; DO NOT EDIT. + +package util + +import ( + "fmt" + "strconv" + "unicode" + "unicode/utf8" +) + +// Interface. + +// RuneValue will convert the literal value of a scanned token to a rune. +func RuneValue(lit []byte) rune { + if lit[1] == '\\' { + return escapeCharVal(lit) + } + r, size := utf8.DecodeRune(lit[1:]) + if size != len(lit)-2 { + panic(fmt.Sprintf("Error decoding rune. Lit: %s, rune: %d, size%d\n", lit, r, size)) + } + return r +} + +// UintValue will attempt to parse a byte-slice as a signed base-10 64-bit integer. +func IntValue(lit []byte) (int64, error) { + return strconv.ParseInt(string(lit), 10, 64) +} + +// UintValue will attempt to parse a byte-slice as an unsigned base-10 64-bit integer. +func UintValue(lit []byte) (uint64, error) { + return strconv.ParseUint(string(lit), 10, 64) +} + +// Helpers. +func escapeCharVal(lit []byte) rune { + var i, base, max uint32 + offset := 2 + switch lit[offset] { + case 'a': + return '\a' + case 'b': + return '\b' + case 'f': + return '\f' + case 'n': + return '\n' + case 'r': + return '\r' + case 't': + return '\t' + case 'v': + return '\v' + case '\\': + return '\\' + case '\'': + return '\'' + case '0', '1', '2', '3', '4', '5', '6', '7': + i, base, max = 3, 8, 255 + case 'x': + i, base, max = 2, 16, 255 + offset++ + case 'u': + i, base, max = 4, 16, unicode.MaxRune + offset++ + case 'U': + i, base, max = 8, 16, unicode.MaxRune + offset++ + default: + panic(fmt.Sprintf("Error decoding character literal: %s\n", lit)) + } + + var x uint32 + for ; i > 0 && offset < len(lit)-1; i-- { + ch, size := utf8.DecodeRune(lit[offset:]) + offset += size + d := uint32(digitVal(ch)) + if d >= base { + panic(fmt.Sprintf("charVal(%s): illegal character (%c) in escape sequence. size=%d, offset=%d", lit, ch, size, offset)) + } + x = x*base + d + } + if x > max || 0xD800 <= x && x < 0xE000 { + panic(fmt.Sprintf("Error decoding escape char value. Lit:%s, offset:%d, escape sequence is invalid Unicode code point\n", lit, offset)) + } + + return rune(x) +} + +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch) - '0' + case 'a' <= ch && ch <= 'f': + return int(ch) - 'a' + 10 + case 'A' <= ch && ch <= 'F': + return int(ch) - 'A' + 10 + } + return 16 // larger than any legal digit val +} diff --git a/schema/util/rune.go b/schema/util/rune.go new file mode 100644 index 0000000..bd8523a --- /dev/null +++ b/schema/util/rune.go @@ -0,0 +1,39 @@ +// Code generated by gocc; DO NOT EDIT. + +package util + +import ( + "fmt" +) + +func RuneToString(r rune) string { + if r >= 0x20 && r < 0x7f { + return fmt.Sprintf("'%c'", r) + } + switch r { + case 0x07: + return "'\\a'" + case 0x08: + return "'\\b'" + case 0x0C: + return "'\\f'" + case 0x0A: + return "'\\n'" + case 0x0D: + return "'\\r'" + case 0x09: + return "'\\t'" + case 0x0b: + return "'\\v'" + case 0x5c: + return "'\\\\\\'" + case 0x27: + return "'\\''" + case 0x22: + return "'\\\"'" + } + if r < 0x10000 { + return fmt.Sprintf("\\u%04x", r) + } + return fmt.Sprintf("\\U%08x", r) +}