From 1fd09335cd0aa4b199dda90470dbdfee6994a1d6 Mon Sep 17 00:00:00 2001 From: mae Date: Tue, 27 Jan 2026 21:57:38 -0600 Subject: [PATCH] feat/schema: add ast and test --- schema/ast/ast.go | 117 ++++++++++++++++++ schema/azschema.bnf | 31 +++-- schema/main.go | 16 +++ schema/parse.go | 135 -------------------- schema/parse_test.go | 91 -------------- schema/parser/productionstable.go | 53 ++++---- schema/schema_test.go | 18 +++ schema/token.go | 198 ------------------------------ schema/token_test.go | 31 ----- 9 files changed, 198 insertions(+), 492 deletions(-) create mode 100644 schema/ast/ast.go delete mode 100644 schema/parse.go delete mode 100644 schema/parse_test.go create mode 100644 schema/schema_test.go delete mode 100644 schema/token.go delete mode 100644 schema/token_test.go diff --git a/schema/ast/ast.go b/schema/ast/ast.go new file mode 100644 index 0000000..f412de8 --- /dev/null +++ b/schema/ast/ast.go @@ -0,0 +1,117 @@ +package ast + +import ( + "azalea/schema/token" + "strconv" + "strings" +) + +type Schema ExprList +type ExprList []Expr +type Expr struct { + Name string + Left *Val + Right *Val +} +type ValList []Val +type Val struct { + string string + number string + name string + *Expr +} + +func NewExprList(expr any) (ExprList, error) { + return ExprList{expr.(Expr)}, nil +} + +func NewStringVal(val *token.Token) (Val, error) { + return Val{string: string(val.Lit)}, nil +} + +func NewNumberVal(val *token.Token) (Val, error) { + return Val{number: string(val.Lit)}, nil +} + +func NewNameVal(val *token.Token) (Val, error) { + return Val{name: string(val.Lit)}, nil +} + +func NewExprVal(val any) (Val, error) { + expr := val.(Expr) + return Val{Expr: &expr}, nil +} + +func AppendExpr(exprList, expr any) (ExprList, error) { + return append(exprList.(ExprList), expr.(Expr)), nil +} + +func NewValList(val any) (ValList, error) { + return ValList{val.(Val)}, nil +} + +func AppendVal(valList, val any) (ValList, error) { + return append(valList.(ValList), val.(Val)), nil +} +func NewExpr(name *token.Token, left any, right any) (Expr, error) { + var l Val + var r Val + if left != nil { + l = left.(Val) + } + if right != nil { + r = right.(Val) + } + + return Expr{string(name.Lit), &l, &r}, nil +} +func ListExpr(val any) (Expr, error) { + vals := val.(ValList) + root := Expr{ + Name: ".", + } + current := &root + for _, val := range vals[:len(vals)-2] { + current.Left = &val + current.Right = &Val{ + Expr: &Expr{ + Name: ".", + }} + current = current.Right.Expr + } + current.Left = &vals[len(vals)-2] + current.Right = &vals[len(vals)-1] + return root, nil +} + +func (e Expr) String() string { + sb := new(strings.Builder) + sb.WriteRune('(') + sb.WriteString(e.Name) + sb.WriteRune(' ') + if e.Left != nil { + sb.WriteString(e.Left.String()) + } + sb.WriteRune(' ') + if e.Left != nil { + sb.WriteString(e.Right.String()) + } + sb.WriteRune(')') + return sb.String() +} +func (v *Val) String() string { + if v.string != "" { + return v.string + } + if v.number != "" { + num, _ := strconv.ParseInt(v.number, 0, 64) + return strconv.FormatInt(num, 10) + } + if v.name != "" { + return v.name + } + if v.Expr != nil { + return v.Expr.String() + } + return "" +} diff --git a/schema/azschema.bnf b/schema/azschema.bnf index 0792348..84299c1 100644 --- a/schema/azschema.bnf +++ b/schema/azschema.bnf @@ -16,25 +16,30 @@ name: _name_initial {_name_char}; !whitespace: ' ' | '\t' | '\n' | '\r'; !comment: ';' {.} '\n'; -<<>> +<< +import ( + "azalea/schema/ast" + "azalea/schema/token" +) +>> Schema: ExprList; ExprList - : Expr - | ExprList Expr + : Expr <> + | ExprList Expr <> ; ValList - : Val - | ValList Val + : Val <> + | ValList Val <> ; Val - : string - | number - | name - | Expr + : string <> + | number <> + | name <> + | Expr <> ; Expr - : "(" name Val Val ")" - | "(" name Val ")" - | "(" name ")" - | "(" "." ValList ")" + : "(" name Val Val ")" <> + | "(" name Val ")" <> + | "(" name ")" <> + | "(" "." ValList ")" <> ; \ No newline at end of file diff --git a/schema/main.go b/schema/main.go index 79baf59..98ce085 100644 --- a/schema/main.go +++ b/schema/main.go @@ -1,2 +1,18 @@ //go:generate gocc -a azschema.bnf package schema + +import ( + "azalea/schema/ast" + "azalea/schema/lexer" + "azalea/schema/parser" +) + +func CreateSchema(in string) (schema ast.Schema, err error) { + s := lexer.NewLexer([]byte(in)) + p := parser.NewParser() + a, err := p.Parse(s) + if err == nil { + schema = ast.Schema(a.(ast.ExprList)) + } + return +} diff --git a/schema/parse.go b/schema/parse.go deleted file mode 100644 index 2328d41..0000000 --- a/schema/parse.go +++ /dev/null @@ -1,135 +0,0 @@ -package schema - -import ( - "fmt" -) - -type Node struct { - Function string - Left, Right *Node - *Token -} -type RawArgument struct { - Index uintptr - Size uintptr -} - -func (n *Node) String() string { - if n.Token != nil { - return n.Token.String() - } - return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right) -} - -func Parse(tokens [][]*Token) ([]*Node, error) { - trees := make([]*Node, len(tokens)) - for i, statement := range tokens { - node, err := parse(statement, 0) - if err != nil { - return nil, err - } - trees[i] = node - } - return trees, nil -} -func parse(statement []*Token, depth uintptr) (*Node, error) { - if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) { - return &Node{ - Function: "", - Left: nil, - Right: nil, - Token: nil, - }, nil - } - if len(statement) < 3 { - return nil, fmt.Errorf("statement too short") - } - if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType { - return nil, fmt.Errorf("malformed statement") - } - statement = statement[1 : len(statement)-1] - expressions := make([]*Node, len(statement)) - exprCounter := 0 - lastBegin := -1 - for i := 0; i < len(statement); i++ { - if lastBegin == -1 { - switch statement[i].Type { - case OpenParenTokenType: - if statement[i].Number == int64(depth)+1 { - lastBegin = i - } - break - case CloseParenTokenType: - return nil, fmt.Errorf("unexpected end of statement") - default: - expressions[exprCounter] = &Node{ - Function: "", - Left: nil, - Right: nil, - Token: statement[i], - } - exprCounter++ - break - } - } - if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 { - res, err := parse(statement[lastBegin:i+1], depth+1) - if err != nil { - return nil, err - } - expressions[exprCounter] = res - exprCounter++ - lastBegin = -1 - } - } - for i, expr := range expressions { - if expr == nil { - expressions = expressions[:i] - break - } - } - switch len(expressions) { - case 1: - node := expressions[0] - if node.Token != nil && node.Type == NameTokenType { - return &Node{ - Function: node.Value, - Left: nil, - Right: nil, - Token: nil, - }, nil - } - return node, nil - case 2, 3: - first := expressions[0] - if first.Token != nil && first.Type == NameTokenType { - var right *Node = nil - if len(expressions) == 3 { - right = expressions[2] - } - return &Node{ - Function: first.Value, - Left: expressions[1], - Right: right, - Token: nil, - }, nil - } - //fallthrough - default: - root := &Node{ - Function: ".", - } - current := root - for _, expr := range expressions[:len(expressions)-2] { - current.Left = expr - current.Right = &Node{ - Function: ".", - } - current = current.Right - } - current.Left = expressions[len(expressions)-2] - current.Right = expressions[len(expressions)-1] - return root, nil - } - return nil, fmt.Errorf("parsing error") -} diff --git a/schema/parse_test.go b/schema/parse_test.go deleted file mode 100644 index 3794489..0000000 --- a/schema/parse_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package schema - -import ( - "fmt" - "os" - "os/exec" - "strings" - "testing" - "unsafe" - - gv "github.com/dominikbraun/graph" - "github.com/dominikbraun/graph/draw" -) - -func TestParse(t *testing.T) { - in := "()" + - "(test)" + - "(test a)" + - "(test a b)" + - "(test a b c)" + - "(test (a b c))" + - "(test (a b c d))" + - "(\"hello world\")" + - "(concat \"hello\" \"world\")" + - "(+ 1 2)" - want := "( )\n" + - "(test )\n" + - "(test [n'a'] )\n" + - "(test [n'a'] [n'b'])\n" + - "(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" + - "(test (a [n'b'] [n'c']) )\n" + - "(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) )\n" + - "[l'hello world']\n" + - "(concat [l'hello'] [l'world'])\n" + - "(+ [l1] [l2])\n" - tokens, err := Tokenize([]byte(in)) - if err != nil { - t.Fatal(err) - } - parse, err := Parse(tokens) - if err != nil { - t.Fatal(err) - } - test := strings.Builder{} - for _, line := range parse { - test.Write([]byte(fmt.Sprintf("%s\n", line))) - } - if test.String() != want { - t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) - } - if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" { - Visualize(parse) - } -} -func hash(n *Node) uintptr { - return uintptr(unsafe.Pointer(n)) -} -func Visualize(nodes []*Node) { - g := gv.New(hash, gv.Tree(), gv.Directed()) - for _, node := range nodes { - addNode(node, g) - } - dot, _ := os.CreateTemp("", "azalea-graph-*.gv") - _ = draw.DOT(g, dot) - _ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run() - _ = exec.Command("qimgv", dot.Name()+".svg").Run() - _ = os.Remove(dot.Name()) - _ = os.Remove(dot.Name() + ".svg") -} -func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node { - str := "" - if node.Function != "" { - str = node.Function - } else { - if node.Token != nil { - str = node.Token.String() - } else { - return nil - } - } - _ = g.AddVertex(node, gv.VertexAttribute("label", str)) - if node.Left != nil { - left := addNode(node.Left, g) - _ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line")) - } - if node.Right != nil { - right := addNode(node.Right, g) - _ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line")) - } - return node -} diff --git a/schema/parser/productionstable.go b/schema/parser/productionstable.go index 710f808..259eb7d 100644 --- a/schema/parser/productionstable.go +++ b/schema/parser/productionstable.go @@ -2,6 +2,11 @@ package parser +import ( + "azalea/schema/ast" + "azalea/schema/token" +) + type ( ProdTab [numProductions]ProdTabEntry ProdTabEntry struct { @@ -38,123 +43,123 @@ var productionsTable = ProdTab{ }, }, ProdTabEntry{ - String: `ExprList : Expr << >>`, + String: `ExprList : Expr << ast.NewExprList(X[0]) >>`, Id: "ExprList", NTType: 2, Index: 2, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewExprList(X[0]) }, }, ProdTabEntry{ - String: `ExprList : ExprList Expr << >>`, + String: `ExprList : ExprList Expr << ast.AppendExpr(X[0], X[1]) >>`, Id: "ExprList", NTType: 2, Index: 3, NumSymbols: 2, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.AppendExpr(X[0], X[1]) }, }, ProdTabEntry{ - String: `ValList : Val << >>`, + String: `ValList : Val << ast.NewValList(X[0]) >>`, Id: "ValList", NTType: 3, Index: 4, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewValList(X[0]) }, }, ProdTabEntry{ - String: `ValList : ValList Val << >>`, + String: `ValList : ValList Val << ast.AppendVal(X[0], X[1]) >>`, Id: "ValList", NTType: 3, Index: 5, NumSymbols: 2, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.AppendVal(X[0], X[1]) }, }, ProdTabEntry{ - String: `Val : string << >>`, + String: `Val : string << ast.NewStringVal(X[0].(*token.Token)) >>`, Id: "Val", NTType: 4, Index: 6, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewStringVal(X[0].(*token.Token)) }, }, ProdTabEntry{ - String: `Val : number << >>`, + String: `Val : number << ast.NewNumberVal(X[0].(*token.Token)) >>`, Id: "Val", NTType: 4, Index: 7, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewNumberVal(X[0].(*token.Token)) }, }, ProdTabEntry{ - String: `Val : name << >>`, + String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`, Id: "Val", NTType: 4, Index: 8, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewNameVal(X[0].(*token.Token)) }, }, ProdTabEntry{ - String: `Val : Expr << >>`, + String: `Val : Expr << ast.NewExprVal(X[0]) >>`, Id: "Val", NTType: 4, Index: 9, NumSymbols: 1, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewExprVal(X[0]) }, }, ProdTabEntry{ - String: `Expr : "(" name Val Val ")" << >>`, + String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`, Id: "Expr", NTType: 5, Index: 10, NumSymbols: 5, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewExpr(X[1].(*token.Token), X[2], X[3]) }, }, ProdTabEntry{ - String: `Expr : "(" name Val ")" << >>`, + String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`, Id: "Expr", NTType: 5, Index: 11, NumSymbols: 4, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewExpr(X[1].(*token.Token), X[2], nil) }, }, ProdTabEntry{ - String: `Expr : "(" name ")" << >>`, + String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`, Id: "Expr", NTType: 5, Index: 12, NumSymbols: 3, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.NewExpr(X[1].(*token.Token), nil, nil) }, }, ProdTabEntry{ - String: `Expr : "(" "." ValList ")" << >>`, + String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`, Id: "Expr", NTType: 5, Index: 13, NumSymbols: 4, ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { - return X[0], nil + return ast.ListExpr(X[2]) }, }, } diff --git a/schema/schema_test.go b/schema/schema_test.go new file mode 100644 index 0000000..e153952 --- /dev/null +++ b/schema/schema_test.go @@ -0,0 +1,18 @@ +package schema + +import ( + "fmt" + "testing" +) + +func TestParser(t *testing.T) { + test := "(test)" + + "(test a)" + + "(test a b)" + + "(test \"a\" \"b\")" + + "(+ 0b1010 -0xDEAD_BEEF)" + + "(. a b c d e f g)" + + "(test (test1 \"hi\") (test2 \"hi 2\"))" + + "(test (. \"awa\" \"awawa\" \"awawawa\" \"awawawawa\"))" + fmt.Println(CreateSchema(test)) +} diff --git a/schema/token.go b/schema/token.go deleted file mode 100644 index 9d897b3..0000000 --- a/schema/token.go +++ /dev/null @@ -1,198 +0,0 @@ -package schema - -import ( - "bytes" - "errors" - "fmt" - "log" - "slices" - "strconv" - "sync" -) - -type Token struct { - Type TokenType - Number int64 - Value string -} - -type TokenType uintptr - -const ( - StringLiteralTokenType TokenType = iota - NumberLiteralTokenType - NameTokenType - OpenParenTokenType - CloseParenTokenType -) - -func (t *Token) String() string { - switch t.Type { - case StringLiteralTokenType: - return fmt.Sprintf("[l'%s']", t.Value) - case NumberLiteralTokenType: - return fmt.Sprintf("[l%d]", t.Number) - case NameTokenType: - return fmt.Sprintf("[n'%s']", t.Value) - case OpenParenTokenType: - return fmt.Sprintf("[(%d]", t.Number) - case CloseParenTokenType: - return fmt.Sprintf("[%d)]", t.Number) - } - return fmt.Sprintf("[?'%s']", t.Value) -} - -func StringLiteralToken(Value string) *Token { - return &Token{Type: StringLiteralTokenType, Value: Value} -} - -func NumberLiteralToken(Value string) *Token { - number, err := strconv.ParseInt(Value, 0, 64) - if err != nil { - log.Panicf("failed to parse '%s' as number: %s", Value, err) - } - return &Token{Type: NumberLiteralTokenType, Number: number} -} -func NameToken(Name string) *Token { - return &Token{Type: NameTokenType, Value: Name} -} -func OpenParenToken(Depth int) *Token { - return &Token{Type: OpenParenTokenType, Number: int64(Depth)} -} -func CloseParenToken(Depth int) *Token { - return &Token{Type: CloseParenTokenType, Number: int64(Depth)} -} - -// preprocess removes comments and newlines. -func preprocess(in []byte) ([]byte, int) { - lines := bytes.Split(in, []byte("\n")) - var wg sync.WaitGroup - length := len(lines) - wg.Add(length) - for n, l := range lines { - go func(n int, l []byte) { - defer wg.Done() - quote := false // " - grave := false // ` - - for i, c := range l { - if c == '"' && !quote && !grave { - quote = true - } - if c == '"' && quote && !grave { - quote = false - } - if c == '`' && !quote && !grave { - grave = true - } - if c == '`' && !quote && grave { - grave = false - } - if c == ';' && !(quote || grave) { - lines[n] = l[:i] - break - } - } - }(n, l) - } - wg.Wait() - return bytes.Join(lines, []byte(" ")), length -} -func Tokenize(s []byte) ([][]*Token, error) { - s, _ = preprocess(s) - var tokens = make([][]*Token, 0) - statement := 0 - token := 0 - depth := 0 - literalbegin := -1 - namebegin := -1 - quote := false - grave := false - - for i, c := range s { - if !quote && !grave { - switch c { - case '(': - if depth == 0 { - tokens = append(tokens, make([]*Token, 0)) - } - tokens[statement] = append(tokens[statement], OpenParenToken(depth)) - depth++ - token++ - break - case ')': - if namebegin != -1 { - tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) - namebegin = -1 - token++ - } else if literalbegin != -1 { - tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) - token++ - literalbegin = -1 - } - depth-- - if depth < 0 { - return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token)) - } - tokens[statement] = append(tokens[statement], CloseParenToken(depth)) - token++ - if depth == 0 { - statement++ - if statement >= len(tokens) { - slices.Grow(tokens, 1) - } - } - break - case '"': - literalbegin = i + 1 - quote = true - break - case '`': - literalbegin = i + 1 - grave = true - break - case ' ': - if namebegin != -1 { - tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) - token++ - namebegin = -1 - } else if literalbegin != -1 { - tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) - token++ - literalbegin = -1 - } - break - default: - if namebegin == -1 && literalbegin == -1 { - if isDigit(c) { - literalbegin = i - } else if isAllowedName(c) { - namebegin = i - } - } - } - } else if c == '"' && quote { - tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) - literalbegin = -1 - quote = false - token++ - } else if c == '`' && grave { - tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) - literalbegin = -1 - grave = false - token++ - } - } - return tokens, nil -} - -// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal. -func isDigit(c byte) bool { - return c >= '0' && c <= '9' -} - -// isAllowedName checks if a character is allowed to be the first character of a name. -// Variable names beginning with a number or containing any of the reserved characters are forbidden. -func isAllowedName(c byte) bool { - return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@') -} diff --git a/schema/token_test.go b/schema/token_test.go deleted file mode 100644 index bedce65..0000000 --- a/schema/token_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package schema - -import ( - "strings" - "testing" -) - -func TestTokenize(t *testing.T) { - in := "(test ; test comment\n" + - "@test) ; test comment\n" + - `(test "Hello World")` + "\n" + - "; test comment 2\n" + - "(+ 1 2)\n" + - "(test `\"Hello world\"`)\n" - want := "[(0][n'test'][n'@test'][0)]\n" + - "[(0][n'test'][l'Hello World'][0)]\n" + - "[(0][n'+'][l1][l2][0)]\n" + - "[(0][n'test'][l'\"Hello world\"'][0)]\n" - tokens, _ := Tokenize([]byte(in)) - var test strings.Builder - - for _, statement := range tokens { - for _, token := range statement { - test.WriteString(token.String()) - } - test.WriteString("\n") - } - if test.String() != want { - t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) - } -}