feat/schema: add ast and test

This commit is contained in:
mae
2026-01-27 21:57:38 -06:00
parent 5f9467e851
commit 1fd09335cd
9 changed files with 198 additions and 492 deletions

117
schema/ast/ast.go Normal file
View File

@@ -0,0 +1,117 @@
package ast
import (
"azalea/schema/token"
"strconv"
"strings"
)
type Schema ExprList
type ExprList []Expr
type Expr struct {
Name string
Left *Val
Right *Val
}
type ValList []Val
type Val struct {
string string
number string
name string
*Expr
}
func NewExprList(expr any) (ExprList, error) {
return ExprList{expr.(Expr)}, nil
}
func NewStringVal(val *token.Token) (Val, error) {
return Val{string: string(val.Lit)}, nil
}
func NewNumberVal(val *token.Token) (Val, error) {
return Val{number: string(val.Lit)}, nil
}
func NewNameVal(val *token.Token) (Val, error) {
return Val{name: string(val.Lit)}, nil
}
func NewExprVal(val any) (Val, error) {
expr := val.(Expr)
return Val{Expr: &expr}, nil
}
func AppendExpr(exprList, expr any) (ExprList, error) {
return append(exprList.(ExprList), expr.(Expr)), nil
}
func NewValList(val any) (ValList, error) {
return ValList{val.(Val)}, nil
}
func AppendVal(valList, val any) (ValList, error) {
return append(valList.(ValList), val.(Val)), nil
}
func NewExpr(name *token.Token, left any, right any) (Expr, error) {
var l Val
var r Val
if left != nil {
l = left.(Val)
}
if right != nil {
r = right.(Val)
}
return Expr{string(name.Lit), &l, &r}, nil
}
func ListExpr(val any) (Expr, error) {
vals := val.(ValList)
root := Expr{
Name: ".",
}
current := &root
for _, val := range vals[:len(vals)-2] {
current.Left = &val
current.Right = &Val{
Expr: &Expr{
Name: ".",
}}
current = current.Right.Expr
}
current.Left = &vals[len(vals)-2]
current.Right = &vals[len(vals)-1]
return root, nil
}
func (e Expr) String() string {
sb := new(strings.Builder)
sb.WriteRune('(')
sb.WriteString(e.Name)
sb.WriteRune(' ')
if e.Left != nil {
sb.WriteString(e.Left.String())
}
sb.WriteRune(' ')
if e.Left != nil {
sb.WriteString(e.Right.String())
}
sb.WriteRune(')')
return sb.String()
}
func (v *Val) String() string {
if v.string != "" {
return v.string
}
if v.number != "" {
num, _ := strconv.ParseInt(v.number, 0, 64)
return strconv.FormatInt(num, 10)
}
if v.name != "" {
return v.name
}
if v.Expr != nil {
return v.Expr.String()
}
return "<nil>"
}

View File

@@ -16,25 +16,30 @@ name: _name_initial {_name_char};
!whitespace: ' ' | '\t' | '\n' | '\r';
!comment: ';' {.} '\n';
<<>>
<<
import (
"azalea/schema/ast"
"azalea/schema/token"
)
>>
Schema: ExprList;
ExprList
: Expr
| ExprList Expr
: Expr <<ast.NewExprList($0)>>
| ExprList Expr <<ast.AppendExpr($0, $1)>>
;
ValList
: Val
| ValList Val
: Val <<ast.NewValList($0)>>
| ValList Val <<ast.AppendVal($0, $1)>>
;
Val
: string
| number
| name
| Expr
: string <<ast.NewStringVal($T0)>>
| number <<ast.NewNumberVal($T0)>>
| name <<ast.NewNameVal($T0)>>
| Expr <<ast.NewExprVal($0)>>
;
Expr
: "(" name Val Val ")"
| "(" name Val ")"
| "(" name ")"
| "(" "." ValList ")"
: "(" name Val Val ")" <<ast.NewExpr($T1, $2, $3)>>
| "(" name Val ")" <<ast.NewExpr($T1, $2, nil)>>
| "(" name ")" <<ast.NewExpr($T1, nil, nil)>>
| "(" "." ValList ")" <<ast.ListExpr($2)>>
;

View File

@@ -1,2 +1,18 @@
//go:generate gocc -a azschema.bnf
package schema
import (
"azalea/schema/ast"
"azalea/schema/lexer"
"azalea/schema/parser"
)
func CreateSchema(in string) (schema ast.Schema, err error) {
s := lexer.NewLexer([]byte(in))
p := parser.NewParser()
a, err := p.Parse(s)
if err == nil {
schema = ast.Schema(a.(ast.ExprList))
}
return
}

View File

@@ -1,135 +0,0 @@
package schema
import (
"fmt"
)
type Node struct {
Function string
Left, Right *Node
*Token
}
type RawArgument struct {
Index uintptr
Size uintptr
}
func (n *Node) String() string {
if n.Token != nil {
return n.Token.String()
}
return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right)
}
func Parse(tokens [][]*Token) ([]*Node, error) {
trees := make([]*Node, len(tokens))
for i, statement := range tokens {
node, err := parse(statement, 0)
if err != nil {
return nil, err
}
trees[i] = node
}
return trees, nil
}
func parse(statement []*Token, depth uintptr) (*Node, error) {
if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) {
return &Node{
Function: "",
Left: nil,
Right: nil,
Token: nil,
}, nil
}
if len(statement) < 3 {
return nil, fmt.Errorf("statement too short")
}
if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType {
return nil, fmt.Errorf("malformed statement")
}
statement = statement[1 : len(statement)-1]
expressions := make([]*Node, len(statement))
exprCounter := 0
lastBegin := -1
for i := 0; i < len(statement); i++ {
if lastBegin == -1 {
switch statement[i].Type {
case OpenParenTokenType:
if statement[i].Number == int64(depth)+1 {
lastBegin = i
}
break
case CloseParenTokenType:
return nil, fmt.Errorf("unexpected end of statement")
default:
expressions[exprCounter] = &Node{
Function: "",
Left: nil,
Right: nil,
Token: statement[i],
}
exprCounter++
break
}
}
if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 {
res, err := parse(statement[lastBegin:i+1], depth+1)
if err != nil {
return nil, err
}
expressions[exprCounter] = res
exprCounter++
lastBegin = -1
}
}
for i, expr := range expressions {
if expr == nil {
expressions = expressions[:i]
break
}
}
switch len(expressions) {
case 1:
node := expressions[0]
if node.Token != nil && node.Type == NameTokenType {
return &Node{
Function: node.Value,
Left: nil,
Right: nil,
Token: nil,
}, nil
}
return node, nil
case 2, 3:
first := expressions[0]
if first.Token != nil && first.Type == NameTokenType {
var right *Node = nil
if len(expressions) == 3 {
right = expressions[2]
}
return &Node{
Function: first.Value,
Left: expressions[1],
Right: right,
Token: nil,
}, nil
}
//fallthrough
default:
root := &Node{
Function: ".",
}
current := root
for _, expr := range expressions[:len(expressions)-2] {
current.Left = expr
current.Right = &Node{
Function: ".",
}
current = current.Right
}
current.Left = expressions[len(expressions)-2]
current.Right = expressions[len(expressions)-1]
return root, nil
}
return nil, fmt.Errorf("parsing error")
}

View File

@@ -1,91 +0,0 @@
package schema
import (
"fmt"
"os"
"os/exec"
"strings"
"testing"
"unsafe"
gv "github.com/dominikbraun/graph"
"github.com/dominikbraun/graph/draw"
)
func TestParse(t *testing.T) {
in := "()" +
"(test)" +
"(test a)" +
"(test a b)" +
"(test a b c)" +
"(test (a b c))" +
"(test (a b c d))" +
"(\"hello world\")" +
"(concat \"hello\" \"world\")" +
"(+ 1 2)"
want := "( <nil> <nil>)\n" +
"(test <nil> <nil>)\n" +
"(test [n'a'] <nil>)\n" +
"(test [n'a'] [n'b'])\n" +
"(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" +
"(test (a [n'b'] [n'c']) <nil>)\n" +
"(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) <nil>)\n" +
"[l'hello world']\n" +
"(concat [l'hello'] [l'world'])\n" +
"(+ [l1] [l2])\n"
tokens, err := Tokenize([]byte(in))
if err != nil {
t.Fatal(err)
}
parse, err := Parse(tokens)
if err != nil {
t.Fatal(err)
}
test := strings.Builder{}
for _, line := range parse {
test.Write([]byte(fmt.Sprintf("%s\n", line)))
}
if test.String() != want {
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
}
if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" {
Visualize(parse)
}
}
func hash(n *Node) uintptr {
return uintptr(unsafe.Pointer(n))
}
func Visualize(nodes []*Node) {
g := gv.New(hash, gv.Tree(), gv.Directed())
for _, node := range nodes {
addNode(node, g)
}
dot, _ := os.CreateTemp("", "azalea-graph-*.gv")
_ = draw.DOT(g, dot)
_ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run()
_ = exec.Command("qimgv", dot.Name()+".svg").Run()
_ = os.Remove(dot.Name())
_ = os.Remove(dot.Name() + ".svg")
}
func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node {
str := ""
if node.Function != "" {
str = node.Function
} else {
if node.Token != nil {
str = node.Token.String()
} else {
return nil
}
}
_ = g.AddVertex(node, gv.VertexAttribute("label", str))
if node.Left != nil {
left := addNode(node.Left, g)
_ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line"))
}
if node.Right != nil {
right := addNode(node.Right, g)
_ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line"))
}
return node
}

View File

@@ -2,6 +2,11 @@
package parser
import (
"azalea/schema/ast"
"azalea/schema/token"
)
type (
ProdTab [numProductions]ProdTabEntry
ProdTabEntry struct {
@@ -38,123 +43,123 @@ var productionsTable = ProdTab{
},
},
ProdTabEntry{
String: `ExprList : Expr << >>`,
String: `ExprList : Expr << ast.NewExprList(X[0]) >>`,
Id: "ExprList",
NTType: 2,
Index: 2,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewExprList(X[0])
},
},
ProdTabEntry{
String: `ExprList : ExprList Expr << >>`,
String: `ExprList : ExprList Expr << ast.AppendExpr(X[0], X[1]) >>`,
Id: "ExprList",
NTType: 2,
Index: 3,
NumSymbols: 2,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.AppendExpr(X[0], X[1])
},
},
ProdTabEntry{
String: `ValList : Val << >>`,
String: `ValList : Val << ast.NewValList(X[0]) >>`,
Id: "ValList",
NTType: 3,
Index: 4,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewValList(X[0])
},
},
ProdTabEntry{
String: `ValList : ValList Val << >>`,
String: `ValList : ValList Val << ast.AppendVal(X[0], X[1]) >>`,
Id: "ValList",
NTType: 3,
Index: 5,
NumSymbols: 2,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.AppendVal(X[0], X[1])
},
},
ProdTabEntry{
String: `Val : string << >>`,
String: `Val : string << ast.NewStringVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 6,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewStringVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : number << >>`,
String: `Val : number << ast.NewNumberVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 7,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewNumberVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : name << >>`,
String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 8,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewNameVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : Expr << >>`,
String: `Val : Expr << ast.NewExprVal(X[0]) >>`,
Id: "Val",
NTType: 4,
Index: 9,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewExprVal(X[0])
},
},
ProdTabEntry{
String: `Expr : "(" name Val Val ")" << >>`,
String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`,
Id: "Expr",
NTType: 5,
Index: 10,
NumSymbols: 5,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewExpr(X[1].(*token.Token), X[2], X[3])
},
},
ProdTabEntry{
String: `Expr : "(" name Val ")" << >>`,
String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`,
Id: "Expr",
NTType: 5,
Index: 11,
NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewExpr(X[1].(*token.Token), X[2], nil)
},
},
ProdTabEntry{
String: `Expr : "(" name ")" << >>`,
String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`,
Id: "Expr",
NTType: 5,
Index: 12,
NumSymbols: 3,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.NewExpr(X[1].(*token.Token), nil, nil)
},
},
ProdTabEntry{
String: `Expr : "(" "." ValList ")" << >>`,
String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`,
Id: "Expr",
NTType: 5,
Index: 13,
NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return X[0], nil
return ast.ListExpr(X[2])
},
},
}

18
schema/schema_test.go Normal file
View File

@@ -0,0 +1,18 @@
package schema
import (
"fmt"
"testing"
)
func TestParser(t *testing.T) {
test := "(test)" +
"(test a)" +
"(test a b)" +
"(test \"a\" \"b\")" +
"(+ 0b1010 -0xDEAD_BEEF)" +
"(. a b c d e f g)" +
"(test (test1 \"hi\") (test2 \"hi 2\"))" +
"(test (. \"awa\" \"awawa\" \"awawawa\" \"awawawawa\"))"
fmt.Println(CreateSchema(test))
}

View File

@@ -1,198 +0,0 @@
package schema
import (
"bytes"
"errors"
"fmt"
"log"
"slices"
"strconv"
"sync"
)
type Token struct {
Type TokenType
Number int64
Value string
}
type TokenType uintptr
const (
StringLiteralTokenType TokenType = iota
NumberLiteralTokenType
NameTokenType
OpenParenTokenType
CloseParenTokenType
)
func (t *Token) String() string {
switch t.Type {
case StringLiteralTokenType:
return fmt.Sprintf("[l'%s']", t.Value)
case NumberLiteralTokenType:
return fmt.Sprintf("[l%d]", t.Number)
case NameTokenType:
return fmt.Sprintf("[n'%s']", t.Value)
case OpenParenTokenType:
return fmt.Sprintf("[(%d]", t.Number)
case CloseParenTokenType:
return fmt.Sprintf("[%d)]", t.Number)
}
return fmt.Sprintf("[?'%s']", t.Value)
}
func StringLiteralToken(Value string) *Token {
return &Token{Type: StringLiteralTokenType, Value: Value}
}
func NumberLiteralToken(Value string) *Token {
number, err := strconv.ParseInt(Value, 0, 64)
if err != nil {
log.Panicf("failed to parse '%s' as number: %s", Value, err)
}
return &Token{Type: NumberLiteralTokenType, Number: number}
}
func NameToken(Name string) *Token {
return &Token{Type: NameTokenType, Value: Name}
}
func OpenParenToken(Depth int) *Token {
return &Token{Type: OpenParenTokenType, Number: int64(Depth)}
}
func CloseParenToken(Depth int) *Token {
return &Token{Type: CloseParenTokenType, Number: int64(Depth)}
}
// preprocess removes comments and newlines.
func preprocess(in []byte) ([]byte, int) {
lines := bytes.Split(in, []byte("\n"))
var wg sync.WaitGroup
length := len(lines)
wg.Add(length)
for n, l := range lines {
go func(n int, l []byte) {
defer wg.Done()
quote := false // "
grave := false // `
for i, c := range l {
if c == '"' && !quote && !grave {
quote = true
}
if c == '"' && quote && !grave {
quote = false
}
if c == '`' && !quote && !grave {
grave = true
}
if c == '`' && !quote && grave {
grave = false
}
if c == ';' && !(quote || grave) {
lines[n] = l[:i]
break
}
}
}(n, l)
}
wg.Wait()
return bytes.Join(lines, []byte(" ")), length
}
func Tokenize(s []byte) ([][]*Token, error) {
s, _ = preprocess(s)
var tokens = make([][]*Token, 0)
statement := 0
token := 0
depth := 0
literalbegin := -1
namebegin := -1
quote := false
grave := false
for i, c := range s {
if !quote && !grave {
switch c {
case '(':
if depth == 0 {
tokens = append(tokens, make([]*Token, 0))
}
tokens[statement] = append(tokens[statement], OpenParenToken(depth))
depth++
token++
break
case ')':
if namebegin != -1 {
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
namebegin = -1
token++
} else if literalbegin != -1 {
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
token++
literalbegin = -1
}
depth--
if depth < 0 {
return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token))
}
tokens[statement] = append(tokens[statement], CloseParenToken(depth))
token++
if depth == 0 {
statement++
if statement >= len(tokens) {
slices.Grow(tokens, 1)
}
}
break
case '"':
literalbegin = i + 1
quote = true
break
case '`':
literalbegin = i + 1
grave = true
break
case ' ':
if namebegin != -1 {
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
token++
namebegin = -1
} else if literalbegin != -1 {
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
token++
literalbegin = -1
}
break
default:
if namebegin == -1 && literalbegin == -1 {
if isDigit(c) {
literalbegin = i
} else if isAllowedName(c) {
namebegin = i
}
}
}
} else if c == '"' && quote {
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
literalbegin = -1
quote = false
token++
} else if c == '`' && grave {
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
literalbegin = -1
grave = false
token++
}
}
return tokens, nil
}
// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal.
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// isAllowedName checks if a character is allowed to be the first character of a name.
// Variable names beginning with a number or containing any of the reserved characters are forbidden.
func isAllowedName(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@')
}

View File

@@ -1,31 +0,0 @@
package schema
import (
"strings"
"testing"
)
func TestTokenize(t *testing.T) {
in := "(test ; test comment\n" +
"@test) ; test comment\n" +
`(test "Hello World")` + "\n" +
"; test comment 2\n" +
"(+ 1 2)\n" +
"(test `\"Hello world\"`)\n"
want := "[(0][n'test'][n'@test'][0)]\n" +
"[(0][n'test'][l'Hello World'][0)]\n" +
"[(0][n'+'][l1][l2][0)]\n" +
"[(0][n'test'][l'\"Hello world\"'][0)]\n"
tokens, _ := Tokenize([]byte(in))
var test strings.Builder
for _, statement := range tokens {
for _, token := range statement {
test.WriteString(token.String())
}
test.WriteString("\n")
}
if test.String() != want {
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
}
}