From 2566f9dbf3aa5b7782a1ef8589338d09ade70ca4 Mon Sep 17 00:00:00 2001 From: mae Date: Fri, 23 Jan 2026 03:20:34 -0600 Subject: [PATCH] initial commit --- go.mod | 5 ++ go.sum | 2 + main.go | 5 ++ schema/main.go | 1 + schema/parse.go | 135 +++++++++++++++++++++++++++++ schema/parse_test.go | 91 ++++++++++++++++++++ schema/token.go | 198 +++++++++++++++++++++++++++++++++++++++++++ schema/token_test.go | 31 +++++++ 8 files changed, 468 insertions(+) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 schema/main.go create mode 100644 schema/parse.go create mode 100644 schema/parse_test.go create mode 100644 schema/token.go create mode 100644 schema/token_test.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..56a8b2b --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module azalea + +go 1.25 + +require github.com/dominikbraun/graph v0.23.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8d8623a --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/dominikbraun/graph v0.23.0 h1:TdZB4pPqCLFxYhdyMFb1TBdFxp8XLcJfTTBQucVPgCo= +github.com/dominikbraun/graph v0.23.0/go.mod h1:yOjYyogZLY1LSG9E33JWZJiq5k83Qy2C6POAuiViluc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..7905807 --- /dev/null +++ b/main.go @@ -0,0 +1,5 @@ +package main + +func main() { + +} diff --git a/schema/main.go b/schema/main.go new file mode 100644 index 0000000..b9e149c --- /dev/null +++ b/schema/main.go @@ -0,0 +1 @@ +package schema diff --git a/schema/parse.go b/schema/parse.go new file mode 100644 index 0000000..2328d41 --- /dev/null +++ b/schema/parse.go @@ -0,0 +1,135 @@ +package schema + +import ( + "fmt" +) + +type Node struct { + Function string + Left, Right *Node + *Token +} +type RawArgument struct { + Index uintptr + Size uintptr +} + +func (n *Node) String() string { + if n.Token != nil { + return n.Token.String() + } + return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right) +} + +func Parse(tokens [][]*Token) ([]*Node, error) { + trees := make([]*Node, len(tokens)) + for i, statement := range tokens { + node, err := parse(statement, 0) + if err != nil { + return nil, err + } + trees[i] = node + } + return trees, nil +} +func parse(statement []*Token, depth uintptr) (*Node, error) { + if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) { + return &Node{ + Function: "", + Left: nil, + Right: nil, + Token: nil, + }, nil + } + if len(statement) < 3 { + return nil, fmt.Errorf("statement too short") + } + if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType { + return nil, fmt.Errorf("malformed statement") + } + statement = statement[1 : len(statement)-1] + expressions := make([]*Node, len(statement)) + exprCounter := 0 + lastBegin := -1 + for i := 0; i < len(statement); i++ { + if lastBegin == -1 { + switch statement[i].Type { + case OpenParenTokenType: + if statement[i].Number == int64(depth)+1 { + lastBegin = i + } + break + case CloseParenTokenType: + return nil, fmt.Errorf("unexpected end of statement") + default: + expressions[exprCounter] = &Node{ + Function: "", + Left: nil, + Right: nil, + Token: statement[i], + } + exprCounter++ + break + } + } + if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 { + res, err := parse(statement[lastBegin:i+1], depth+1) + if err != nil { + return nil, err + } + expressions[exprCounter] = res + exprCounter++ + lastBegin = -1 + } + } + for i, expr := range expressions { + if expr == nil { + expressions = expressions[:i] + break + } + } + switch len(expressions) { + case 1: + node := expressions[0] + if node.Token != nil && node.Type == NameTokenType { + return &Node{ + Function: node.Value, + Left: nil, + Right: nil, + Token: nil, + }, nil + } + return node, nil + case 2, 3: + first := expressions[0] + if first.Token != nil && first.Type == NameTokenType { + var right *Node = nil + if len(expressions) == 3 { + right = expressions[2] + } + return &Node{ + Function: first.Value, + Left: expressions[1], + Right: right, + Token: nil, + }, nil + } + //fallthrough + default: + root := &Node{ + Function: ".", + } + current := root + for _, expr := range expressions[:len(expressions)-2] { + current.Left = expr + current.Right = &Node{ + Function: ".", + } + current = current.Right + } + current.Left = expressions[len(expressions)-2] + current.Right = expressions[len(expressions)-1] + return root, nil + } + return nil, fmt.Errorf("parsing error") +} diff --git a/schema/parse_test.go b/schema/parse_test.go new file mode 100644 index 0000000..3794489 --- /dev/null +++ b/schema/parse_test.go @@ -0,0 +1,91 @@ +package schema + +import ( + "fmt" + "os" + "os/exec" + "strings" + "testing" + "unsafe" + + gv "github.com/dominikbraun/graph" + "github.com/dominikbraun/graph/draw" +) + +func TestParse(t *testing.T) { + in := "()" + + "(test)" + + "(test a)" + + "(test a b)" + + "(test a b c)" + + "(test (a b c))" + + "(test (a b c d))" + + "(\"hello world\")" + + "(concat \"hello\" \"world\")" + + "(+ 1 2)" + want := "( )\n" + + "(test )\n" + + "(test [n'a'] )\n" + + "(test [n'a'] [n'b'])\n" + + "(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" + + "(test (a [n'b'] [n'c']) )\n" + + "(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) )\n" + + "[l'hello world']\n" + + "(concat [l'hello'] [l'world'])\n" + + "(+ [l1] [l2])\n" + tokens, err := Tokenize([]byte(in)) + if err != nil { + t.Fatal(err) + } + parse, err := Parse(tokens) + if err != nil { + t.Fatal(err) + } + test := strings.Builder{} + for _, line := range parse { + test.Write([]byte(fmt.Sprintf("%s\n", line))) + } + if test.String() != want { + t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) + } + if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" { + Visualize(parse) + } +} +func hash(n *Node) uintptr { + return uintptr(unsafe.Pointer(n)) +} +func Visualize(nodes []*Node) { + g := gv.New(hash, gv.Tree(), gv.Directed()) + for _, node := range nodes { + addNode(node, g) + } + dot, _ := os.CreateTemp("", "azalea-graph-*.gv") + _ = draw.DOT(g, dot) + _ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run() + _ = exec.Command("qimgv", dot.Name()+".svg").Run() + _ = os.Remove(dot.Name()) + _ = os.Remove(dot.Name() + ".svg") +} +func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node { + str := "" + if node.Function != "" { + str = node.Function + } else { + if node.Token != nil { + str = node.Token.String() + } else { + return nil + } + } + _ = g.AddVertex(node, gv.VertexAttribute("label", str)) + if node.Left != nil { + left := addNode(node.Left, g) + _ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line")) + } + if node.Right != nil { + right := addNode(node.Right, g) + _ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line")) + } + return node +} diff --git a/schema/token.go b/schema/token.go new file mode 100644 index 0000000..9d897b3 --- /dev/null +++ b/schema/token.go @@ -0,0 +1,198 @@ +package schema + +import ( + "bytes" + "errors" + "fmt" + "log" + "slices" + "strconv" + "sync" +) + +type Token struct { + Type TokenType + Number int64 + Value string +} + +type TokenType uintptr + +const ( + StringLiteralTokenType TokenType = iota + NumberLiteralTokenType + NameTokenType + OpenParenTokenType + CloseParenTokenType +) + +func (t *Token) String() string { + switch t.Type { + case StringLiteralTokenType: + return fmt.Sprintf("[l'%s']", t.Value) + case NumberLiteralTokenType: + return fmt.Sprintf("[l%d]", t.Number) + case NameTokenType: + return fmt.Sprintf("[n'%s']", t.Value) + case OpenParenTokenType: + return fmt.Sprintf("[(%d]", t.Number) + case CloseParenTokenType: + return fmt.Sprintf("[%d)]", t.Number) + } + return fmt.Sprintf("[?'%s']", t.Value) +} + +func StringLiteralToken(Value string) *Token { + return &Token{Type: StringLiteralTokenType, Value: Value} +} + +func NumberLiteralToken(Value string) *Token { + number, err := strconv.ParseInt(Value, 0, 64) + if err != nil { + log.Panicf("failed to parse '%s' as number: %s", Value, err) + } + return &Token{Type: NumberLiteralTokenType, Number: number} +} +func NameToken(Name string) *Token { + return &Token{Type: NameTokenType, Value: Name} +} +func OpenParenToken(Depth int) *Token { + return &Token{Type: OpenParenTokenType, Number: int64(Depth)} +} +func CloseParenToken(Depth int) *Token { + return &Token{Type: CloseParenTokenType, Number: int64(Depth)} +} + +// preprocess removes comments and newlines. +func preprocess(in []byte) ([]byte, int) { + lines := bytes.Split(in, []byte("\n")) + var wg sync.WaitGroup + length := len(lines) + wg.Add(length) + for n, l := range lines { + go func(n int, l []byte) { + defer wg.Done() + quote := false // " + grave := false // ` + + for i, c := range l { + if c == '"' && !quote && !grave { + quote = true + } + if c == '"' && quote && !grave { + quote = false + } + if c == '`' && !quote && !grave { + grave = true + } + if c == '`' && !quote && grave { + grave = false + } + if c == ';' && !(quote || grave) { + lines[n] = l[:i] + break + } + } + }(n, l) + } + wg.Wait() + return bytes.Join(lines, []byte(" ")), length +} +func Tokenize(s []byte) ([][]*Token, error) { + s, _ = preprocess(s) + var tokens = make([][]*Token, 0) + statement := 0 + token := 0 + depth := 0 + literalbegin := -1 + namebegin := -1 + quote := false + grave := false + + for i, c := range s { + if !quote && !grave { + switch c { + case '(': + if depth == 0 { + tokens = append(tokens, make([]*Token, 0)) + } + tokens[statement] = append(tokens[statement], OpenParenToken(depth)) + depth++ + token++ + break + case ')': + if namebegin != -1 { + tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) + namebegin = -1 + token++ + } else if literalbegin != -1 { + tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) + token++ + literalbegin = -1 + } + depth-- + if depth < 0 { + return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token)) + } + tokens[statement] = append(tokens[statement], CloseParenToken(depth)) + token++ + if depth == 0 { + statement++ + if statement >= len(tokens) { + slices.Grow(tokens, 1) + } + } + break + case '"': + literalbegin = i + 1 + quote = true + break + case '`': + literalbegin = i + 1 + grave = true + break + case ' ': + if namebegin != -1 { + tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i]))) + token++ + namebegin = -1 + } else if literalbegin != -1 { + tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i]))) + token++ + literalbegin = -1 + } + break + default: + if namebegin == -1 && literalbegin == -1 { + if isDigit(c) { + literalbegin = i + } else if isAllowedName(c) { + namebegin = i + } + } + } + } else if c == '"' && quote { + tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) + literalbegin = -1 + quote = false + token++ + } else if c == '`' && grave { + tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i]))) + literalbegin = -1 + grave = false + token++ + } + } + return tokens, nil +} + +// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal. +func isDigit(c byte) bool { + return c >= '0' && c <= '9' +} + +// isAllowedName checks if a character is allowed to be the first character of a name. +// Variable names beginning with a number or containing any of the reserved characters are forbidden. +func isAllowedName(c byte) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@') +} diff --git a/schema/token_test.go b/schema/token_test.go new file mode 100644 index 0000000..bedce65 --- /dev/null +++ b/schema/token_test.go @@ -0,0 +1,31 @@ +package schema + +import ( + "strings" + "testing" +) + +func TestTokenize(t *testing.T) { + in := "(test ; test comment\n" + + "@test) ; test comment\n" + + `(test "Hello World")` + "\n" + + "; test comment 2\n" + + "(+ 1 2)\n" + + "(test `\"Hello world\"`)\n" + want := "[(0][n'test'][n'@test'][0)]\n" + + "[(0][n'test'][l'Hello World'][0)]\n" + + "[(0][n'+'][l1][l2][0)]\n" + + "[(0][n'test'][l'\"Hello world\"'][0)]\n" + tokens, _ := Tokenize([]byte(in)) + var test strings.Builder + + for _, statement := range tokens { + for _, token := range statement { + test.WriteString(token.String()) + } + test.WriteString("\n") + } + if test.String() != want { + t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want) + } +}