initial commit

This commit is contained in:
mae
2026-01-23 03:20:34 -06:00
commit 2566f9dbf3
8 changed files with 468 additions and 0 deletions

5
go.mod Normal file
View File

@@ -0,0 +1,5 @@
module azalea
go 1.25
require github.com/dominikbraun/graph v0.23.0 // indirect

2
go.sum Normal file
View File

@@ -0,0 +1,2 @@
github.com/dominikbraun/graph v0.23.0 h1:TdZB4pPqCLFxYhdyMFb1TBdFxp8XLcJfTTBQucVPgCo=
github.com/dominikbraun/graph v0.23.0/go.mod h1:yOjYyogZLY1LSG9E33JWZJiq5k83Qy2C6POAuiViluc=

5
main.go Normal file
View File

@@ -0,0 +1,5 @@
package main
func main() {
}

1
schema/main.go Normal file
View File

@@ -0,0 +1 @@
package schema

135
schema/parse.go Normal file
View File

@@ -0,0 +1,135 @@
package schema
import (
"fmt"
)
type Node struct {
Function string
Left, Right *Node
*Token
}
type RawArgument struct {
Index uintptr
Size uintptr
}
func (n *Node) String() string {
if n.Token != nil {
return n.Token.String()
}
return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right)
}
func Parse(tokens [][]*Token) ([]*Node, error) {
trees := make([]*Node, len(tokens))
for i, statement := range tokens {
node, err := parse(statement, 0)
if err != nil {
return nil, err
}
trees[i] = node
}
return trees, nil
}
func parse(statement []*Token, depth uintptr) (*Node, error) {
if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) {
return &Node{
Function: "",
Left: nil,
Right: nil,
Token: nil,
}, nil
}
if len(statement) < 3 {
return nil, fmt.Errorf("statement too short")
}
if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType {
return nil, fmt.Errorf("malformed statement")
}
statement = statement[1 : len(statement)-1]
expressions := make([]*Node, len(statement))
exprCounter := 0
lastBegin := -1
for i := 0; i < len(statement); i++ {
if lastBegin == -1 {
switch statement[i].Type {
case OpenParenTokenType:
if statement[i].Number == int64(depth)+1 {
lastBegin = i
}
break
case CloseParenTokenType:
return nil, fmt.Errorf("unexpected end of statement")
default:
expressions[exprCounter] = &Node{
Function: "",
Left: nil,
Right: nil,
Token: statement[i],
}
exprCounter++
break
}
}
if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 {
res, err := parse(statement[lastBegin:i+1], depth+1)
if err != nil {
return nil, err
}
expressions[exprCounter] = res
exprCounter++
lastBegin = -1
}
}
for i, expr := range expressions {
if expr == nil {
expressions = expressions[:i]
break
}
}
switch len(expressions) {
case 1:
node := expressions[0]
if node.Token != nil && node.Type == NameTokenType {
return &Node{
Function: node.Value,
Left: nil,
Right: nil,
Token: nil,
}, nil
}
return node, nil
case 2, 3:
first := expressions[0]
if first.Token != nil && first.Type == NameTokenType {
var right *Node = nil
if len(expressions) == 3 {
right = expressions[2]
}
return &Node{
Function: first.Value,
Left: expressions[1],
Right: right,
Token: nil,
}, nil
}
//fallthrough
default:
root := &Node{
Function: ".",
}
current := root
for _, expr := range expressions[:len(expressions)-2] {
current.Left = expr
current.Right = &Node{
Function: ".",
}
current = current.Right
}
current.Left = expressions[len(expressions)-2]
current.Right = expressions[len(expressions)-1]
return root, nil
}
return nil, fmt.Errorf("parsing error")
}

91
schema/parse_test.go Normal file
View File

@@ -0,0 +1,91 @@
package schema
import (
"fmt"
"os"
"os/exec"
"strings"
"testing"
"unsafe"
gv "github.com/dominikbraun/graph"
"github.com/dominikbraun/graph/draw"
)
func TestParse(t *testing.T) {
in := "()" +
"(test)" +
"(test a)" +
"(test a b)" +
"(test a b c)" +
"(test (a b c))" +
"(test (a b c d))" +
"(\"hello world\")" +
"(concat \"hello\" \"world\")" +
"(+ 1 2)"
want := "( <nil> <nil>)\n" +
"(test <nil> <nil>)\n" +
"(test [n'a'] <nil>)\n" +
"(test [n'a'] [n'b'])\n" +
"(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" +
"(test (a [n'b'] [n'c']) <nil>)\n" +
"(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) <nil>)\n" +
"[l'hello world']\n" +
"(concat [l'hello'] [l'world'])\n" +
"(+ [l1] [l2])\n"
tokens, err := Tokenize([]byte(in))
if err != nil {
t.Fatal(err)
}
parse, err := Parse(tokens)
if err != nil {
t.Fatal(err)
}
test := strings.Builder{}
for _, line := range parse {
test.Write([]byte(fmt.Sprintf("%s\n", line)))
}
if test.String() != want {
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
}
if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" {
Visualize(parse)
}
}
func hash(n *Node) uintptr {
return uintptr(unsafe.Pointer(n))
}
func Visualize(nodes []*Node) {
g := gv.New(hash, gv.Tree(), gv.Directed())
for _, node := range nodes {
addNode(node, g)
}
dot, _ := os.CreateTemp("", "azalea-graph-*.gv")
_ = draw.DOT(g, dot)
_ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run()
_ = exec.Command("qimgv", dot.Name()+".svg").Run()
_ = os.Remove(dot.Name())
_ = os.Remove(dot.Name() + ".svg")
}
func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node {
str := ""
if node.Function != "" {
str = node.Function
} else {
if node.Token != nil {
str = node.Token.String()
} else {
return nil
}
}
_ = g.AddVertex(node, gv.VertexAttribute("label", str))
if node.Left != nil {
left := addNode(node.Left, g)
_ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line"))
}
if node.Right != nil {
right := addNode(node.Right, g)
_ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line"))
}
return node
}

198
schema/token.go Normal file
View File

@@ -0,0 +1,198 @@
package schema
import (
"bytes"
"errors"
"fmt"
"log"
"slices"
"strconv"
"sync"
)
type Token struct {
Type TokenType
Number int64
Value string
}
type TokenType uintptr
const (
StringLiteralTokenType TokenType = iota
NumberLiteralTokenType
NameTokenType
OpenParenTokenType
CloseParenTokenType
)
func (t *Token) String() string {
switch t.Type {
case StringLiteralTokenType:
return fmt.Sprintf("[l'%s']", t.Value)
case NumberLiteralTokenType:
return fmt.Sprintf("[l%d]", t.Number)
case NameTokenType:
return fmt.Sprintf("[n'%s']", t.Value)
case OpenParenTokenType:
return fmt.Sprintf("[(%d]", t.Number)
case CloseParenTokenType:
return fmt.Sprintf("[%d)]", t.Number)
}
return fmt.Sprintf("[?'%s']", t.Value)
}
func StringLiteralToken(Value string) *Token {
return &Token{Type: StringLiteralTokenType, Value: Value}
}
func NumberLiteralToken(Value string) *Token {
number, err := strconv.ParseInt(Value, 0, 64)
if err != nil {
log.Panicf("failed to parse '%s' as number: %s", Value, err)
}
return &Token{Type: NumberLiteralTokenType, Number: number}
}
func NameToken(Name string) *Token {
return &Token{Type: NameTokenType, Value: Name}
}
func OpenParenToken(Depth int) *Token {
return &Token{Type: OpenParenTokenType, Number: int64(Depth)}
}
func CloseParenToken(Depth int) *Token {
return &Token{Type: CloseParenTokenType, Number: int64(Depth)}
}
// preprocess removes comments and newlines.
func preprocess(in []byte) ([]byte, int) {
lines := bytes.Split(in, []byte("\n"))
var wg sync.WaitGroup
length := len(lines)
wg.Add(length)
for n, l := range lines {
go func(n int, l []byte) {
defer wg.Done()
quote := false // "
grave := false // `
for i, c := range l {
if c == '"' && !quote && !grave {
quote = true
}
if c == '"' && quote && !grave {
quote = false
}
if c == '`' && !quote && !grave {
grave = true
}
if c == '`' && !quote && grave {
grave = false
}
if c == ';' && !(quote || grave) {
lines[n] = l[:i]
break
}
}
}(n, l)
}
wg.Wait()
return bytes.Join(lines, []byte(" ")), length
}
func Tokenize(s []byte) ([][]*Token, error) {
s, _ = preprocess(s)
var tokens = make([][]*Token, 0)
statement := 0
token := 0
depth := 0
literalbegin := -1
namebegin := -1
quote := false
grave := false
for i, c := range s {
if !quote && !grave {
switch c {
case '(':
if depth == 0 {
tokens = append(tokens, make([]*Token, 0))
}
tokens[statement] = append(tokens[statement], OpenParenToken(depth))
depth++
token++
break
case ')':
if namebegin != -1 {
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
namebegin = -1
token++
} else if literalbegin != -1 {
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
token++
literalbegin = -1
}
depth--
if depth < 0 {
return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token))
}
tokens[statement] = append(tokens[statement], CloseParenToken(depth))
token++
if depth == 0 {
statement++
if statement >= len(tokens) {
slices.Grow(tokens, 1)
}
}
break
case '"':
literalbegin = i + 1
quote = true
break
case '`':
literalbegin = i + 1
grave = true
break
case ' ':
if namebegin != -1 {
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
token++
namebegin = -1
} else if literalbegin != -1 {
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
token++
literalbegin = -1
}
break
default:
if namebegin == -1 && literalbegin == -1 {
if isDigit(c) {
literalbegin = i
} else if isAllowedName(c) {
namebegin = i
}
}
}
} else if c == '"' && quote {
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
literalbegin = -1
quote = false
token++
} else if c == '`' && grave {
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
literalbegin = -1
grave = false
token++
}
}
return tokens, nil
}
// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal.
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// isAllowedName checks if a character is allowed to be the first character of a name.
// Variable names beginning with a number or containing any of the reserved characters are forbidden.
func isAllowedName(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@')
}

31
schema/token_test.go Normal file
View File

@@ -0,0 +1,31 @@
package schema
import (
"strings"
"testing"
)
func TestTokenize(t *testing.T) {
in := "(test ; test comment\n" +
"@test) ; test comment\n" +
`(test "Hello World")` + "\n" +
"; test comment 2\n" +
"(+ 1 2)\n" +
"(test `\"Hello world\"`)\n"
want := "[(0][n'test'][n'@test'][0)]\n" +
"[(0][n'test'][l'Hello World'][0)]\n" +
"[(0][n'+'][l1][l2][0)]\n" +
"[(0][n'test'][l'\"Hello world\"'][0)]\n"
tokens, _ := Tokenize([]byte(in))
var test strings.Builder
for _, statement := range tokens {
for _, token := range statement {
test.WriteString(token.String())
}
test.WriteString("\n")
}
if test.String() != want {
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
}
}