initial commit
This commit is contained in:
5
go.mod
Normal file
5
go.mod
Normal file
@@ -0,0 +1,5 @@
|
||||
module azalea
|
||||
|
||||
go 1.25
|
||||
|
||||
require github.com/dominikbraun/graph v0.23.0 // indirect
|
||||
2
go.sum
Normal file
2
go.sum
Normal file
@@ -0,0 +1,2 @@
|
||||
github.com/dominikbraun/graph v0.23.0 h1:TdZB4pPqCLFxYhdyMFb1TBdFxp8XLcJfTTBQucVPgCo=
|
||||
github.com/dominikbraun/graph v0.23.0/go.mod h1:yOjYyogZLY1LSG9E33JWZJiq5k83Qy2C6POAuiViluc=
|
||||
1
schema/main.go
Normal file
1
schema/main.go
Normal file
@@ -0,0 +1 @@
|
||||
package schema
|
||||
135
schema/parse.go
Normal file
135
schema/parse.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Node struct {
|
||||
Function string
|
||||
Left, Right *Node
|
||||
*Token
|
||||
}
|
||||
type RawArgument struct {
|
||||
Index uintptr
|
||||
Size uintptr
|
||||
}
|
||||
|
||||
func (n *Node) String() string {
|
||||
if n.Token != nil {
|
||||
return n.Token.String()
|
||||
}
|
||||
return fmt.Sprintf("(%s %s %s)", n.Function, n.Left, n.Right)
|
||||
}
|
||||
|
||||
func Parse(tokens [][]*Token) ([]*Node, error) {
|
||||
trees := make([]*Node, len(tokens))
|
||||
for i, statement := range tokens {
|
||||
node, err := parse(statement, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
trees[i] = node
|
||||
}
|
||||
return trees, nil
|
||||
}
|
||||
func parse(statement []*Token, depth uintptr) (*Node, error) {
|
||||
if len(statement) == 0 || (len(statement) == 2 && statement[0].Type == OpenParenTokenType && statement[1].Type == CloseParenTokenType) {
|
||||
return &Node{
|
||||
Function: "",
|
||||
Left: nil,
|
||||
Right: nil,
|
||||
Token: nil,
|
||||
}, nil
|
||||
}
|
||||
if len(statement) < 3 {
|
||||
return nil, fmt.Errorf("statement too short")
|
||||
}
|
||||
if statement[0].Type != OpenParenTokenType || statement[len(statement)-1].Type != CloseParenTokenType {
|
||||
return nil, fmt.Errorf("malformed statement")
|
||||
}
|
||||
statement = statement[1 : len(statement)-1]
|
||||
expressions := make([]*Node, len(statement))
|
||||
exprCounter := 0
|
||||
lastBegin := -1
|
||||
for i := 0; i < len(statement); i++ {
|
||||
if lastBegin == -1 {
|
||||
switch statement[i].Type {
|
||||
case OpenParenTokenType:
|
||||
if statement[i].Number == int64(depth)+1 {
|
||||
lastBegin = i
|
||||
}
|
||||
break
|
||||
case CloseParenTokenType:
|
||||
return nil, fmt.Errorf("unexpected end of statement")
|
||||
default:
|
||||
expressions[exprCounter] = &Node{
|
||||
Function: "",
|
||||
Left: nil,
|
||||
Right: nil,
|
||||
Token: statement[i],
|
||||
}
|
||||
exprCounter++
|
||||
break
|
||||
}
|
||||
}
|
||||
if statement[i].Type == CloseParenTokenType && statement[i].Number == int64(depth)+1 {
|
||||
res, err := parse(statement[lastBegin:i+1], depth+1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
expressions[exprCounter] = res
|
||||
exprCounter++
|
||||
lastBegin = -1
|
||||
}
|
||||
}
|
||||
for i, expr := range expressions {
|
||||
if expr == nil {
|
||||
expressions = expressions[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
switch len(expressions) {
|
||||
case 1:
|
||||
node := expressions[0]
|
||||
if node.Token != nil && node.Type == NameTokenType {
|
||||
return &Node{
|
||||
Function: node.Value,
|
||||
Left: nil,
|
||||
Right: nil,
|
||||
Token: nil,
|
||||
}, nil
|
||||
}
|
||||
return node, nil
|
||||
case 2, 3:
|
||||
first := expressions[0]
|
||||
if first.Token != nil && first.Type == NameTokenType {
|
||||
var right *Node = nil
|
||||
if len(expressions) == 3 {
|
||||
right = expressions[2]
|
||||
}
|
||||
return &Node{
|
||||
Function: first.Value,
|
||||
Left: expressions[1],
|
||||
Right: right,
|
||||
Token: nil,
|
||||
}, nil
|
||||
}
|
||||
//fallthrough
|
||||
default:
|
||||
root := &Node{
|
||||
Function: ".",
|
||||
}
|
||||
current := root
|
||||
for _, expr := range expressions[:len(expressions)-2] {
|
||||
current.Left = expr
|
||||
current.Right = &Node{
|
||||
Function: ".",
|
||||
}
|
||||
current = current.Right
|
||||
}
|
||||
current.Left = expressions[len(expressions)-2]
|
||||
current.Right = expressions[len(expressions)-1]
|
||||
return root, nil
|
||||
}
|
||||
return nil, fmt.Errorf("parsing error")
|
||||
}
|
||||
91
schema/parse_test.go
Normal file
91
schema/parse_test.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"unsafe"
|
||||
|
||||
gv "github.com/dominikbraun/graph"
|
||||
"github.com/dominikbraun/graph/draw"
|
||||
)
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
in := "()" +
|
||||
"(test)" +
|
||||
"(test a)" +
|
||||
"(test a b)" +
|
||||
"(test a b c)" +
|
||||
"(test (a b c))" +
|
||||
"(test (a b c d))" +
|
||||
"(\"hello world\")" +
|
||||
"(concat \"hello\" \"world\")" +
|
||||
"(+ 1 2)"
|
||||
want := "( <nil> <nil>)\n" +
|
||||
"(test <nil> <nil>)\n" +
|
||||
"(test [n'a'] <nil>)\n" +
|
||||
"(test [n'a'] [n'b'])\n" +
|
||||
"(. [n'test'] (. [n'a'] (. [n'b'] [n'c'])))\n" +
|
||||
"(test (a [n'b'] [n'c']) <nil>)\n" +
|
||||
"(test (. [n'a'] (. [n'b'] (. [n'c'] [n'd']))) <nil>)\n" +
|
||||
"[l'hello world']\n" +
|
||||
"(concat [l'hello'] [l'world'])\n" +
|
||||
"(+ [l1] [l2])\n"
|
||||
tokens, err := Tokenize([]byte(in))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
parse, err := Parse(tokens)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
test := strings.Builder{}
|
||||
for _, line := range parse {
|
||||
test.Write([]byte(fmt.Sprintf("%s\n", line)))
|
||||
}
|
||||
if test.String() != want {
|
||||
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
|
||||
}
|
||||
if os.Getenv("AZALEA_TEST_VISUALIZE") == "1" {
|
||||
Visualize(parse)
|
||||
}
|
||||
}
|
||||
func hash(n *Node) uintptr {
|
||||
return uintptr(unsafe.Pointer(n))
|
||||
}
|
||||
func Visualize(nodes []*Node) {
|
||||
g := gv.New(hash, gv.Tree(), gv.Directed())
|
||||
for _, node := range nodes {
|
||||
addNode(node, g)
|
||||
}
|
||||
dot, _ := os.CreateTemp("", "azalea-graph-*.gv")
|
||||
_ = draw.DOT(g, dot)
|
||||
_ = exec.Command("dot", "-Tsvg", "-O", dot.Name()).Run()
|
||||
_ = exec.Command("qimgv", dot.Name()+".svg").Run()
|
||||
_ = os.Remove(dot.Name())
|
||||
_ = os.Remove(dot.Name() + ".svg")
|
||||
}
|
||||
func addNode(node *Node, g gv.Graph[uintptr, *Node]) *Node {
|
||||
str := ""
|
||||
if node.Function != "" {
|
||||
str = node.Function
|
||||
} else {
|
||||
if node.Token != nil {
|
||||
str = node.Token.String()
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
_ = g.AddVertex(node, gv.VertexAttribute("label", str))
|
||||
if node.Left != nil {
|
||||
left := addNode(node.Left, g)
|
||||
_ = g.AddEdge(hash(node), hash(left), gv.EdgeAttribute("splines", "line"))
|
||||
}
|
||||
if node.Right != nil {
|
||||
right := addNode(node.Right, g)
|
||||
_ = g.AddEdge(hash(node), hash(right), gv.EdgeAttribute("splines", "line"))
|
||||
}
|
||||
return node
|
||||
}
|
||||
198
schema/token.go
Normal file
198
schema/token.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"slices"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Number int64
|
||||
Value string
|
||||
}
|
||||
|
||||
type TokenType uintptr
|
||||
|
||||
const (
|
||||
StringLiteralTokenType TokenType = iota
|
||||
NumberLiteralTokenType
|
||||
NameTokenType
|
||||
OpenParenTokenType
|
||||
CloseParenTokenType
|
||||
)
|
||||
|
||||
func (t *Token) String() string {
|
||||
switch t.Type {
|
||||
case StringLiteralTokenType:
|
||||
return fmt.Sprintf("[l'%s']", t.Value)
|
||||
case NumberLiteralTokenType:
|
||||
return fmt.Sprintf("[l%d]", t.Number)
|
||||
case NameTokenType:
|
||||
return fmt.Sprintf("[n'%s']", t.Value)
|
||||
case OpenParenTokenType:
|
||||
return fmt.Sprintf("[(%d]", t.Number)
|
||||
case CloseParenTokenType:
|
||||
return fmt.Sprintf("[%d)]", t.Number)
|
||||
}
|
||||
return fmt.Sprintf("[?'%s']", t.Value)
|
||||
}
|
||||
|
||||
func StringLiteralToken(Value string) *Token {
|
||||
return &Token{Type: StringLiteralTokenType, Value: Value}
|
||||
}
|
||||
|
||||
func NumberLiteralToken(Value string) *Token {
|
||||
number, err := strconv.ParseInt(Value, 0, 64)
|
||||
if err != nil {
|
||||
log.Panicf("failed to parse '%s' as number: %s", Value, err)
|
||||
}
|
||||
return &Token{Type: NumberLiteralTokenType, Number: number}
|
||||
}
|
||||
func NameToken(Name string) *Token {
|
||||
return &Token{Type: NameTokenType, Value: Name}
|
||||
}
|
||||
func OpenParenToken(Depth int) *Token {
|
||||
return &Token{Type: OpenParenTokenType, Number: int64(Depth)}
|
||||
}
|
||||
func CloseParenToken(Depth int) *Token {
|
||||
return &Token{Type: CloseParenTokenType, Number: int64(Depth)}
|
||||
}
|
||||
|
||||
// preprocess removes comments and newlines.
|
||||
func preprocess(in []byte) ([]byte, int) {
|
||||
lines := bytes.Split(in, []byte("\n"))
|
||||
var wg sync.WaitGroup
|
||||
length := len(lines)
|
||||
wg.Add(length)
|
||||
for n, l := range lines {
|
||||
go func(n int, l []byte) {
|
||||
defer wg.Done()
|
||||
quote := false // "
|
||||
grave := false // `
|
||||
|
||||
for i, c := range l {
|
||||
if c == '"' && !quote && !grave {
|
||||
quote = true
|
||||
}
|
||||
if c == '"' && quote && !grave {
|
||||
quote = false
|
||||
}
|
||||
if c == '`' && !quote && !grave {
|
||||
grave = true
|
||||
}
|
||||
if c == '`' && !quote && grave {
|
||||
grave = false
|
||||
}
|
||||
if c == ';' && !(quote || grave) {
|
||||
lines[n] = l[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
}(n, l)
|
||||
}
|
||||
wg.Wait()
|
||||
return bytes.Join(lines, []byte(" ")), length
|
||||
}
|
||||
func Tokenize(s []byte) ([][]*Token, error) {
|
||||
s, _ = preprocess(s)
|
||||
var tokens = make([][]*Token, 0)
|
||||
statement := 0
|
||||
token := 0
|
||||
depth := 0
|
||||
literalbegin := -1
|
||||
namebegin := -1
|
||||
quote := false
|
||||
grave := false
|
||||
|
||||
for i, c := range s {
|
||||
if !quote && !grave {
|
||||
switch c {
|
||||
case '(':
|
||||
if depth == 0 {
|
||||
tokens = append(tokens, make([]*Token, 0))
|
||||
}
|
||||
tokens[statement] = append(tokens[statement], OpenParenToken(depth))
|
||||
depth++
|
||||
token++
|
||||
break
|
||||
case ')':
|
||||
if namebegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
||||
namebegin = -1
|
||||
token++
|
||||
} else if literalbegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
||||
token++
|
||||
literalbegin = -1
|
||||
}
|
||||
depth--
|
||||
if depth < 0 {
|
||||
return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token))
|
||||
}
|
||||
tokens[statement] = append(tokens[statement], CloseParenToken(depth))
|
||||
token++
|
||||
if depth == 0 {
|
||||
statement++
|
||||
if statement >= len(tokens) {
|
||||
slices.Grow(tokens, 1)
|
||||
}
|
||||
}
|
||||
break
|
||||
case '"':
|
||||
literalbegin = i + 1
|
||||
quote = true
|
||||
break
|
||||
case '`':
|
||||
literalbegin = i + 1
|
||||
grave = true
|
||||
break
|
||||
case ' ':
|
||||
if namebegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
||||
token++
|
||||
namebegin = -1
|
||||
} else if literalbegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
||||
token++
|
||||
literalbegin = -1
|
||||
}
|
||||
break
|
||||
default:
|
||||
if namebegin == -1 && literalbegin == -1 {
|
||||
if isDigit(c) {
|
||||
literalbegin = i
|
||||
} else if isAllowedName(c) {
|
||||
namebegin = i
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c == '"' && quote {
|
||||
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
||||
literalbegin = -1
|
||||
quote = false
|
||||
token++
|
||||
} else if c == '`' && grave {
|
||||
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
||||
literalbegin = -1
|
||||
grave = false
|
||||
token++
|
||||
}
|
||||
}
|
||||
return tokens, nil
|
||||
}
|
||||
|
||||
// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal.
|
||||
func isDigit(c byte) bool {
|
||||
return c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
// isAllowedName checks if a character is allowed to be the first character of a name.
|
||||
// Variable names beginning with a number or containing any of the reserved characters are forbidden.
|
||||
func isAllowedName(c byte) bool {
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@')
|
||||
}
|
||||
31
schema/token_test.go
Normal file
31
schema/token_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenize(t *testing.T) {
|
||||
in := "(test ; test comment\n" +
|
||||
"@test) ; test comment\n" +
|
||||
`(test "Hello World")` + "\n" +
|
||||
"; test comment 2\n" +
|
||||
"(+ 1 2)\n" +
|
||||
"(test `\"Hello world\"`)\n"
|
||||
want := "[(0][n'test'][n'@test'][0)]\n" +
|
||||
"[(0][n'test'][l'Hello World'][0)]\n" +
|
||||
"[(0][n'+'][l1][l2][0)]\n" +
|
||||
"[(0][n'test'][l'\"Hello world\"'][0)]\n"
|
||||
tokens, _ := Tokenize([]byte(in))
|
||||
var test strings.Builder
|
||||
|
||||
for _, statement := range tokens {
|
||||
for _, token := range statement {
|
||||
test.WriteString(token.String())
|
||||
}
|
||||
test.WriteString("\n")
|
||||
}
|
||||
if test.String() != want {
|
||||
t.Errorf("\ngot:\n%s\nwant:\n%s", test.String(), want)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user