199 lines
4.7 KiB
Go
199 lines
4.7 KiB
Go
package schema
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"slices"
|
|
"strconv"
|
|
"sync"
|
|
)
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Number int64
|
|
Value string
|
|
}
|
|
|
|
type TokenType uintptr
|
|
|
|
const (
|
|
StringLiteralTokenType TokenType = iota
|
|
NumberLiteralTokenType
|
|
NameTokenType
|
|
OpenParenTokenType
|
|
CloseParenTokenType
|
|
)
|
|
|
|
func (t *Token) String() string {
|
|
switch t.Type {
|
|
case StringLiteralTokenType:
|
|
return fmt.Sprintf("[l'%s']", t.Value)
|
|
case NumberLiteralTokenType:
|
|
return fmt.Sprintf("[l%d]", t.Number)
|
|
case NameTokenType:
|
|
return fmt.Sprintf("[n'%s']", t.Value)
|
|
case OpenParenTokenType:
|
|
return fmt.Sprintf("[(%d]", t.Number)
|
|
case CloseParenTokenType:
|
|
return fmt.Sprintf("[%d)]", t.Number)
|
|
}
|
|
return fmt.Sprintf("[?'%s']", t.Value)
|
|
}
|
|
|
|
func StringLiteralToken(Value string) *Token {
|
|
return &Token{Type: StringLiteralTokenType, Value: Value}
|
|
}
|
|
|
|
func NumberLiteralToken(Value string) *Token {
|
|
number, err := strconv.ParseInt(Value, 0, 64)
|
|
if err != nil {
|
|
log.Panicf("failed to parse '%s' as number: %s", Value, err)
|
|
}
|
|
return &Token{Type: NumberLiteralTokenType, Number: number}
|
|
}
|
|
func NameToken(Name string) *Token {
|
|
return &Token{Type: NameTokenType, Value: Name}
|
|
}
|
|
func OpenParenToken(Depth int) *Token {
|
|
return &Token{Type: OpenParenTokenType, Number: int64(Depth)}
|
|
}
|
|
func CloseParenToken(Depth int) *Token {
|
|
return &Token{Type: CloseParenTokenType, Number: int64(Depth)}
|
|
}
|
|
|
|
// preprocess removes comments and newlines.
|
|
func preprocess(in []byte) ([]byte, int) {
|
|
lines := bytes.Split(in, []byte("\n"))
|
|
var wg sync.WaitGroup
|
|
length := len(lines)
|
|
wg.Add(length)
|
|
for n, l := range lines {
|
|
go func(n int, l []byte) {
|
|
defer wg.Done()
|
|
quote := false // "
|
|
grave := false // `
|
|
|
|
for i, c := range l {
|
|
if c == '"' && !quote && !grave {
|
|
quote = true
|
|
}
|
|
if c == '"' && quote && !grave {
|
|
quote = false
|
|
}
|
|
if c == '`' && !quote && !grave {
|
|
grave = true
|
|
}
|
|
if c == '`' && !quote && grave {
|
|
grave = false
|
|
}
|
|
if c == ';' && !(quote || grave) {
|
|
lines[n] = l[:i]
|
|
break
|
|
}
|
|
}
|
|
}(n, l)
|
|
}
|
|
wg.Wait()
|
|
return bytes.Join(lines, []byte(" ")), length
|
|
}
|
|
func Tokenize(s []byte) ([][]*Token, error) {
|
|
s, _ = preprocess(s)
|
|
var tokens = make([][]*Token, 0)
|
|
statement := 0
|
|
token := 0
|
|
depth := 0
|
|
literalbegin := -1
|
|
namebegin := -1
|
|
quote := false
|
|
grave := false
|
|
|
|
for i, c := range s {
|
|
if !quote && !grave {
|
|
switch c {
|
|
case '(':
|
|
if depth == 0 {
|
|
tokens = append(tokens, make([]*Token, 0))
|
|
}
|
|
tokens[statement] = append(tokens[statement], OpenParenToken(depth))
|
|
depth++
|
|
token++
|
|
break
|
|
case ')':
|
|
if namebegin != -1 {
|
|
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
|
namebegin = -1
|
|
token++
|
|
} else if literalbegin != -1 {
|
|
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
|
token++
|
|
literalbegin = -1
|
|
}
|
|
depth--
|
|
if depth < 0 {
|
|
return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token))
|
|
}
|
|
tokens[statement] = append(tokens[statement], CloseParenToken(depth))
|
|
token++
|
|
if depth == 0 {
|
|
statement++
|
|
if statement >= len(tokens) {
|
|
slices.Grow(tokens, 1)
|
|
}
|
|
}
|
|
break
|
|
case '"':
|
|
literalbegin = i + 1
|
|
quote = true
|
|
break
|
|
case '`':
|
|
literalbegin = i + 1
|
|
grave = true
|
|
break
|
|
case ' ':
|
|
if namebegin != -1 {
|
|
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
|
token++
|
|
namebegin = -1
|
|
} else if literalbegin != -1 {
|
|
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
|
token++
|
|
literalbegin = -1
|
|
}
|
|
break
|
|
default:
|
|
if namebegin == -1 && literalbegin == -1 {
|
|
if isDigit(c) {
|
|
literalbegin = i
|
|
} else if isAllowedName(c) {
|
|
namebegin = i
|
|
}
|
|
}
|
|
}
|
|
} else if c == '"' && quote {
|
|
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
|
literalbegin = -1
|
|
quote = false
|
|
token++
|
|
} else if c == '`' && grave {
|
|
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
|
literalbegin = -1
|
|
grave = false
|
|
token++
|
|
}
|
|
}
|
|
return tokens, nil
|
|
}
|
|
|
|
// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal.
|
|
func isDigit(c byte) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
// isAllowedName checks if a character is allowed to be the first character of a name.
|
|
// Variable names beginning with a number or containing any of the reserved characters are forbidden.
|
|
func isAllowedName(c byte) bool {
|
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@')
|
|
}
|