initial commit
This commit is contained in:
198
schema/token.go
Normal file
198
schema/token.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"slices"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Number int64
|
||||
Value string
|
||||
}
|
||||
|
||||
type TokenType uintptr
|
||||
|
||||
const (
|
||||
StringLiteralTokenType TokenType = iota
|
||||
NumberLiteralTokenType
|
||||
NameTokenType
|
||||
OpenParenTokenType
|
||||
CloseParenTokenType
|
||||
)
|
||||
|
||||
func (t *Token) String() string {
|
||||
switch t.Type {
|
||||
case StringLiteralTokenType:
|
||||
return fmt.Sprintf("[l'%s']", t.Value)
|
||||
case NumberLiteralTokenType:
|
||||
return fmt.Sprintf("[l%d]", t.Number)
|
||||
case NameTokenType:
|
||||
return fmt.Sprintf("[n'%s']", t.Value)
|
||||
case OpenParenTokenType:
|
||||
return fmt.Sprintf("[(%d]", t.Number)
|
||||
case CloseParenTokenType:
|
||||
return fmt.Sprintf("[%d)]", t.Number)
|
||||
}
|
||||
return fmt.Sprintf("[?'%s']", t.Value)
|
||||
}
|
||||
|
||||
func StringLiteralToken(Value string) *Token {
|
||||
return &Token{Type: StringLiteralTokenType, Value: Value}
|
||||
}
|
||||
|
||||
func NumberLiteralToken(Value string) *Token {
|
||||
number, err := strconv.ParseInt(Value, 0, 64)
|
||||
if err != nil {
|
||||
log.Panicf("failed to parse '%s' as number: %s", Value, err)
|
||||
}
|
||||
return &Token{Type: NumberLiteralTokenType, Number: number}
|
||||
}
|
||||
func NameToken(Name string) *Token {
|
||||
return &Token{Type: NameTokenType, Value: Name}
|
||||
}
|
||||
func OpenParenToken(Depth int) *Token {
|
||||
return &Token{Type: OpenParenTokenType, Number: int64(Depth)}
|
||||
}
|
||||
func CloseParenToken(Depth int) *Token {
|
||||
return &Token{Type: CloseParenTokenType, Number: int64(Depth)}
|
||||
}
|
||||
|
||||
// preprocess removes comments and newlines.
|
||||
func preprocess(in []byte) ([]byte, int) {
|
||||
lines := bytes.Split(in, []byte("\n"))
|
||||
var wg sync.WaitGroup
|
||||
length := len(lines)
|
||||
wg.Add(length)
|
||||
for n, l := range lines {
|
||||
go func(n int, l []byte) {
|
||||
defer wg.Done()
|
||||
quote := false // "
|
||||
grave := false // `
|
||||
|
||||
for i, c := range l {
|
||||
if c == '"' && !quote && !grave {
|
||||
quote = true
|
||||
}
|
||||
if c == '"' && quote && !grave {
|
||||
quote = false
|
||||
}
|
||||
if c == '`' && !quote && !grave {
|
||||
grave = true
|
||||
}
|
||||
if c == '`' && !quote && grave {
|
||||
grave = false
|
||||
}
|
||||
if c == ';' && !(quote || grave) {
|
||||
lines[n] = l[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
}(n, l)
|
||||
}
|
||||
wg.Wait()
|
||||
return bytes.Join(lines, []byte(" ")), length
|
||||
}
|
||||
func Tokenize(s []byte) ([][]*Token, error) {
|
||||
s, _ = preprocess(s)
|
||||
var tokens = make([][]*Token, 0)
|
||||
statement := 0
|
||||
token := 0
|
||||
depth := 0
|
||||
literalbegin := -1
|
||||
namebegin := -1
|
||||
quote := false
|
||||
grave := false
|
||||
|
||||
for i, c := range s {
|
||||
if !quote && !grave {
|
||||
switch c {
|
||||
case '(':
|
||||
if depth == 0 {
|
||||
tokens = append(tokens, make([]*Token, 0))
|
||||
}
|
||||
tokens[statement] = append(tokens[statement], OpenParenToken(depth))
|
||||
depth++
|
||||
token++
|
||||
break
|
||||
case ')':
|
||||
if namebegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
||||
namebegin = -1
|
||||
token++
|
||||
} else if literalbegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
||||
token++
|
||||
literalbegin = -1
|
||||
}
|
||||
depth--
|
||||
if depth < 0 {
|
||||
return nil, errors.New(fmt.Sprintf("unexpected closing paren at [%d,%d]", statement, token))
|
||||
}
|
||||
tokens[statement] = append(tokens[statement], CloseParenToken(depth))
|
||||
token++
|
||||
if depth == 0 {
|
||||
statement++
|
||||
if statement >= len(tokens) {
|
||||
slices.Grow(tokens, 1)
|
||||
}
|
||||
}
|
||||
break
|
||||
case '"':
|
||||
literalbegin = i + 1
|
||||
quote = true
|
||||
break
|
||||
case '`':
|
||||
literalbegin = i + 1
|
||||
grave = true
|
||||
break
|
||||
case ' ':
|
||||
if namebegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NameToken(string(s[namebegin:i])))
|
||||
token++
|
||||
namebegin = -1
|
||||
} else if literalbegin != -1 {
|
||||
tokens[statement] = append(tokens[statement], NumberLiteralToken(string(s[literalbegin:i])))
|
||||
token++
|
||||
literalbegin = -1
|
||||
}
|
||||
break
|
||||
default:
|
||||
if namebegin == -1 && literalbegin == -1 {
|
||||
if isDigit(c) {
|
||||
literalbegin = i
|
||||
} else if isAllowedName(c) {
|
||||
namebegin = i
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c == '"' && quote {
|
||||
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
||||
literalbegin = -1
|
||||
quote = false
|
||||
token++
|
||||
} else if c == '`' && grave {
|
||||
tokens[statement] = append(tokens[statement], StringLiteralToken(string(s[literalbegin:i])))
|
||||
literalbegin = -1
|
||||
grave = false
|
||||
token++
|
||||
}
|
||||
}
|
||||
return tokens, nil
|
||||
}
|
||||
|
||||
// isDigit checks if a character is a digit and therefore is allowed to be the start of a numeric literal.
|
||||
func isDigit(c byte) bool {
|
||||
return c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
// isAllowedName checks if a character is allowed to be the first character of a name.
|
||||
// Variable names beginning with a number or containing any of the reserved characters are forbidden.
|
||||
func isAllowedName(c byte) bool {
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '*' && c <= '/') || (c >= ':' && c <= '@')
|
||||
}
|
||||
Reference in New Issue
Block a user