Compare commits

...

2 Commits

Author SHA1 Message Date
mae
73fd1dd90d fix/schema: fix string literals 2026-01-28 19:52:13 -06:00
mae
dc87bef1c2 feat/schema: additional literal types 2026-01-28 02:08:12 -06:00
12 changed files with 8651 additions and 1563 deletions

View File

@@ -2,6 +2,7 @@ package ast
import (
"azalea/schema/token"
"azalea/schema/util"
"strconv"
"strings"
)
@@ -15,26 +16,45 @@ type Expr struct {
}
type ValList []Val
type Val struct {
string string
number string
name string
string *string
rune *rune
int *int64
float *float64
imaginary *complex128
name *string
*Expr
}
func NewExprList(expr any) (ExprList, error) {
return ExprList{expr.(Expr)}, nil
}
func NewStringVal(val *token.Token) (Val, error) {
return Val{string: string(val.Lit)}, nil
s, err := strconv.Unquote(string(val.Lit))
return Val{string: &s}, err
}
func NewIStringVal(val *token.Token) (Val, error) {
s, err := util.InterpretString(string(val.Lit))
return Val{string: &s}, err
}
func NewRuneVal(val *token.Token) (Val, error) {
r := util.RuneValue(val.Lit)
return Val{rune: &r}, nil
}
func NewIntVal(val *token.Token) (Val, error) {
i, err := strconv.ParseInt(string(val.Lit), 0, 64)
return Val{int: &i}, err
}
func NewNumberVal(val *token.Token) (Val, error) {
return Val{number: string(val.Lit)}, nil
func NewFloatVal(val *token.Token) (Val, error) {
f, err := strconv.ParseFloat(string(val.Lit), 64)
return Val{float: &f}, err
}
func NewComplexVal(val *token.Token) (Val, error) {
c, err := strconv.ParseComplex(string(val.Lit), 128)
return Val{imaginary: &c}, err
}
func NewNameVal(val *token.Token) (Val, error) {
return Val{name: string(val.Lit)}, nil
name := string(val.Lit)
return Val{name: &name}, nil
}
func NewExprVal(val any) (Val, error) {
@@ -42,6 +62,10 @@ func NewExprVal(val any) (Val, error) {
return Val{Expr: &expr}, nil
}
func NewExprList(expr any) (ExprList, error) {
return ExprList{expr.(Expr)}, nil
}
func AppendExpr(exprList, expr any) (ExprList, error) {
return append(exprList.(ExprList), expr.(Expr)), nil
}
@@ -100,15 +124,23 @@ func (e Expr) String() string {
return sb.String()
}
func (v *Val) String() string {
if v.string != "" {
return v.string
if v.string != nil {
return *v.string
}
if v.number != "" {
num, _ := strconv.ParseInt(v.number, 0, 64)
return strconv.FormatInt(num, 10)
if v.rune != nil {
return string(*v.rune)
}
if v.name != "" {
return v.name
if v.int != nil {
return strconv.FormatInt(*v.int, 10)
}
if v.float != nil {
return strconv.FormatFloat(*v.float, 'g', -1, 64)
}
if v.imaginary != nil {
return strconv.FormatComplex(*v.imaginary, 'g', -1, 128)
}
if v.name != nil {
return *v.name
}
if v.Expr != nil {
return v.Expr.String()

View File

@@ -1,15 +1,42 @@
string: '`' {.} '`' | '"' {.} '"';
raw_string: '`' {.} '`';
interpreted_string: '"' {_byte_value | _little_u_value | _big_u_value | _escaped_char | ' ' - '!' | '#' - '[' | ']' - '\U0010FFFF'} '"';
_unicode_value: . | _little_u_value | _big_u_value | _escaped_char;
_byte_value: _octal_byte_value | _hex_byte_value;
_little_u_value: '\\' 'u' _hex_digit _hex_digit _hex_digit _hex_digit;
_big_u_value: '\\' 'U' _hex_digit _hex_digit _hex_digit _hex_digit _hex_digit _hex_digit _hex_digit _hex_digit;
_escaped_char: '\\' ('a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | '\'' | '"');
_octal_byte_value: '\\' _oct_digit _oct_digit _oct_digit;
_hex_byte_value: '\\' 'x' _hex_digit _hex_digit;
rune: '\'' (_unicode_value | _byte_value) '\'';
_bin_digit: '0' - '1';
_bin_digits: _bin_digit {_bin_digit | '_'};
_oct_digit: _bin_digit | '2' - '7';
_oct_digits: _oct_digit {_oct_digit | '_'};
_dec_digit: _oct_digit | '8' - '9';
_dec_digits: _dec_digit {_dec_digit | '_'};
_hex_digit: _dec_digit | 'A' - 'F' | 'a' - 'f';
number: ['-' | '+'] '0' 'b' _bin_digit {_bin_digit | '_'}
| ['-' | '+'] '0' 'o' _oct_digit {_oct_digit | '_'}
| ['-' | '+'] _dec_digit {_dec_digit | '_'}
| ['-' | '+'] '0' 'x' _hex_digit {_hex_digit | '_'};
_hex_digits: _hex_digit {_hex_digit | '_'};
_name_initial: 'A' - 'Z' | 'a' - 'z' | '_' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '-' | '_' | '+' | '=' | '?' | '/' | '.' | '\'';
_int: ['-' | '+'] '0' ('b' | 'B') _bin_digits
| ['-' | '+'] '0' ('o' | 'O') _oct_digits
| ['-' | '+'] _dec_digits
| ['-' | '+'] '0' ('x' | 'X') _hex_digits;
int: _int;
_dec_exponent: ('e' | 'E') ['+' | '-'] _dec_digits;
_dec_float: ['-' | '+'] _dec_digits '.' [_dec_digits] [_dec_exponent]
| _dec_digits _dec_exponent
| '.' _dec_digits [_dec_exponent];
_hex_exponent: ('p' | 'P') ['+' | '-'] _dec_digits;
_hex_mantissa: ['_'] _hex_digits '.' _hex_digits | ['_'] _hex_digits | '.' _hex_digits;
_hex_float: ['-' | '+'] '0' ('x' | 'X') _hex_mantissa _hex_exponent;
_float: _dec_float | _hex_float;
float: _float;
imaginary: (_dec_digits | _int | _float) 'i';
_name_initial: 'A' - 'Z' | 'a' - 'z' | '_' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '-' | '_' | '+' | '=' | '?' | '/' | '.';
_name_char: _name_initial | _dec_digit;
name: _name_initial {_name_char};
@@ -32,8 +59,12 @@ ValList
| ValList Val <<ast.AppendVal($0, $1)>>
;
Val
: string <<ast.NewStringVal($T0)>>
| number <<ast.NewNumberVal($T0)>>
: raw_string <<ast.NewStringVal($T0)>>
| interpreted_string <<ast.NewIStringVal($T0)>>
| rune <<ast.NewRuneVal($T0)>>
| int <<ast.NewIntVal($T0)>>
| float <<ast.NewFloatVal($T0)>>
| imaginary <<ast.NewComplexVal($T0)>>
| name <<ast.NewNameVal($T0)>>
| Expr <<ast.NewExprVal($0)>>
;

File diff suppressed because it is too large Load Diff

View File

@@ -11,8 +11,8 @@ import (
const (
NoState = -1
NumStates = 52
NumSymbols = 57
NumStates = 278
NumSymbols = 104
)
type Lexer struct {
@@ -133,57 +133,104 @@ Lexer symbols:
1: '`'
2: '"'
3: '"'
4: '-'
5: '+'
6: '0'
7: 'b'
8: '_'
9: '-'
10: '+'
11: '0'
12: 'o'
13: '_'
14: '-'
15: '+'
16: '_'
17: '-'
18: '+'
19: '0'
20: 'x'
21: '_'
22: '('
23: ')'
24: '.'
25: '_'
26: '~'
27: '!'
28: '@'
29: '#'
30: '$'
31: '%'
32: '^'
33: '&'
34: '*'
35: '-'
36: '_'
37: '+'
38: '='
39: '?'
40: '/'
41: '.'
42: '''
43: ' '
44: '\t'
45: '\n'
46: '\r'
47: ';'
48: '\n'
49: '0'-'1'
50: '2'-'7'
51: '8'-'9'
52: 'A'-'F'
53: 'a'-'f'
54: 'A'-'Z'
55: 'a'-'z'
56: .
4: '''
5: '''
6: 'i'
7: '('
8: ')'
9: '.'
10: '\'
11: 'u'
12: '\'
13: 'U'
14: '\'
15: 'a'
16: 'b'
17: 'f'
18: 'n'
19: 'r'
20: 't'
21: 'v'
22: '\'
23: '''
24: '"'
25: '\'
26: '\'
27: 'x'
28: '_'
29: '_'
30: '_'
31: '_'
32: '-'
33: '+'
34: '0'
35: 'b'
36: 'B'
37: '-'
38: '+'
39: '0'
40: 'o'
41: 'O'
42: '-'
43: '+'
44: '-'
45: '+'
46: '0'
47: 'x'
48: 'X'
49: 'e'
50: 'E'
51: '+'
52: '-'
53: '-'
54: '+'
55: '.'
56: '.'
57: 'p'
58: 'P'
59: '+'
60: '-'
61: '_'
62: '.'
63: '_'
64: '.'
65: '-'
66: '+'
67: '0'
68: 'x'
69: 'X'
70: '_'
71: '~'
72: '!'
73: '@'
74: '#'
75: '$'
76: '%'
77: '^'
78: '&'
79: '*'
80: '-'
81: '_'
82: '+'
83: '='
84: '?'
85: '/'
86: '.'
87: ' '
88: '\t'
89: '\n'
90: '\r'
91: ';'
92: '\n'
93: ' '-'!'
94: '#'-'['
95: ']'-\U0010ffff
96: '0'-'1'
97: '2'-'7'
98: '8'-'9'
99: 'A'-'F'
100: 'a'-'f'
101: 'A'-'Z'
102: 'a'-'z'
103: .
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -70,8 +70,8 @@ var gotoTab = gotoTable{
-1, // S'
-1, // Schema
-1, // ExprList
15, // ValList
16, // Val
19, // ValList
20, // Val
8, // Expr
},
gotoRow{ // S8
@@ -87,8 +87,8 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
18, // Val
17, // Expr
22, // Val
21, // Expr
},
gotoRow{ // S10
-1, // S'
@@ -135,8 +135,8 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
26, // Val
8, // Expr
-1, // Val
-1, // Expr
},
gotoRow{ // S16
-1, // S'
@@ -167,8 +167,8 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
34, // Val
8, // Expr
},
gotoRow{ // S20
-1, // S'
@@ -207,16 +207,16 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
31, // Val
8, // Expr
-1, // Val
-1, // Expr
},
gotoRow{ // S25
-1, // S'
-1, // Schema
-1, // ExprList
33, // ValList
16, // Val
8, // Expr
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S26
-1, // S'
@@ -247,26 +247,10 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
34, // Val
8, // Expr
-1, // Val
-1, // Expr
},
gotoRow{ // S30
-1, // S'
-1, // Schema
-1, // ExprList
36, // ValList
16, // Val
8, // Expr
},
gotoRow{ // S31
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
37, // Val
17, // Expr
},
gotoRow{ // S32
-1, // S'
-1, // Schema
-1, // ExprList
@@ -274,12 +258,28 @@ var gotoTab = gotoTable{
-1, // Val
-1, // Expr
},
gotoRow{ // S33
gotoRow{ // S31
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
26, // Val
-1, // Val
-1, // Expr
},
gotoRow{ // S32
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
39, // Val
8, // Expr
},
gotoRow{ // S33
-1, // S'
-1, // Schema
-1, // ExprList
41, // ValList
20, // Val
8, // Expr
},
gotoRow{ // S34
@@ -287,8 +287,8 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
40, // Val
17, // Expr
-1, // Val
-1, // Expr
},
gotoRow{ // S35
-1, // S'
@@ -303,32 +303,32 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
26, // Val
8, // Expr
-1, // Val
-1, // Expr
},
gotoRow{ // S37
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
42, // Val
8, // Expr
},
gotoRow{ // S38
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
44, // ValList
20, // Val
8, // Expr
},
gotoRow{ // S39
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
45, // Val
21, // Expr
},
gotoRow{ // S40
-1, // S'
@@ -343,16 +343,16 @@ var gotoTab = gotoTable{
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
34, // Val
8, // Expr
},
gotoRow{ // S42
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
48, // Val
21, // Expr
},
gotoRow{ // S43
-1, // S'
@@ -363,6 +363,70 @@ var gotoTab = gotoTable{
-1, // Expr
},
gotoRow{ // S44
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
34, // Val
8, // Expr
},
gotoRow{ // S45
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S46
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S47
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S48
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S49
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S50
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S51
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S52
-1, // S'
-1, // Schema
-1, // ExprList

View File

@@ -11,9 +11,9 @@ import (
)
const (
numProductions = 14
numStates = 45
numSymbols = 14
numProductions = 18
numStates = 53
numSymbols = 18
)
// Stack

View File

@@ -83,7 +83,7 @@ var productionsTable = ProdTab{
},
},
ProdTabEntry{
String: `Val : string << ast.NewStringVal(X[0].(*token.Token)) >>`,
String: `Val : raw_string << ast.NewStringVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 6,
@@ -93,20 +93,60 @@ var productionsTable = ProdTab{
},
},
ProdTabEntry{
String: `Val : number << ast.NewNumberVal(X[0].(*token.Token)) >>`,
String: `Val : interpreted_string << ast.NewIStringVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 7,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewNumberVal(X[0].(*token.Token))
return ast.NewIStringVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : rune << ast.NewRuneVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 8,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewRuneVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : int << ast.NewIntVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 9,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewIntVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : float << ast.NewFloatVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 10,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewFloatVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : imaginary << ast.NewComplexVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 11,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewComplexVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 8,
Index: 12,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewNameVal(X[0].(*token.Token))
@@ -116,7 +156,7 @@ var productionsTable = ProdTab{
String: `Val : Expr << ast.NewExprVal(X[0]) >>`,
Id: "Val",
NTType: 4,
Index: 9,
Index: 13,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExprVal(X[0])
@@ -126,7 +166,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`,
Id: "Expr",
NTType: 5,
Index: 10,
Index: 14,
NumSymbols: 5,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), X[2], X[3])
@@ -136,7 +176,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`,
Id: "Expr",
NTType: 5,
Index: 11,
Index: 15,
NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), X[2], nil)
@@ -146,7 +186,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`,
Id: "Expr",
NTType: 5,
Index: 12,
Index: 16,
NumSymbols: 3,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), nil, nil)
@@ -156,7 +196,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`,
Id: "Expr",
NTType: 5,
Index: 13,
Index: 17,
NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.ListExpr(X[2])

View File

@@ -6,13 +6,24 @@ import (
)
func TestParser(t *testing.T) {
test := "(test)" +
"(test a)" +
"(test a b)" +
"(test \"a\" \"b\")" +
"(+ 0b1010 -0xDEAD_BEEF)" +
"(. a b c d e f g)" +
"(test (test1 \"hi\") (test2 \"hi 2\"))" +
"(test (. \"awa\" \"awawa\" \"awawawa\" \"awawawawa\"))"
fmt.Println(CreateSchema(test))
testSchema(t, "(test)\n ; test comment"+
"(test a)\n"+
"(test a b)\n"+
"(test \"a\" \"b\")\n"+
"(+ 0b1010 -0xDEAD_BEEF)\n"+
"(. a b c d e f g)\n"+
"(test (test1 \"hi\") (test2 \"hi 2\"))\n"+
"(test (. \"awa\" `awawa` \"awawawa\" \"awawawawa\"))\n"+
"(test \n `new\nline`)\n"+
"(test (. 0x0.1Fp1 '\\t' 2i '\\u6767' '\\U0001F600' '\\x23' '\\043'))\n")
}
func TestInterpretString(t *testing.T) {
testSchema(t, "(test \"\\\" \\\\v \\u6767 \\U0001F600 \\x23 \\043 \" `\\\\ \\t \\u6767 \\U0001F600 \\x23 \\043`)")
}
func testSchema(t *testing.T, test string) {
schema, err := CreateSchema(test)
if err != nil {
t.Fatal(err)
}
fmt.Println(schema)
}

View File

@@ -138,8 +138,12 @@ var TokMap = TokenMap{
typeMap: []string{
"INVALID",
"␚",
"string",
"number",
"raw_string",
"interpreted_string",
"rune",
"int",
"float",
"imaginary",
"name",
"(",
")",
@@ -147,13 +151,17 @@ var TokMap = TokenMap{
},
idMap: map[string]Type{
"INVALID": 0,
"␚": 1,
"string": 2,
"number": 3,
"name": 4,
"(": 5,
")": 6,
".": 7,
"INVALID": 0,
"␚": 1,
"raw_string": 2,
"interpreted_string": 3,
"rune": 4,
"int": 5,
"float": 6,
"imaginary": 7,
"name": 8,
"(": 9,
")": 10,
".": 11,
},
}

97
schema/util/util.go Normal file
View File

@@ -0,0 +1,97 @@
package util
import (
"errors"
"strconv"
"strings"
)
func InterpretString(in string) (string, error) {
in = in[1 : len(in)-1]
sb := strings.Builder{}
s := []rune(in)
for i := 0; i < len(s); {
r := s[i]
if r == '\\' {
if i == len(s)-1 {
return "", errors.New("illegal escape character")
}
switch s[i+1] {
case 'a':
sb.WriteRune('\a')
i += 2
break
case 'b':
sb.WriteRune('\b')
i += 2
break
case 'f':
sb.WriteRune('\f')
i += 2
break
case 'n':
sb.WriteRune('\n')
i += 2
break
case 'r':
sb.WriteRune('\r')
i += 2
break
case 't':
sb.WriteRune('\t')
i += 2
break
case 'v':
sb.WriteRune('\v')
i += 2
break
case '\\':
sb.WriteRune('\\')
i += 2
break
case '"':
sb.WriteRune('"')
i += 2
break
case 'u':
sub := s[i+2 : i+6]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 6
case 'U':
sub := s[i+2 : i+10]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 10
case '0', '1', '2', '3', '4', '5', '6', '7':
sub := s[i+1 : i+4]
n, err := strconv.ParseInt(string(sub), 8, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 4
case 'x':
sub := s[i+2 : i+4]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 4
default:
return "", errors.New("could not parse escape character")
}
} else {
sb.WriteRune(r)
i++
}
}
return sb.String(), nil
}