fix/schema: fix string literals

This commit is contained in:
mae
2026-01-28 19:52:13 -06:00
parent dc87bef1c2
commit 73fd1dd90d
12 changed files with 5285 additions and 3720 deletions

View File

@@ -29,6 +29,10 @@ func NewStringVal(val *token.Token) (Val, error) {
s, err := strconv.Unquote(string(val.Lit)) s, err := strconv.Unquote(string(val.Lit))
return Val{string: &s}, err return Val{string: &s}, err
} }
func NewIStringVal(val *token.Token) (Val, error) {
s, err := util.InterpretString(string(val.Lit))
return Val{string: &s}, err
}
func NewRuneVal(val *token.Token) (Val, error) { func NewRuneVal(val *token.Token) (Val, error) {
r := util.RuneValue(val.Lit) r := util.RuneValue(val.Lit)
return Val{rune: &r}, nil return Val{rune: &r}, nil

View File

@@ -1,6 +1,5 @@
_raw_string: '`' {.} '`'; raw_string: '`' {.} '`';
_interpreted_string: '"' {.} '"'; interpreted_string: '"' {_byte_value | _little_u_value | _big_u_value | _escaped_char | ' ' - '!' | '#' - '[' | ']' - '\U0010FFFF'} '"';
string: _raw_string | _interpreted_string;
_unicode_value: . | _little_u_value | _big_u_value | _escaped_char; _unicode_value: . | _little_u_value | _big_u_value | _escaped_char;
_byte_value: _octal_byte_value | _hex_byte_value; _byte_value: _octal_byte_value | _hex_byte_value;
@@ -60,7 +59,8 @@ ValList
| ValList Val <<ast.AppendVal($0, $1)>> | ValList Val <<ast.AppendVal($0, $1)>>
; ;
Val Val
: string <<ast.NewStringVal($T0)>> : raw_string <<ast.NewStringVal($T0)>>
| interpreted_string <<ast.NewIStringVal($T0)>>
| rune <<ast.NewRuneVal($T0)>> | rune <<ast.NewRuneVal($T0)>>
| int <<ast.NewIntVal($T0)>> | int <<ast.NewIntVal($T0)>>
| float <<ast.NewFloatVal($T0)>> | float <<ast.NewFloatVal($T0)>>

File diff suppressed because it is too large Load Diff

View File

@@ -11,8 +11,8 @@ import (
const ( const (
NoState = -1 NoState = -1
NumStates = 211 NumStates = 278
NumSymbols = 101 NumSymbols = 104
) )
type Lexer struct { type Lexer struct {
@@ -129,16 +129,16 @@ func (l *Lexer) Reset() {
/* /*
Lexer symbols: Lexer symbols:
0: ''' 0: '`'
1: ''' 1: '`'
2: 'i' 2: '"'
3: '(' 3: '"'
4: ')' 4: '''
5: '.' 5: '''
6: '`' 6: 'i'
7: '`' 7: '('
8: '"' 8: ')'
9: '"' 9: '.'
10: '\' 10: '\'
11: 'u' 11: 'u'
12: '\' 12: '\'
@@ -222,12 +222,15 @@ Lexer symbols:
90: '\r' 90: '\r'
91: ';' 91: ';'
92: '\n' 92: '\n'
93: '0'-'1' 93: ' '-'!'
94: '2'-'7' 94: '#'-'['
95: '8'-'9' 95: ']'-\U0010ffff
96: 'A'-'F' 96: '0'-'1'
97: 'a'-'f' 97: '2'-'7'
98: 'A'-'Z' 98: '8'-'9'
99: 'a'-'z' 99: 'A'-'F'
100: . 100: 'a'-'f'
101: 'A'-'Z'
102: 'a'-'z'
103: .
*/ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -70,8 +70,8 @@ var gotoTab = gotoTable{
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
18, // ValList 19, // ValList
19, // Val 20, // Val
8, // Expr 8, // Expr
}, },
gotoRow{ // S8 gotoRow{ // S8
@@ -87,8 +87,8 @@ var gotoTab = gotoTable{
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
21, // Val 22, // Val
20, // Expr 21, // Expr
}, },
gotoRow{ // S10 gotoRow{ // S10
-1, // S' -1, // S'
@@ -159,16 +159,16 @@ var gotoTab = gotoTable{
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
32, // Val -1, // Val
8, // Expr -1, // Expr
}, },
gotoRow{ // S19 gotoRow{ // S19
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
-1, // Val 34, // Val
-1, // Expr 8, // Expr
}, },
gotoRow{ // S20 gotoRow{ // S20
-1, // S' -1, // S'
@@ -255,32 +255,32 @@ var gotoTab = gotoTable{
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
37, // Val -1, // Val
8, // Expr -1, // Expr
}, },
gotoRow{ // S31 gotoRow{ // S31
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
39, // ValList -1, // ValList
19, // Val -1, // Val
8, // Expr -1, // Expr
}, },
gotoRow{ // S32 gotoRow{ // S32
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
-1, // Val 39, // Val
-1, // Expr 8, // Expr
}, },
gotoRow{ // S33 gotoRow{ // S33
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList 41, // ValList
-1, // Val 20, // Val
-1, // Expr 8, // Expr
}, },
gotoRow{ // S34 gotoRow{ // S34
-1, // S' -1, // S'
@@ -295,50 +295,42 @@ var gotoTab = gotoTable{
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
40, // Val -1, // Val
8, // Expr -1, // Expr
}, },
gotoRow{ // S36 gotoRow{ // S36
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
42, // ValList -1, // ValList
19, // Val -1, // Val
8, // Expr -1, // Expr
}, },
gotoRow{ // S37 gotoRow{ // S37
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
43, // Val 42, // Val
20, // Expr 8, // Expr
}, },
gotoRow{ // S38 gotoRow{ // S38
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList 44, // ValList
-1, // Val 20, // Val
-1, // Expr 8, // Expr
}, },
gotoRow{ // S39 gotoRow{ // S39
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
32, // Val 45, // Val
8, // Expr 21, // Expr
}, },
gotoRow{ // S40 gotoRow{ // S40
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
46, // Val
20, // Expr
},
gotoRow{ // S41
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
@@ -346,13 +338,21 @@ var gotoTab = gotoTable{
-1, // Val -1, // Val
-1, // Expr -1, // Expr
}, },
gotoRow{ // S41
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
34, // Val
8, // Expr
},
gotoRow{ // S42 gotoRow{ // S42
-1, // S' -1, // S'
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
32, // Val 48, // Val
8, // Expr 21, // Expr
}, },
gotoRow{ // S43 gotoRow{ // S43
-1, // S' -1, // S'
@@ -367,8 +367,8 @@ var gotoTab = gotoTable{
-1, // Schema -1, // Schema
-1, // ExprList -1, // ExprList
-1, // ValList -1, // ValList
-1, // Val 34, // Val
-1, // Expr 8, // Expr
}, },
gotoRow{ // S45 gotoRow{ // S45
-1, // S' -1, // S'
@@ -418,4 +418,20 @@ var gotoTab = gotoTable{
-1, // Val -1, // Val
-1, // Expr -1, // Expr
}, },
gotoRow{ // S51
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
gotoRow{ // S52
-1, // S'
-1, // Schema
-1, // ExprList
-1, // ValList
-1, // Val
-1, // Expr
},
} }

View File

@@ -11,9 +11,9 @@ import (
) )
const ( const (
numProductions = 17 numProductions = 18
numStates = 51 numStates = 53
numSymbols = 17 numSymbols = 18
) )
// Stack // Stack

View File

@@ -83,7 +83,7 @@ var productionsTable = ProdTab{
}, },
}, },
ProdTabEntry{ ProdTabEntry{
String: `Val : string << ast.NewStringVal(X[0].(*token.Token)) >>`, String: `Val : raw_string << ast.NewStringVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 6, Index: 6,
@@ -93,11 +93,21 @@ var productionsTable = ProdTab{
}, },
}, },
ProdTabEntry{ ProdTabEntry{
String: `Val : rune << ast.NewRuneVal(X[0].(*token.Token)) >>`, String: `Val : interpreted_string << ast.NewIStringVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 7, Index: 7,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewIStringVal(X[0].(*token.Token))
},
},
ProdTabEntry{
String: `Val : rune << ast.NewRuneVal(X[0].(*token.Token)) >>`,
Id: "Val",
NTType: 4,
Index: 8,
NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewRuneVal(X[0].(*token.Token)) return ast.NewRuneVal(X[0].(*token.Token))
}, },
@@ -106,7 +116,7 @@ var productionsTable = ProdTab{
String: `Val : int << ast.NewIntVal(X[0].(*token.Token)) >>`, String: `Val : int << ast.NewIntVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 8, Index: 9,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewIntVal(X[0].(*token.Token)) return ast.NewIntVal(X[0].(*token.Token))
@@ -116,7 +126,7 @@ var productionsTable = ProdTab{
String: `Val : float << ast.NewFloatVal(X[0].(*token.Token)) >>`, String: `Val : float << ast.NewFloatVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 9, Index: 10,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewFloatVal(X[0].(*token.Token)) return ast.NewFloatVal(X[0].(*token.Token))
@@ -126,7 +136,7 @@ var productionsTable = ProdTab{
String: `Val : imaginary << ast.NewComplexVal(X[0].(*token.Token)) >>`, String: `Val : imaginary << ast.NewComplexVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 10, Index: 11,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewComplexVal(X[0].(*token.Token)) return ast.NewComplexVal(X[0].(*token.Token))
@@ -136,7 +146,7 @@ var productionsTable = ProdTab{
String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`, String: `Val : name << ast.NewNameVal(X[0].(*token.Token)) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 11, Index: 12,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewNameVal(X[0].(*token.Token)) return ast.NewNameVal(X[0].(*token.Token))
@@ -146,7 +156,7 @@ var productionsTable = ProdTab{
String: `Val : Expr << ast.NewExprVal(X[0]) >>`, String: `Val : Expr << ast.NewExprVal(X[0]) >>`,
Id: "Val", Id: "Val",
NTType: 4, NTType: 4,
Index: 12, Index: 13,
NumSymbols: 1, NumSymbols: 1,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExprVal(X[0]) return ast.NewExprVal(X[0])
@@ -156,7 +166,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`, String: `Expr : "(" name Val Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], X[3]) >>`,
Id: "Expr", Id: "Expr",
NTType: 5, NTType: 5,
Index: 13, Index: 14,
NumSymbols: 5, NumSymbols: 5,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), X[2], X[3]) return ast.NewExpr(X[1].(*token.Token), X[2], X[3])
@@ -166,7 +176,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`, String: `Expr : "(" name Val ")" << ast.NewExpr(X[1].(*token.Token), X[2], nil) >>`,
Id: "Expr", Id: "Expr",
NTType: 5, NTType: 5,
Index: 14, Index: 15,
NumSymbols: 4, NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), X[2], nil) return ast.NewExpr(X[1].(*token.Token), X[2], nil)
@@ -176,7 +186,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`, String: `Expr : "(" name ")" << ast.NewExpr(X[1].(*token.Token), nil, nil) >>`,
Id: "Expr", Id: "Expr",
NTType: 5, NTType: 5,
Index: 15, Index: 16,
NumSymbols: 3, NumSymbols: 3,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.NewExpr(X[1].(*token.Token), nil, nil) return ast.NewExpr(X[1].(*token.Token), nil, nil)
@@ -186,7 +196,7 @@ var productionsTable = ProdTab{
String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`, String: `Expr : "(" "." ValList ")" << ast.ListExpr(X[2]) >>`,
Id: "Expr", Id: "Expr",
NTType: 5, NTType: 5,
Index: 16, Index: 17,
NumSymbols: 4, NumSymbols: 4,
ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) { ReduceFunc: func(X []Attrib, C interface{}) (Attrib, error) {
return ast.ListExpr(X[2]) return ast.ListExpr(X[2])

View File

@@ -6,16 +6,21 @@ import (
) )
func TestParser(t *testing.T) { func TestParser(t *testing.T) {
test := "(test)\n ; test comment" + testSchema(t, "(test)\n ; test comment"+
"(test a)\n" + "(test a)\n"+
"(test a b)\n" + "(test a b)\n"+
"(test \"a\" \"b\")\n" + "(test \"a\" \"b\")\n"+
"(+ 0b1010 -0xDEAD_BEEF)\n" + "(+ 0b1010 -0xDEAD_BEEF)\n"+
"(. a b c d e f g)\n" + "(. a b c d e f g)\n"+
"(test (test1 \"hi\") (test2 \"hi 2\"))\n" + "(test (test1 \"hi\") (test2 \"hi 2\"))\n"+
"(test (. \"awa\" `awawa` \"awawawa\" \"awawawawa\"))\n" + "(test (. \"awa\" `awawa` \"awawawa\" \"awawawawa\"))\n"+
"(test \n `new\nline`)\n" + "(test \n `new\nline`)\n"+
"(test (. 0x0.1Fp1 '\\t' 2i '\\u6767' '\\U0001F600' '\\x23' '\\043'))\n" "(test (. 0x0.1Fp1 '\\t' 2i '\\u6767' '\\U0001F600' '\\x23' '\\043'))\n")
}
func TestInterpretString(t *testing.T) {
testSchema(t, "(test \"\\\" \\\\v \\u6767 \\U0001F600 \\x23 \\043 \" `\\\\ \\t \\u6767 \\U0001F600 \\x23 \\043`)")
}
func testSchema(t *testing.T, test string) {
schema, err := CreateSchema(test) schema, err := CreateSchema(test)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)

View File

@@ -138,7 +138,8 @@ var TokMap = TokenMap{
typeMap: []string{ typeMap: []string{
"INVALID", "INVALID",
"␚", "␚",
"string", "raw_string",
"interpreted_string",
"rune", "rune",
"int", "int",
"float", "float",
@@ -150,16 +151,17 @@ var TokMap = TokenMap{
}, },
idMap: map[string]Type{ idMap: map[string]Type{
"INVALID": 0, "INVALID": 0,
"␚": 1, "␚": 1,
"string": 2, "raw_string": 2,
"rune": 3, "interpreted_string": 3,
"int": 4, "rune": 4,
"float": 5, "int": 5,
"imaginary": 6, "float": 6,
"name": 7, "imaginary": 7,
"(": 8, "name": 8,
")": 9, "(": 9,
".": 10, ")": 10,
".": 11,
}, },
} }

97
schema/util/util.go Normal file
View File

@@ -0,0 +1,97 @@
package util
import (
"errors"
"strconv"
"strings"
)
func InterpretString(in string) (string, error) {
in = in[1 : len(in)-1]
sb := strings.Builder{}
s := []rune(in)
for i := 0; i < len(s); {
r := s[i]
if r == '\\' {
if i == len(s)-1 {
return "", errors.New("illegal escape character")
}
switch s[i+1] {
case 'a':
sb.WriteRune('\a')
i += 2
break
case 'b':
sb.WriteRune('\b')
i += 2
break
case 'f':
sb.WriteRune('\f')
i += 2
break
case 'n':
sb.WriteRune('\n')
i += 2
break
case 'r':
sb.WriteRune('\r')
i += 2
break
case 't':
sb.WriteRune('\t')
i += 2
break
case 'v':
sb.WriteRune('\v')
i += 2
break
case '\\':
sb.WriteRune('\\')
i += 2
break
case '"':
sb.WriteRune('"')
i += 2
break
case 'u':
sub := s[i+2 : i+6]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 6
case 'U':
sub := s[i+2 : i+10]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 10
case '0', '1', '2', '3', '4', '5', '6', '7':
sub := s[i+1 : i+4]
n, err := strconv.ParseInt(string(sub), 8, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 4
case 'x':
sub := s[i+2 : i+4]
n, err := strconv.ParseInt(string(sub), 16, 64)
if err != nil {
return "", err
}
sb.WriteRune(rune(n))
i += 4
default:
return "", errors.New("could not parse escape character")
}
} else {
sb.WriteRune(r)
i++
}
}
return sb.String(), nil
}