diff --git a/internal/rosa/azalea/azalea.go b/internal/rosa/azalea/azalea.go new file mode 100644 index 00000000..682d668d --- /dev/null +++ b/internal/rosa/azalea/azalea.go @@ -0,0 +1,333 @@ +// Package azalea implements a proof-of-concept, domain-specific language for +// Rosa OS software packaging. +package azalea + +import ( + "errors" + "io" + "strconv" + "text/scanner" +) + +// idents are runes accepted in an identifier. +var idents = [...]bool{ + '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, + '7': true, '8': true, '9': true, + + 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, + 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, + 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, + 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, + + 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, + 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, + 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, + 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, + + '-': true, '_': true, +} + +// TokenError describes an unexpected token. +type TokenError [2]rune + +func (e TokenError) Error() string { + return "expected " + scanner.TokenString(e[0]) + + ", found " + scanner.TokenString(e[1]) +} + +// ExprError is an unexpected token encountered while parsing an expression. +type ExprError rune + +func (e ExprError) Error() string { + return "unexpected token " + scanner.TokenString(rune(e)) +} + +// must1 returns v, or panics if err is not nil. +func must1[T any](v T, err error) T { + if err != nil { + panic(err) + } + return v +} + +// scanAs advances the scanner for an expected token. +func scanAs(s *scanner.Scanner, expects rune) { + e := TokenError{expects, s.Scan()} + if e[0] != e[1] { + panic(e) + } +} + +// parseInt parses the current token as a base 10 representation of a 64-bit +// signed integer. +func parseInt(s *scanner.Scanner) int64 { + return must1(strconv.ParseInt(s.TokenText(), 10, 64)) +} + +// A String represents an identifier or string literal. +type String struct { + Value string + Ident bool +} + +// A StringSpec describes a statement evaluating down to a string value. +type StringSpec []String + +// parseString parses the current token as a string. +func parseString(s *scanner.Scanner) string { + return must1(strconv.Unquote(s.TokenText())) +} + +// appendStringSpec parses from the next token until the end of the StringSpec. +// It always advances to the delimiter. +func appendStringSpec( + s *scanner.Scanner, + op bool, + p StringSpec, +) (v StringSpec, tok rune, ok bool) { + ok = true + v = p + for { + if op { + switch tok = s.Scan(); tok { + case '+': + break + + default: + return + } + } + + switch tok = s.Scan(); tok { + case scanner.String, scanner.RawString: + v = append(v, String{parseString(s), false}) + + case scanner.Ident: + v = append(v, String{s.TokenText(), true}) + + default: + ok = op + return + } + op = true + } +} + +// A KV holds a key/value pair. +type KV struct { + K string + V any +} + +// An Arg represents an argument of [Func]. +type Arg struct { + K string + V any + R bool +} + +// Func is a function call or package declaration. +type Func struct { + // Function or package identifier. + Ident string + // Whether this is a package declaration. + Package bool + // Key-value arguments. + Args []Arg +} + +// parseExpr scans and parses the next expression. A nil return indicates +// [scanner.EOF]. +func parseExpr(s *scanner.Scanner) (any, bool) { + tok := s.Scan() + if tok == scanner.EOF { + return nil, false + } + + switch tok { + case scanner.Int: + return parseInt(s), false + + case scanner.String, scanner.RawString: + p, t, ok := appendStringSpec(s, true, StringSpec{ + {parseString(s), false}, + }) + if !ok { + panic(TokenError{scanner.String, t}) + } + return p, true + + case scanner.Ident: + var v Func + v.Ident = s.TokenText() + if v.Package = v.Ident == "package"; v.Package { + scanAs(s, scanner.Ident) + v.Ident = s.TokenText() + } + + switch tok = s.Scan(); tok { + case '{': + for { + switch tok = s.Scan(); tok { + case '}': + return v, false + + case scanner.Ident: + break + + default: + panic(TokenError{scanner.Ident, tok}) + } + + var next bool + arg := Arg{K: s.TokenText()} + switch tok = s.Scan(); tok { + case '=': + break + + case '*': + arg.R = true + scanAs(s, '=') + + default: + panic(TokenError{'=', tok}) + } + arg.V, next = parseExpr(s) + v.Args = append(v.Args, arg) + if !next { + scanAs(s, ';') + } + } + + case ';': + return StringSpec{{v.Ident, true}}, true + + case '+': + p, t, ok := appendStringSpec(s, false, StringSpec{ + {v.Ident, true}, + }) + if !ok { + panic(TokenError{scanner.String, t}) + } + return p, t != scanner.EOF + + case scanner.EOF: + return StringSpec{{v.Ident, true}}, false + + default: + panic(TokenError{'{', tok}) + } + + case '{': + var v []KV + for { + switch tok = s.Scan(); tok { + case '}': + return v, false + + case scanner.String: + p := KV{K: parseString(s)} + switch tok = s.Scan(); tok { + case ';': + break + + case ':': + var next bool + p.V, next = parseExpr(s) + if !next { + scanAs(s, ';') + } + break + + default: + panic(ExprError(tok)) + } + v = append(v, p) + + default: + panic(ExprError(tok)) + } + } + + case '[': + var v []StringSpec + for { + p, t, ok := appendStringSpec(s, false, nil) + if ok { + v = append(v, p) + } + switch t { + case scanner.String, scanner.RawString, scanner.Ident, ',': + if !ok { + panic(ExprError(']')) + } + continue + + case ']': + return v, false + + default: + panic(TokenError{']', t}) + } + } + + default: + panic(ExprError(tok)) + } +} + +// ScanError is the error count parsing all expressions. +type ScanError int + +func (ScanError) Error() string { + return "aborting due to scanning errors" +} + +// Parse parses expressions from r. +func Parse(r io.Reader) (e []any, err error) { + var s scanner.Scanner + s.Init(r) + + s.Mode = scanner.ScanIdents | + scanner.ScanInts | + scanner.ScanStrings | + scanner.ScanRawStrings | + scanner.ScanComments | + scanner.SkipComments + s.IsIdentRune = func(ch rune, i int) bool { + if i == 0 && ch >= '0' && ch <= '9' { + return false + } + return ch > 0 && ch < rune(len(idents)) && idents[ch] + } + + defer func() { + v := recover() + if v == nil { + return + } + + _err, ok := v.(error) + if !ok { + panic(v) + } + + if err == nil { + err = _err + return + } + err = errors.Join(err, _err) + }() + + for expr, next := parseExpr(&s); expr != nil; expr, next = parseExpr(&s) { + if next { + return e, ExprError(';') + } + e = append(e, expr) + } + + if s.ErrorCount != 0 { + err = ScanError(s.ErrorCount) + } + return +} diff --git a/internal/rosa/azalea/azalea_test.go b/internal/rosa/azalea/azalea_test.go new file mode 100644 index 00000000..da0acabe --- /dev/null +++ b/internal/rosa/azalea/azalea_test.go @@ -0,0 +1,193 @@ +package azalea_test + +import ( + "reflect" + "strings" + "testing" + "text/scanner" + + "hakurei.app/internal/rosa/azalea" +) + +func TestParse(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + data string + want []any + err error + }{ + {"invalid", "}", nil, azalea.ExprError('}')}, + {"bad args", "f }", nil, azalea.TokenError{'{', '}'}}, + {"bad sep", "f{v?}", nil, azalea.TokenError{'=', '?'}}, + {"bad ident", "f{9}", nil, azalea.TokenError{scanner.Ident, scanner.Int}}, + + {"ident string", `v`, []any{azalea.StringSpec{ + {Value: "v", Ident: true}, + }}, nil}, + {"ident string concat", `v+"\xfd"`, []any{azalea.StringSpec{ + {Value: "v", Ident: true}, + {Value: "\xfd"}, + }}, nil}, + {"truncated string concat", `v+`, nil, + azalea.TokenError{scanner.String, scanner.EOF}}, + {"unexpected string concat", `v+9`, nil, + azalea.TokenError{scanner.String, scanner.Int}}, + + {"empty pairs", `{}`, []any{[]azalea.KV(nil)}, nil}, + {"short kv", `{"\x00":v;}`, []any{[]azalea.KV{ + {K: "\x00", V: azalea.StringSpec{azalea.String{Value: "v", Ident: true}}}, + }}, nil}, + {"truncated kv", `{"\x00"`, nil, azalea.ExprError(scanner.EOF)}, + {"ident kv", `{v="";}`, nil, azalea.ExprError(scanner.Ident)}, + + {"empty array", `[]`, []any{[]azalea.StringSpec(nil)}, nil}, + {"unexpected array", `[9]`, nil, + azalea.TokenError{']', scanner.Int}}, + {"short array", `[ "\x00" ]`, []any{ + []azalea.StringSpec{{{Value: "\x00"}}}, + }, nil}, + {"short array delim", `[ "\x00", ]`, []any{ + []azalea.StringSpec{{{Value: "\x00"}}}, + }, nil}, + {"missing array value", `[ "\x00", , v ]`, nil, azalea.ExprError(']')}, + {"truncated array", `[ "\x00"`, nil, + azalea.TokenError{']', scanner.EOF}}, + + {"gcc", ` +package gcc { + description = "The GNU Compiler Collection"; + website = "https://www.gnu.org/software/gcc"; + anitya = 6502; + + version* = "16.1.0"; + source = remoteTar { + url = "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"+ + "gcc-"+version+"/gcc-"+version+".tar.gz"; + checksum = "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"; + compress = gzip; + }; + patches = [ + "musl-off64_t-loff_t.patch", + "musl-legacy-lfs.patch", + ]; + + // GCC spends most of its time in its many configure scripts, however + // it also saturates the CPU for a consequential amount of time. + exclusive = true; + + exec = make { + configure = { + "disable-multilib"; + "enable-default-pie"; + "disable-nls"; + "with-gnu-as"; + "with-gnu-ld"; + "with-system-zlib"; + "enable-languages": "c,c++,go"; + "with-native-system-header-dir": "/system/include"; + "with-multilib-list": arch { + amd64 = "''"; + default = unset; + }; + }; + make = [ + "BOOT_CFLAGS='-O2 -g'", + "bootstrap", + ]; + + // This toolchain is hacked to pieces, it is not expected to ever work + // well in its current state. That does not matter as long as the + // toolchain it produces passes its own test suite. + skip-check = true; + }; + + inputs = [ + binutils, + + mpc, + zlib, + libucontext, + kernel-headers, + ]; +} +`, []any{azalea.Func{ + Ident: "gcc", + Package: true, + + Args: []azalea.Arg{ + {K: "description", V: azalea.StringSpec{{Value: "The GNU Compiler Collection"}}}, + {K: "website", V: azalea.StringSpec{{Value: "https://www.gnu.org/software/gcc"}}}, + {K: "anitya", V: int64(6502)}, + {K: "version", V: azalea.StringSpec{{Value: "16.1.0"}}, R: true}, + {K: "source", V: azalea.Func{Ident: "remoteTar", Package: false, Args: []azalea.Arg{ + {K: "url", V: azalea.StringSpec{ + {Value: "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"}, + {Value: "gcc-"}, {Value: "version", Ident: true}, + {Value: "/gcc-"}, {Value: "version", Ident: true}, + {Value: ".tar.gz"}, + }}, + {K: "checksum", V: azalea.StringSpec{ + {Value: "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"}, + }}, + {K: "compress", V: azalea.StringSpec{{Value: "gzip", Ident: true}}}, + }}}, + {K: "patches", V: []azalea.StringSpec{ + {{Value: "musl-off64_t-loff_t.patch"}}, + {{Value: "musl-legacy-lfs.patch"}}, + }}, + {K: "exclusive", V: azalea.StringSpec{{Value: "true", Ident: true}}}, + {K: "exec", V: azalea.Func{ + Ident: "make", + + Args: []azalea.Arg{ + {K: "configure", V: []azalea.KV{ + {K: "disable-multilib"}, + {K: "enable-default-pie"}, + {K: "disable-nls"}, + {K: "with-gnu-as"}, + {K: "with-gnu-ld"}, + {K: "with-system-zlib"}, + {K: "enable-languages", V: azalea.StringSpec{{Value: "c,c++,go"}}}, + {K: "with-native-system-header-dir", V: azalea.StringSpec{{Value: "/system/include"}}}, + {K: "with-multilib-list", V: azalea.Func{ + Ident: "arch", + + Args: []azalea.Arg{ + {K: "amd64", V: azalea.StringSpec{{Value: "''"}}}, + {K: "default", V: azalea.StringSpec{{Value: "unset", Ident: true}}}, + }, + }}, + }}, + {K: "make", V: []azalea.StringSpec{ + {{Value: "BOOT_CFLAGS='-O2 -g'"}}, + {{Value: "bootstrap"}}, + }}, + {K: "skip-check", V: azalea.StringSpec{{Value: "true", Ident: true}}}, + }, + }}, + {K: "inputs", V: []azalea.StringSpec{ + {{Value: "binutils", Ident: true}}, + {{Value: "mpc", Ident: true}}, + {{Value: "zlib", Ident: true}}, + {{Value: "libucontext", Ident: true}}, + {{Value: "kernel-headers", Ident: true}}, + }}, + }, + }}, nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + p, err := azalea.Parse(strings.NewReader(tc.data)) + if !reflect.DeepEqual(p, tc.want) { + t.Errorf("Parse: %#v, want %#v", p, tc.want) + } + if !reflect.DeepEqual(err, tc.err) { + t.Errorf("Parse: error = %v, want %v", err, tc.err) + } + }) + } +}