From c2d44d593742ad4363401b8e10006877e9d93c26 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Fri, 15 May 2026 06:48:04 +0900 Subject: [PATCH] internal/rosa/azalea: ast and parser This syntax is not final, but acts as a stopgap solution and a proof of concept. Signed-off-by: Ophestra --- internal/rosa/azalea/azalea.go | 386 +++++++++++++++++++++++++++ internal/rosa/azalea/azalea_test.go | 149 +++++++++++ internal/rosa/azalea/testdata/gcc.az | 57 ++++ 3 files changed, 592 insertions(+) create mode 100644 internal/rosa/azalea/azalea.go create mode 100644 internal/rosa/azalea/azalea_test.go create mode 100644 internal/rosa/azalea/testdata/gcc.az diff --git a/internal/rosa/azalea/azalea.go b/internal/rosa/azalea/azalea.go new file mode 100644 index 00000000..3e6ff567 --- /dev/null +++ b/internal/rosa/azalea/azalea.go @@ -0,0 +1,386 @@ +// Package azalea implements a proof-of-concept, domain-specific language for +// Rosa OS software packaging. +package azalea + +import ( + "errors" + "io" + "strconv" + "text/scanner" +) + +// idents are runes accepted in an identifier. +var idents = [...]bool{ + '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, + '7': true, '8': true, '9': true, + + 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, + 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, + 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, + 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, + + 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, + 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, + 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, + 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, + + '-': true, '_': true, +} + +// TokenError describes an unexpected token. +type TokenError [2]rune + +func (e TokenError) Error() string { + return "expected " + scanner.TokenString(e[0]) + + ", found " + scanner.TokenString(e[1]) +} + +// ExprError is an unexpected token encountered while parsing an expression. +type ExprError rune + +func (e ExprError) Error() string { + return "unexpected token " + scanner.TokenString(rune(e)) +} + +// must1 returns v, or panics if err is not nil. +func must1[T any](v T, err error) T { + if err != nil { + panic(err) + } + return v +} + +// parser retains the current token. +type parser struct { + s scanner.Scanner + tok rune +} + +// scan advances the underlying scanner to the next token, storing its result. +func (p *parser) scan() rune { p.tok = p.s.Scan(); return p.tok } + +// scanAs advances the scanner for an expected token. +func (p *parser) scanAs(expects rune) { + e := TokenError{expects, p.scan()} + if e[0] != e[1] { + panic(e) + } +} + +// parseInt parses the current token as a base 10 representation of a 64-bit +// signed integer. +func (p *parser) parseInt() int64 { + return must1(strconv.ParseInt(p.s.TokenText(), 10, 64)) +} + +// A String represents an identifier or string literal. +type String struct { + Value string + Ident bool +} + +// A StringSpec describes a statement evaluating down to a string value. +type StringSpec []String + +// parseString parses the current token as a string. +func (p *parser) parseString() string { + return must1(strconv.Unquote(p.s.TokenText())) +} + +// appendStringSpec parses from the next token until the end of the StringSpec. +// It always advances past the final token. +func (p *parser) appendStringSpec( + op bool, + data StringSpec, +) (v StringSpec, ok bool) { + ok = true + v = data + for { + if op { + switch tok := p.scan(); tok { + case '+': + break + + default: + return + } + } + + switch tok := p.scan(); tok { + case scanner.String, scanner.RawString: + v = append(v, String{p.parseString(), false}) + + case scanner.Ident: + v = append(v, String{p.s.TokenText(), true}) + + default: + ok = op + return + } + op = true + } +} + +// A KV holds a key/value pair. +type KV struct { + K string + V any +} + +// An Arg represents an argument of [Func]. +type Arg struct { + K []string + V any + R bool +} + +// Func is a function call or package declaration. +type Func struct { + // Function or package identifier. + Ident string + // Whether this is a package declaration. + Package bool + // Key-value arguments. + Args []Arg +} + +// parseExpr scans and parses the current expression. A nil return indicates +// [scanner.EOF]. +func (p *parser) parseExpr() (any, bool) { + switch p.tok { + case scanner.EOF: + return nil, false + + case scanner.Int: + return p.parseInt(), false + + case scanner.String, scanner.RawString: + v, ok := p.appendStringSpec(true, StringSpec{ + {p.parseString(), false}, + }) + if !ok { + panic(TokenError{scanner.String, p.tok}) + } + return v, true + + case scanner.Ident: + var v Func + v.Ident = p.s.TokenText() + if v.Package = v.Ident == "package"; v.Package { + p.scanAs(scanner.Ident) + v.Ident = p.s.TokenText() + } + + p.scan() + switch p.tok { + case '{': + for { + p.scan() + switch p.tok { + case '}': + return v, false + + case scanner.Ident: + break + + default: + panic(TokenError{scanner.Ident, p.tok}) + } + + var next bool + arg := Arg{K: []string{p.s.TokenText()}} + delim := true + arg: + for { + p.scan() + switch p.tok { + case ',': + if delim { + delim = false + continue + } + panic(ExprError(p.tok)) + + case scanner.Ident: + if delim { + panic(TokenError{',', p.tok}) + } + delim = true + arg.K = append(arg.K, p.s.TokenText()) + + default: + break arg + } + } + switch p.tok { + case '=': + break + + case '*': + arg.R = true + p.scanAs('=') + + default: + panic(TokenError{'=', p.tok}) + } + p.scan() + arg.V, next = p.parseExpr() + v.Args = append(v.Args, arg) + if !next { + p.scanAs(';') + } + } + + case ';': + return StringSpec{{v.Ident, true}}, true + + case '+': + s, ok := p.appendStringSpec(false, StringSpec{ + {v.Ident, true}, + }) + if !ok { + panic(TokenError{scanner.String, p.tok}) + } + return s, p.tok != scanner.EOF + + case scanner.EOF: + return StringSpec{{v.Ident, true}}, false + + default: + return StringSpec{{v.Ident, true}}, true + } + + case '{': + var v []KV + for { + p.scan() + switch p.tok { + case '}': + return v, false + + case scanner.String: + pair := KV{K: p.parseString()} + p.scan() + switch p.tok { + case ';': + break + + case ':': + var next bool + p.scan() + pair.V, next = p.parseExpr() + if !next { + p.scanAs(';') + } + break + + default: + panic(ExprError(p.tok)) + } + v = append(v, pair) + + default: + panic(ExprError(p.tok)) + } + } + + case '[': + var ( + v []any + e any + delim bool + next bool + ) + for { + if !next { + p.scan() + } + switch p.tok { + case ',': + if delim { + delim = false + next = false + continue + } + panic(ExprError(',')) + case ']': + return v, false + case scanner.EOF: + panic(ExprError(scanner.EOF)) + default: + if delim { + panic(TokenError{',', p.tok}) + } + delim = true + break + } + + e, next = p.parseExpr() + v = append(v, e) + } + + default: + panic(ExprError(p.tok)) + } +} + +// ScanError is the error count parsing all expressions. +type ScanError int + +func (ScanError) Error() string { + return "aborting due to scanning errors" +} + +// Parse parses expressions from r. +func Parse(r io.Reader) (e []any, err error) { + var p parser + p.s.Init(r) + + p.s.Mode = scanner.ScanIdents | + scanner.ScanInts | + scanner.ScanStrings | + scanner.ScanRawStrings | + scanner.ScanComments | + scanner.SkipComments + p.s.IsIdentRune = func(ch rune, i int) bool { + if i == 0 && ch >= '0' && ch <= '9' { + return false + } + return ch > 0 && ch < rune(len(idents)) && idents[ch] + } + + defer func() { + v := recover() + if v == nil { + return + } + + _err, ok := v.(error) + if !ok { + panic(v) + } + + if err == nil { + err = _err + return + } + err = errors.Join(err, _err) + }() + + p.scan() + for { + expr, next := p.parseExpr() + if expr == nil { + break + } + e = append(e, expr) + if !next { + p.scan() + } + } + + if p.s.ErrorCount != 0 { + err = ScanError(p.s.ErrorCount) + } + return +} diff --git a/internal/rosa/azalea/azalea_test.go b/internal/rosa/azalea/azalea_test.go new file mode 100644 index 00000000..21a69813 --- /dev/null +++ b/internal/rosa/azalea/azalea_test.go @@ -0,0 +1,149 @@ +package azalea_test + +import ( + _ "embed" + "reflect" + "strings" + "testing" + "text/scanner" + + "hakurei.app/internal/rosa/azalea" +) + +//go:embed testdata/gcc.az +var sample string + +func TestParse(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + data string + want []any + err error + }{ + {"invalid", "}", nil, azalea.ExprError('}')}, + {"bad sep", "f{v?}", nil, azalea.TokenError{'=', '?'}}, + {"bad ident", "f{9}", nil, azalea.TokenError{scanner.Ident, scanner.Int}}, + {"share bad sep", "f { v,,v = v; }", nil, azalea.ExprError(',')}, + {"share missing sep", "f { v v }", nil, azalea.TokenError{',', scanner.Ident}}, + + {"ident string", `v`, []any{azalea.StringSpec{ + {Value: "v", Ident: true}, + }}, nil}, + {"ident string concat", `v+"\xfd"`, []any{azalea.StringSpec{ + {Value: "v", Ident: true}, + {Value: "\xfd"}, + }}, nil}, + {"truncated string concat", `v+`, nil, + azalea.TokenError{scanner.String, scanner.EOF}}, + {"unexpected string concat", `v+9`, nil, + azalea.TokenError{scanner.String, scanner.Int}}, + + {"empty pairs", `{}`, []any{[]azalea.KV(nil)}, nil}, + {"short kv", `{"\x00":v;}`, []any{[]azalea.KV{ + {K: "\x00", V: azalea.StringSpec{azalea.String{Value: "v", Ident: true}}}, + }}, nil}, + {"truncated kv", `{"\x00"`, nil, azalea.ExprError(scanner.EOF)}, + {"ident kv", `{v="";}`, nil, azalea.ExprError(scanner.Ident)}, + + {"empty array", `[]`, []any{[]any(nil)}, nil}, + {"integer array", `[9]`, []any{[]any{int64(9)}}, nil}, + {"short array", `[ "\x00" ]`, []any{ + []any{azalea.StringSpec{{Value: "\x00"}}}, + }, nil}, + {"short array delim", `[ "\x00", ]`, []any{ + []any{azalea.StringSpec{{Value: "\x00"}}}, + }, nil}, + {"missing array value", `[ "\x00", , v ]`, nil, azalea.ExprError(',')}, + {"missing array delimiter", `[ v0 v1 ]`, nil, azalea.TokenError{',', scanner.Ident}}, + {"truncated array", `[ "\x00"`, nil, + azalea.ExprError(scanner.EOF)}, + + {"gcc", sample, []any{azalea.Func{ + Ident: "gcc", + Package: true, + + Args: []azalea.Arg{ + {K: []string{"description"}, V: azalea.StringSpec{{Value: "The GNU Compiler Collection"}}}, + {K: []string{"website"}, V: azalea.StringSpec{{Value: "https://www.gnu.org/software/gcc"}}}, + {K: []string{"anitya"}, V: int64(6502)}, + {K: []string{"version"}, V: azalea.StringSpec{{Value: "16.1.0"}}, R: true}, + {K: []string{"source"}, V: azalea.Func{Ident: "remoteTar", Package: false, Args: []azalea.Arg{ + {K: []string{"url"}, V: azalea.StringSpec{ + {Value: "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"}, + {Value: "gcc-"}, {Value: "version", Ident: true}, + {Value: "/gcc-"}, {Value: "version", Ident: true}, + {Value: ".tar.gz"}, + }}, + {K: []string{"checksum"}, V: azalea.StringSpec{ + {Value: "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"}, + }}, + {K: []string{"compress"}, V: azalea.StringSpec{{Value: "gzip", Ident: true}}}, + }}}, + {K: []string{"patches"}, V: []any{ + azalea.StringSpec{{Value: "musl-off64_t-loff_t.patch"}}, + azalea.StringSpec{{Value: "musl-legacy-lfs.patch"}}, + }}, + {K: []string{"exclusive"}, V: azalea.StringSpec{{Value: "true", Ident: true}}}, + {K: []string{"exec"}, V: azalea.Func{ + Ident: "make", + + Args: []azalea.Arg{ + {K: []string{"configure"}, V: []azalea.KV{ + {K: "disable-multilib"}, + {K: "enable-default-pie"}, + {K: "disable-nls"}, + {K: "with-gnu-as"}, + {K: "with-gnu-ld"}, + {K: "with-system-zlib"}, + {K: "enable-languages", V: azalea.StringSpec{{Value: "c,c++,go"}}}, + {K: "with-native-system-header-dir", V: azalea.StringSpec{{Value: "/system/include"}}}, + {K: "with-multilib-list", V: azalea.Func{ + Ident: "arch", + + Args: []azalea.Arg{ + {K: []string{"amd64", "arm64"}, V: azalea.StringSpec{{Value: "''"}}}, + {K: []string{"default"}, V: azalea.StringSpec{{Value: "unset", Ident: true}}}, + }, + }}, + }}, + {K: []string{"make"}, V: []any{ + azalea.StringSpec{{Value: "BOOT_CFLAGS='-O2 -g'"}}, + azalea.Func{ + Ident: "noop", + Args: []azalea.Arg{ + {K: []string{"key"}, V: azalea.StringSpec{ + {Value: "value", Ident: true}, + }}, + }, + }, + azalea.StringSpec{{Value: "bootstrap"}}, + }}, + {K: []string{"skip-check"}, V: azalea.StringSpec{{Value: "true", Ident: true}}}, + }, + }}, + {K: []string{"inputs"}, V: []any{ + azalea.StringSpec{{Value: "binutils", Ident: true}}, + azalea.StringSpec{{Value: "mpc", Ident: true}}, + azalea.StringSpec{{Value: "zlib", Ident: true}}, + azalea.StringSpec{{Value: "libucontext", Ident: true}}, + azalea.StringSpec{{Value: "kernel-headers", Ident: true}}, + }}, + }, + }}, nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + p, err := azalea.Parse(strings.NewReader(tc.data)) + if !reflect.DeepEqual(p, tc.want) { + t.Errorf("Parse: %#v, want %#v", p, tc.want) + } + if !reflect.DeepEqual(err, tc.err) { + t.Errorf("Parse: error = %v, want %v", err, tc.err) + } + }) + } +} diff --git a/internal/rosa/azalea/testdata/gcc.az b/internal/rosa/azalea/testdata/gcc.az new file mode 100644 index 00000000..c6e5693b --- /dev/null +++ b/internal/rosa/azalea/testdata/gcc.az @@ -0,0 +1,57 @@ +package gcc { + description = "The GNU Compiler Collection"; + website = "https://www.gnu.org/software/gcc"; + anitya = 6502; + + version* = "16.1.0"; + source = remoteTar { + url = "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"+ + "gcc-"+version+"/gcc-"+version+".tar.gz"; + checksum = "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"; + compress = gzip; + }; + patches = [ + "musl-off64_t-loff_t.patch", + "musl-legacy-lfs.patch", + ]; + + // GCC spends most of its time in its many configure scripts, however + // it also saturates the CPU for a consequential amount of time. + exclusive = true; + + exec = make { + configure = { + "disable-multilib"; + "enable-default-pie"; + "disable-nls"; + "with-gnu-as"; + "with-gnu-ld"; + "with-system-zlib"; + "enable-languages": "c,c++,go"; + "with-native-system-header-dir": "/system/include"; + "with-multilib-list": arch { + amd64, arm64 = "''"; + default = unset; + }; + }; + make = [ + "BOOT_CFLAGS='-O2 -g'", + noop { key = value; }, + "bootstrap", + ]; + + // This toolchain is hacked to pieces, it is not expected to ever work + // well in its current state. That does not matter as long as the + // toolchain it produces passes its own test suite. + skip-check = true; + }; + + inputs = [ + binutils, + + mpc, + zlib, + libucontext, + kernel-headers, + ]; +}