From d38d306147979cbc7f161d1150d6786572598497 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Fri, 15 May 2026 06:48:04 +0900 Subject: [PATCH] internal/rosa/azalea: ast and parser This syntax is not final, but acts as a stopgap solution and a proof of concept. Signed-off-by: Ophestra --- internal/rosa/azalea/azalea.go | 348 +++++++++++++++++++++++++++ internal/rosa/azalea/azalea_test.go | 156 ++++++++++++ internal/rosa/azalea/testdata/gcc.az | 57 +++++ 3 files changed, 561 insertions(+) create mode 100644 internal/rosa/azalea/azalea.go create mode 100644 internal/rosa/azalea/azalea_test.go create mode 100644 internal/rosa/azalea/testdata/gcc.az diff --git a/internal/rosa/azalea/azalea.go b/internal/rosa/azalea/azalea.go new file mode 100644 index 00000000..4872e9d1 --- /dev/null +++ b/internal/rosa/azalea/azalea.go @@ -0,0 +1,348 @@ +// Package azalea implements a proof-of-concept, domain-specific language for +// Rosa OS software packaging. +package azalea + +import ( + "errors" + "io" + "strconv" + "text/scanner" +) + +// idents are runes accepted in an identifier. +var idents = [...]bool{ + '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, + '7': true, '8': true, '9': true, + + 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, + 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, + 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, + 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, + + 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, + 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, + 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, + 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, + + '-': true, '_': true, +} + +// TokenError describes an unexpected token. +type TokenError [2]rune + +func (e TokenError) Error() string { + return "expected " + scanner.TokenString(e[0]) + + ", found " + scanner.TokenString(e[1]) +} + +// ExprError is an unexpected token encountered while parsing an expression. +type ExprError rune + +func (e ExprError) Error() string { + return "unexpected token " + scanner.TokenString(rune(e)) +} + +// must1 returns v, or panics if err is not nil. +func must1[T any](v T, err error) T { + if err != nil { + panic(err) + } + return v +} + +// parser retains the current token. +type parser struct { + s scanner.Scanner + tok rune +} + +// scan advances the underlying scanner to the next token, storing its result. +func (p *parser) scan() rune { p.tok = p.s.Scan(); return p.tok } + +// expects panics with [TokenError] for an unexpected tok. +func (p *parser) expects(expects rune) { + if p.tok != expects { + panic(TokenError{expects, p.tok}) + } +} + +// scanAs advances the scanner for an expected token. +func (p *parser) scanAs(expects rune) { p.scan(); p.expects(expects) } + +// An Int is the value represented by an integer literal. +type Int int64 + +func (v Int) GoString() string { + return "azalea.Int(" + strconv.FormatInt(int64(v), 10) + ")" +} + +// parseInt parses the current token as a base 10 representation of a 64-bit +// signed integer. +func (p *parser) parseInt() Int { + v, err := strconv.ParseInt(p.s.TokenText(), 10, 64) + return must1(Int(v), err) +} + +// A String holds the unquoted content of a string literal. +type String string + +func (v String) GoString() string { + return "azalea.String(" + strconv.Quote(string(v)) + ")" +} + +// parseString parses the current token as a string. +func (p *parser) parseString() String { + s, err := strconv.Unquote(p.s.TokenText()) + return must1(String(s), err) +} + +// An Ident holds the name of an identifier. +type Ident string + +func (v Ident) GoString() string { + return "azalea.Ident(" + strconv.Quote(string(v)) + ")" +} + +// A Val are statements joined by the '+' operator. Only the [String] type +// supports concatenation. +type Val []any + +// parseVal parses until the end of the [Val]. +func (p *parser) parseVal() (v Val) { + v = append(v, p.parseExpr()) + for p.tok == '+' { + p.scan() + v = append(v, p.parseExpr()) + } + return +} + +// An Array holds statements in an array. +type Array []Val + +// A KV holds a key/value pair. +type KV struct { + K String + V Val +} + +// An Arg represents an argument of [Func]. +type Arg struct { + K []Ident + V Val + R bool +} + +// Func is a function call or package declaration. +type Func struct { + // Function or package identifier. + Ident Ident + // Whether this is a package declaration. + Package bool + // Key-value arguments. + Args []Arg +} + +// parseExpr parses the current expression. +func (p *parser) parseExpr() any { + switch p.tok { + case scanner.Int: + v := p.parseInt() + p.scan() + return v + + case scanner.String, scanner.RawString: + v := p.parseString() + p.scan() + return v + + case scanner.Ident: + var v Func + v.Ident = Ident(p.s.TokenText()) + if v.Package = v.Ident == "package"; v.Package { + p.scanAs(scanner.Ident) + v.Ident = Ident(p.s.TokenText()) + } + + p.scan() + switch p.tok { + case '{': + for { + p.scan() + switch p.tok { + case '}': + p.scan() + return v + + case scanner.Ident: + break + + default: + panic(TokenError{scanner.Ident, p.tok}) + } + + arg := Arg{K: []Ident{Ident(p.s.TokenText())}} + delim := true + arg: + for { + p.scan() + switch p.tok { + case ',': + if delim { + delim = false + continue + } + panic(ExprError(p.tok)) + + case scanner.Ident: + if delim { + panic(TokenError{',', p.tok}) + } + delim = true + arg.K = append(arg.K, Ident(p.s.TokenText())) + + default: + break arg + } + } + switch p.tok { + case '=': + break + + case '*': + arg.R = true + p.scanAs('=') + + default: + panic(TokenError{'=', p.tok}) + } + p.scan() + arg.V = p.parseVal() + v.Args = append(v.Args, arg) + p.expects(';') + } + + default: + return v.Ident + } + + case '{': + var v []KV + for { + p.scan() + switch p.tok { + case '}': + p.scan() + return v + + case scanner.String: + pair := KV{K: p.parseString()} + p.scan() + switch p.tok { + case ';': + break + + case ':': + p.scan() + pair.V = p.parseVal() + p.expects(';') + break + + default: + panic(ExprError(p.tok)) + } + v = append(v, pair) + + default: + panic(ExprError(p.tok)) + } + } + + case '[': + var ( + v Array + delim bool + ) + p.scan() + for { + switch p.tok { + case ',': + if delim { + p.scan() + delim = false + continue + } + panic(ExprError(',')) + case ']': + p.scan() + return v + case scanner.EOF: + panic(ExprError(scanner.EOF)) + default: + if delim { + panic(TokenError{',', p.tok}) + } + delim = true + break + } + v = append(v, p.parseVal()) + } + + default: + panic(ExprError(p.tok)) + } +} + +// ScanError is the error count parsing all expressions. +type ScanError int + +func (ScanError) Error() string { + return "aborting due to scanning errors" +} + +// Parse parses expressions from r. +func Parse(r io.Reader) (e []any, err error) { + var p parser + p.s.Init(r) + + p.s.Mode = scanner.ScanIdents | + scanner.ScanInts | + scanner.ScanStrings | + scanner.ScanRawStrings | + scanner.ScanComments | + scanner.SkipComments + p.s.IsIdentRune = func(ch rune, i int) bool { + if i == 0 && ch >= '0' && ch <= '9' { + return false + } + return ch > 0 && ch < rune(len(idents)) && idents[ch] + } + + defer func() { + v := recover() + if v == nil { + return + } + + _err, ok := v.(error) + if !ok { + panic(v) + } + + if err == nil { + err = _err + return + } + err = errors.Join(err, _err) + }() + + p.scan() + for p.tok != scanner.EOF { + e = append(e, p.parseExpr()) + } + + if p.s.ErrorCount != 0 { + err = ScanError(p.s.ErrorCount) + } + return +} diff --git a/internal/rosa/azalea/azalea_test.go b/internal/rosa/azalea/azalea_test.go new file mode 100644 index 00000000..f2dec252 --- /dev/null +++ b/internal/rosa/azalea/azalea_test.go @@ -0,0 +1,156 @@ +package azalea_test + +import ( + _ "embed" + "reflect" + "strings" + "testing" + "text/scanner" + + . "hakurei.app/internal/rosa/azalea" +) + +//go:embed testdata/gcc.az +var sample string + +func TestParse(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + data string + want []any + err error + }{ + {"invalid", "}", nil, ExprError('}')}, + {"bad sep", "f{v?}", nil, TokenError{'=', '?'}}, + {"bad ident", "f{9}", nil, TokenError{scanner.Ident, scanner.Int}}, + {"share bad sep", "f { v,,v = v; }", nil, ExprError(',')}, + {"share missing sep", "f { v v }", nil, TokenError{',', scanner.Ident}}, + + {"ident", `v`, []any{Ident("v")}, nil}, + {"concat", `f { v = v+"\xfd"+p{}+9; }`, []any{Func{ + Ident: "f", + + Args: []Arg{{K: []Ident{"v"}, V: Val{ + Ident("v"), + String("\xfd"), + Func{Ident: "p"}, + Int(9), + }}}, + }}, nil}, + {"truncated string concat", `f { v = v+; }`, nil, + ExprError(';')}, + + {"empty pairs", `{}`, []any{[]KV(nil)}, nil}, + {"short kv", `{"\x00":v;}`, []any{[]KV{ + {K: "\x00", V: Val{Ident("v")}}, + }}, nil}, + {"truncated kv", `{"\x00"`, nil, ExprError(scanner.EOF)}, + {"ident kv", `{v="";}`, nil, ExprError(scanner.Ident)}, + + {"empty array", `[]`, []any{Array(nil)}, nil}, + {"integer array", `[9]`, []any{Array{{Int(9)}}}, nil}, + {"short array", `[ "\x00" ]`, []any{ + Array{{String("\x00")}}, + }, nil}, + {"short array delim", `[ "\x00", ]`, []any{ + Array{{String("\x00")}}, + }, nil}, + {"missing array value", `[ "\x00", , v ]`, nil, ExprError(',')}, + {"missing array delimiter", `[ v0 v1 ]`, nil, TokenError{',', scanner.Ident}}, + {"truncated array", `[ "\x00"`, nil, + ExprError(scanner.EOF)}, + + {"gcc", sample, []any{Func{ + Ident: Ident("gcc"), + Package: true, + + Args: []Arg{ + {K: []Ident{Ident("description")}, V: Val{String("The GNU Compiler Collection")}}, + {K: []Ident{Ident("website")}, V: Val{String("https://www.gnu.org/software/gcc")}}, + {K: []Ident{Ident("anitya")}, V: Val{Int(6502)}}, + + {K: []Ident{Ident("version")}, V: Val{String("16.1.0")}, R: true}, + {K: []Ident{Ident("source")}, V: Val{Func{ + Ident: Ident("remoteTar"), + + Args: []Arg{ + {K: []Ident{Ident("url")}, V: Val{ + String("https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"), + String("gcc-"), + Ident("version"), + String("/gcc-"), + Ident("version"), + String(".tar.gz"), + }}, + {K: []Ident{Ident("checksum")}, V: Val{String("4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K")}}, + {K: []Ident{Ident("compress")}, V: Val{Ident("gzip")}}, + }, + }}}, + {K: []Ident{Ident("patches")}, V: Val{Array{ + {String("musl-off64_t-loff_t.patch")}, + {String("musl-legacy-lfs.patch")}, + }}}, + + {K: []Ident{Ident("exclusive")}, V: Val{Ident("true")}}, + + {K: []Ident{Ident("exec")}, V: Val{Func{ + Ident: Ident("make"), + + Args: []Arg{ + {K: []Ident{Ident("configure")}, V: Val{[]KV{ + {K: String("disable-multilib")}, + {K: String("enable-default-pie")}, + {K: String("disable-nls")}, + {K: String("with-gnu-as")}, + {K: String("with-gnu-ld")}, + {K: String("with-system-zlib")}, + {K: String("enable-languages"), V: Val{String("c,c++,go")}}, + {K: String("with-native-system-header-dir"), V: Val{String("/system/include")}}, + {K: String("with-multilib-list"), V: Val{Func{ + Ident: Ident("arch"), + + Args: []Arg{ + {K: []Ident{Ident("amd64"), Ident("arm64")}, V: Val{String("''")}}, + {K: []Ident{Ident("default")}, V: Val{Ident("unset")}}, + }, + }}}, + }}}, + {K: []Ident{Ident("make")}, V: Val{Array{ + {String("BOOT_CFLAGS='-O2 -g'")}, + { + Func{Ident: Ident("noop"), Args: []Arg{{K: []Ident{Ident("key")}, V: Val{Ident("value")}}}}, + String("\x00"), + }, + {String("bootstrap")}, + }}}, + + {K: []Ident{Ident("skip-check")}, V: Val{Ident("true")}}, + }, + }}}, + + {K: []Ident{Ident("inputs")}, V: Val{Array{ + {Ident("binutils")}, + {Ident("mpc")}, + {Ident("zlib")}, + {Ident("libucontext")}, + {Ident("kernel-headers")}, + }}}, + }, + }}, nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + p, err := Parse(strings.NewReader(tc.data)) + if !reflect.DeepEqual(p, tc.want) { + t.Errorf("Parse: %#v, want %#v", p, tc.want) + } + if !reflect.DeepEqual(err, tc.err) { + t.Errorf("Parse: error = %v, want %v", err, tc.err) + } + }) + } +} diff --git a/internal/rosa/azalea/testdata/gcc.az b/internal/rosa/azalea/testdata/gcc.az new file mode 100644 index 00000000..2ef069a9 --- /dev/null +++ b/internal/rosa/azalea/testdata/gcc.az @@ -0,0 +1,57 @@ +package gcc { + description = "The GNU Compiler Collection"; + website = "https://www.gnu.org/software/gcc"; + anitya = 6502; + + version* = "16.1.0"; + source = remoteTar { + url = "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"+ + "gcc-"+version+"/gcc-"+version+".tar.gz"; + checksum = "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"; + compress = gzip; + }; + patches = [ + "musl-off64_t-loff_t.patch", + "musl-legacy-lfs.patch", + ]; + + // GCC spends most of its time in its many configure scripts, however + // it also saturates the CPU for a consequential amount of time. + exclusive = true; + + exec = make { + configure = { + "disable-multilib"; + "enable-default-pie"; + "disable-nls"; + "with-gnu-as"; + "with-gnu-ld"; + "with-system-zlib"; + "enable-languages": "c,c++,go"; + "with-native-system-header-dir": "/system/include"; + "with-multilib-list": arch { + amd64, arm64 = "''"; + default = unset; + }; + }; + make = [ + "BOOT_CFLAGS='-O2 -g'", + noop { key = value; } + "\x00", + "bootstrap", + ]; + + // This toolchain is hacked to pieces, it is not expected to ever work + // well in its current state. That does not matter as long as the + // toolchain it produces passes its own test suite. + skip-check = true; + }; + + inputs = [ + binutils, + + mpc, + zlib, + libucontext, + kernel-headers, + ]; +}