internal/rosa/azalea: ast and parser
All checks were successful
Test / Create distribution (push) Successful in 1m4s
Test / Sandbox (push) Successful in 2m43s
Test / ShareFS (push) Successful in 3m40s
Test / Hakurei (push) Successful in 3m54s
Test / Sandbox (race detector) (push) Successful in 5m15s
Test / Hakurei (race detector) (push) Successful in 6m21s
Test / Flake checks (push) Successful in 1m22s

This syntax is not final, but acts as a stopgap solution and a proof of concept.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-05-15 06:48:04 +09:00
parent c32c06b2e8
commit c2d44d5937
3 changed files with 592 additions and 0 deletions

View File

@@ -0,0 +1,386 @@
// Package azalea implements a proof-of-concept, domain-specific language for
// Rosa OS software packaging.
package azalea
import (
"errors"
"io"
"strconv"
"text/scanner"
)
// idents are runes accepted in an identifier.
var idents = [...]bool{
'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true,
'7': true, '8': true, '9': true,
'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true,
'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true,
'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true,
'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true,
'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true,
'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true,
'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true,
'v': true, 'w': true, 'x': true, 'y': true, 'z': true,
'-': true, '_': true,
}
// TokenError describes an unexpected token.
type TokenError [2]rune
func (e TokenError) Error() string {
return "expected " + scanner.TokenString(e[0]) +
", found " + scanner.TokenString(e[1])
}
// ExprError is an unexpected token encountered while parsing an expression.
type ExprError rune
func (e ExprError) Error() string {
return "unexpected token " + scanner.TokenString(rune(e))
}
// must1 returns v, or panics if err is not nil.
func must1[T any](v T, err error) T {
if err != nil {
panic(err)
}
return v
}
// parser retains the current token.
type parser struct {
s scanner.Scanner
tok rune
}
// scan advances the underlying scanner to the next token, storing its result.
func (p *parser) scan() rune { p.tok = p.s.Scan(); return p.tok }
// scanAs advances the scanner for an expected token.
func (p *parser) scanAs(expects rune) {
e := TokenError{expects, p.scan()}
if e[0] != e[1] {
panic(e)
}
}
// parseInt parses the current token as a base 10 representation of a 64-bit
// signed integer.
func (p *parser) parseInt() int64 {
return must1(strconv.ParseInt(p.s.TokenText(), 10, 64))
}
// A String represents an identifier or string literal.
type String struct {
Value string
Ident bool
}
// A StringSpec describes a statement evaluating down to a string value.
type StringSpec []String
// parseString parses the current token as a string.
func (p *parser) parseString() string {
return must1(strconv.Unquote(p.s.TokenText()))
}
// appendStringSpec parses from the next token until the end of the StringSpec.
// It always advances past the final token.
func (p *parser) appendStringSpec(
op bool,
data StringSpec,
) (v StringSpec, ok bool) {
ok = true
v = data
for {
if op {
switch tok := p.scan(); tok {
case '+':
break
default:
return
}
}
switch tok := p.scan(); tok {
case scanner.String, scanner.RawString:
v = append(v, String{p.parseString(), false})
case scanner.Ident:
v = append(v, String{p.s.TokenText(), true})
default:
ok = op
return
}
op = true
}
}
// A KV holds a key/value pair.
type KV struct {
K string
V any
}
// An Arg represents an argument of [Func].
type Arg struct {
K []string
V any
R bool
}
// Func is a function call or package declaration.
type Func struct {
// Function or package identifier.
Ident string
// Whether this is a package declaration.
Package bool
// Key-value arguments.
Args []Arg
}
// parseExpr scans and parses the current expression. A nil return indicates
// [scanner.EOF].
func (p *parser) parseExpr() (any, bool) {
switch p.tok {
case scanner.EOF:
return nil, false
case scanner.Int:
return p.parseInt(), false
case scanner.String, scanner.RawString:
v, ok := p.appendStringSpec(true, StringSpec{
{p.parseString(), false},
})
if !ok {
panic(TokenError{scanner.String, p.tok})
}
return v, true
case scanner.Ident:
var v Func
v.Ident = p.s.TokenText()
if v.Package = v.Ident == "package"; v.Package {
p.scanAs(scanner.Ident)
v.Ident = p.s.TokenText()
}
p.scan()
switch p.tok {
case '{':
for {
p.scan()
switch p.tok {
case '}':
return v, false
case scanner.Ident:
break
default:
panic(TokenError{scanner.Ident, p.tok})
}
var next bool
arg := Arg{K: []string{p.s.TokenText()}}
delim := true
arg:
for {
p.scan()
switch p.tok {
case ',':
if delim {
delim = false
continue
}
panic(ExprError(p.tok))
case scanner.Ident:
if delim {
panic(TokenError{',', p.tok})
}
delim = true
arg.K = append(arg.K, p.s.TokenText())
default:
break arg
}
}
switch p.tok {
case '=':
break
case '*':
arg.R = true
p.scanAs('=')
default:
panic(TokenError{'=', p.tok})
}
p.scan()
arg.V, next = p.parseExpr()
v.Args = append(v.Args, arg)
if !next {
p.scanAs(';')
}
}
case ';':
return StringSpec{{v.Ident, true}}, true
case '+':
s, ok := p.appendStringSpec(false, StringSpec{
{v.Ident, true},
})
if !ok {
panic(TokenError{scanner.String, p.tok})
}
return s, p.tok != scanner.EOF
case scanner.EOF:
return StringSpec{{v.Ident, true}}, false
default:
return StringSpec{{v.Ident, true}}, true
}
case '{':
var v []KV
for {
p.scan()
switch p.tok {
case '}':
return v, false
case scanner.String:
pair := KV{K: p.parseString()}
p.scan()
switch p.tok {
case ';':
break
case ':':
var next bool
p.scan()
pair.V, next = p.parseExpr()
if !next {
p.scanAs(';')
}
break
default:
panic(ExprError(p.tok))
}
v = append(v, pair)
default:
panic(ExprError(p.tok))
}
}
case '[':
var (
v []any
e any
delim bool
next bool
)
for {
if !next {
p.scan()
}
switch p.tok {
case ',':
if delim {
delim = false
next = false
continue
}
panic(ExprError(','))
case ']':
return v, false
case scanner.EOF:
panic(ExprError(scanner.EOF))
default:
if delim {
panic(TokenError{',', p.tok})
}
delim = true
break
}
e, next = p.parseExpr()
v = append(v, e)
}
default:
panic(ExprError(p.tok))
}
}
// ScanError is the error count parsing all expressions.
type ScanError int
func (ScanError) Error() string {
return "aborting due to scanning errors"
}
// Parse parses expressions from r.
func Parse(r io.Reader) (e []any, err error) {
var p parser
p.s.Init(r)
p.s.Mode = scanner.ScanIdents |
scanner.ScanInts |
scanner.ScanStrings |
scanner.ScanRawStrings |
scanner.ScanComments |
scanner.SkipComments
p.s.IsIdentRune = func(ch rune, i int) bool {
if i == 0 && ch >= '0' && ch <= '9' {
return false
}
return ch > 0 && ch < rune(len(idents)) && idents[ch]
}
defer func() {
v := recover()
if v == nil {
return
}
_err, ok := v.(error)
if !ok {
panic(v)
}
if err == nil {
err = _err
return
}
err = errors.Join(err, _err)
}()
p.scan()
for {
expr, next := p.parseExpr()
if expr == nil {
break
}
e = append(e, expr)
if !next {
p.scan()
}
}
if p.s.ErrorCount != 0 {
err = ScanError(p.s.ErrorCount)
}
return
}

View File

@@ -0,0 +1,149 @@
package azalea_test
import (
_ "embed"
"reflect"
"strings"
"testing"
"text/scanner"
"hakurei.app/internal/rosa/azalea"
)
//go:embed testdata/gcc.az
var sample string
func TestParse(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
data string
want []any
err error
}{
{"invalid", "}", nil, azalea.ExprError('}')},
{"bad sep", "f{v?}", nil, azalea.TokenError{'=', '?'}},
{"bad ident", "f{9}", nil, azalea.TokenError{scanner.Ident, scanner.Int}},
{"share bad sep", "f { v,,v = v; }", nil, azalea.ExprError(',')},
{"share missing sep", "f { v v }", nil, azalea.TokenError{',', scanner.Ident}},
{"ident string", `v`, []any{azalea.StringSpec{
{Value: "v", Ident: true},
}}, nil},
{"ident string concat", `v+"\xfd"`, []any{azalea.StringSpec{
{Value: "v", Ident: true},
{Value: "\xfd"},
}}, nil},
{"truncated string concat", `v+`, nil,
azalea.TokenError{scanner.String, scanner.EOF}},
{"unexpected string concat", `v+9`, nil,
azalea.TokenError{scanner.String, scanner.Int}},
{"empty pairs", `{}`, []any{[]azalea.KV(nil)}, nil},
{"short kv", `{"\x00":v;}`, []any{[]azalea.KV{
{K: "\x00", V: azalea.StringSpec{azalea.String{Value: "v", Ident: true}}},
}}, nil},
{"truncated kv", `{"\x00"`, nil, azalea.ExprError(scanner.EOF)},
{"ident kv", `{v="";}`, nil, azalea.ExprError(scanner.Ident)},
{"empty array", `[]`, []any{[]any(nil)}, nil},
{"integer array", `[9]`, []any{[]any{int64(9)}}, nil},
{"short array", `[ "\x00" ]`, []any{
[]any{azalea.StringSpec{{Value: "\x00"}}},
}, nil},
{"short array delim", `[ "\x00", ]`, []any{
[]any{azalea.StringSpec{{Value: "\x00"}}},
}, nil},
{"missing array value", `[ "\x00", , v ]`, nil, azalea.ExprError(',')},
{"missing array delimiter", `[ v0 v1 ]`, nil, azalea.TokenError{',', scanner.Ident}},
{"truncated array", `[ "\x00"`, nil,
azalea.ExprError(scanner.EOF)},
{"gcc", sample, []any{azalea.Func{
Ident: "gcc",
Package: true,
Args: []azalea.Arg{
{K: []string{"description"}, V: azalea.StringSpec{{Value: "The GNU Compiler Collection"}}},
{K: []string{"website"}, V: azalea.StringSpec{{Value: "https://www.gnu.org/software/gcc"}}},
{K: []string{"anitya"}, V: int64(6502)},
{K: []string{"version"}, V: azalea.StringSpec{{Value: "16.1.0"}}, R: true},
{K: []string{"source"}, V: azalea.Func{Ident: "remoteTar", Package: false, Args: []azalea.Arg{
{K: []string{"url"}, V: azalea.StringSpec{
{Value: "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"},
{Value: "gcc-"}, {Value: "version", Ident: true},
{Value: "/gcc-"}, {Value: "version", Ident: true},
{Value: ".tar.gz"},
}},
{K: []string{"checksum"}, V: azalea.StringSpec{
{Value: "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K"},
}},
{K: []string{"compress"}, V: azalea.StringSpec{{Value: "gzip", Ident: true}}},
}}},
{K: []string{"patches"}, V: []any{
azalea.StringSpec{{Value: "musl-off64_t-loff_t.patch"}},
azalea.StringSpec{{Value: "musl-legacy-lfs.patch"}},
}},
{K: []string{"exclusive"}, V: azalea.StringSpec{{Value: "true", Ident: true}}},
{K: []string{"exec"}, V: azalea.Func{
Ident: "make",
Args: []azalea.Arg{
{K: []string{"configure"}, V: []azalea.KV{
{K: "disable-multilib"},
{K: "enable-default-pie"},
{K: "disable-nls"},
{K: "with-gnu-as"},
{K: "with-gnu-ld"},
{K: "with-system-zlib"},
{K: "enable-languages", V: azalea.StringSpec{{Value: "c,c++,go"}}},
{K: "with-native-system-header-dir", V: azalea.StringSpec{{Value: "/system/include"}}},
{K: "with-multilib-list", V: azalea.Func{
Ident: "arch",
Args: []azalea.Arg{
{K: []string{"amd64", "arm64"}, V: azalea.StringSpec{{Value: "''"}}},
{K: []string{"default"}, V: azalea.StringSpec{{Value: "unset", Ident: true}}},
},
}},
}},
{K: []string{"make"}, V: []any{
azalea.StringSpec{{Value: "BOOT_CFLAGS='-O2 -g'"}},
azalea.Func{
Ident: "noop",
Args: []azalea.Arg{
{K: []string{"key"}, V: azalea.StringSpec{
{Value: "value", Ident: true},
}},
},
},
azalea.StringSpec{{Value: "bootstrap"}},
}},
{K: []string{"skip-check"}, V: azalea.StringSpec{{Value: "true", Ident: true}}},
},
}},
{K: []string{"inputs"}, V: []any{
azalea.StringSpec{{Value: "binutils", Ident: true}},
azalea.StringSpec{{Value: "mpc", Ident: true}},
azalea.StringSpec{{Value: "zlib", Ident: true}},
azalea.StringSpec{{Value: "libucontext", Ident: true}},
azalea.StringSpec{{Value: "kernel-headers", Ident: true}},
}},
},
}}, nil},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
p, err := azalea.Parse(strings.NewReader(tc.data))
if !reflect.DeepEqual(p, tc.want) {
t.Errorf("Parse: %#v, want %#v", p, tc.want)
}
if !reflect.DeepEqual(err, tc.err) {
t.Errorf("Parse: error = %v, want %v", err, tc.err)
}
})
}
}

57
internal/rosa/azalea/testdata/gcc.az vendored Normal file
View File

@@ -0,0 +1,57 @@
package gcc {
description = "The GNU Compiler Collection";
website = "https://www.gnu.org/software/gcc";
anitya = 6502;
version* = "16.1.0";
source = remoteTar {
url = "https://ftp.tsukuba.wide.ad.jp/software/gcc/releases/"+
"gcc-"+version+"/gcc-"+version+".tar.gz";
checksum = "4ASoWbxaA2FW7PAB0zzHDPC5XnNhyaAyjtDPpGzceSLeYnEIXsNYZR3PA_Zu5P0K";
compress = gzip;
};
patches = [
"musl-off64_t-loff_t.patch",
"musl-legacy-lfs.patch",
];
// GCC spends most of its time in its many configure scripts, however
// it also saturates the CPU for a consequential amount of time.
exclusive = true;
exec = make {
configure = {
"disable-multilib";
"enable-default-pie";
"disable-nls";
"with-gnu-as";
"with-gnu-ld";
"with-system-zlib";
"enable-languages": "c,c++,go";
"with-native-system-header-dir": "/system/include";
"with-multilib-list": arch {
amd64, arm64 = "''";
default = unset;
};
};
make = [
"BOOT_CFLAGS='-O2 -g'",
noop { key = value; },
"bootstrap",
];
// This toolchain is hacked to pieces, it is not expected to ever work
// well in its current state. That does not matter as long as the
// toolchain it produces passes its own test suite.
skip-check = true;
};
inputs = [
binutils,
mpc,
zlib,
libucontext,
kernel-headers,
];
}