hakurei/internal/app/finalise.go
Ophestra f1e590ca8f
Some checks failed
Test / Create distribution (push) Successful in 34s
Test / Flake checks (push) Has been cancelled
Test / Sandbox (race detector) (push) Has been cancelled
Test / Hakurei (race detector) (push) Has been cancelled
Test / Sandbox (push) Has been cancelled
Test / Hakurei (push) Has been cancelled
Test / Hpkg (push) Has been cancelled
internal/app: apply pd behaviour to outcomeState
This avoids needlessly clobbering hst.Config.

Signed-off-by: Ophestra <cat@gensokyo.uk>
2025-10-05 03:31:43 +09:00

299 lines
7.7 KiB
Go

package app
import (
"bytes"
"context"
"encoding/gob"
"errors"
"fmt"
"io"
"io/fs"
"maps"
"os"
"os/user"
"slices"
"strings"
"sync/atomic"
"syscall"
"hakurei.app/container"
"hakurei.app/hst"
"hakurei.app/internal/app/state"
"hakurei.app/system"
"hakurei.app/system/acl"
)
func newWithMessage(msg string) error { return newWithMessageError(msg, os.ErrInvalid) }
func newWithMessageError(msg string, err error) error {
return &hst.AppError{Step: "finalise", Err: err, Msg: msg}
}
// An outcome is the runnable state of a hakurei container via [hst.Config].
type outcome struct {
// initial [hst.Config] gob stream for state data;
// this is prepared ahead of time as config is clobbered during seal creation
ct io.WriterTo
sys *system.I
ctx context.Context
container container.Params
// Populated during outcome.finalise.
proc *finaliseProcess
// Whether the current process is in outcome.main.
active atomic.Bool
syscallDispatcher
}
func (k *outcome) finalise(ctx context.Context, msg container.Msg, id *state.ID, config *hst.Config) error {
const (
// only used for a nil configured env map
envAllocSize = 1 << 6
)
var kp finaliseProcess
if ctx == nil || id == nil {
// unreachable
panic("invalid call to finalise")
}
if k.ctx != nil || k.proc != nil {
// unreachable
panic("attempting to finalise twice")
}
k.ctx = ctx
if config == nil {
return newWithMessage("invalid configuration")
}
if config.Home == nil {
return newWithMessage("invalid path to home directory")
}
// TODO(ophestra): do not clobber during finalise
{
// encode initial configuration for state tracking
ct := new(bytes.Buffer)
if err := gob.NewEncoder(ct).Encode(config); err != nil {
return &hst.AppError{Step: "encode initial config", Err: err}
}
k.ct = ct
}
// allowed identity range 0 to 9999, this is checked again in hsu
if config.Identity < 0 || config.Identity > 9999 {
return newWithMessage(fmt.Sprintf("identity %d out of range", config.Identity))
}
kp.supp = make([]string, len(config.Groups))
for i, name := range config.Groups {
if gid, err := k.lookupGroupId(name); err != nil {
var unknownGroupError user.UnknownGroupError
if errors.As(err, &unknownGroupError) {
return newWithMessageError(fmt.Sprintf("unknown group %q", name), unknownGroupError)
} else {
return &hst.AppError{Step: "look up group by name", Err: err}
}
} else {
kp.supp[i] = gid
}
}
s := outcomeState{
ID: id,
Identity: config.Identity,
UserID: (&Hsu{k: k}).MustIDMsg(msg),
EnvPaths: copyPaths(k.syscallDispatcher),
Container: config.Container,
}
// permissive defaults
if s.Container == nil {
msg.Verbose("container configuration not supplied, PROCEED WITH CAUTION")
if config.Shell == nil {
config.Shell = container.AbsFHSRoot.Append("bin", "sh")
shell, _ := k.lookupEnv("SHELL")
if a, err := container.NewAbs(shell); err == nil {
config.Shell = a
}
}
// hsu clears the environment so resolve paths early
if config.Path == nil {
if len(config.Args) > 0 {
if p, err := k.lookPath(config.Args[0]); err != nil {
return &hst.AppError{Step: "look up executable file", Err: err}
} else if config.Path, err = container.NewAbs(p); err != nil {
return newWithMessageError(err.Error(), err)
}
} else {
config.Path = config.Shell
}
}
conf := &hst.ContainerConfig{
Userns: true,
HostNet: true,
HostAbstract: true,
Tty: true,
Filesystem: []hst.FilesystemConfigJSON{
// autoroot, includes the home directory
{FilesystemConfig: &hst.FSBind{
Target: container.AbsFHSRoot,
Source: container.AbsFHSRoot,
Write: true,
Special: true,
}},
},
}
// bind GPU stuff
if config.Enablements.Unwrap()&(hst.EX11|hst.EWayland) != 0 {
conf.Filesystem = append(conf.Filesystem, hst.FilesystemConfigJSON{FilesystemConfig: &hst.FSBind{Source: container.AbsFHSDev.Append("dri"), Device: true, Optional: true}})
}
// opportunistically bind kvm
conf.Filesystem = append(conf.Filesystem, hst.FilesystemConfigJSON{FilesystemConfig: &hst.FSBind{Source: container.AbsFHSDev.Append("kvm"), Device: true, Optional: true}})
// hide nscd from container if present
nscd := container.AbsFHSVar.Append("run/nscd")
if _, err := k.stat(nscd.String()); !errors.Is(err, fs.ErrNotExist) {
conf.Filesystem = append(conf.Filesystem, hst.FilesystemConfigJSON{FilesystemConfig: &hst.FSEphemeral{Target: nscd}})
}
// do autoetc last
conf.Filesystem = append(conf.Filesystem,
hst.FilesystemConfigJSON{FilesystemConfig: &hst.FSBind{
Target: container.AbsFHSEtc,
Source: container.AbsFHSEtc,
Special: true,
}},
)
s.Container = conf
}
// late nil checks for pd behaviour
if config.Shell == nil {
return newWithMessage("invalid shell path")
}
if config.Path == nil {
return newWithMessage("invalid program path")
}
// enforce bounds and default early
kp.waitDelay = shimWaitTimeout
if s.Container.WaitDelay <= 0 {
kp.waitDelay += DefaultShimWaitDelay
} else if s.Container.WaitDelay > MaxShimWaitDelay {
kp.waitDelay += MaxShimWaitDelay
} else {
kp.waitDelay += config.Container.WaitDelay
}
if s.Container.MapRealUID {
s.Mapuid, s.Mapgid = k.getuid(), k.getgid()
} else {
s.Mapuid, s.Mapgid = k.overflowUid(msg), k.overflowGid(msg)
}
// TODO(ophestra): duplicate in shim (params to shim)
if err := s.populateLocal(k.syscallDispatcher, msg); err != nil {
return err
}
kp.runDirPath, kp.identity, kp.id = s.sc.RunDirPath, s.identity, s.id
k.sys = system.New(k.ctx, msg, s.uid.unwrap())
{
ops := []outcomeOp{
// must run first
&spParamsOp{Path: config.Path, Args: config.Args},
// TODO(ophestra): move this late for #8 and #9
spFilesystemOp{},
spRuntimeOp{},
spTmpdirOp{},
&spAccountOp{Home: config.Home, Username: config.Username, Shell: config.Shell},
}
et := config.Enablements.Unwrap()
if et&hst.EWayland != 0 {
ops = append(ops, &spWaylandOp{})
}
if et&hst.EX11 != 0 {
ops = append(ops, &spX11Op{})
}
if et&hst.EPulse != 0 {
ops = append(ops, &spPulseOp{})
}
if et&hst.EDBus != 0 {
ops = append(ops, &spDBusOp{})
}
stateSys := outcomeStateSys{sys: k.sys, outcomeState: &s}
for _, op := range ops {
if err := op.toSystem(&stateSys, config); err != nil {
return err
}
}
// TODO(ophestra): move to shim
stateParams := outcomeStateParams{params: &k.container, outcomeState: &s}
if s.Container.Env == nil {
stateParams.env = make(map[string]string, envAllocSize)
} else {
stateParams.env = maps.Clone(s.Container.Env)
}
for _, op := range ops {
if err := op.toContainer(&stateParams); err != nil {
return err
}
}
// flatten and sort env for deterministic behaviour
k.container.Env = make([]string, 0, len(stateParams.env))
for key, value := range stateParams.env {
if strings.IndexByte(key, '=') != -1 {
return &hst.AppError{Step: "flatten environment", Err: syscall.EINVAL,
Msg: fmt.Sprintf("invalid environment variable %s", key)}
}
k.container.Env = append(k.container.Env, key+"="+value)
}
slices.Sort(k.container.Env)
}
// mount root read-only as the final setup Op
// TODO(ophestra): move this to spFilesystemOp after #8 and #9
k.container.Remount(container.AbsFHSRoot, syscall.MS_RDONLY)
// append ExtraPerms last
for _, p := range config.ExtraPerms {
if p == nil || p.Path == nil {
continue
}
if p.Ensure {
k.sys.Ensure(p.Path, 0700)
}
perms := make(acl.Perms, 0, 3)
if p.Read {
perms = append(perms, acl.Read)
}
if p.Write {
perms = append(perms, acl.Write)
}
if p.Execute {
perms = append(perms, acl.Execute)
}
k.sys.UpdatePermType(system.User, p.Path, perms...)
}
k.proc = &kp
return nil
}