Files
hakurei/internal/pkg/exec.go
Ophestra 7bd4d7d0e6
All checks were successful
Test / Create distribution (push) Successful in 47s
Test / Sandbox (push) Successful in 2m48s
Test / ShareFS (push) Successful in 4m43s
Test / Sandbox (race detector) (push) Successful in 5m15s
Test / Hpkg (push) Successful in 5m25s
Test / Hakurei (push) Successful in 5m38s
Test / Hakurei (race detector) (push) Successful in 7m28s
Test / Flake checks (push) Successful in 1m44s
internal/pkg: support explicit overlay mount
This removes all but the /work/ auto overlay behaviour and enables much greater flexibility. This also renames ExecContainerPath to ExecPath so it is easier to type.

Signed-off-by: Ophestra <cat@gensokyo.uk>
2026-01-08 07:55:09 +09:00

371 lines
9.1 KiB
Go

package pkg
import (
"bytes"
"context"
"errors"
"os"
"runtime"
"slices"
"syscall"
"time"
"hakurei.app/container"
"hakurei.app/container/check"
"hakurei.app/container/fhs"
"hakurei.app/container/std"
"hakurei.app/message"
)
// AbsWork is the container pathname [CureContext.GetWorkDir] is mounted on.
var AbsWork = fhs.AbsRoot.Append("work/")
// ExecPath is a slice of [Artifact] and the [check.Absolute] pathname to make
// it available at under in the container.
type ExecPath struct {
// Pathname in the container mount namespace.
P *check.Absolute
// Artifacts to mount on the pathname, must contain at least one [Artifact].
// If there are multiple entries or W is true, P is set up as an overlay
// mount, and entries of A must not implement [File].
A []Artifact
// Whether to make the mount point writable via an invisible tmpfs upperdir.
W bool
}
// Path returns a populated [ExecPath].
func Path(pathname *check.Absolute, writable bool, a ...Artifact) ExecPath {
return ExecPath{pathname, a, writable}
}
// MustPath is like [Path], but takes a string pathname via [check.MustAbs].
func MustPath(pathname string, writable bool, a ...Artifact) ExecPath {
return ExecPath{check.MustAbs(pathname), a, writable}
}
// An execArtifact is an [Artifact] that produces output by running a program
// part of another [Artifact] in a [container] to produce its output.
//
// Methods of execArtifact does not modify any struct field or underlying arrays
// referred to by slices.
type execArtifact struct {
// Caller-supplied context.
ctx context.Context
// Caller-supplied inner mount points.
paths []ExecPath
// Caller-supplied logging facility, passed through to [container] and used
// internally to produce verbose output.
msg message.Msg
// Number of [Artifact] to concurrently cure. A value of 0 or lower is
// equivalent to the value returned by [runtime.NumCPU].
cures int
// Passed through to [container.Params].
dir *check.Absolute
// Passed through to [container.Params].
env []string
// Passed through to [container.Params].
path *check.Absolute
// Passed through to [container.Params].
args []string
}
// execNetArtifact is like execArtifact but implements [KnownChecksum] and has
// its resulting container keep the host net namespace.
type execNetArtifact struct {
checksum Checksum
execArtifact
}
var _ KnownChecksum = new(execNetArtifact)
// Checksum returns the caller-supplied checksum.
func (a *execNetArtifact) Checksum() Checksum { return a.checksum }
// Kind returns the hardcoded [Kind] constant.
func (a *execNetArtifact) Kind() Kind { return KindExecNet }
// Params is [Checksum] concatenated with [KindExec] params.
func (a *execNetArtifact) Params() []byte {
return slices.Concat(a.checksum[:], a.execArtifact.Params())
}
// Cure cures the [Artifact] by curing all its dependencies then running the
// container described by the caller. The container retains host networking.
func (a *execNetArtifact) Cure(c *CureContext) error {
return a.cure(c, true)
}
// NewExec returns a new [Artifact] bounded by ctx, it cures all [Artifact]
// in paths at the specified maximum concurrent cures limit. Specified paths are
// bind mounted read-only in the specified order in the resulting container.
// A private instance of /proc and /dev is made available to the container.
//
// The working and temporary directories are both created and mounted writable
// on [AbsWork] and [fhs.AbsTmp] respectively. If one or more paths target
// [AbsWork], the final entry is set up as a writable overlay mount on /work for
// which the upperdir is the host side work directory. In this configuration,
// the W field is ignored, and the program must avoid causing whiteout files to
// be created. Cure fails if upperdir ends up with entries other than directory,
// regular or symlink.
//
// If checksum is non-nil, the resulting [Artifact] implements [KnownChecksum]
// and its container runs in the host net namespace.
//
// A cures value of 0 or lower is equivalent to the value returned by
// [runtime.NumCPU].
func NewExec(
ctx context.Context,
msg message.Msg,
cures int,
checksum *Checksum,
dir *check.Absolute,
env []string,
path *check.Absolute,
args []string,
paths ...ExecPath,
) Artifact {
a := execArtifact{ctx, paths, msg, cures, dir, env, path, args}
if checksum == nil {
return &a
}
return &execNetArtifact{*checksum, a}
}
// Kind returns the hardcoded [Kind] constant.
func (a *execArtifact) Kind() Kind { return KindExec }
// Params returns paths, executable pathname and args concatenated together.
func (a *execArtifact) Params() []byte {
var buf bytes.Buffer
for _, p := range a.paths {
if p.W {
buf.WriteByte(1)
} else {
buf.WriteByte(0)
}
if p.P != nil {
buf.WriteString(p.P.String())
} else {
buf.WriteString("invalid P\x00")
}
buf.WriteByte(0)
for _, d := range p.A {
id := Ident(d)
buf.Write(id[:])
}
buf.WriteByte(0)
}
buf.WriteByte(0)
buf.WriteString(a.dir.String())
buf.WriteByte(0)
for _, e := range a.env {
buf.WriteString(e)
}
buf.WriteByte(0)
buf.WriteString(a.path.String())
buf.WriteByte(0)
for _, arg := range a.args {
buf.WriteString(arg)
}
return buf.Bytes()
}
// Dependencies returns a slice of all artifacts collected from caller-supplied
// [ExecPath].
func (a *execArtifact) Dependencies() []Artifact {
artifacts := make([][]Artifact, 0, len(a.paths))
for _, p := range a.paths {
artifacts = append(artifacts, p.A)
}
return slices.Concat(artifacts...)
}
// Cure cures the [Artifact] by curing all its dependencies then running the
// container described by the caller.
func (a *execArtifact) Cure(c *CureContext) (err error) {
return a.cure(c, false)
}
const (
// execWaitDelay is passed through to [container.Params].
execWaitDelay = 15 * time.Second
)
// cure is like Cure but allows optional host net namespace. This is used for
// the [KnownChecksum] variant where networking is allowed.
func (a *execArtifact) cure(c *CureContext, hostNet bool) (err error) {
cures := a.cures
if cures < 1 {
cures = runtime.NumCPU()
}
overlayWorkIndex := -1
type curePath struct {
// Copied from ExecPath.P.
dst *check.Absolute
// Cured from ExecPath.A.
src []*check.Absolute
}
paths := make([]curePath, len(a.paths))
for i, p := range a.paths {
if p.P == nil || len(p.A) == 0 {
return os.ErrInvalid
}
if p.P.Is(AbsWork) {
overlayWorkIndex = i
}
paths[i].dst = p.P
paths[i].src = make([]*check.Absolute, len(p.A))
}
if len(paths) > 0 {
type cureArtifact struct {
// Index of pending Artifact in paths.
index [2]int
// Pending artifact.
a Artifact
}
ac := make(chan cureArtifact, len(paths))
for i, p := range a.paths {
for j, d := range p.A {
ac <- cureArtifact{[2]int{i, j}, d}
}
}
type cureRes struct {
// Index of result in paths.
index [2]int
// Cured pathname.
pathname *check.Absolute
// Error returned by c.
err error
}
res := make(chan cureRes)
for i := 0; i < cures; i++ {
go func() {
for d := range ac {
// computing and encoding identifier is expensive
if a.msg.IsVerbose() {
a.msg.Verbosef("curing %s...", Encode(Ident(d.a)))
}
var cr cureRes
cr.index = d.index
cr.pathname, _, cr.err = c.Cure(d.a)
res <- cr
}
}()
}
var count int
errs := make([]error, 0, len(paths))
for cr := range res {
count++
if cr.err != nil {
errs = append(errs, cr.err)
} else {
paths[cr.index[0]].src[cr.index[1]] = cr.pathname
}
if count == len(paths) {
break
}
}
close(ac)
if err = errors.Join(errs...); err != nil {
return
}
}
ctx, cancel := context.WithCancel(a.ctx)
defer cancel()
z := container.New(ctx, a.msg)
z.ForwardCancel = true
z.WaitDelay = execWaitDelay
z.SeccompPresets |= std.PresetStrict
z.ParentPerm = 0700
z.HostNet = hostNet
z.Hostname = "cure"
if z.HostNet {
z.Hostname = "cure-net"
}
z.Uid, z.Gid = (1<<10)-1, (1<<10)-1
if a.msg.IsVerbose() {
z.Stdout, z.Stderr = os.Stdout, os.Stderr
}
z.Dir, z.Env, z.Path, z.Args = a.dir, a.env, a.path, a.args
z.Grow(len(paths) + 4)
temp, work := c.GetTempDir(), c.GetWorkDir()
for i, b := range paths {
if i == overlayWorkIndex {
if err = os.MkdirAll(work.String(), 0700); err != nil {
return
}
tempWork := temp.Append(".work")
if err = os.MkdirAll(tempWork.String(), 0700); err != nil {
return
}
z.Overlay(
AbsWork,
work,
tempWork,
b.src...,
)
continue
}
if a.paths[i].W {
z.OverlayEphemeral(b.dst, b.src...)
} else if len(b.src) == 1 {
z.Bind(b.src[0], b.dst, 0)
} else {
z.OverlayReadonly(b.dst, b.src...)
}
}
if overlayWorkIndex < 0 {
z.Bind(
work,
AbsWork,
std.BindWritable|std.BindEnsure,
)
}
z.Bind(
c.GetTempDir(),
fhs.AbsTmp,
std.BindWritable|std.BindEnsure,
)
z.Proc(fhs.AbsProc).Dev(fhs.AbsDev, true)
if err = z.Start(); err != nil {
return
}
if err = z.Serve(); err != nil {
return
}
if err = z.Wait(); err != nil {
return
}
// do not allow empty directories to succeed
for {
err = syscall.Rmdir(work.String())
if err != syscall.EINTR {
break
}
}
if err != nil && errors.Is(err, syscall.ENOTEMPTY) {
err = nil
}
return
}