container: improve documentation
All checks were successful
Test / Create distribution (push) Successful in 1m16s
Test / Sandbox (push) Successful in 3m2s
Test / Hakurei (push) Successful in 4m4s
Test / ShareFS (push) Successful in 4m17s
Test / Hpkg (push) Successful in 4m49s
Test / Sandbox (race detector) (push) Successful in 5m22s
Test / Hakurei (race detector) (push) Successful in 6m30s
Test / Flake checks (push) Successful in 1m48s

This change removes inconsistencies collected over time in this package.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-02-28 20:18:30 +09:00
parent 84e6922f30
commit cd9b534d6b
23 changed files with 222 additions and 97 deletions

View File

@@ -1,4 +1,5 @@
// Package container implements unprivileged Linux containers with built-in support for syscall filtering.
// Package container implements unprivileged Linux containers with built-in
// support for syscall filtering.
package container
import (
@@ -42,22 +43,25 @@ type (
SchedPolicy int
// Cgroup fd, nil to disable.
Cgroup *int
// ExtraFiles passed through to initial process in the container,
// with behaviour identical to its [exec.Cmd] counterpart.
// ExtraFiles passed through to initial process in the container, with
// behaviour identical to its [exec.Cmd] counterpart.
ExtraFiles []*os.File
// param pipe for shim and init
// Write end of a pipe connected to the init to deliver [Params].
setup *os.File
// cancels cmd
// Cancels the context passed to the underlying cmd.
cancel context.CancelFunc
// closed after Wait returns
// Closed after Wait returns. Keeps the spawning thread alive.
wait chan struct{}
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
Cancel func(cmd *exec.Cmd) error
// Custom cancellation behaviour for the underlying [exec.Cmd]. Must
// deliver [CancelSignal] before returning.
Cancel func(cmd *exec.Cmd) error
// Copied to the underlying [exec.Cmd].
WaitDelay time.Duration
cmd *exec.Cmd
@@ -286,7 +290,11 @@ func (p *Container) Start() error {
// place setup pipe before user supplied extra files, this is later restored by init
if fd, f, err := Setup(&p.cmd.ExtraFiles); err != nil {
return &StartError{true, "set up params stream", err, false, false}
return &StartError{
Fatal: true,
Step: "set up params stream",
Err: err,
}
} else {
p.setup = f
p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)}
@@ -298,10 +306,16 @@ func (p *Container) Start() error {
runtime.LockOSThread()
p.wait = make(chan struct{})
done <- func() error { // setup depending on per-thread state must happen here
// PR_SET_NO_NEW_PRIVS: depends on per-thread state but acts on all processes created from that thread
// setup depending on per-thread state must happen here
done <- func() error {
// PR_SET_NO_NEW_PRIVS: thread-directed but acts on all processes
// created from the calling thread
if err := SetNoNewPrivs(); err != nil {
return &StartError{true, "prctl(PR_SET_NO_NEW_PRIVS)", err, false, false}
return &StartError{
Fatal: true,
Step: "prctl(PR_SET_NO_NEW_PRIVS)",
Err: err,
}
}
// landlock: depends on per-thread state but acts on a process group
@@ -313,28 +327,40 @@ func (p *Container) Start() error {
if abi, err := LandlockGetABI(); err != nil {
if p.HostAbstract {
// landlock can be skipped here as it restricts access to resources
// already covered by namespaces (pid)
// landlock can be skipped here as it restricts access
// to resources already covered by namespaces (pid)
goto landlockOut
}
return &StartError{false, "get landlock ABI", err, false, false}
return &StartError{Step: "get landlock ABI", Err: err}
} else if abi < 6 {
if p.HostAbstract {
// see above comment
goto landlockOut
}
return &StartError{false, "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET", ENOSYS, true, false}
return &StartError{
Step: "kernel too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET",
Err: ENOSYS,
Origin: true,
}
} else {
p.msg.Verbosef("landlock abi version %d", abi)
}
if rulesetFd, err := rulesetAttr.Create(0); err != nil {
return &StartError{true, "create landlock ruleset", err, false, false}
return &StartError{
Fatal: true,
Step: "create landlock ruleset",
Err: err,
}
} else {
p.msg.Verbosef("enforcing landlock ruleset %s", rulesetAttr)
if err = LandlockRestrictSelf(rulesetFd, 0); err != nil {
_ = Close(rulesetFd)
return &StartError{true, "enforce landlock ruleset", err, false, false}
return &StartError{
Fatal: true,
Step: "enforce landlock ruleset",
Err: err,
}
}
if err = Close(rulesetFd); err != nil {
p.msg.Verbosef("cannot close landlock ruleset: %v", err)
@@ -346,7 +372,7 @@ func (p *Container) Start() error {
}
// sched_setscheduler: thread-directed but acts on all processes
// created from that thread
// created from the calling thread
if p.SchedPolicy > 0 {
p.msg.Verbosef("setting scheduling policy %d", p.SchedPolicy)
if err := schedSetscheduler(
@@ -364,7 +390,11 @@ func (p *Container) Start() error {
p.msg.Verbose("starting container init")
if err := p.cmd.Start(); err != nil {
return &StartError{false, "start container init", err, false, true}
return &StartError{
Step: "start container init",
Err: err,
Passthrough: true,
}
}
return nil
}()
@@ -376,6 +406,7 @@ func (p *Container) Start() error {
}
// Serve serves [Container.Params] to the container init.
//
// Serve must only be called once.
func (p *Container) Serve() error {
if p.setup == nil {
@@ -385,12 +416,21 @@ func (p *Container) Serve() error {
setup := p.setup
p.setup = nil
if err := setup.SetDeadline(time.Now().Add(initSetupTimeout)); err != nil {
return &StartError{true, "set init pipe deadline", err, false, true}
return &StartError{
Fatal: true,
Step: "set init pipe deadline",
Err: err,
Passthrough: true,
}
}
if p.Path == nil {
p.cancel()
return &StartError{false, "invalid executable pathname", EINVAL, true, false}
return &StartError{
Step: "invalid executable pathname",
Err: EINVAL,
Origin: true,
}
}
// do not transmit nil
@@ -415,7 +455,8 @@ func (p *Container) Serve() error {
return err
}
// Wait waits for the container init process to exit and releases any resources associated with the [Container].
// Wait blocks until the container init process to exit and releases any
// resources associated with the [Container].
func (p *Container) Wait() error {
if p.cmd == nil || p.cmd.Process == nil {
return EINVAL
@@ -460,11 +501,13 @@ func (p *Container) StderrPipe() (r io.ReadCloser, err error) {
}
func (p *Container) String() string {
return fmt.Sprintf("argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets))
return fmt.Sprintf(
"argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets),
)
}
// ProcessState returns the address to os.ProcessState held by the underlying [exec.Cmd].
// ProcessState returns the address of os.ProcessState held by the underlying [exec.Cmd].
func (p *Container) ProcessState() *os.ProcessState {
if p.cmd == nil {
return nil
@@ -472,7 +515,8 @@ func (p *Container) ProcessState() *os.ProcessState {
return p.cmd.ProcessState
}
// New returns the address to a new instance of [Container] that requires further initialisation before use.
// New returns the address to a new instance of [Container]. This value requires
// further initialisation before use.
func New(ctx context.Context, msg message.Msg) *Container {
if msg == nil {
msg = message.New(nil)
@@ -486,7 +530,13 @@ func New(ctx context.Context, msg message.Msg) *Container {
}
// NewCommand calls [New] and initialises the [Params.Path] and [Params.Args] fields.
func NewCommand(ctx context.Context, msg message.Msg, pathname *check.Absolute, name string, args ...string) *Container {
func NewCommand(
ctx context.Context,
msg message.Msg,
pathname *check.Absolute,
name string,
args ...string,
) *Container {
z := New(ctx, msg)
z.Path = pathname
z.Args = append([]string{name}, args...)