container: make wait4 loop available to ops
All checks were successful
Test / Create distribution (push) Successful in 38s
Test / Sandbox (push) Successful in 2m36s
Test / Sandbox (race detector) (push) Successful in 4m43s
Test / Hpkg (push) Successful in 4m44s
Test / Hakurei (push) Successful in 4m55s
Test / Hakurei (race detector) (push) Successful in 6m23s
Test / Flake checks (push) Successful in 1m32s

Due to the special nature of the init process, regular wait calls are unavailable. This change provides infrastructure to access wait4 loop state from Op.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
Ophestra 2025-12-08 21:43:26 +09:00
parent 564db6863b
commit 791a1dfa55
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
2 changed files with 114 additions and 52 deletions

View File

@ -10,6 +10,7 @@ import (
"path"
"slices"
"strconv"
"sync"
. "syscall"
"time"
@ -19,24 +20,28 @@ import (
)
const (
/* intermediate tmpfs mount point
/* intermediateHostPath is the pathname of the intermediate tmpfs mount point.
this path might seem like a weird choice, however there are many good reasons to use it:
- the contents of this path is never exposed to the container:
the tmpfs root established here effectively becomes anonymous after pivot_root
- it is safe to assume this path exists and is a directory:
this program will not work correctly without a proper /proc and neither will most others
- this path belongs to the container init:
the container init is not any more privileged or trusted than the rest of the container
- this path is only accessible by init and root:
the container init sets SUID_DUMP_DISABLE and terminates if that fails;
This path might seem like a weird choice, however there are many good reasons to use it:
- The contents of this path is never exposed to the container:
The tmpfs root established here effectively becomes anonymous after pivot_root.
- It is safe to assume this path exists and is a directory:
This program will not work correctly without a proper /proc and neither will most others.
- This path belongs to the container init:
The container init is not any more privileged or trusted than the rest of the container.
- This path is only accessible by init and root:
The container init sets SUID_DUMP_DISABLE and terminates if that fails.
it should be noted that none of this should become relevant at any point since the resulting
intermediate root tmpfs should be effectively anonymous */
It should be noted that none of this should become relevant at any point since the resulting
intermediate root tmpfs should be effectively anonymous. */
intermediateHostPath = fhs.Proc + "self/fd"
// setup params file descriptor
// setupEnv is the name of the environment variable holding the string representation of
// the read end file descriptor of the setup params pipe.
setupEnv = "HAKUREI_SETUP"
// exitUnexpectedWait4 is the exit code if wait4 returns an unexpected errno.
exitUnexpectedWait4 = 2
)
type (
@ -64,12 +69,29 @@ type (
// setupState persists context between Ops.
setupState struct {
nonrepeatable uintptr
// Whether early reaping has concluded. Must only be accessed in the wait4 loop.
processConcluded bool
// Process to syscall.WaitStatus populated in the wait4 loop. Freed after early reaping concludes.
process map[int]WaitStatus
// Synchronises access to process.
processMu sync.RWMutex
*Params
context.Context
message.Msg
}
)
// terminated returns whether the specified pid has been reaped, and its
// syscall.WaitStatus if it had. This is only usable by [Op].
func (state *setupState) terminated(pid int) (wstatus WaitStatus, ok bool) {
state.processMu.RLock()
wstatus, ok = state.process[pid]
state.processMu.RUnlock()
return
}
// Grow grows the slice Ops points to using [slices.Grow].
func (f *Ops) Grow(n int) { *f = slices.Grow(*f, n) }
@ -185,7 +207,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
ctx, cancel := context.WithCancel(context.Background())
state := &setupState{Params: &params.Params, Msg: msg, Context: ctx}
state := &setupState{process: make(map[int]WaitStatus), Params: &params.Params, Msg: msg, Context: ctx}
defer cancel()
/* early is called right before pivot_root into intermediate root;
@ -336,35 +358,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
k.umask(oldmask)
// called right before startup of initial process, all state changes to the
// current process is prohibited during late
for i, op := range *params.Ops {
// ops already checked during early setup
if err := op.late(state, k); err != nil {
if m, ok := messageFromError(err); ok {
k.fatal(msg, m)
} else {
k.fatalf(msg, "cannot complete op at index %d: %v", i, err)
}
}
}
if err := closeSetup(); err != nil {
k.fatalf(msg, "cannot close setup pipe: %v", err)
}
cmd := exec.Command(params.Path.String())
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
cmd.Args = params.Args
cmd.Env = params.Env
cmd.ExtraFiles = extraFiles
cmd.Dir = params.Dir.String()
msg.Verbosef("starting initial program %s", params.Path)
if err := k.start(cmd); err != nil {
k.fatalf(msg, "%v", err)
}
// winfo represents an exited process from wait4.
type winfo struct {
wpid int
wstatus WaitStatus
@ -374,9 +368,13 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
// when there are no longer any processes left to reap
info := make(chan winfo, 1)
// closed when the initial process has started
initialProcessStarted := make(chan struct{})
k.new(func(k syscallDispatcher) {
k.lockOSThread()
wait4:
var (
err error
wpid = -2
@ -390,7 +388,21 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
if wpid != -2 {
info <- winfo{wpid, wstatus}
if !state.processConcluded {
state.processMu.Lock()
if state.process == nil {
// early reaping has already concluded at this point
state.processConcluded = true
info <- winfo{wpid, wstatus}
} else {
// initial process has not yet been created, and the
// info channel is not yet being received from
state.process[wpid] = wstatus
}
state.processMu.Unlock()
} else {
info <- winfo{wpid, wstatus}
}
}
err = EINTR
@ -398,13 +410,54 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
wpid, err = k.wait4(-1, &wstatus, 0, nil)
}
}
if !errors.Is(err, ECHILD) {
k.printf(msg, "unexpected wait4 response: %v", err)
} else if initialProcessStarted != nil {
// initial process has not yet been reached and all daemons
// terminated or none were started in the first place
<-initialProcessStarted
initialProcessStarted = nil
goto wait4
}
close(info)
})
// called right before startup of initial process, all state changes to the
// current process is prohibited during late
for i, op := range *params.Ops {
// ops already checked during early setup
if err := op.late(state, k); err != nil {
if m, ok := messageFromError(err); ok {
k.fatal(msg, m)
} else {
k.fatalf(msg, "cannot complete op at index %d: %v", i, err)
}
}
}
// early reaping has concluded, this must happen before initial process is created
state.processMu.Lock()
state.process = nil
state.processMu.Unlock()
if err := closeSetup(); err != nil {
k.fatalf(msg, "cannot close setup pipe: %v", err)
}
cmd := exec.Command(params.Path.String())
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
cmd.Args = params.Args
cmd.Env = params.Env
cmd.ExtraFiles = extraFiles
cmd.Dir = params.Dir.String()
msg.Verbosef("starting initial process %s", params.Path)
if err := k.start(cmd); err != nil {
k.fatalf(msg, "%v", err)
}
close(initialProcessStarted)
// handle signals to dump withheld messages
sig := make(chan os.Signal, 2)
k.notify(sig, CancelSignal,
@ -413,7 +466,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
// closed after residualProcessTimeout has elapsed after initial process death
timeout := make(chan struct{})
r := 2
r := exitUnexpectedWait4
for {
select {
case s := <-sig:

View File

@ -1983,11 +1983,20 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(13)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, nil, stub.UniqueError(12)),
call("fatalf", stub.ExpectArgs{"%v", []any{stub.UniqueError(12)}}, nil, nil),
},
/* wait4 */
Tracks: []stub.Expect{{Calls: []stub.Call{
call("lockOSThread", stub.ExpectArgs{}, nil, nil),
// this terminates the goroutine at the call, preventing it from leaking while preserving behaviour
call("wait4", stub.ExpectArgs{-1, nil, 0, nil, stub.PanicExit}, 0, syscall.ECHILD),
}}},
}, nil},
{"lowlastcap signaled cancel forward error", func(k *kstub) error { initEntrypoint(k, k); return nil }, stub.Expect{
@ -2062,10 +2071,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(10)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- CancelSignal }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{"forwarding context cancellation"}}, nil, nil),
// magicWait4Signal as ret causes wait4 stub to unblock
@ -2162,10 +2171,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(7)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- syscall.SIGQUIT }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbosef", stub.ExpectArgs{"got %s, forwarding to initial process", []any{"quit"}}, nil, nil),
// magicWait4Signal as ret causes wait4 stub to unblock
@ -2262,10 +2271,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(7)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- os.Interrupt }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbosef", stub.ExpectArgs{"got %s", []any{"interrupt"}}, nil, nil),
call("beforeExit", stub.ExpectArgs{}, nil, nil),
@ -2353,10 +2362,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(5)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
@ -2448,10 +2457,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(3)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
@ -2586,10 +2595,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(1)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil),
call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
@ -2728,10 +2737,10 @@ func TestInitEntrypoint(t *testing.T) {
call("newFile", stub.ExpectArgs{uintptr(11), "extra file 1"}, (*os.File)(nil), nil),
call("newFile", stub.ExpectArgs{uintptr(12), "extra file 2"}, (*os.File)(nil), nil),
call("umask", stub.ExpectArgs{022}, 0, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(0)}}, nil, nil),
call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/bin/zsh")}}, nil, nil),
call("start", stub.ExpectArgs{"/bin/zsh", []string{"zsh", "-c", "exec vim"}, []string{"DISPLAY=:0"}, "/.hakurei"}, &os.Process{Pid: 0xcafe}, nil),
call("New", stub.ExpectArgs{}, nil, nil),
call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),
call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),