diff --git a/container/init.go b/container/init.go index c568702..b5e9eba 100644 --- a/container/init.go +++ b/container/init.go @@ -10,6 +10,7 @@ import ( "path" "slices" "strconv" + "sync" . "syscall" "time" @@ -19,24 +20,28 @@ import ( ) const ( - /* intermediate tmpfs mount point + /* intermediateHostPath is the pathname of the intermediate tmpfs mount point. - this path might seem like a weird choice, however there are many good reasons to use it: - - the contents of this path is never exposed to the container: - the tmpfs root established here effectively becomes anonymous after pivot_root - - it is safe to assume this path exists and is a directory: - this program will not work correctly without a proper /proc and neither will most others - - this path belongs to the container init: - the container init is not any more privileged or trusted than the rest of the container - - this path is only accessible by init and root: - the container init sets SUID_DUMP_DISABLE and terminates if that fails; + This path might seem like a weird choice, however there are many good reasons to use it: + - The contents of this path is never exposed to the container: + The tmpfs root established here effectively becomes anonymous after pivot_root. + - It is safe to assume this path exists and is a directory: + This program will not work correctly without a proper /proc and neither will most others. + - This path belongs to the container init: + The container init is not any more privileged or trusted than the rest of the container. + - This path is only accessible by init and root: + The container init sets SUID_DUMP_DISABLE and terminates if that fails. - it should be noted that none of this should become relevant at any point since the resulting - intermediate root tmpfs should be effectively anonymous */ + It should be noted that none of this should become relevant at any point since the resulting + intermediate root tmpfs should be effectively anonymous. */ intermediateHostPath = fhs.Proc + "self/fd" - // setup params file descriptor + // setupEnv is the name of the environment variable holding the string representation of + // the read end file descriptor of the setup params pipe. setupEnv = "HAKUREI_SETUP" + + // exitUnexpectedWait4 is the exit code if wait4 returns an unexpected errno. + exitUnexpectedWait4 = 2 ) type ( @@ -64,12 +69,29 @@ type ( // setupState persists context between Ops. setupState struct { nonrepeatable uintptr + + // Whether early reaping has concluded. Must only be accessed in the wait4 loop. + processConcluded bool + // Process to syscall.WaitStatus populated in the wait4 loop. Freed after early reaping concludes. + process map[int]WaitStatus + // Synchronises access to process. + processMu sync.RWMutex + *Params context.Context message.Msg } ) +// terminated returns whether the specified pid has been reaped, and its +// syscall.WaitStatus if it had. This is only usable by [Op]. +func (state *setupState) terminated(pid int) (wstatus WaitStatus, ok bool) { + state.processMu.RLock() + wstatus, ok = state.process[pid] + state.processMu.RUnlock() + return +} + // Grow grows the slice Ops points to using [slices.Grow]. func (f *Ops) Grow(n int) { *f = slices.Grow(*f, n) } @@ -185,7 +207,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { } ctx, cancel := context.WithCancel(context.Background()) - state := &setupState{Params: ¶ms.Params, Msg: msg, Context: ctx} + state := &setupState{process: make(map[int]WaitStatus), Params: ¶ms.Params, Msg: msg, Context: ctx} defer cancel() /* early is called right before pivot_root into intermediate root; @@ -336,35 +358,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { } k.umask(oldmask) - // called right before startup of initial process, all state changes to the - // current process is prohibited during late - for i, op := range *params.Ops { - // ops already checked during early setup - if err := op.late(state, k); err != nil { - if m, ok := messageFromError(err); ok { - k.fatal(msg, m) - } else { - k.fatalf(msg, "cannot complete op at index %d: %v", i, err) - } - } - } - - if err := closeSetup(); err != nil { - k.fatalf(msg, "cannot close setup pipe: %v", err) - } - - cmd := exec.Command(params.Path.String()) - cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr - cmd.Args = params.Args - cmd.Env = params.Env - cmd.ExtraFiles = extraFiles - cmd.Dir = params.Dir.String() - - msg.Verbosef("starting initial program %s", params.Path) - if err := k.start(cmd); err != nil { - k.fatalf(msg, "%v", err) - } - + // winfo represents an exited process from wait4. type winfo struct { wpid int wstatus WaitStatus @@ -374,9 +368,13 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { // when there are no longer any processes left to reap info := make(chan winfo, 1) + // closed when the initial process has started + initialProcessStarted := make(chan struct{}) + k.new(func(k syscallDispatcher) { k.lockOSThread() + wait4: var ( err error wpid = -2 @@ -390,7 +388,21 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { } if wpid != -2 { - info <- winfo{wpid, wstatus} + if !state.processConcluded { + state.processMu.Lock() + if state.process == nil { + // early reaping has already concluded at this point + state.processConcluded = true + info <- winfo{wpid, wstatus} + } else { + // initial process has not yet been created, and the + // info channel is not yet being received from + state.process[wpid] = wstatus + } + state.processMu.Unlock() + } else { + info <- winfo{wpid, wstatus} + } } err = EINTR @@ -398,13 +410,54 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { wpid, err = k.wait4(-1, &wstatus, 0, nil) } } + if !errors.Is(err, ECHILD) { k.printf(msg, "unexpected wait4 response: %v", err) + } else if initialProcessStarted != nil { + // initial process has not yet been reached and all daemons + // terminated or none were started in the first place + <-initialProcessStarted + initialProcessStarted = nil + goto wait4 } close(info) }) + // called right before startup of initial process, all state changes to the + // current process is prohibited during late + for i, op := range *params.Ops { + // ops already checked during early setup + if err := op.late(state, k); err != nil { + if m, ok := messageFromError(err); ok { + k.fatal(msg, m) + } else { + k.fatalf(msg, "cannot complete op at index %d: %v", i, err) + } + } + } + // early reaping has concluded, this must happen before initial process is created + state.processMu.Lock() + state.process = nil + state.processMu.Unlock() + + if err := closeSetup(); err != nil { + k.fatalf(msg, "cannot close setup pipe: %v", err) + } + + cmd := exec.Command(params.Path.String()) + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + cmd.Args = params.Args + cmd.Env = params.Env + cmd.ExtraFiles = extraFiles + cmd.Dir = params.Dir.String() + + msg.Verbosef("starting initial process %s", params.Path) + if err := k.start(cmd); err != nil { + k.fatalf(msg, "%v", err) + } + close(initialProcessStarted) + // handle signals to dump withheld messages sig := make(chan os.Signal, 2) k.notify(sig, CancelSignal, @@ -413,7 +466,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { // closed after residualProcessTimeout has elapsed after initial process death timeout := make(chan struct{}) - r := 2 + r := exitUnexpectedWait4 for { select { case s := <-sig: diff --git a/container/init_test.go b/container/init_test.go index ca5ed74..e8daf6f 100644 --- a/container/init_test.go +++ b/container/init_test.go @@ -1983,11 +1983,20 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(13)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, nil, stub.UniqueError(12)), call("fatalf", stub.ExpectArgs{"%v", []any{stub.UniqueError(12)}}, nil, nil), }, + + /* wait4 */ + Tracks: []stub.Expect{{Calls: []stub.Call{ + call("lockOSThread", stub.ExpectArgs{}, nil, nil), + + // this terminates the goroutine at the call, preventing it from leaking while preserving behaviour + call("wait4", stub.ExpectArgs{-1, nil, 0, nil, stub.PanicExit}, 0, syscall.ECHILD), + }}}, }, nil}, {"lowlastcap signaled cancel forward error", func(k *kstub) error { initEntrypoint(k, k); return nil }, stub.Expect{ @@ -2062,10 +2071,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(10)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- CancelSignal }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{"forwarding context cancellation"}}, nil, nil), // magicWait4Signal as ret causes wait4 stub to unblock @@ -2162,10 +2171,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(7)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- syscall.SIGQUIT }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbosef", stub.ExpectArgs{"got %s, forwarding to initial process", []any{"quit"}}, nil, nil), // magicWait4Signal as ret causes wait4 stub to unblock @@ -2262,10 +2271,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(7)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{func(c chan<- os.Signal) { c <- os.Interrupt }, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbosef", stub.ExpectArgs{"got %s", []any{"interrupt"}}, nil, nil), call("beforeExit", stub.ExpectArgs{}, nil, nil), @@ -2353,10 +2362,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(5)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), @@ -2448,10 +2457,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(3)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), @@ -2586,10 +2595,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(0x3a), "extra file 0"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(0x3b), "extra file 1"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(1)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/run/current-system/sw/bin/bash")}}, nil, nil), call("start", stub.ExpectArgs{"/run/current-system/sw/bin/bash", []string{"bash", "-c", "false"}, ([]string)(nil), "/.hakurei/nonexistent"}, &os.Process{Pid: 0xbad}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), @@ -2728,10 +2737,10 @@ func TestInitEntrypoint(t *testing.T) { call("newFile", stub.ExpectArgs{uintptr(11), "extra file 1"}, (*os.File)(nil), nil), call("newFile", stub.ExpectArgs{uintptr(12), "extra file 2"}, (*os.File)(nil), nil), call("umask", stub.ExpectArgs{022}, 0, nil), + call("New", stub.ExpectArgs{}, nil, nil), call("fatalf", stub.ExpectArgs{"cannot close setup pipe: %v", []any{stub.UniqueError(0)}}, nil, nil), call("verbosef", stub.ExpectArgs{"starting initial program %s", []any{check.MustAbs("/bin/zsh")}}, nil, nil), call("start", stub.ExpectArgs{"/bin/zsh", []string{"zsh", "-c", "exec vim"}, []string{"DISPLAY=:0"}, "/.hakurei"}, &os.Process{Pid: 0xcafe}, nil), - call("New", stub.ExpectArgs{}, nil, nil), call("notify", stub.ExpectArgs{nil, []os.Signal{CancelSignal, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil), call("verbose", stub.ExpectArgs{[]any{os.ErrInvalid.Error()}}, nil, nil),