internal/outcome/process: use new store interface
All checks were successful
Test / Create distribution (push) Successful in 42s
Test / Sandbox (push) Successful in 2m26s
Test / Hakurei (push) Successful in 3m20s
Test / Hpkg (push) Successful in 4m7s
Test / Sandbox (race detector) (push) Successful in 4m15s
Test / Flake checks (push) Successful in 1m32s
Test / Hakurei (race detector) (push) Successful in 5m5s
All checks were successful
Test / Create distribution (push) Successful in 42s
Test / Sandbox (push) Successful in 2m26s
Test / Hakurei (push) Successful in 3m20s
Test / Hpkg (push) Successful in 4m7s
Test / Sandbox (race detector) (push) Successful in 4m15s
Test / Flake checks (push) Successful in 1m32s
Test / Hakurei (race detector) (push) Successful in 5m5s
This change also spawns shim before committing system state, leaving it blocking on the setup pipe. The internal/outcome/process structure is also entirely reworked to be much more readable and less error-prone, while enabling basic performance measurements. A long-standing bug where segment lock is not held during Commit is also resolved. Closes #19. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"iter"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
@@ -28,179 +30,6 @@ const (
|
||||
shimSetupTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// mainState holds persistent state bound to outcome.main.
|
||||
type mainState struct {
|
||||
// done is whether beforeExit has been called already.
|
||||
done bool
|
||||
|
||||
// Populated on successful hsu startup.
|
||||
cmd *exec.Cmd
|
||||
// Cancels cmd, must be populated before cmd is populated.
|
||||
cancel context.CancelFunc
|
||||
|
||||
store store.Compat
|
||||
|
||||
k *outcome
|
||||
message.Msg
|
||||
uintptr
|
||||
}
|
||||
|
||||
const (
|
||||
// mainNeedsRevert indicates the call to Commit has succeeded.
|
||||
mainNeedsRevert uintptr = 1 << iota
|
||||
// mainNeedsDestroy indicates the instance state entry is present in the store.
|
||||
mainNeedsDestroy
|
||||
)
|
||||
|
||||
// beforeExit must be called immediately before a call to [os.Exit].
|
||||
func (ms mainState) beforeExit(isFault bool) {
|
||||
if ms.done {
|
||||
panic("attempting to call beforeExit twice")
|
||||
}
|
||||
ms.done = true
|
||||
defer ms.BeforeExit()
|
||||
|
||||
if isFault && ms.cancel != nil {
|
||||
ms.cancel()
|
||||
}
|
||||
|
||||
var hasErr bool
|
||||
// updates hasErr but does not terminate
|
||||
perror := func(err error, message string) {
|
||||
hasErr = true
|
||||
printMessageError(ms.GetLogger().Println, "cannot "+message+":", err)
|
||||
}
|
||||
exitCode := 1
|
||||
defer func() {
|
||||
if hasErr {
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
}()
|
||||
|
||||
// this also handles wait for a non-fault termination
|
||||
if ms.cmd != nil {
|
||||
select {
|
||||
case err := <-func() chan error { w := make(chan error, 1); go func() { w <- ms.cmd.Wait(); ms.cancel() }(); return w }():
|
||||
wstatus, ok := ms.cmd.ProcessState.Sys().(syscall.WaitStatus)
|
||||
if ok {
|
||||
if v := wstatus.ExitStatus(); v != 0 {
|
||||
hasErr = true
|
||||
exitCode = v
|
||||
}
|
||||
}
|
||||
|
||||
if ms.IsVerbose() {
|
||||
if !ok {
|
||||
if err != nil {
|
||||
ms.Verbosef("wait: %v", err)
|
||||
}
|
||||
} else {
|
||||
switch {
|
||||
case wstatus.Exited():
|
||||
ms.Verbosef("process %d exited with code %d", ms.cmd.Process.Pid, wstatus.ExitStatus())
|
||||
|
||||
case wstatus.CoreDump():
|
||||
ms.Verbosef("process %d dumped core", ms.cmd.Process.Pid)
|
||||
|
||||
case wstatus.Signaled():
|
||||
ms.Verbosef("process %d got %s", ms.cmd.Process.Pid, wstatus.Signal())
|
||||
|
||||
default:
|
||||
ms.Verbosef("process %d exited with status %#x", ms.cmd.Process.Pid, wstatus)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case <-func() chan struct{} {
|
||||
w := make(chan struct{})
|
||||
// this ties waitDone to ctx with the additional compensated timeout duration
|
||||
go func() { <-ms.k.ctx.Done(); time.Sleep(ms.k.state.Shim.WaitDelay + shimWaitTimeout); close(w) }()
|
||||
return w
|
||||
}():
|
||||
ms.Resume()
|
||||
// this is only reachable when shim did not exit within shimWaitTimeout, after its WaitDelay has elapsed.
|
||||
// This is different from the container failing to terminate within its timeout period, as that is enforced
|
||||
// by the shim. This path is instead reached when there is a lockup in shim preventing it from completing.
|
||||
ms.GetLogger().Printf("process %d did not terminate", ms.cmd.Process.Pid)
|
||||
}
|
||||
|
||||
ms.Resume()
|
||||
}
|
||||
|
||||
if ms.uintptr&mainNeedsRevert != 0 {
|
||||
if ok, err := ms.store.Do(ms.k.state.identity.unwrap(), func(c store.Cursor) {
|
||||
if ms.uintptr&mainNeedsDestroy != 0 {
|
||||
if err := c.Destroy(ms.k.state.id.unwrap()); err != nil {
|
||||
perror(err, "destroy state entry")
|
||||
}
|
||||
}
|
||||
|
||||
var rt hst.Enablement
|
||||
if states, err := c.Load(); err != nil {
|
||||
// it is impossible to continue from this point;
|
||||
// revert per-process state here to limit damage
|
||||
ec := system.Process
|
||||
if revertErr := ms.k.sys.Revert((*system.Criteria)(&ec)); revertErr != nil {
|
||||
var joinError interface {
|
||||
Unwrap() []error
|
||||
error
|
||||
}
|
||||
if !errors.As(revertErr, &joinError) || joinError == nil {
|
||||
perror(revertErr, "revert system setup")
|
||||
} else {
|
||||
for _, v := range joinError.Unwrap() {
|
||||
perror(v, "revert system setup step")
|
||||
}
|
||||
}
|
||||
}
|
||||
perror(err, "load instance states")
|
||||
} else {
|
||||
ec := system.Process
|
||||
if l := len(states); l == 0 {
|
||||
ec |= system.User
|
||||
} else {
|
||||
ms.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
|
||||
}
|
||||
|
||||
// accumulate enablements of remaining launchers
|
||||
for i, s := range states {
|
||||
if s.Config != nil {
|
||||
rt |= s.Config.Enablements.Unwrap()
|
||||
} else {
|
||||
ms.GetLogger().Printf("state entry %d does not contain config", i)
|
||||
}
|
||||
}
|
||||
|
||||
ec |= rt ^ (hst.EWayland | hst.EX11 | hst.EDBus | hst.EPulse)
|
||||
if ms.IsVerbose() {
|
||||
if ec > 0 {
|
||||
ms.Verbose("reverting operations scope", system.TypeString(ec))
|
||||
}
|
||||
}
|
||||
|
||||
if err = ms.k.sys.Revert((*system.Criteria)(&ec)); err != nil {
|
||||
perror(err, "revert system setup")
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
if ok {
|
||||
perror(err, "unlock state store")
|
||||
} else {
|
||||
perror(err, "open state store")
|
||||
}
|
||||
}
|
||||
} else if ms.uintptr&mainNeedsDestroy != 0 {
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// fatal calls printMessageError, performs necessary cleanup, followed by a call to [os.Exit](1).
|
||||
func (ms mainState) fatal(fallback string, ferr error) {
|
||||
printMessageError(ms.GetLogger().Println, fallback, ferr)
|
||||
ms.beforeExit(true)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// main carries out outcome and terminates. main does not return.
|
||||
func (k *outcome) main(msg message.Msg) {
|
||||
if k.ctx == nil || k.sys == nil || k.state == nil {
|
||||
@@ -210,61 +39,281 @@ func (k *outcome) main(msg message.Msg) {
|
||||
// read comp value early for early failure
|
||||
hsuPath := internal.MustHsuPath()
|
||||
|
||||
// ms.beforeExit required beyond this point
|
||||
ms := mainState{Msg: msg, k: k}
|
||||
|
||||
if err := k.sys.Commit(); err != nil {
|
||||
ms.fatal("cannot commit system setup:", err)
|
||||
}
|
||||
ms.uintptr |= mainNeedsRevert
|
||||
ms.store = store.NewMulti(msg, k.state.sc.RunDirPath)
|
||||
const (
|
||||
// transitions to processCommit, or processFinal on failure
|
||||
processStart = iota
|
||||
// transitions to processServe, or processLifecycle on failure
|
||||
processCommit
|
||||
// transitions to processLifecycle only
|
||||
processServe
|
||||
// transitions to processCleanup only
|
||||
processLifecycle
|
||||
// transitions to processFinal only
|
||||
processCleanup
|
||||
// execution terminates, must be the final state
|
||||
processFinal
|
||||
)
|
||||
|
||||
// for the shim process
|
||||
ctx, cancel := context.WithCancel(k.ctx)
|
||||
defer cancel()
|
||||
ms.cancel = cancel
|
||||
|
||||
// shim starts and blocks on setup payload before container is started
|
||||
var (
|
||||
// state for next iteration
|
||||
processState uintptr = processStart
|
||||
// current state, must not be mutated directly
|
||||
processStateCur uintptr = math.MaxUint
|
||||
// point in time the current iteration began
|
||||
processTime time.Time
|
||||
|
||||
// whether sys is currently in between a call to Commit and Revert
|
||||
isBeforeRevert bool
|
||||
|
||||
// initialised during processStart if successful
|
||||
handle *store.Handle
|
||||
// initialised during processServe if state is saved
|
||||
entryHandle *store.EntryHandle
|
||||
|
||||
// can be set in any state, used in processFinal
|
||||
exitCode int
|
||||
|
||||
// shim process startup time,
|
||||
// populated in processStart, accessed by processServe
|
||||
startTime time.Time
|
||||
shimPipe *os.File
|
||||
// shim process as target uid,
|
||||
// populated in processStart, accessed by processServe
|
||||
shimCmd *exec.Cmd
|
||||
// write end of shim setup pipe,
|
||||
// populated in processStart, accessed by processServe
|
||||
shimPipe *os.File
|
||||
|
||||
// perror cancels ctx and prints an error message
|
||||
perror = func(err error, message string) {
|
||||
cancel()
|
||||
if shimPipe != nil {
|
||||
if closeErr := shimPipe.Close(); closeErr != nil {
|
||||
msg.Verbose(closeErr.Error())
|
||||
}
|
||||
shimPipe = nil
|
||||
}
|
||||
if exitCode == 0 {
|
||||
exitCode = 1
|
||||
}
|
||||
printMessageError(msg.GetLogger().Println, "cannot "+message+":", err)
|
||||
}
|
||||
|
||||
// perrorFatal cancels ctx, prints an error message, and sets the next state
|
||||
perrorFatal = func(err error, message string, newState uintptr) {
|
||||
perror(err, message)
|
||||
processState = newState
|
||||
}
|
||||
)
|
||||
if cmd, f, err := k.start(ctx, msg, hsuPath, &startTime); err != nil {
|
||||
ms.fatal("cannot start shim:", err)
|
||||
panic("unreachable")
|
||||
} else {
|
||||
ms.cmd, shimPipe = cmd, f
|
||||
}
|
||||
|
||||
// this starts the container, system setup must complete before this point
|
||||
if err := serveShim(msg, shimPipe, k.state); err != nil {
|
||||
ms.fatal("cannot serve shim payload:", err)
|
||||
}
|
||||
for {
|
||||
var processTimePrev time.Time
|
||||
processTimePrev, processTime = processTime, time.Now()
|
||||
var processStatePrev uintptr
|
||||
processStatePrev, processStateCur = processStateCur, processState
|
||||
|
||||
// shim accepted setup payload, create process state
|
||||
if ok, err := ms.store.Do(k.state.identity.unwrap(), func(c store.Cursor) {
|
||||
if err := c.Save(&hst.State{
|
||||
ID: k.state.id.unwrap(),
|
||||
PID: os.Getpid(),
|
||||
ShimPID: ms.cmd.Process.Pid,
|
||||
Config: k.config,
|
||||
Time: startTime,
|
||||
}); err != nil {
|
||||
ms.fatal("cannot save state entry:", err)
|
||||
if !processTimePrev.IsZero() && processStatePrev != processLifecycle {
|
||||
msg.Verbosef("state %d took %d ms", processStatePrev, processTime.Sub(processTimePrev).Milliseconds())
|
||||
}
|
||||
}); err != nil {
|
||||
if ok {
|
||||
ms.uintptr |= mainNeedsDestroy
|
||||
ms.fatal("cannot unlock state store:", err)
|
||||
} else {
|
||||
ms.fatal("cannot open state store:", err)
|
||||
|
||||
switch processState {
|
||||
case processStart:
|
||||
if h, err := store.New(k.state.sc.RunDirPath.Append("state")).Handle(k.state.identity.unwrap()); err != nil {
|
||||
perrorFatal(err, "obtain store segment handle", processFinal)
|
||||
continue
|
||||
} else {
|
||||
handle = h
|
||||
}
|
||||
|
||||
cmd, f, err := k.start(ctx, msg, hsuPath, &startTime)
|
||||
if err != nil {
|
||||
perrorFatal(err, "start shim", processFinal)
|
||||
continue
|
||||
} else {
|
||||
shimCmd, shimPipe = cmd, f
|
||||
}
|
||||
|
||||
processState = processCommit
|
||||
|
||||
case processCommit:
|
||||
if isBeforeRevert {
|
||||
perrorFatal(newWithMessage("invalid transition to commit state"), "commit", processLifecycle)
|
||||
continue
|
||||
}
|
||||
|
||||
unlock, err := handle.Lock()
|
||||
if err != nil {
|
||||
perrorFatal(err, "acquire lock on store segment", processLifecycle)
|
||||
continue
|
||||
}
|
||||
if entryHandle, err = handle.Save(&hst.State{
|
||||
ID: k.state.id.unwrap(),
|
||||
PID: os.Getpid(),
|
||||
ShimPID: shimCmd.Process.Pid,
|
||||
Config: k.config,
|
||||
Time: startTime,
|
||||
}); err != nil {
|
||||
unlock()
|
||||
// transition here to avoid the commit/revert cycle on the doomed instance
|
||||
perrorFatal(err, "save instance state", processLifecycle)
|
||||
continue
|
||||
}
|
||||
|
||||
err = k.sys.Commit()
|
||||
unlock()
|
||||
if err != nil {
|
||||
perrorFatal(err, "commit system setup", processLifecycle)
|
||||
continue
|
||||
}
|
||||
isBeforeRevert = true
|
||||
|
||||
processState = processServe
|
||||
|
||||
case processServe:
|
||||
// this state transition to processLifecycle only
|
||||
processState = processLifecycle
|
||||
|
||||
// this starts the container, system setup must complete before this point
|
||||
if err := serveShim(msg, shimPipe, k.state); err != nil {
|
||||
perror(err, "serve shim payload")
|
||||
continue
|
||||
} else {
|
||||
shimPipe = nil // this is already closed by serveShim
|
||||
}
|
||||
|
||||
case processLifecycle:
|
||||
// this state transition to processCleanup only
|
||||
processState = processCleanup
|
||||
|
||||
msg.Suspend()
|
||||
select {
|
||||
case err := <-func() chan error { w := make(chan error, 1); go func() { w <- shimCmd.Wait(); cancel() }(); return w }():
|
||||
wstatus, ok := shimCmd.ProcessState.Sys().(syscall.WaitStatus)
|
||||
if ok {
|
||||
if v := wstatus.ExitStatus(); v != 0 {
|
||||
exitCode = v
|
||||
}
|
||||
}
|
||||
|
||||
if msg.IsVerbose() {
|
||||
if !ok {
|
||||
if err != nil {
|
||||
msg.Verbosef("wait: %v", err)
|
||||
}
|
||||
} else {
|
||||
switch {
|
||||
case wstatus.Exited():
|
||||
msg.Verbosef("process %d exited with code %d", shimCmd.Process.Pid, wstatus.ExitStatus())
|
||||
|
||||
case wstatus.CoreDump():
|
||||
msg.Verbosef("process %d dumped core", shimCmd.Process.Pid)
|
||||
|
||||
case wstatus.Signaled():
|
||||
msg.Verbosef("process %d got %s", shimCmd.Process.Pid, wstatus.Signal())
|
||||
|
||||
default:
|
||||
msg.Verbosef("process %d exited with status %#x", shimCmd.Process.Pid, wstatus)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case <-func() chan struct{} {
|
||||
w := make(chan struct{})
|
||||
// this ties processLifecycle to ctx with the additional compensated timeout duration
|
||||
// to allow transition to the next state on a locked up shim
|
||||
go func() { <-ctx.Done(); time.Sleep(k.state.Shim.WaitDelay + shimWaitTimeout); close(w) }()
|
||||
return w
|
||||
}():
|
||||
// this is only reachable when wait did not return within shimWaitTimeout, after its WaitDelay has elapsed.
|
||||
// This is different from the container failing to terminate within its timeout period, as that is enforced
|
||||
// by the shim. This path is instead reached when there is a lockup in shim preventing it from completing.
|
||||
msg.GetLogger().Printf("process %d did not terminate", shimCmd.Process.Pid)
|
||||
}
|
||||
msg.Resume()
|
||||
|
||||
case processCleanup:
|
||||
// this state transition to processFinal only
|
||||
processState = processFinal
|
||||
|
||||
unlock, err := handle.Lock()
|
||||
if err != nil {
|
||||
perror(err, "acquire lock on store segment")
|
||||
}
|
||||
|
||||
if entryHandle != nil {
|
||||
if err = entryHandle.Destroy(); err != nil {
|
||||
perror(err, "destroy state entry")
|
||||
}
|
||||
}
|
||||
|
||||
if isBeforeRevert {
|
||||
ec := system.Process
|
||||
|
||||
var entries iter.Seq[*store.EntryHandle]
|
||||
if entries, _, err = handle.Entries(); err != nil {
|
||||
// it is impossible to continue from this point,
|
||||
// per-process state will be reverted to limit damage
|
||||
perror(err, "read store segment entries")
|
||||
} else {
|
||||
// accumulate enablements of remaining instances
|
||||
var (
|
||||
// alive enablement bits
|
||||
rt hst.Enablement
|
||||
// alive instance count
|
||||
n int
|
||||
)
|
||||
for eh := range entries {
|
||||
var et hst.Enablement
|
||||
if et, err = eh.Load(nil); err != nil {
|
||||
perror(err, "read state header of instance "+eh.ID.String())
|
||||
} else {
|
||||
rt |= et
|
||||
n++
|
||||
}
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
ec |= system.User
|
||||
} else {
|
||||
msg.Verbosef("found %d instances, cleaning up without user-scoped operations", n)
|
||||
}
|
||||
ec |= rt ^ (hst.EWayland | hst.EX11 | hst.EDBus | hst.EPulse)
|
||||
if msg.IsVerbose() {
|
||||
if ec > 0 {
|
||||
msg.Verbose("reverting operations scope", system.TypeString(ec))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err = k.sys.Revert((*system.Criteria)(&ec)); err != nil {
|
||||
var joinError interface {
|
||||
Unwrap() []error
|
||||
error
|
||||
}
|
||||
if !errors.As(err, &joinError) || joinError == nil {
|
||||
perror(err, "revert system setup")
|
||||
} else {
|
||||
for _, v := range joinError.Unwrap() {
|
||||
perror(v, "revert system setup step")
|
||||
}
|
||||
}
|
||||
}
|
||||
isBeforeRevert = false
|
||||
}
|
||||
unlock()
|
||||
|
||||
case processFinal:
|
||||
msg.BeforeExit()
|
||||
os.Exit(exitCode)
|
||||
|
||||
default: // not reached
|
||||
k.fatalf("invalid transition from state %d to %d", processStatePrev, processState)
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
// state in store at this point, destroy defunct state entry on termination
|
||||
ms.uintptr |= mainNeedsDestroy
|
||||
|
||||
// beforeExit ties shim process to context
|
||||
ms.beforeExit(false)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// start starts the shim via cmd/hsu.
|
||||
@@ -301,7 +350,6 @@ func (k *outcome) start(ctx context.Context, msg message.Msg,
|
||||
}
|
||||
|
||||
msg.Verbosef("setuid helper at %s", hsuPath)
|
||||
msg.Suspend()
|
||||
if err := cmd.Start(); err != nil {
|
||||
msg.Resume()
|
||||
return cmd, shimPipe, &hst.AppError{Step: "start setuid wrapper", Err: err}
|
||||
@@ -313,6 +361,10 @@ func (k *outcome) start(ctx context.Context, msg message.Msg,
|
||||
|
||||
// serveShim serves outcomeState through the shim setup pipe.
|
||||
func serveShim(msg message.Msg, shimPipe *os.File, state *outcomeState) error {
|
||||
if shimPipe == nil {
|
||||
return newWithMessage("shim pipe not available")
|
||||
}
|
||||
|
||||
if err := shimPipe.SetDeadline(time.Now().Add(shimSetupTimeout)); err != nil {
|
||||
msg.Verbose(err.Error())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user