internal/app: do not return from shim start
All checks were successful
Test / Create distribution (push) Successful in 49s
Test / Sandbox (push) Successful in 2m37s
Test / Hakurei (push) Successful in 3m32s
Test / Hpkg (push) Successful in 4m21s
Test / Hakurei (race detector) (push) Successful in 5m37s
Test / Sandbox (race detector) (push) Successful in 2m7s
Test / Flake checks (push) Successful in 1m20s
All checks were successful
Test / Create distribution (push) Successful in 49s
Test / Sandbox (push) Successful in 2m37s
Test / Hakurei (push) Successful in 3m32s
Test / Hpkg (push) Successful in 4m21s
Test / Hakurei (race detector) (push) Successful in 5m37s
Test / Sandbox (race detector) (push) Successful in 2m7s
Test / Flake checks (push) Successful in 1m20s
The whole RunState ugliness and the other horrendous error handling conditions for internal/app come from an old design proposal for maintaining all app containers under the same daemon process for a user. The proposal was ultimately rejected but the implementation remained. It is removed here to alleviate internal/app from much of its ugliness and unreadability. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -13,7 +13,6 @@ import (
|
||||
"time"
|
||||
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
@@ -23,85 +22,166 @@ import (
|
||||
// duration to wait for shim to exit, after container WaitDelay has elapsed.
|
||||
const shimWaitTimeout = 5 * time.Second
|
||||
|
||||
// ErrShimTimeout is returned when shim did not exit within shimWaitTimeout, after its WaitDelay has elapsed.
|
||||
// This is different from the container failing to terminate within its timeout period, as that is enforced
|
||||
// by the shim. This error is instead returned when there is a lockup in shim preventing it from completing.
|
||||
var ErrShimTimeout = errors.New("shim did not exit")
|
||||
// mainState holds persistent state bound to [Outcome.Main].
|
||||
type mainState struct {
|
||||
// done is whether beforeExit has been called already.
|
||||
done bool
|
||||
|
||||
// RunState stores the outcome of a call to [Outcome.Run].
|
||||
type RunState struct {
|
||||
// Time is the exact point in time where the process was created.
|
||||
// Location must be set to UTC.
|
||||
//
|
||||
// Time is nil if no process was ever created.
|
||||
Time *time.Time
|
||||
// RevertErr is stored by the deferred revert call.
|
||||
RevertErr error
|
||||
// WaitErr is the generic error value created by the standard library.
|
||||
WaitErr error
|
||||
|
||||
syscall.WaitStatus
|
||||
seal *Outcome
|
||||
store state.Store
|
||||
cancel context.CancelFunc
|
||||
cmd *exec.Cmd
|
||||
cmdWait chan error
|
||||
|
||||
uintptr
|
||||
}
|
||||
|
||||
// setStart stores the current time in [RunState] once.
|
||||
func (rs *RunState) setStart() {
|
||||
if rs.Time != nil {
|
||||
panic("attempted to store time twice")
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
rs.Time = &now
|
||||
}
|
||||
const (
|
||||
// mainNeedsRevert indicates the call to Commit has succeeded.
|
||||
mainNeedsRevert uintptr = 1 << iota
|
||||
// mainNeedsDestroy indicates the instance state entry is present in the store.
|
||||
mainNeedsDestroy
|
||||
)
|
||||
|
||||
// Run commits deferred system setup and starts the container.
|
||||
func (seal *Outcome) Run(rs *RunState) error {
|
||||
if !seal.f.CompareAndSwap(false, true) {
|
||||
// Run does much more than just starting a process; calling it twice, even if the first call fails, will result
|
||||
// in inconsistent state that is impossible to clean up; return here to limit damage and hopefully give the
|
||||
// other Run a chance to return
|
||||
return errors.New("outcome: attempted to run twice")
|
||||
// beforeExit must be called immediately before a call to [os.Exit].
|
||||
func (ms mainState) beforeExit(isFault bool) {
|
||||
if ms.done {
|
||||
panic("attempting to call beforeExit twice")
|
||||
}
|
||||
ms.done = true
|
||||
defer hlog.BeforeExit()
|
||||
|
||||
if isFault && ms.cancel != nil {
|
||||
ms.cancel()
|
||||
}
|
||||
|
||||
if rs == nil {
|
||||
panic("invalid state")
|
||||
var hasErr bool
|
||||
// updates hasErr but does not terminate
|
||||
perror := func(err error, message string) {
|
||||
hasErr = true
|
||||
printMessageError("cannot "+message+":", err)
|
||||
}
|
||||
|
||||
// read comp value early to allow for early failure
|
||||
hsuPath := internal.MustHsuPath()
|
||||
|
||||
if err := seal.sys.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
store := state.NewMulti(seal.runDirPath.String())
|
||||
deferredStoreFunc := func(c state.Cursor) error { return nil } // noop until state in store
|
||||
exitCode := 1
|
||||
defer func() {
|
||||
var revertErr error
|
||||
storeErr := new(StateStoreError)
|
||||
storeErr.Inner, storeErr.DoErr = store.Do(seal.user.identity.unwrap(), func(c state.Cursor) {
|
||||
revertErr = func() error {
|
||||
storeErr.InnerErr = deferredStoreFunc(c)
|
||||
if hasErr {
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
}()
|
||||
|
||||
var rt system.Enablement
|
||||
ec := system.Process
|
||||
if states, err := c.Load(); err != nil {
|
||||
// revert per-process state here to limit damage
|
||||
storeErr.OpErr = err
|
||||
return seal.sys.Revert((*system.Criteria)(&ec))
|
||||
} else {
|
||||
if l := len(states); l == 0 {
|
||||
ec |= system.User
|
||||
} else {
|
||||
hlog.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
|
||||
// this also handles wait for a non-fault termination
|
||||
if ms.cmd != nil && ms.cmdWait != nil {
|
||||
waitDone := make(chan struct{})
|
||||
// TODO(ophestra): enforce this limit early so it does not have to be done twice
|
||||
shimTimeoutCompensated := shimWaitTimeout
|
||||
if ms.seal.waitDelay > MaxShimWaitDelay {
|
||||
shimTimeoutCompensated += MaxShimWaitDelay
|
||||
} else {
|
||||
shimTimeoutCompensated += ms.seal.waitDelay
|
||||
}
|
||||
// this ties waitDone to ctx with the additional compensated timeout duration
|
||||
go func() { <-ms.seal.ctx.Done(); time.Sleep(shimTimeoutCompensated); close(waitDone) }()
|
||||
|
||||
select {
|
||||
case err := <-ms.cmdWait:
|
||||
wstatus, ok := ms.cmd.ProcessState.Sys().(syscall.WaitStatus)
|
||||
if ok {
|
||||
if v := wstatus.ExitStatus(); v != 0 {
|
||||
hasErr = true
|
||||
exitCode = v
|
||||
}
|
||||
}
|
||||
|
||||
if hlog.Load() {
|
||||
if !ok {
|
||||
if err != nil {
|
||||
hlog.Verbosef("wait: %v", err)
|
||||
}
|
||||
} else {
|
||||
switch {
|
||||
case wstatus.Exited():
|
||||
hlog.Verbosef("process %d exited with code %d", ms.cmd.Process.Pid, wstatus.ExitStatus())
|
||||
|
||||
// accumulate enablements of remaining launchers
|
||||
for i, s := range states {
|
||||
if s.Config != nil {
|
||||
rt |= s.Config.Enablements.Unwrap()
|
||||
} else {
|
||||
log.Printf("state entry %d does not contain config", i)
|
||||
case wstatus.CoreDump():
|
||||
hlog.Verbosef("process %d dumped core", ms.cmd.Process.Pid)
|
||||
|
||||
case wstatus.Signaled():
|
||||
hlog.Verbosef("process %d got %s", ms.cmd.Process.Pid, wstatus.Signal())
|
||||
|
||||
default:
|
||||
hlog.Verbosef("process %d exited with status %#x", ms.cmd.Process.Pid, wstatus)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case <-waitDone:
|
||||
hlog.Resume()
|
||||
// this is only reachable when shim did not exit within shimWaitTimeout, after its WaitDelay has elapsed.
|
||||
// This is different from the container failing to terminate within its timeout period, as that is enforced
|
||||
// by the shim. This path is instead reached when there is a lockup in shim preventing it from completing.
|
||||
log.Printf("process %d did not terminate", ms.cmd.Process.Pid)
|
||||
}
|
||||
|
||||
hlog.Resume()
|
||||
if ms.seal.sync != nil {
|
||||
if err := ms.seal.sync.Close(); err != nil {
|
||||
perror(err, "close wayland security context")
|
||||
}
|
||||
}
|
||||
if ms.seal.dbusMsg != nil {
|
||||
ms.seal.dbusMsg()
|
||||
}
|
||||
}
|
||||
|
||||
if ms.uintptr&mainNeedsRevert != 0 {
|
||||
if ok, err := ms.store.Do(ms.seal.user.identity.unwrap(), func(c state.Cursor) {
|
||||
if ms.uintptr&mainNeedsDestroy != 0 {
|
||||
if err := c.Destroy(ms.seal.id.unwrap()); err != nil {
|
||||
perror(err, "destroy state entry")
|
||||
}
|
||||
}
|
||||
|
||||
var rt system.Enablement
|
||||
if states, err := c.Load(); err != nil {
|
||||
// it is impossible to continue from this point;
|
||||
// revert per-process state here to limit damage
|
||||
ec := system.Process
|
||||
if revertErr := ms.seal.sys.Revert((*system.Criteria)(&ec)); revertErr != nil {
|
||||
var joinError interface {
|
||||
Unwrap() []error
|
||||
error
|
||||
}
|
||||
if !errors.As(revertErr, &joinError) || joinError == nil {
|
||||
perror(revertErr, "revert system setup")
|
||||
} else {
|
||||
for _, v := range joinError.Unwrap() {
|
||||
perror(v, "revert system setup step")
|
||||
}
|
||||
}
|
||||
}
|
||||
perror(err, "load instance states")
|
||||
} else {
|
||||
ec := system.Process
|
||||
if l := len(states); l == 0 {
|
||||
ec |= system.User
|
||||
} else {
|
||||
hlog.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
|
||||
}
|
||||
|
||||
// accumulate enablements of remaining launchers
|
||||
for i, s := range states {
|
||||
if s.Config != nil {
|
||||
rt |= s.Config.Enablements.Unwrap()
|
||||
} else {
|
||||
log.Printf("state entry %d does not contain config", i)
|
||||
}
|
||||
}
|
||||
|
||||
ec |= rt ^ (system.EWayland | system.EX11 | system.EDBus | system.EPulse)
|
||||
if hlog.Load() {
|
||||
if ec > 0 {
|
||||
@@ -109,27 +189,70 @@ func (seal *Outcome) Run(rs *RunState) error {
|
||||
}
|
||||
}
|
||||
|
||||
return seal.sys.Revert((*system.Criteria)(&ec))
|
||||
}()
|
||||
})
|
||||
storeErr.save(revertErr, store.Close())
|
||||
rs.RevertErr = storeErr.equiv("clean up")
|
||||
}()
|
||||
if err = ms.seal.sys.Revert((*system.Criteria)(&ec)); err != nil {
|
||||
perror(err, "revert system setup")
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
if ok {
|
||||
perror(err, "unlock state store")
|
||||
} else {
|
||||
perror(err, "open state store")
|
||||
}
|
||||
}
|
||||
} else if ms.uintptr&mainNeedsDestroy != 0 {
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
if ms.store != nil {
|
||||
if err := ms.store.Close(); err != nil {
|
||||
perror(err, "close state store")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fatal calls printMessageError, performs necessary cleanup, followed by a call to [os.Exit](1).
|
||||
func (ms mainState) fatal(fallback string, ferr error) {
|
||||
printMessageError(fallback, ferr)
|
||||
ms.beforeExit(true)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Main commits deferred system setup, runs the container, reverts changes to the system, and terminates the program.
|
||||
// Main does not return.
|
||||
func (seal *Outcome) Main() {
|
||||
if !seal.f.CompareAndSwap(false, true) {
|
||||
panic("outcome: attempted to run twice")
|
||||
}
|
||||
|
||||
// read comp value early for early failure
|
||||
hsuPath := internal.MustHsuPath()
|
||||
|
||||
// ms.beforeExit required beyond this point
|
||||
ms := &mainState{seal: seal}
|
||||
|
||||
if err := seal.sys.Commit(); err != nil {
|
||||
ms.fatal("cannot commit system setup:", err)
|
||||
}
|
||||
ms.uintptr |= mainNeedsRevert
|
||||
ms.store = state.NewMulti(seal.runDirPath.String())
|
||||
|
||||
ctx, cancel := context.WithCancel(seal.ctx)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, hsuPath)
|
||||
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
cmd.Dir = container.FHSRoot // container init enters final working directory
|
||||
ms.cancel = cancel
|
||||
|
||||
ms.cmd = exec.CommandContext(ctx, hsuPath)
|
||||
ms.cmd.Stdin, ms.cmd.Stdout, ms.cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
ms.cmd.Dir = container.FHSRoot // container init enters final working directory
|
||||
// shim runs in the same session as monitor; see shim.go for behaviour
|
||||
cmd.Cancel = func() error { return cmd.Process.Signal(syscall.SIGCONT) }
|
||||
ms.cmd.Cancel = func() error { return ms.cmd.Process.Signal(syscall.SIGCONT) }
|
||||
|
||||
var e *gob.Encoder
|
||||
if fd, encoder, err := container.Setup(&cmd.ExtraFiles); err != nil {
|
||||
return &hst.AppError{Step: "create shim setup pipe", Err: err}
|
||||
if fd, encoder, err := container.Setup(&ms.cmd.ExtraFiles); err != nil {
|
||||
ms.fatal("cannot create shim setup pipe:", err)
|
||||
} else {
|
||||
e = encoder
|
||||
cmd.Env = []string{
|
||||
ms.cmd.Env = []string{
|
||||
// passed through to shim by hsu
|
||||
shimEnv + "=" + strconv.Itoa(fd),
|
||||
// interpreted by hsu
|
||||
@@ -140,102 +263,78 @@ func (seal *Outcome) Run(rs *RunState) error {
|
||||
if len(seal.user.supp) > 0 {
|
||||
hlog.Verbosef("attaching supplementary group ids %s", seal.user.supp)
|
||||
// interpreted by hsu
|
||||
cmd.Env = append(cmd.Env, "HAKUREI_GROUPS="+strings.Join(seal.user.supp, " "))
|
||||
ms.cmd.Env = append(ms.cmd.Env, "HAKUREI_GROUPS="+strings.Join(seal.user.supp, " "))
|
||||
}
|
||||
|
||||
hlog.Verbosef("setuid helper at %s", hsuPath)
|
||||
hlog.Suspend()
|
||||
if err := cmd.Start(); err != nil {
|
||||
return &hst.AppError{Step: "start setuid wrapper", Err: err}
|
||||
if err := ms.cmd.Start(); err != nil {
|
||||
ms.fatal("cannot start setuid wrapper:", err)
|
||||
}
|
||||
rs.setStart()
|
||||
|
||||
// this prevents blocking forever on an early failure
|
||||
waitErr, setupErr := make(chan error, 1), make(chan error, 1)
|
||||
go func() { waitErr <- cmd.Wait(); cancel() }()
|
||||
go func() {
|
||||
setupErr <- e.Encode(&shimParams{
|
||||
os.Getpid(),
|
||||
seal.waitDelay,
|
||||
seal.container,
|
||||
hlog.Load(),
|
||||
})
|
||||
}()
|
||||
startTime := time.Now().UTC()
|
||||
ms.cmdWait = make(chan error, 1)
|
||||
// this ties context back to the life of the process
|
||||
go func() { ms.cmdWait <- ms.cmd.Wait(); cancel() }()
|
||||
ms.Time = &startTime
|
||||
|
||||
// unfortunately the I/O here cannot be directly canceled;
|
||||
// the cancellation path leads to fatal in this case so that is fine
|
||||
select {
|
||||
case err := <-setupErr:
|
||||
case err := <-func() (setupErr chan error) {
|
||||
setupErr = make(chan error, 1)
|
||||
go func() {
|
||||
setupErr <- e.Encode(&shimParams{
|
||||
os.Getpid(),
|
||||
seal.waitDelay,
|
||||
seal.container,
|
||||
hlog.Load(),
|
||||
})
|
||||
}()
|
||||
return
|
||||
}():
|
||||
if err != nil {
|
||||
hlog.Resume()
|
||||
return &hst.AppError{Step: "transmit shim config", Err: err}
|
||||
ms.fatal("cannot transmit shim config:", err)
|
||||
}
|
||||
|
||||
case <-ctx.Done():
|
||||
hlog.Resume()
|
||||
return newWithMessageError("shim setup canceled", syscall.ECANCELED)
|
||||
ms.fatal("shim context canceled:", newWithMessageError("shim setup canceled", ctx.Err()))
|
||||
}
|
||||
|
||||
// returned after blocking on waitErr
|
||||
var earlyStoreErr = new(StateStoreError)
|
||||
{
|
||||
// shim accepted setup payload, create process state
|
||||
sd := state.State{
|
||||
// shim accepted setup payload, create process state
|
||||
if ok, err := ms.store.Do(seal.user.identity.unwrap(), func(c state.Cursor) {
|
||||
if err := c.Save(&state.State{
|
||||
ID: seal.id.unwrap(),
|
||||
PID: cmd.Process.Pid,
|
||||
Time: *rs.Time,
|
||||
PID: ms.cmd.Process.Pid,
|
||||
Time: *ms.Time,
|
||||
}, seal.ct); err != nil {
|
||||
ms.fatal("cannot save state entry:", err)
|
||||
}
|
||||
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.identity.unwrap(), func(c state.Cursor) {
|
||||
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
|
||||
})
|
||||
}
|
||||
|
||||
// state in store at this point, destroy defunct state entry on return
|
||||
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
|
||||
|
||||
waitTimeout := make(chan struct{})
|
||||
// TODO(ophestra): enforce this limit early so it does not have to be done twice
|
||||
shimTimeoutCompensated := shimWaitTimeout
|
||||
if seal.waitDelay > MaxShimWaitDelay {
|
||||
shimTimeoutCompensated += MaxShimWaitDelay
|
||||
} else {
|
||||
shimTimeoutCompensated += seal.waitDelay
|
||||
}
|
||||
go func() { <-seal.ctx.Done(); time.Sleep(shimTimeoutCompensated); close(waitTimeout) }()
|
||||
|
||||
select {
|
||||
case rs.WaitErr = <-waitErr:
|
||||
rs.WaitStatus = cmd.ProcessState.Sys().(syscall.WaitStatus)
|
||||
if hlog.Load() {
|
||||
switch {
|
||||
case rs.Exited():
|
||||
hlog.Verbosef("process %d exited with code %d", cmd.Process.Pid, rs.ExitStatus())
|
||||
|
||||
case rs.CoreDump():
|
||||
hlog.Verbosef("process %d dumped core", cmd.Process.Pid)
|
||||
|
||||
case rs.Signaled():
|
||||
hlog.Verbosef("process %d got %s", cmd.Process.Pid, rs.Signal())
|
||||
|
||||
default:
|
||||
hlog.Verbosef("process %d exited with status %#x", cmd.Process.Pid, rs.WaitStatus)
|
||||
}
|
||||
}
|
||||
|
||||
case <-waitTimeout:
|
||||
rs.WaitErr = ErrShimTimeout
|
||||
hlog.Resume()
|
||||
// TODO(ophestra): verify this behaviour in vm tests
|
||||
log.Printf("process %d did not terminate", cmd.Process.Pid)
|
||||
}
|
||||
|
||||
hlog.Resume()
|
||||
if seal.sync != nil {
|
||||
if err := seal.sync.Close(); err != nil {
|
||||
log.Printf("cannot close wayland security context: %v", err)
|
||||
}); err != nil {
|
||||
if ok {
|
||||
ms.uintptr |= mainNeedsDestroy
|
||||
ms.fatal("cannot unlock state store:", err)
|
||||
} else {
|
||||
ms.fatal("cannot open state store:", err)
|
||||
}
|
||||
}
|
||||
if seal.dbusMsg != nil {
|
||||
seal.dbusMsg()
|
||||
}
|
||||
// state in store at this point, destroy defunct state entry on termination
|
||||
ms.uintptr |= mainNeedsDestroy
|
||||
|
||||
return earlyStoreErr.equiv("save process state")
|
||||
// beforeExit ties shim process to context
|
||||
ms.beforeExit(false)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// printMessageError prints the error message according to [container.GetErrorMessage],
|
||||
// or fallback prepended to err if an error message is not available.
|
||||
func printMessageError(fallback string, err error) {
|
||||
m, ok := container.GetErrorMessage(err)
|
||||
if !ok {
|
||||
log.Println(fallback, err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Print(m)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user