internal/app: compensate shim timeout
All checks were successful
Test / Create distribution (push) Successful in 34s
Test / Sandbox (push) Successful in 2m15s
Test / Hakurei (push) Successful in 3m13s
Test / Hpkg (push) Successful in 4m0s
Test / Sandbox (race detector) (push) Successful in 4m32s
Test / Hakurei (race detector) (push) Successful in 5m9s
Test / Flake checks (push) Successful in 1m23s

This catches cases where the shim has somehow locked up, so it should wait out the full shim WaitDelay as well.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
Ophestra 2025-09-16 02:22:05 +09:00
parent a2a291791c
commit 16409b37a2
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
2 changed files with 20 additions and 3 deletions

View File

@ -20,8 +20,14 @@ import (
"hakurei.app/system"
)
// duration to wait for shim to exit, after container WaitDelay has elapsed.
const shimWaitTimeout = 5 * time.Second
// ErrShimTimeout is returned when shim did not exit within shimWaitTimeout, after its WaitDelay has elapsed.
// This is different from the container failing to terminate within its timeout period, as that is enforced
// by the shim. This error is instead returned when there is a lockup in shim preventing it from completing.
var ErrShimTimeout = errors.New("shim did not exit")
// RunState stores the outcome of a call to [Outcome.Run].
type RunState struct {
// Time is the exact point in time where the process was created.
@ -186,7 +192,14 @@ func (seal *Outcome) Run(rs *RunState) error {
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
waitTimeout := make(chan struct{})
go func() { <-seal.ctx.Done(); time.Sleep(shimWaitTimeout); close(waitTimeout) }()
// TODO(ophestra): enforce this limit early so it does not have to be done twice
shimTimeoutCompensated := shimWaitTimeout
if seal.waitDelay > MaxShimWaitDelay {
shimTimeoutCompensated += MaxShimWaitDelay
} else {
shimTimeoutCompensated += seal.waitDelay
}
go func() { <-seal.ctx.Done(); time.Sleep(shimTimeoutCompensated); close(waitTimeout) }()
select {
case rs.WaitErr = <-waitErr:
@ -206,9 +219,11 @@ func (seal *Outcome) Run(rs *RunState) error {
hlog.Verbosef("process %d exited with status %#x", cmd.Process.Pid, rs.WaitStatus)
}
}
case <-waitTimeout:
rs.WaitErr = syscall.ETIMEDOUT
rs.WaitErr = ErrShimTimeout
hlog.Resume()
// TODO(ophestra): verify this behaviour in vm tests
log.Printf("process %d did not terminate", cmd.Process.Pid)
}

View File

@ -45,7 +45,9 @@ const (
// ShimExitOrphan is returned when the shim is orphaned before monitor delivers a signal.
ShimExitOrphan = 3
// DefaultShimWaitDelay is used when WaitDelay has its zero value.
DefaultShimWaitDelay = 5 * time.Second
// MaxShimWaitDelay is used instead if WaitDelay exceeds its value.
MaxShimWaitDelay = 30 * time.Second
)