From 16409b37a2de7dbc112e113481a2f2c45f9f09f8 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Tue, 16 Sep 2025 02:22:05 +0900 Subject: [PATCH] internal/app: compensate shim timeout This catches cases where the shim has somehow locked up, so it should wait out the full shim WaitDelay as well. Signed-off-by: Ophestra --- internal/app/process.go | 19 +++++++++++++++++-- internal/app/shim.go | 4 +++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/internal/app/process.go b/internal/app/process.go index 50d5dc8..daa7832 100644 --- a/internal/app/process.go +++ b/internal/app/process.go @@ -20,8 +20,14 @@ import ( "hakurei.app/system" ) +// duration to wait for shim to exit, after container WaitDelay has elapsed. const shimWaitTimeout = 5 * time.Second +// ErrShimTimeout is returned when shim did not exit within shimWaitTimeout, after its WaitDelay has elapsed. +// This is different from the container failing to terminate within its timeout period, as that is enforced +// by the shim. This error is instead returned when there is a lockup in shim preventing it from completing. +var ErrShimTimeout = errors.New("shim did not exit") + // RunState stores the outcome of a call to [Outcome.Run]. type RunState struct { // Time is the exact point in time where the process was created. @@ -186,7 +192,14 @@ func (seal *Outcome) Run(rs *RunState) error { deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) } waitTimeout := make(chan struct{}) - go func() { <-seal.ctx.Done(); time.Sleep(shimWaitTimeout); close(waitTimeout) }() + // TODO(ophestra): enforce this limit early so it does not have to be done twice + shimTimeoutCompensated := shimWaitTimeout + if seal.waitDelay > MaxShimWaitDelay { + shimTimeoutCompensated += MaxShimWaitDelay + } else { + shimTimeoutCompensated += seal.waitDelay + } + go func() { <-seal.ctx.Done(); time.Sleep(shimTimeoutCompensated); close(waitTimeout) }() select { case rs.WaitErr = <-waitErr: @@ -206,9 +219,11 @@ func (seal *Outcome) Run(rs *RunState) error { hlog.Verbosef("process %d exited with status %#x", cmd.Process.Pid, rs.WaitStatus) } } + case <-waitTimeout: - rs.WaitErr = syscall.ETIMEDOUT + rs.WaitErr = ErrShimTimeout hlog.Resume() + // TODO(ophestra): verify this behaviour in vm tests log.Printf("process %d did not terminate", cmd.Process.Pid) } diff --git a/internal/app/shim.go b/internal/app/shim.go index 5eaf833..e38104c 100644 --- a/internal/app/shim.go +++ b/internal/app/shim.go @@ -45,8 +45,10 @@ const ( // ShimExitOrphan is returned when the shim is orphaned before monitor delivers a signal. ShimExitOrphan = 3 + // DefaultShimWaitDelay is used when WaitDelay has its zero value. DefaultShimWaitDelay = 5 * time.Second - MaxShimWaitDelay = 30 * time.Second + // MaxShimWaitDelay is used instead if WaitDelay exceeds its value. + MaxShimWaitDelay = 30 * time.Second ) // ShimMain is the main function of the shim process and runs as the unconstrained target user.