app: improve shim process management
All checks were successful
Test / Create distribution (push) Successful in 29s
Test / Sandbox (push) Successful in 1m52s
Test / Fortify (push) Successful in 2m45s
Test / Sandbox (race detector) (push) Successful in 2m52s
Test / Fpkg (push) Successful in 3m38s
Test / Fortify (race detector) (push) Successful in 4m14s
Test / Flake checks (push) Successful in 1m6s

This ensures SIGKILL gets delivered to the process instead of relying on the parent exiting.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
Ophestra 2025-04-04 10:57:27 +09:00
parent 12be7bc78e
commit c47ce10061
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
4 changed files with 119 additions and 170 deletions

View File

@ -36,6 +36,13 @@ type RunState struct {
WaitErr error WaitErr error
} }
func (rs *RunState) StoreTime(v time.Time) {
if rs.Time != nil {
panic("attempted to store time twice")
}
rs.Time = &v
}
// Paths contains environment-dependent paths used by fortify. // Paths contains environment-dependent paths used by fortify.
type Paths struct { type Paths struct {
// path to shared directory (usually `/tmp/fortify.%d`) // path to shared directory (usually `/tmp/fortify.%d`)

View File

@ -121,7 +121,7 @@ type StateStoreError struct {
} }
// save saves arbitrary errors in [StateStoreError] once. // save saves arbitrary errors in [StateStoreError] once.
func (e *StateStoreError) save(errs []error) { func (e *StateStoreError) save(errs ...error) {
if len(errs) == 0 || e.Err != nil { if len(errs) == 0 || e.Err != nil {
panic("invalid call to save") panic("invalid call to save")
} }

View File

@ -2,21 +2,32 @@ package app
import ( import (
"context" "context"
"encoding/gob"
"errors" "errors"
"log" "log"
"os"
"os/exec" "os/exec"
"runtime"
"strconv"
"strings"
"syscall"
"time" "time"
"git.gensokyo.uk/security/fortify/fst" "git.gensokyo.uk/security/fortify/fst"
"git.gensokyo.uk/security/fortify/internal" "git.gensokyo.uk/security/fortify/internal"
"git.gensokyo.uk/security/fortify/internal/fmsg" "git.gensokyo.uk/security/fortify/internal/fmsg"
"git.gensokyo.uk/security/fortify/internal/state" "git.gensokyo.uk/security/fortify/internal/state"
"git.gensokyo.uk/security/fortify/sandbox"
"git.gensokyo.uk/security/fortify/system" "git.gensokyo.uk/security/fortify/system"
) )
const shimSetupTimeout = 5 * time.Second const shimSetupTimeout = 5 * time.Second
func (seal *outcome) Run(rs *fst.RunState) error { func (seal *outcome) Run(rs *fst.RunState) error {
// there is some kind of action at a distance caused by scheduling if this is not also set
// causing xdg-dbus-proxy and potentially other helpers spuriously getting sent SIGKILL
runtime.LockOSThread()
if !seal.f.CompareAndSwap(false, true) { if !seal.f.CompareAndSwap(false, true) {
// run does much more than just starting a process; calling it twice, even if the first call fails, will result // run does much more than just starting a process; calling it twice, even if the first call fails, will result
// in inconsistent state that is impossible to clean up; return here to limit damage and hopefully give the // in inconsistent state that is impossible to clean up; return here to limit damage and hopefully give the
@ -28,19 +39,14 @@ func (seal *outcome) Run(rs *fst.RunState) error {
panic("invalid state") panic("invalid state")
} }
// read comp values early to allow for early failure // read comp value early to allow for early failure
fmsg.Verbosef("version %s", internal.Version()) fsuPath := internal.MustFsuPath()
fmsg.Verbosef("setuid helper at %s", internal.MustFsuPath())
/*
prepare/revert os state
*/
if err := seal.sys.Commit(seal.ctx); err != nil { if err := seal.sys.Commit(seal.ctx); err != nil {
return err return err
} }
store := state.NewMulti(seal.runDirPath) store := state.NewMulti(seal.runDirPath)
deferredStoreFunc := func(c state.Cursor) error { return nil } deferredStoreFunc := func(c state.Cursor) error { return nil } // noop until state in store
defer func() { defer func() {
var revertErr error var revertErr error
storeErr := new(StateStoreError) storeErr := new(StateStoreError)
@ -48,10 +54,6 @@ func (seal *outcome) Run(rs *fst.RunState) error {
revertErr = func() error { revertErr = func() error {
storeErr.InnerErr = deferredStoreFunc(c) storeErr.InnerErr = deferredStoreFunc(c)
/*
revert app setup transaction
*/
var rt system.Enablement var rt system.Enablement
ec := system.Process ec := system.Process
if states, err := c.Load(); err != nil { if states, err := c.Load(); err != nil {
@ -60,10 +62,9 @@ func (seal *outcome) Run(rs *fst.RunState) error {
return seal.sys.Revert((*system.Criteria)(&ec)) return seal.sys.Revert((*system.Criteria)(&ec))
} else { } else {
if l := len(states); l == 0 { if l := len(states); l == 0 {
fmsg.Verbose("no other launchers active, will clean up globals")
ec |= system.User ec |= system.User
} else { } else {
fmsg.Verbosef("found %d active launchers, cleaning up without globals", l) fmsg.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
} }
// accumulate enablements of remaining launchers // accumulate enablements of remaining launchers
@ -78,62 +79,112 @@ func (seal *outcome) Run(rs *fst.RunState) error {
ec |= rt ^ (system.EWayland | system.EX11 | system.EDBus | system.EPulse) ec |= rt ^ (system.EWayland | system.EX11 | system.EDBus | system.EPulse)
if fmsg.Load() { if fmsg.Load() {
if ec > 0 { if ec > 0 {
fmsg.Verbose("reverting operations type", system.TypeString(ec)) fmsg.Verbose("reverting operations scope", system.TypeString(ec))
} }
} }
return seal.sys.Revert((*system.Criteria)(&ec)) return seal.sys.Revert((*system.Criteria)(&ec))
}() }()
}) })
storeErr.save([]error{revertErr, store.Close()}) storeErr.save(revertErr, store.Close())
rs.RevertErr = storeErr.equiv("error returned during cleanup:") rs.RevertErr = storeErr.equiv("error during cleanup:")
}() }()
/* ctx, cancel := context.WithCancel(seal.ctx)
shim process lifecycle defer cancel()
*/ cmd := exec.CommandContext(ctx, fsuPath)
cmd.SysProcAttr = new(syscall.SysProcAttr)
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
cmd.Dir = "/" // container init enters final working directory
waitErr := make(chan error, 1) var encoder *gob.Encoder
cmd := new(shimProcess) if fd, e, err := sandbox.Setup(&cmd.ExtraFiles); err != nil {
if startTime, err := cmd.Start( return fmsg.WrapErrorSuffix(err,
seal.user.aid.String(), "cannot create shim setup pipe:")
seal.user.supp,
); err != nil {
return err
} else { } else {
// whether/when the fsu process was created encoder = e
rs.Time = startTime cmd.Env = []string{
// passed through to shim by fsu
shimEnv + "=" + strconv.Itoa(fd),
// interpreted by fsu
"FORTIFY_APP_ID=" + seal.user.aid.String(),
}
} }
ctx, cancel := context.WithTimeout(seal.ctx, shimSetupTimeout) if len(seal.user.supp) > 0 {
defer cancel() fmsg.Verbosef("attaching supplementary group ids %s", seal.user.supp)
// interpreted by fsu
cmd.Env = append(cmd.Env, "FORTIFY_GROUPS="+strings.Join(seal.user.supp, " "))
}
{
startErr := make(chan error, 1)
go func() { go func() {
waitErr <- cmd.Unwrap().Wait() runtime.LockOSThread()
// cancel shim setup in case shim died before receiving payload fmsg.Verbosef("setuid helper at %s", fsuPath)
cancel() fmsg.Suspend()
killShim := make(chan struct{})
// the parent-death signal is delivered despite the child having a different uid
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
cmd.Cancel = func() error { close(killShim); return nil }
startErr <- fmsg.WrapErrorSuffix(cmd.Start(),
"cannot start setuid wrapper:")
<-killShim
}() }()
if err := cmd.Serve(ctx, &shimParams{ if err := <-startErr; err != nil {
return err
}
rs.StoreTime(time.Now().UTC())
}
waitErr := make(chan error, 1)
go func() { waitErr <- cmd.Wait(); cancel() }()
{
encodeErr := make(chan error, 1)
go func() {
encodeErr <- encoder.Encode(&shimParams{
Container: seal.container, Container: seal.container,
Home: seal.user.data, Home: seal.user.data,
Verbose: fmsg.Load(), Verbose: fmsg.Load(),
}); err != nil { })
return err }()
select {
case err := <-encodeErr:
if err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
} }
case <-time.After(shimSetupTimeout):
return fmsg.WrapError(syscall.ETIMEDOUT,
"deadline exceeded waiting for shim")
case <-ctx.Done():
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
}
// returned after blocking on waitErr
var earlyStoreErr = new(StateStoreError)
{
// shim accepted setup payload, create process state // shim accepted setup payload, create process state
sd := state.State{ sd := state.State{
ID: seal.id.unwrap(), ID: seal.id.unwrap(),
PID: cmd.Unwrap().Process.Pid, PID: cmd.Process.Pid,
Time: *rs.Time, Time: *rs.Time,
} }
var earlyStoreErr = new(StateStoreError) // returned after blocking on waitErr
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) { earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) {
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct) earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
}) })
// destroy defunct state entry }
// state in store at this point, destroy defunct state entry on return
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) } deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
select { select {
@ -148,22 +199,14 @@ func (seal *outcome) Run(rs *fst.RunState) error {
// store non-zero return code // store non-zero return code
rs.ExitCode = exitError.ExitCode() rs.ExitCode = exitError.ExitCode()
} else { } else {
rs.ExitCode = cmd.Unwrap().ProcessState.ExitCode() rs.ExitCode = cmd.ProcessState.ExitCode()
} }
if fmsg.Load() { if fmsg.Load() {
fmsg.Verbosef("process %d exited with exit code %d", cmd.Unwrap().Process.Pid, rs.ExitCode) fmsg.Verbosef("process %d exited with exit code %d", cmd.Process.Pid, rs.ExitCode)
} }
// this is reached when a fault makes an already running shim impossible to continue execution case <-ctx.Done():
// however a kill signal could not be delivered (should actually always happen like that since fsu) fmsg.Verbose("shim process canceled")
// the effects of this is similar to the alternative exit path and ensures shim death
case err := <-cmd.Fallback():
rs.ExitCode = 255
log.Printf("cannot terminate shim on faulted setup: %v", err)
// alternative exit path relying on shim behaviour on monitor process exit
case <-seal.ctx.Done():
fmsg.Verbose("alternative exit path selected")
} }
fmsg.Resume() fmsg.Resume()

View File

@ -2,14 +2,11 @@ package app
import ( import (
"context" "context"
"encoding/gob"
"errors" "errors"
"log" "log"
"os" "os"
"os/exec" "os/exec"
"os/signal" "os/signal"
"strconv"
"strings"
"syscall" "syscall"
"time" "time"
@ -112,101 +109,3 @@ func ShimMain() {
os.Exit(exitError.ExitCode()) os.Exit(exitError.ExitCode())
} }
} }
type shimProcess struct {
// user switcher process
cmd *exec.Cmd
// fallback exit notifier with error returned killing the process
killFallback chan error
// monitor to shim encoder
encoder *gob.Encoder
}
func (s *shimProcess) Unwrap() *exec.Cmd { return s.cmd }
func (s *shimProcess) Fallback() chan error { return s.killFallback }
func (s *shimProcess) String() string {
if s.cmd == nil {
return "(unused shim manager)"
}
return s.cmd.String()
}
func (s *shimProcess) Start(
aid string,
supp []string,
) (*time.Time, error) {
// prepare user switcher invocation
fsuPath := internal.MustFsuPath()
s.cmd = exec.Command(fsuPath)
// pass shim setup pipe
if fd, e, err := sandbox.Setup(&s.cmd.ExtraFiles); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot create shim setup pipe:")
} else {
s.encoder = e
s.cmd.Env = []string{
shimEnv + "=" + strconv.Itoa(fd),
"FORTIFY_APP_ID=" + aid,
}
}
// format fsu supplementary groups
if len(supp) > 0 {
fmsg.Verbosef("attaching supplementary group ids %s", supp)
s.cmd.Env = append(s.cmd.Env, "FORTIFY_GROUPS="+strings.Join(supp, " "))
}
s.cmd.Stdin, s.cmd.Stdout, s.cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
s.cmd.Dir = "/"
fmsg.Verbose("starting shim via fsu:", s.cmd)
// withhold messages to stderr
fmsg.Suspend()
if err := s.cmd.Start(); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot start fsu:")
}
startTime := time.Now().UTC()
return &startTime, nil
}
func (s *shimProcess) Serve(ctx context.Context, params *shimParams) error {
// kill shim if something goes wrong and an error is returned
s.killFallback = make(chan error, 1)
killShim := func() {
if err := s.cmd.Process.Signal(os.Interrupt); err != nil {
s.killFallback <- err
}
}
defer func() { killShim() }()
encodeErr := make(chan error)
go func() { encodeErr <- s.encoder.Encode(params) }()
select {
// encode return indicates setup completion
case err := <-encodeErr:
if err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
}
killShim = func() {}
return nil
// setup canceled before payload was accepted
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.Canceled) {
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
if errors.Is(err, context.DeadlineExceeded) {
return fmsg.WrapError(syscall.ETIMEDOUT,
"deadline exceeded waiting for shim")
}
// unreachable
return err
}
}