Compare commits

...

1 Commits

Author SHA1 Message Date
c47ce10061
app: improve shim process management
All checks were successful
Test / Create distribution (push) Successful in 29s
Test / Sandbox (push) Successful in 1m52s
Test / Fortify (push) Successful in 2m45s
Test / Sandbox (race detector) (push) Successful in 2m52s
Test / Fpkg (push) Successful in 3m38s
Test / Fortify (race detector) (push) Successful in 4m14s
Test / Flake checks (push) Successful in 1m6s
This ensures SIGKILL gets delivered to the process instead of relying on the parent exiting.

Signed-off-by: Ophestra <cat@gensokyo.uk>
2025-04-05 00:09:13 +09:00
4 changed files with 119 additions and 170 deletions

View File

@ -36,6 +36,13 @@ type RunState struct {
WaitErr error
}
func (rs *RunState) StoreTime(v time.Time) {
if rs.Time != nil {
panic("attempted to store time twice")
}
rs.Time = &v
}
// Paths contains environment-dependent paths used by fortify.
type Paths struct {
// path to shared directory (usually `/tmp/fortify.%d`)

View File

@ -121,7 +121,7 @@ type StateStoreError struct {
}
// save saves arbitrary errors in [StateStoreError] once.
func (e *StateStoreError) save(errs []error) {
func (e *StateStoreError) save(errs ...error) {
if len(errs) == 0 || e.Err != nil {
panic("invalid call to save")
}

View File

@ -2,21 +2,32 @@ package app
import (
"context"
"encoding/gob"
"errors"
"log"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"syscall"
"time"
"git.gensokyo.uk/security/fortify/fst"
"git.gensokyo.uk/security/fortify/internal"
"git.gensokyo.uk/security/fortify/internal/fmsg"
"git.gensokyo.uk/security/fortify/internal/state"
"git.gensokyo.uk/security/fortify/sandbox"
"git.gensokyo.uk/security/fortify/system"
)
const shimSetupTimeout = 5 * time.Second
func (seal *outcome) Run(rs *fst.RunState) error {
// there is some kind of action at a distance caused by scheduling if this is not also set
// causing xdg-dbus-proxy and potentially other helpers spuriously getting sent SIGKILL
runtime.LockOSThread()
if !seal.f.CompareAndSwap(false, true) {
// run does much more than just starting a process; calling it twice, even if the first call fails, will result
// in inconsistent state that is impossible to clean up; return here to limit damage and hopefully give the
@ -28,19 +39,14 @@ func (seal *outcome) Run(rs *fst.RunState) error {
panic("invalid state")
}
// read comp values early to allow for early failure
fmsg.Verbosef("version %s", internal.Version())
fmsg.Verbosef("setuid helper at %s", internal.MustFsuPath())
/*
prepare/revert os state
*/
// read comp value early to allow for early failure
fsuPath := internal.MustFsuPath()
if err := seal.sys.Commit(seal.ctx); err != nil {
return err
}
store := state.NewMulti(seal.runDirPath)
deferredStoreFunc := func(c state.Cursor) error { return nil }
deferredStoreFunc := func(c state.Cursor) error { return nil } // noop until state in store
defer func() {
var revertErr error
storeErr := new(StateStoreError)
@ -48,10 +54,6 @@ func (seal *outcome) Run(rs *fst.RunState) error {
revertErr = func() error {
storeErr.InnerErr = deferredStoreFunc(c)
/*
revert app setup transaction
*/
var rt system.Enablement
ec := system.Process
if states, err := c.Load(); err != nil {
@ -60,10 +62,9 @@ func (seal *outcome) Run(rs *fst.RunState) error {
return seal.sys.Revert((*system.Criteria)(&ec))
} else {
if l := len(states); l == 0 {
fmsg.Verbose("no other launchers active, will clean up globals")
ec |= system.User
} else {
fmsg.Verbosef("found %d active launchers, cleaning up without globals", l)
fmsg.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
}
// accumulate enablements of remaining launchers
@ -78,62 +79,112 @@ func (seal *outcome) Run(rs *fst.RunState) error {
ec |= rt ^ (system.EWayland | system.EX11 | system.EDBus | system.EPulse)
if fmsg.Load() {
if ec > 0 {
fmsg.Verbose("reverting operations type", system.TypeString(ec))
fmsg.Verbose("reverting operations scope", system.TypeString(ec))
}
}
return seal.sys.Revert((*system.Criteria)(&ec))
}()
})
storeErr.save([]error{revertErr, store.Close()})
rs.RevertErr = storeErr.equiv("error returned during cleanup:")
storeErr.save(revertErr, store.Close())
rs.RevertErr = storeErr.equiv("error during cleanup:")
}()
/*
shim process lifecycle
*/
ctx, cancel := context.WithCancel(seal.ctx)
defer cancel()
cmd := exec.CommandContext(ctx, fsuPath)
cmd.SysProcAttr = new(syscall.SysProcAttr)
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
cmd.Dir = "/" // container init enters final working directory
var encoder *gob.Encoder
if fd, e, err := sandbox.Setup(&cmd.ExtraFiles); err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot create shim setup pipe:")
} else {
encoder = e
cmd.Env = []string{
// passed through to shim by fsu
shimEnv + "=" + strconv.Itoa(fd),
// interpreted by fsu
"FORTIFY_APP_ID=" + seal.user.aid.String(),
}
}
if len(seal.user.supp) > 0 {
fmsg.Verbosef("attaching supplementary group ids %s", seal.user.supp)
// interpreted by fsu
cmd.Env = append(cmd.Env, "FORTIFY_GROUPS="+strings.Join(seal.user.supp, " "))
}
{
startErr := make(chan error, 1)
go func() {
runtime.LockOSThread()
fmsg.Verbosef("setuid helper at %s", fsuPath)
fmsg.Suspend()
killShim := make(chan struct{})
// the parent-death signal is delivered despite the child having a different uid
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
cmd.Cancel = func() error { close(killShim); return nil }
startErr <- fmsg.WrapErrorSuffix(cmd.Start(),
"cannot start setuid wrapper:")
<-killShim
}()
if err := <-startErr; err != nil {
return err
}
rs.StoreTime(time.Now().UTC())
}
waitErr := make(chan error, 1)
cmd := new(shimProcess)
if startTime, err := cmd.Start(
seal.user.aid.String(),
seal.user.supp,
); err != nil {
return err
} else {
// whether/when the fsu process was created
rs.Time = startTime
go func() { waitErr <- cmd.Wait(); cancel() }()
{
encodeErr := make(chan error, 1)
go func() {
encodeErr <- encoder.Encode(&shimParams{
Container: seal.container,
Home: seal.user.data,
Verbose: fmsg.Load(),
})
}()
select {
case err := <-encodeErr:
if err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
}
case <-time.After(shimSetupTimeout):
return fmsg.WrapError(syscall.ETIMEDOUT,
"deadline exceeded waiting for shim")
case <-ctx.Done():
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
}
ctx, cancel := context.WithTimeout(seal.ctx, shimSetupTimeout)
defer cancel()
go func() {
waitErr <- cmd.Unwrap().Wait()
// cancel shim setup in case shim died before receiving payload
cancel()
}()
if err := cmd.Serve(ctx, &shimParams{
Container: seal.container,
Home: seal.user.data,
Verbose: fmsg.Load(),
}); err != nil {
return err
// returned after blocking on waitErr
var earlyStoreErr = new(StateStoreError)
{
// shim accepted setup payload, create process state
sd := state.State{
ID: seal.id.unwrap(),
PID: cmd.Process.Pid,
Time: *rs.Time,
}
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) {
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
})
}
// shim accepted setup payload, create process state
sd := state.State{
ID: seal.id.unwrap(),
PID: cmd.Unwrap().Process.Pid,
Time: *rs.Time,
}
var earlyStoreErr = new(StateStoreError) // returned after blocking on waitErr
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) {
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
})
// destroy defunct state entry
// state in store at this point, destroy defunct state entry on return
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
select {
@ -148,22 +199,14 @@ func (seal *outcome) Run(rs *fst.RunState) error {
// store non-zero return code
rs.ExitCode = exitError.ExitCode()
} else {
rs.ExitCode = cmd.Unwrap().ProcessState.ExitCode()
rs.ExitCode = cmd.ProcessState.ExitCode()
}
if fmsg.Load() {
fmsg.Verbosef("process %d exited with exit code %d", cmd.Unwrap().Process.Pid, rs.ExitCode)
fmsg.Verbosef("process %d exited with exit code %d", cmd.Process.Pid, rs.ExitCode)
}
// this is reached when a fault makes an already running shim impossible to continue execution
// however a kill signal could not be delivered (should actually always happen like that since fsu)
// the effects of this is similar to the alternative exit path and ensures shim death
case err := <-cmd.Fallback():
rs.ExitCode = 255
log.Printf("cannot terminate shim on faulted setup: %v", err)
// alternative exit path relying on shim behaviour on monitor process exit
case <-seal.ctx.Done():
fmsg.Verbose("alternative exit path selected")
case <-ctx.Done():
fmsg.Verbose("shim process canceled")
}
fmsg.Resume()

View File

@ -2,14 +2,11 @@ package app
import (
"context"
"encoding/gob"
"errors"
"log"
"os"
"os/exec"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
@ -112,101 +109,3 @@ func ShimMain() {
os.Exit(exitError.ExitCode())
}
}
type shimProcess struct {
// user switcher process
cmd *exec.Cmd
// fallback exit notifier with error returned killing the process
killFallback chan error
// monitor to shim encoder
encoder *gob.Encoder
}
func (s *shimProcess) Unwrap() *exec.Cmd { return s.cmd }
func (s *shimProcess) Fallback() chan error { return s.killFallback }
func (s *shimProcess) String() string {
if s.cmd == nil {
return "(unused shim manager)"
}
return s.cmd.String()
}
func (s *shimProcess) Start(
aid string,
supp []string,
) (*time.Time, error) {
// prepare user switcher invocation
fsuPath := internal.MustFsuPath()
s.cmd = exec.Command(fsuPath)
// pass shim setup pipe
if fd, e, err := sandbox.Setup(&s.cmd.ExtraFiles); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot create shim setup pipe:")
} else {
s.encoder = e
s.cmd.Env = []string{
shimEnv + "=" + strconv.Itoa(fd),
"FORTIFY_APP_ID=" + aid,
}
}
// format fsu supplementary groups
if len(supp) > 0 {
fmsg.Verbosef("attaching supplementary group ids %s", supp)
s.cmd.Env = append(s.cmd.Env, "FORTIFY_GROUPS="+strings.Join(supp, " "))
}
s.cmd.Stdin, s.cmd.Stdout, s.cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
s.cmd.Dir = "/"
fmsg.Verbose("starting shim via fsu:", s.cmd)
// withhold messages to stderr
fmsg.Suspend()
if err := s.cmd.Start(); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot start fsu:")
}
startTime := time.Now().UTC()
return &startTime, nil
}
func (s *shimProcess) Serve(ctx context.Context, params *shimParams) error {
// kill shim if something goes wrong and an error is returned
s.killFallback = make(chan error, 1)
killShim := func() {
if err := s.cmd.Process.Signal(os.Interrupt); err != nil {
s.killFallback <- err
}
}
defer func() { killShim() }()
encodeErr := make(chan error)
go func() { encodeErr <- s.encoder.Encode(params) }()
select {
// encode return indicates setup completion
case err := <-encodeErr:
if err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
}
killShim = func() {}
return nil
// setup canceled before payload was accepted
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.Canceled) {
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
if errors.Is(err, context.DeadlineExceeded) {
return fmsg.WrapError(syscall.ETIMEDOUT,
"deadline exceeded waiting for shim")
}
// unreachable
return err
}
}