app: improve shim process management
All checks were successful
Test / Create distribution (push) Successful in 27s
Test / Sandbox (push) Successful in 1m45s
Test / Fortify (push) Successful in 2m36s
Test / Sandbox (race detector) (push) Successful in 2m49s
Test / Fpkg (push) Successful in 3m33s
Test / Fortify (race detector) (push) Successful in 4m13s
Test / Flake checks (push) Successful in 1m6s

This ensures a signal gets delivered to the process instead of relying on parent death behaviour.

SIGCONT was chosen as it is the only signal an unprivileged process is allowed to send to processes with different credentials.

A custom signal handler is installed because the Go runtime does not expose signal information other than which signal was received, and shim must check pid to ensure reasonable behaviour.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
Ophestra 2025-04-05 23:51:39 +09:00
parent 12be7bc78e
commit e9a7cd526f
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
7 changed files with 204 additions and 203 deletions

View File

@ -13,16 +13,16 @@ func mustRunApp(ctx context.Context, config *fst.Config, beforeFail func()) {
rs := new(fst.RunState)
a := app.MustNew(ctx, std)
var code int
if sa, err := a.Seal(config); err != nil {
fmsg.PrintBaseError(err, "cannot seal app:")
rs.ExitCode = 1
code = 1
} else {
// this updates ExitCode
app.PrintRunStateErr(rs, sa.Run(rs))
code = app.PrintRunStateErr(rs, sa.Run(rs))
}
if rs.ExitCode != 0 {
if code != 0 {
beforeFail()
os.Exit(rs.ExitCode)
os.Exit(code)
}
}

View File

@ -2,6 +2,7 @@
package fst
import (
"syscall"
"time"
)
@ -28,12 +29,21 @@ type RunState struct {
//
// Time is nil if no process was ever created.
Time *time.Time
// ExitCode is the value returned by shim.
ExitCode int
// RevertErr is stored by the deferred revert call.
RevertErr error
// WaitErr is error returned by the underlying wait syscall.
// WaitErr is the generic error value created by the standard library.
WaitErr error
syscall.WaitStatus
}
// SetStart stores the current time in [RunState] once.
func (rs *RunState) SetStart() {
if rs.Time != nil {
panic("attempted to store time twice")
}
now := time.Now().UTC()
rs.Time = &now
}
// Paths contains environment-dependent paths used by fortify.

View File

@ -8,7 +8,9 @@ import (
"git.gensokyo.uk/security/fortify/internal/fmsg"
)
func PrintRunStateErr(rs *fst.RunState, runErr error) {
func PrintRunStateErr(rs *fst.RunState, runErr error) (code int) {
code = rs.ExitStatus()
if runErr != nil {
if rs.Time == nil {
fmsg.PrintBaseError(runErr, "cannot start app:")
@ -49,8 +51,8 @@ func PrintRunStateErr(rs *fst.RunState, runErr error) {
}
}
if rs.ExitCode == 0 {
rs.ExitCode = 126
if code == 0 {
code = 126
}
}
@ -97,13 +99,14 @@ func PrintRunStateErr(rs *fst.RunState, runErr error) {
}
out:
if rs.ExitCode == 0 {
rs.ExitCode = 128
if code == 0 {
code = 128
}
}
if rs.WaitErr != nil {
log.Println("inner wait failed:", rs.WaitErr)
fmsg.Verbosef("wait: %v", rs.WaitErr)
}
return
}
// StateStoreError is returned for a failed state save
@ -121,7 +124,7 @@ type StateStoreError struct {
}
// save saves arbitrary errors in [StateStoreError] once.
func (e *StateStoreError) save(errs []error) {
func (e *StateStoreError) save(errs ...error) {
if len(errs) == 0 || e.Err != nil {
panic("invalid call to save")
}

View File

@ -2,45 +2,46 @@ package app
import (
"context"
"encoding/gob"
"errors"
"log"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
"time"
"git.gensokyo.uk/security/fortify/fst"
"git.gensokyo.uk/security/fortify/internal"
"git.gensokyo.uk/security/fortify/internal/fmsg"
"git.gensokyo.uk/security/fortify/internal/state"
"git.gensokyo.uk/security/fortify/sandbox"
"git.gensokyo.uk/security/fortify/system"
)
const shimSetupTimeout = 5 * time.Second
const shimWaitTimeout = 5 * time.Second
func (seal *outcome) Run(rs *fst.RunState) error {
if !seal.f.CompareAndSwap(false, true) {
// run does much more than just starting a process; calling it twice, even if the first call fails, will result
// in inconsistent state that is impossible to clean up; return here to limit damage and hopefully give the
// other Run a chance to return
panic("attempted to run twice")
return errors.New("outcome: attempted to run twice")
}
if rs == nil {
panic("invalid state")
}
// read comp values early to allow for early failure
fmsg.Verbosef("version %s", internal.Version())
fmsg.Verbosef("setuid helper at %s", internal.MustFsuPath())
/*
prepare/revert os state
*/
// read comp value early to allow for early failure
fsuPath := internal.MustFsuPath()
if err := seal.sys.Commit(seal.ctx); err != nil {
return err
}
store := state.NewMulti(seal.runDirPath)
deferredStoreFunc := func(c state.Cursor) error { return nil }
deferredStoreFunc := func(c state.Cursor) error { return nil } // noop until state in store
defer func() {
var revertErr error
storeErr := new(StateStoreError)
@ -48,10 +49,6 @@ func (seal *outcome) Run(rs *fst.RunState) error {
revertErr = func() error {
storeErr.InnerErr = deferredStoreFunc(c)
/*
revert app setup transaction
*/
var rt system.Enablement
ec := system.Process
if states, err := c.Load(); err != nil {
@ -60,10 +57,9 @@ func (seal *outcome) Run(rs *fst.RunState) error {
return seal.sys.Revert((*system.Criteria)(&ec))
} else {
if l := len(states); l == 0 {
fmsg.Verbose("no other launchers active, will clean up globals")
ec |= system.User
} else {
fmsg.Verbosef("found %d active launchers, cleaning up without globals", l)
fmsg.Verbosef("found %d instances, cleaning up without user-scoped operations", l)
}
// accumulate enablements of remaining launchers
@ -78,92 +74,111 @@ func (seal *outcome) Run(rs *fst.RunState) error {
ec |= rt ^ (system.EWayland | system.EX11 | system.EDBus | system.EPulse)
if fmsg.Load() {
if ec > 0 {
fmsg.Verbose("reverting operations type", system.TypeString(ec))
fmsg.Verbose("reverting operations scope", system.TypeString(ec))
}
}
return seal.sys.Revert((*system.Criteria)(&ec))
}()
})
storeErr.save([]error{revertErr, store.Close()})
rs.RevertErr = storeErr.equiv("error returned during cleanup:")
storeErr.save(revertErr, store.Close())
rs.RevertErr = storeErr.equiv("error during cleanup:")
}()
/*
shim process lifecycle
*/
waitErr := make(chan error, 1)
cmd := new(shimProcess)
if startTime, err := cmd.Start(
seal.user.aid.String(),
seal.user.supp,
); err != nil {
return err
} else {
// whether/when the fsu process was created
rs.Time = startTime
}
ctx, cancel := context.WithTimeout(seal.ctx, shimSetupTimeout)
ctx, cancel := context.WithCancel(seal.ctx)
defer cancel()
cmd := exec.CommandContext(ctx, fsuPath)
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
cmd.Dir = "/" // container init enters final working directory
// shim runs in the same session as monitor; see shim.go for behaviour
cmd.Cancel = func() error { return cmd.Process.Signal(syscall.SIGCONT) }
go func() {
waitErr <- cmd.Unwrap().Wait()
// cancel shim setup in case shim died before receiving payload
cancel()
}()
if err := cmd.Serve(ctx, &shimParams{
Container: seal.container,
Home: seal.user.data,
Verbose: fmsg.Load(),
}); err != nil {
return err
var e *gob.Encoder
if fd, encoder, err := sandbox.Setup(&cmd.ExtraFiles); err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot create shim setup pipe:")
} else {
e = encoder
cmd.Env = []string{
// passed through to shim by fsu
shimEnv + "=" + strconv.Itoa(fd),
// interpreted by fsu
"FORTIFY_APP_ID=" + seal.user.aid.String(),
}
}
// shim accepted setup payload, create process state
sd := state.State{
ID: seal.id.unwrap(),
PID: cmd.Unwrap().Process.Pid,
Time: *rs.Time,
if len(seal.user.supp) > 0 {
fmsg.Verbosef("attaching supplementary group ids %s", seal.user.supp)
// interpreted by fsu
cmd.Env = append(cmd.Env, "FORTIFY_GROUPS="+strings.Join(seal.user.supp, " "))
}
var earlyStoreErr = new(StateStoreError) // returned after blocking on waitErr
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) {
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
})
// destroy defunct state entry
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
fmsg.Verbosef("setuid helper at %s", fsuPath)
fmsg.Suspend()
if err := cmd.Start(); err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot start setuid wrapper:")
}
rs.SetStart()
// this prevents blocking forever on an early failure
waitErr, setupErr := make(chan error, 1), make(chan error, 1)
go func() { waitErr <- cmd.Wait(); cancel() }()
go func() { setupErr <- e.Encode(&shimParams{os.Getpid(), seal.container, seal.user.data, fmsg.Load()}) }()
select {
case err := <-waitErr: // block until fsu/shim returns
case err := <-setupErr:
if err != nil {
var exitError *exec.ExitError
if !errors.As(err, &exitError) {
// should be unreachable
rs.WaitErr = err
}
// store non-zero return code
rs.ExitCode = exitError.ExitCode()
} else {
rs.ExitCode = cmd.Unwrap().ProcessState.ExitCode()
fmsg.Resume()
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
}
case <-ctx.Done():
fmsg.Resume()
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
// returned after blocking on waitErr
var earlyStoreErr = new(StateStoreError)
{
// shim accepted setup payload, create process state
sd := state.State{
ID: seal.id.unwrap(),
PID: cmd.Process.Pid,
Time: *rs.Time,
}
earlyStoreErr.Inner, earlyStoreErr.DoErr = store.Do(seal.user.aid.unwrap(), func(c state.Cursor) {
earlyStoreErr.InnerErr = c.Save(&sd, seal.ct)
})
}
// state in store at this point, destroy defunct state entry on return
deferredStoreFunc = func(c state.Cursor) error { return c.Destroy(seal.id.unwrap()) }
waitTimeout := make(chan struct{})
go func() { <-seal.ctx.Done(); time.Sleep(shimWaitTimeout); close(waitTimeout) }()
select {
case rs.WaitErr = <-waitErr:
rs.WaitStatus = cmd.ProcessState.Sys().(syscall.WaitStatus)
if fmsg.Load() {
fmsg.Verbosef("process %d exited with exit code %d", cmd.Unwrap().Process.Pid, rs.ExitCode)
switch {
case rs.Exited():
fmsg.Verbosef("process %d exited with code %d", cmd.Process.Pid, rs.ExitStatus())
case rs.CoreDump():
fmsg.Verbosef("process %d dumped core", cmd.Process.Pid)
case rs.Signaled():
fmsg.Verbosef("process %d got %s", cmd.Process.Pid, rs.Signal())
default:
fmsg.Verbosef("process %d exited with status %#x", cmd.Process.Pid, rs.WaitStatus)
}
}
// this is reached when a fault makes an already running shim impossible to continue execution
// however a kill signal could not be delivered (should actually always happen like that since fsu)
// the effects of this is similar to the alternative exit path and ensures shim death
case err := <-cmd.Fallback():
rs.ExitCode = 255
log.Printf("cannot terminate shim on faulted setup: %v", err)
// alternative exit path relying on shim behaviour on monitor process exit
case <-seal.ctx.Done():
fmsg.Verbose("alternative exit path selected")
case <-waitTimeout:
rs.WaitErr = syscall.ETIMEDOUT
fmsg.Resume()
log.Printf("process %d did not terminate", cmd.Process.Pid)
}
fmsg.Resume()

View File

@ -2,14 +2,11 @@ package app
import (
"context"
"encoding/gob"
"errors"
"log"
"os"
"os/exec"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
@ -18,9 +15,63 @@ import (
"git.gensokyo.uk/security/fortify/sandbox"
)
/*
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
static pid_t f_shim_param_ppid = -1;
// this cannot unblock fmsg since Go code is not async-signal-safe
static void f_shim_sigaction(int sig, siginfo_t *si, void *ucontext) {
if (sig != SIGCONT || si == NULL) {
// unreachable
fprintf(stderr, "sigaction: sa_sigaction got invalid siginfo\n");
return;
}
// monitor requests shim exit
if (si->si_pid == f_shim_param_ppid)
exit(254);
fprintf(stderr, "sigaction: got SIGCONT from process %d\n", si->si_pid);
// shim orphaned before monitor delivers a signal
if (getppid() != f_shim_param_ppid)
exit(3);
}
void f_shim_setup_cont_signal(pid_t ppid) {
struct sigaction new_action = {0}, old_action = {0};
if (sigaction(SIGCONT, NULL, &old_action) != 0)
return;
if (old_action.sa_handler != SIG_DFL) {
errno = ENOTRECOVERABLE;
return;
}
new_action.sa_sigaction = f_shim_sigaction;
if (sigemptyset(&new_action.sa_mask) != 0)
return;
new_action.sa_flags = SA_ONSTACK | SA_SIGINFO;
if (sigaction(SIGCONT, &new_action, NULL) != 0)
return;
errno = 0;
f_shim_param_ppid = ppid;
}
*/
import "C"
const shimEnv = "FORTIFY_SHIM"
type shimParams struct {
// monitor pid, checked against ppid in signal handler
Monitor int
// finalised container params
Container *sandbox.Params
// path to outer home directory
@ -54,6 +105,16 @@ func ShimMain() {
} else {
internal.InstallFmsg(params.Verbose)
closeSetup = f
// the Go runtime does not expose siginfo_t so SIGCONT is handled in C to check si_pid
if _, err = C.f_shim_setup_cont_signal(C.pid_t(params.Monitor)); err != nil {
log.Fatalf("cannot install SIGCONT handler: %v", err)
}
// pdeath_signal delivery is checked as if the dying process called kill(2), see kernel/exit.c
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGCONT), 0); errno != 0 {
log.Fatalf("cannot set parent-death signal: %v", errno)
}
}
if params.Container == nil || params.Container.Ops == nil {
@ -112,101 +173,3 @@ func ShimMain() {
os.Exit(exitError.ExitCode())
}
}
type shimProcess struct {
// user switcher process
cmd *exec.Cmd
// fallback exit notifier with error returned killing the process
killFallback chan error
// monitor to shim encoder
encoder *gob.Encoder
}
func (s *shimProcess) Unwrap() *exec.Cmd { return s.cmd }
func (s *shimProcess) Fallback() chan error { return s.killFallback }
func (s *shimProcess) String() string {
if s.cmd == nil {
return "(unused shim manager)"
}
return s.cmd.String()
}
func (s *shimProcess) Start(
aid string,
supp []string,
) (*time.Time, error) {
// prepare user switcher invocation
fsuPath := internal.MustFsuPath()
s.cmd = exec.Command(fsuPath)
// pass shim setup pipe
if fd, e, err := sandbox.Setup(&s.cmd.ExtraFiles); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot create shim setup pipe:")
} else {
s.encoder = e
s.cmd.Env = []string{
shimEnv + "=" + strconv.Itoa(fd),
"FORTIFY_APP_ID=" + aid,
}
}
// format fsu supplementary groups
if len(supp) > 0 {
fmsg.Verbosef("attaching supplementary group ids %s", supp)
s.cmd.Env = append(s.cmd.Env, "FORTIFY_GROUPS="+strings.Join(supp, " "))
}
s.cmd.Stdin, s.cmd.Stdout, s.cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
s.cmd.Dir = "/"
fmsg.Verbose("starting shim via fsu:", s.cmd)
// withhold messages to stderr
fmsg.Suspend()
if err := s.cmd.Start(); err != nil {
return nil, fmsg.WrapErrorSuffix(err,
"cannot start fsu:")
}
startTime := time.Now().UTC()
return &startTime, nil
}
func (s *shimProcess) Serve(ctx context.Context, params *shimParams) error {
// kill shim if something goes wrong and an error is returned
s.killFallback = make(chan error, 1)
killShim := func() {
if err := s.cmd.Process.Signal(os.Interrupt); err != nil {
s.killFallback <- err
}
}
defer func() { killShim() }()
encodeErr := make(chan error)
go func() { encodeErr <- s.encoder.Encode(params) }()
select {
// encode return indicates setup completion
case err := <-encodeErr:
if err != nil {
return fmsg.WrapErrorSuffix(err,
"cannot transmit shim config:")
}
killShim = func() {}
return nil
// setup canceled before payload was accepted
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.Canceled) {
return fmsg.WrapError(syscall.ECANCELED,
"shim setup canceled")
}
if errors.Is(err, context.DeadlineExceeded) {
return fmsg.WrapError(syscall.ETIMEDOUT,
"deadline exceeded waiting for shim")
}
// unreachable
return err
}
}

View File

@ -289,10 +289,10 @@ func runApp(config *fst.Config) {
rs := new(fst.RunState)
if sa, err := a.Seal(config); err != nil {
fmsg.PrintBaseError(err, "cannot seal app:")
rs.ExitCode = 1
internal.Exit(1)
} else {
// this updates ExitCode
app.PrintRunStateErr(rs, sa.Run(rs))
internal.Exit(app.PrintRunStateErr(rs, sa.Run(rs)))
}
internal.Exit(rs.ExitCode)
*(*int)(nil) = 0 // not reached
}

View File

@ -169,6 +169,16 @@ machine.send_chars("exit\n")
machine.wait_for_file("/tmp/p0-exit-ok", timeout=15)
machine.fail("getfacl --absolute-names --omit-header --numeric /run/user/1000 | grep 1000000")
# Check interrupt shim behaviour:
swaymsg("exec sh -c 'ne-foot; echo -n $? > /tmp/monitor-exit-code'")
wait_for_window(f"u0_a{aid(0)}@machine")
machine.succeed("pkill -INT -f 'fortify -v app '")
machine.wait_until_fails("pgrep foot", timeout=5)
machine.wait_for_file("/tmp/monitor-exit-code")
interrupt_exit_code = int(machine.succeed("cat /tmp/monitor-exit-code"))
if interrupt_exit_code != 254:
raise Exception(f"unexpected exit code {interrupt_exit_code}")
# Start app (foot) with Wayland enablement:
swaymsg("exec ne-foot")
wait_for_window(f"u0_a{aid(0)}@machine")