app: run in native sandbox
All checks were successful
Test / Create distribution (push) Successful in 20s
Test / Fortify (push) Successful in 2m5s
Test / Fpkg (push) Successful in 3m0s
Test / Data race detector (push) Successful in 4m12s
Test / Flake checks (push) Successful in 1m4s

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-03-25 01:52:49 +09:00
parent e732dca762
commit 5c4058d5ac
35 changed files with 949 additions and 1225 deletions

View File

@@ -4,125 +4,149 @@ import (
"errors"
"fmt"
"io/fs"
"maps"
"path"
"slices"
"syscall"
"git.gensokyo.uk/security/fortify/dbus"
"git.gensokyo.uk/security/fortify/helper/bwrap"
"git.gensokyo.uk/security/fortify/sandbox"
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
)
// SandboxConfig describes resources made available to the sandbox.
type SandboxConfig struct {
// unix hostname within sandbox
Hostname string `json:"hostname,omitempty"`
// allow userns within sandbox
UserNS bool `json:"userns,omitempty"`
// share net namespace
Net bool `json:"net,omitempty"`
// share all devices
Dev bool `json:"dev,omitempty"`
// seccomp syscall filter policy
Syscall *bwrap.SyscallPolicy `json:"syscall"`
// do not run in new session
NoNewSession bool `json:"no_new_session,omitempty"`
// map target user uid to privileged user uid in the user namespace
MapRealUID bool `json:"map_real_uid"`
// direct access to wayland socket; when this gets set no attempt is made to attach security-context-v1
// and the bare socket is mounted to the sandbox
DirectWayland bool `json:"direct_wayland,omitempty"`
type (
SandboxConfig struct {
// container hostname
Hostname string `json:"hostname,omitempty"`
// final environment variables
Env map[string]string `json:"env"`
// sandbox host filesystem access
Filesystem []*FilesystemConfig `json:"filesystem"`
// symlinks created inside the sandbox
Link [][2]string `json:"symlink"`
// read-only /etc directory
Etc string `json:"etc,omitempty"`
// automatically set up /etc symlinks
AutoEtc bool `json:"auto_etc"`
// mount tmpfs over these paths,
// runs right before [ConfinementConfig.ExtraPerms]
Override []string `json:"override"`
}
// extra seccomp flags
Seccomp seccomp.SyscallOpts `json:"seccomp"`
// allow ptrace and friends
Devel bool `json:"devel,omitempty"`
// allow userns creation in container
Userns bool `json:"userns,omitempty"`
// share host net namespace
Net bool `json:"net,omitempty"`
// expose main process tty
Tty bool `json:"tty,omitempty"`
// allow multiarch
Multiarch bool `json:"multiarch,omitempty"`
// SandboxSys encapsulates system functions used during the creation of [bwrap.Config].
type SandboxSys interface {
Getuid() int
Paths() Paths
ReadDir(name string) ([]fs.DirEntry, error)
EvalSymlinks(path string) (string, error)
// initial process environment variables
Env map[string]string `json:"env"`
// map target user uid to privileged user uid in the user namespace
MapRealUID bool `json:"map_real_uid"`
Println(v ...any)
Printf(format string, v ...any)
}
// expose all devices
Dev bool `json:"dev,omitempty"`
// container host filesystem bind mounts
Filesystem []*FilesystemConfig `json:"filesystem"`
// create symlinks inside container filesystem
Link [][2]string `json:"symlink"`
// Bwrap returns the address of the corresponding bwrap.Config to s.
// Note that remaining tmpfs entries must be queued by the caller prior to launch.
func (s *SandboxConfig) Bwrap(sys SandboxSys, uid *int) (*bwrap.Config, error) {
// direct access to wayland socket; when this gets set no attempt is made to attach security-context-v1
// and the bare socket is mounted to the sandbox
DirectWayland bool `json:"direct_wayland,omitempty"`
// read-only /etc directory
Etc string `json:"etc,omitempty"`
// automatically set up /etc symlinks
AutoEtc bool `json:"auto_etc"`
// cover these paths or create them if they do not already exist
Cover []string `json:"cover"`
}
// SandboxSys encapsulates system functions used during [sandbox.Container] initialisation.
SandboxSys interface {
Getuid() int
Getgid() int
Paths() Paths
ReadDir(name string) ([]fs.DirEntry, error)
EvalSymlinks(path string) (string, error)
Println(v ...any)
Printf(format string, v ...any)
}
// FilesystemConfig is a representation of [sandbox.BindMount].
FilesystemConfig struct {
// mount point in container, same as src if empty
Dst string `json:"dst,omitempty"`
// host filesystem path to make available to the container
Src string `json:"src"`
// do not mount filesystem read-only
Write bool `json:"write,omitempty"`
// do not disable device files
Device bool `json:"dev,omitempty"`
// fail if the bind mount cannot be established for any reason
Must bool `json:"require,omitempty"`
}
)
// ToContainer initialises [sandbox.Params] via [SandboxConfig].
// Note that remaining container setup must be queued by the [App] implementation.
func (s *SandboxConfig) ToContainer(sys SandboxSys, uid, gid *int) (*sandbox.Params, map[string]string, error) {
if s == nil {
return nil, errors.New("nil sandbox config")
return nil, nil, syscall.EBADE
}
if s.Syscall == nil {
sys.Println("syscall filter not configured, PROCEED WITH CAUTION")
}
if !s.MapRealUID {
// mapped uid defaults to 65534 to work around file ownership checks due to a bwrap limitation
*uid = 65534
} else {
// some programs fail to connect to dbus session running as a different uid, so a separate workaround
// is introduced to map priv-side caller uid in namespace
*uid = sys.Getuid()
}
conf := (&bwrap.Config{
Net: s.Net,
UserNS: s.UserNS,
UID: uid,
GID: uid,
container := &sandbox.Params{
Hostname: s.Hostname,
Clearenv: true,
SetEnv: s.Env,
Ops: new(sandbox.Ops),
Seccomp: s.Seccomp,
}
/* this is only 4 KiB of memory on a 64-bit system,
permissive defaults on NixOS results in around 100 entries
so this capacity should eliminate copies for most setups */
Filesystem: make([]bwrap.FSBuilder, 0, 256),
/* this is only 4 KiB of memory on a 64-bit system,
permissive defaults on NixOS results in around 100 entries
so this capacity should eliminate copies for most setups */
*container.Ops = slices.Grow(*container.Ops, 1<<8)
Syscall: s.Syscall,
NewSession: !s.NoNewSession,
DieWithParent: true,
AsInit: true,
if s.Devel {
container.Flags |= sandbox.FAllowDevel
}
if s.Userns {
container.Flags |= sandbox.FAllowUserns
}
if s.Net {
container.Flags |= sandbox.FAllowNet
}
if s.Tty {
container.Flags |= sandbox.FAllowTTY
}
// initialise unconditionally as Once cannot be justified
// for saving such a miniscule amount of memory
Chmod: make(bwrap.ChmodConfig),
}).
Procfs("/proc").
Tmpfs(Tmp, 4*1024)
if s.MapRealUID {
/* some programs fail to connect to dbus session running as a different uid
so this workaround is introduced to map priv-side caller uid in container */
container.Uid = sys.Getuid()
*uid = container.Uid
container.Gid = sys.Getgid()
*gid = container.Gid
} else {
*uid = sandbox.OverflowUid()
*gid = sandbox.OverflowGid()
}
container.
Proc("/proc").
Tmpfs(Tmp, 1<<12, 0755)
if !s.Dev {
conf.DevTmpfs("/dev").Mqueue("/dev/mqueue")
container.Dev("/dev").Mqueue("/dev/mqueue")
} else {
conf.Bind("/dev", "/dev", false, true, true)
container.Bind("/dev", "/dev", sandbox.BindDevice)
}
if !s.AutoEtc {
if s.Etc == "" {
conf.Dir("/etc")
} else {
conf.Bind(s.Etc, "/etc")
}
}
// retrieve paths and hide them if they're made available in the sandbox
/* retrieve paths and hide them if they're made available in the sandbox;
this feature tries to improve user experience of permissive defaults, and
to warn about issues in custom configuration; it is NOT a security feature
and should not be treated as such, ALWAYS be careful with what you bind */
var hidePaths []string
sc := sys.Paths()
hidePaths = append(hidePaths, sc.RuntimePath, sc.SharePath)
_, systemBusAddr := dbus.Address()
if entries, err := dbus.Parse([]byte(systemBusAddr)); err != nil {
return nil, err
return nil, nil, err
} else {
// there is usually only one, do not preallocate
for _, entry := range entries {
@@ -148,7 +172,7 @@ func (s *SandboxConfig) Bwrap(sys SandboxSys, uid *int) (*bwrap.Config, error) {
hidePathMatch := make([]bool, len(hidePaths))
for i := range hidePaths {
if err := evalSymlinks(sys, &hidePaths[i]); err != nil {
return nil, err
return nil, nil, err
}
}
@@ -158,19 +182,19 @@ func (s *SandboxConfig) Bwrap(sys SandboxSys, uid *int) (*bwrap.Config, error) {
}
if !path.IsAbs(c.Src) {
return nil, fmt.Errorf("src path %q is not absolute", c.Src)
return nil, nil, fmt.Errorf("src path %q is not absolute", c.Src)
}
dest := c.Dst
if c.Dst == "" {
dest = c.Src
} else if !path.IsAbs(dest) {
return nil, fmt.Errorf("dst path %q is not absolute", dest)
return nil, nil, fmt.Errorf("dst path %q is not absolute", dest)
}
srcH := c.Src
if err := evalSymlinks(sys, &srcH); err != nil {
return nil, err
return nil, nil, err
}
for i := range hidePaths {
@@ -180,54 +204,71 @@ func (s *SandboxConfig) Bwrap(sys SandboxSys, uid *int) (*bwrap.Config, error) {
}
if ok, err := deepContainsH(srcH, hidePaths[i]); err != nil {
return nil, err
return nil, nil, err
} else if ok {
hidePathMatch[i] = true
sys.Printf("hiding paths from %q", c.Src)
}
}
conf.Bind(c.Src, dest, !c.Must, c.Write, c.Device)
var flags int
if c.Write {
flags |= sandbox.BindWritable
}
if c.Device {
flags |= sandbox.BindDevice | sandbox.BindWritable
}
if !c.Must {
flags |= sandbox.BindOptional
}
container.Bind(c.Src, dest, flags)
}
// hide marked paths before setting up shares
// cover matched paths
for i, ok := range hidePathMatch {
if ok {
conf.Tmpfs(hidePaths[i], 8192)
container.Tmpfs(hidePaths[i], 1<<13, 0755)
}
}
for _, l := range s.Link {
conf.Symlink(l[0], l[1])
container.Link(l[0], l[1])
}
if s.AutoEtc {
etc := s.Etc
if etc == "" {
etc = "/etc"
// perf: this might work better if implemented as a setup op in container init
if !s.AutoEtc {
if s.Etc != "" {
container.Bind(s.Etc, "/etc", 0)
}
conf.Bind(etc, Tmp+"/etc")
} else {
etcPath := s.Etc
if etcPath == "" {
etcPath = "/etc"
}
container.
Bind(etcPath, Tmp+"/etc", 0).
Mkdir("/etc", 0700)
// link host /etc contents to prevent passwd/group from being overwritten
if d, err := sys.ReadDir(etc); err != nil {
return nil, err
// link host /etc contents to prevent dropping passwd/group bind mounts
if d, err := sys.ReadDir(etcPath); err != nil {
return nil, nil, err
} else {
for _, ent := range d {
name := ent.Name()
switch name {
n := ent.Name()
switch n {
case "passwd":
case "group":
case "mtab":
conf.Symlink("/proc/mounts", "/etc/"+name)
container.Link("/proc/mounts", "/etc/"+n)
default:
conf.Symlink(Tmp+"/etc/"+name, "/etc/"+name)
container.Link(Tmp+"/etc/"+n, "/etc/"+n)
}
}
}
}
return conf, nil
return container, maps.Clone(s.Env), nil
}
func evalSymlinks(sys SandboxSys, v *string) error {