sandbox: expose seccomp interface
All checks were successful
Test / Create distribution (push) Successful in 31s
Test / Sandbox (push) Successful in 1m59s
Test / Hakurei (push) Successful in 2m47s
Test / Sandbox (race detector) (push) Successful in 3m11s
Test / Planterette (push) Successful in 3m34s
Test / Hakurei (race detector) (push) Successful in 4m22s
Test / Flake checks (push) Successful in 1m8s

There's no point in artificially limiting and abstracting away these options. The higher level hakurei package is responsible for providing a secure baseline and sane defaults. The sandbox package should present everything to the caller.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-07-02 04:38:28 +09:00
parent a6887f7253
commit 31aef905fa
12 changed files with 117 additions and 77 deletions

View File

@@ -17,32 +17,6 @@ import (
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
)
type HardeningFlags uintptr
const (
FSyscallCompat HardeningFlags = 1 << iota
FAllowDevel
FAllowUserns
FAllowTTY
FAllowNet
)
func (flags HardeningFlags) seccomp(presets seccomp.FilterPreset) seccomp.FilterPreset {
if flags&FSyscallCompat == 0 {
presets |= seccomp.PresetExt
}
if flags&FAllowDevel == 0 {
presets |= seccomp.PresetDenyDevel
}
if flags&FAllowUserns == 0 {
presets |= seccomp.PresetDenyNS
}
if flags&FAllowTTY == 0 {
presets |= seccomp.PresetDenyTTY
}
return presets
}
type (
// Container represents a container environment being prepared or run.
// None of [Container] methods are safe for concurrent use.
@@ -94,17 +68,23 @@ type (
Hostname string
// Sequential container setup ops.
*Ops
// Seccomp system call filter rules.
SeccompRules []seccomp.NativeRule
// Extra seccomp flags.
SeccompFlags seccomp.ExportFlag
// Extra seccomp presets.
// Seccomp presets. Has no effect unless SeccompRules is zero-length.
SeccompPresets seccomp.FilterPreset
// Do not load seccomp program.
SeccompDisable bool
// Permission bits of newly created parent directories.
// The zero value is interpreted as 0755.
ParentPerm os.FileMode
// Do not syscall.Setsid.
RetainSession bool
// Do not [syscall.CLONE_NEWNET].
HostNet bool
// Retain CAP_SYS_ADMIN.
Privileged bool
Flags HardeningFlags
}
)
@@ -120,7 +100,7 @@ func (p *Container) Start() error {
p.cancel = cancel
var cloneFlags uintptr = CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWCGROUP
if p.Flags&FAllowNet == 0 {
if !p.HostNet {
cloneFlags |= CLONE_NEWNET
}
@@ -132,6 +112,10 @@ func (p *Container) Start() error {
p.Gid = OverflowGid()
}
if !p.RetainSession {
p.SeccompPresets |= seccomp.PresetDenyTTY
}
if p.CommandContext != nil {
p.cmd = p.CommandContext(ctx)
} else {
@@ -148,7 +132,7 @@ func (p *Container) Start() error {
}
p.cmd.Dir = "/"
p.cmd.SysProcAttr = &SysProcAttr{
Setsid: p.Flags&FAllowTTY == 0,
Setsid: !p.RetainSession,
Pdeathsig: SIGKILL,
Cloneflags: cloneFlags | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS,
@@ -211,6 +195,11 @@ func (p *Container) Serve() error {
}
}
if p.SeccompRules == nil {
// do not transmit nil
p.SeccompRules = make([]seccomp.NativeRule, 0)
}
err := setup.Encode(
&initParams{
p.Params,
@@ -229,8 +218,8 @@ func (p *Container) Serve() error {
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
func (p *Container) String() string {
return fmt.Sprintf("argv: %q, flags: %#x, seccomp: %#x, presets: %#x",
p.Args, p.Flags, int(p.SeccompFlags), int(p.Flags.seccomp(p.SeccompPresets)))
return fmt.Sprintf("argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets))
}
func New(ctx context.Context, name string, args ...string) *Container {

View File

@@ -36,22 +36,39 @@ func TestContainer(t *testing.T) {
}
testCases := []struct {
name string
flags sandbox.HardeningFlags
ops *sandbox.Ops
mnt []*vfs.MountInfoEntry
host string
name string
filter bool
session bool
net bool
ops *sandbox.Ops
mnt []*vfs.MountInfoEntry
host string
rules []seccomp.NativeRule
flags seccomp.ExportFlag
presets seccomp.FilterPreset
}{
{"minimal", 0, new(sandbox.Ops), nil, "test-minimal"},
{"allow", sandbox.FAllowUserns | sandbox.FAllowNet | sandbox.FAllowTTY,
new(sandbox.Ops), nil, "test-minimal"},
{"tmpfs", 0,
{"minimal", true, false, false,
new(sandbox.Ops), nil, "test-minimal",
nil, 0, seccomp.PresetStrict},
{"allow", true, true, true,
new(sandbox.Ops), nil, "test-minimal",
nil, 0, seccomp.PresetExt | seccomp.PresetDenyDevel},
{"no filter", false, true, true,
new(sandbox.Ops), nil, "test-no-filter",
nil, 0, seccomp.PresetExt},
{"custom rules", true, true, true,
new(sandbox.Ops), nil, "test-no-filter",
[]seccomp.NativeRule{
{seccomp.ScmpSyscall(syscall.SYS_SETUID), seccomp.ScmpErrno(syscall.EPERM), nil},
}, 0, seccomp.PresetExt},
{"tmpfs", true, false, false,
new(sandbox.Ops).
Tmpfs(hst.Tmp, 0, 0755),
[]*vfs.MountInfoEntry{
e("/", hst.Tmp, "rw,nosuid,nodev,relatime", "tmpfs", "tmpfs", ignore),
}, "test-tmpfs"},
{"dev", sandbox.FAllowTTY, // go test output is not a tty
}, "test-tmpfs",
nil, 0, seccomp.PresetStrict},
{"dev", true, true /* go test output is not a tty */, false,
new(sandbox.Ops).
Dev("/dev").
Mqueue("/dev/mqueue"),
@@ -65,7 +82,8 @@ func TestContainer(t *testing.T) {
e("/tty", "/dev/tty", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
e("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"),
e("/", "/dev/mqueue", "rw,nosuid,nodev,noexec,relatime", "mqueue", "mqueue", "rw"),
}, ""},
}, "",
nil, 0, seccomp.PresetStrict},
}
for _, tc := range testCases {
@@ -79,9 +97,14 @@ func TestContainer(t *testing.T) {
container.Gid = 100
container.Hostname = tc.host
container.CommandContext = commandContext
container.Flags |= tc.flags
container.Stdout, container.Stderr = os.Stdout, os.Stderr
container.Ops = tc.ops
container.SeccompRules = tc.rules
container.SeccompFlags = tc.flags | seccomp.AllowMultiarch
container.SeccompPresets = tc.presets
container.SeccompDisable = !tc.filter
container.RetainSession = tc.session
container.HostNet = tc.net
if container.Args[5] == "" {
if name, err := os.Hostname(); err != nil {
t.Fatalf("cannot get hostname: %v", err)
@@ -163,9 +186,12 @@ func e(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoE
func TestContainerString(t *testing.T) {
container := sandbox.New(t.Context(), "ldd", "/usr/bin/env")
container.Flags |= sandbox.FAllowDevel
container.SeccompFlags |= seccomp.AllowMultiarch
want := `argv: ["ldd" "/usr/bin/env"], flags: 0x2, seccomp: 0x1, presets: 0x7`
container.SeccompRules = seccomp.Preset(
seccomp.PresetExt|seccomp.PresetDenyNS|seccomp.PresetDenyTTY,
container.SeccompFlags)
container.SeccompPresets = seccomp.PresetStrict
want := `argv: ["ldd" "/usr/bin/env"], filter: true, rules: 65, flags: 0x1, presets: 0xf`
if got := container.String(); got != want {
t.Errorf("String: %s, want %s", got, want)
}

View File

@@ -229,8 +229,18 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
log.Fatalf("cannot capset: %v", err)
}
if err := seccomp.Load(seccomp.Preset(params.Flags.seccomp(params.SeccompPresets), params.SeccompFlags), params.SeccompFlags); err != nil {
log.Fatalf("cannot load syscall filter: %v", err)
if !params.SeccompDisable {
rules := params.SeccompRules
if len(rules) == 0 { // non-empty rules slice always overrides presets
msg.Verbosef("resolving presets %#x", params.SeccompPresets)
rules = seccomp.Preset(params.SeccompPresets, params.SeccompFlags)
}
if err := seccomp.Load(rules, params.SeccompFlags); err != nil {
log.Fatalf("cannot load syscall filter: %v", err)
}
msg.Verbosef("%d filter rules loaded", len(rules))
} else {
msg.Verbose("syscall filter not configured")
}
extraFiles := make([]*os.File, params.Count)

View File

@@ -205,7 +205,7 @@ func (d MountDevOp) apply(params *Params) error {
fmt.Sprintf("cannot mount devpts on %q:", devPtsPath))
}
if params.Flags&FAllowTTY != 0 {
if params.RetainSession {
var buf [8]byte
if _, _, errno := Syscall(SYS_IOCTL, 1, TIOCGWINSZ, uintptr(unsafe.Pointer(&buf[0]))); errno == 0 {
consolePath := toSysroot(path.Join(v, "console"))

View File

@@ -171,11 +171,11 @@ type ScmpDatum uint64
// Argument / Value comparison definition
type ScmpArgCmp struct {
// argument number, starting at 0
arg C.uint
Arg C.uint
// the comparison op, e.g. SCMP_CMP_*
op ScmpCompare
Op ScmpCompare
datum_a, datum_b ScmpDatum
DatumA, DatumB ScmpDatum
}
// only used for testing