sandbox: move out of internal
Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
6
sandbox/const.go
Normal file
6
sandbox/const.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package sandbox
|
||||
|
||||
const (
|
||||
PR_SET_NO_NEW_PRIVS = 0x26
|
||||
CAP_SYS_ADMIN = 0x15
|
||||
)
|
||||
232
sandbox/container.go
Normal file
232
sandbox/container.go
Normal file
@@ -0,0 +1,232 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
|
||||
)
|
||||
|
||||
type HardeningFlags uintptr
|
||||
|
||||
const (
|
||||
FSyscallCompat HardeningFlags = 1 << iota
|
||||
FAllowDevel
|
||||
FAllowUserns
|
||||
FAllowTTY
|
||||
FAllowNet
|
||||
)
|
||||
|
||||
func (flags HardeningFlags) seccomp(opts seccomp.SyscallOpts) seccomp.SyscallOpts {
|
||||
if flags&FSyscallCompat == 0 {
|
||||
opts |= seccomp.FlagExt
|
||||
}
|
||||
if flags&FAllowDevel == 0 {
|
||||
opts |= seccomp.FlagDenyDevel
|
||||
}
|
||||
if flags&FAllowUserns == 0 {
|
||||
opts |= seccomp.FlagDenyNS
|
||||
}
|
||||
if flags&FAllowTTY == 0 {
|
||||
opts |= seccomp.FlagDenyTTY
|
||||
}
|
||||
return opts
|
||||
}
|
||||
|
||||
type (
|
||||
// Container represents a container environment being prepared or run.
|
||||
// None of [Container] methods are safe for concurrent use.
|
||||
Container struct {
|
||||
// Name of initial process in the container.
|
||||
name string
|
||||
// Cgroup fd, nil to disable.
|
||||
Cgroup *int
|
||||
// ExtraFiles passed through to initial process in the container,
|
||||
// with behaviour identical to its [exec.Cmd] counterpart.
|
||||
ExtraFiles []*os.File
|
||||
|
||||
InitParams
|
||||
// Custom [exec.Cmd] initialisation function.
|
||||
CommandContext func(ctx context.Context) (cmd *exec.Cmd)
|
||||
|
||||
// param encoder for shim and init
|
||||
setup *gob.Encoder
|
||||
// cancels cmd
|
||||
cancel context.CancelFunc
|
||||
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
Stderr io.Writer
|
||||
|
||||
Cancel func() error
|
||||
WaitDelay time.Duration
|
||||
|
||||
cmd *exec.Cmd
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
InitParams struct {
|
||||
// Working directory in the container.
|
||||
Dir string
|
||||
// Initial process environment.
|
||||
Env []string
|
||||
// Absolute path of initial process in the container. Overrides name.
|
||||
Path string
|
||||
// Initial process argv.
|
||||
Args []string
|
||||
|
||||
// Mapped Uid in user namespace.
|
||||
Uid int
|
||||
// Mapped Gid in user namespace.
|
||||
Gid int
|
||||
// Hostname value in UTS namespace.
|
||||
Hostname string
|
||||
// Sequential container setup ops.
|
||||
*Ops
|
||||
// Extra seccomp options.
|
||||
Seccomp seccomp.SyscallOpts
|
||||
|
||||
Flags HardeningFlags
|
||||
}
|
||||
|
||||
Ops []Op
|
||||
Op interface {
|
||||
apply(params *InitParams) error
|
||||
|
||||
Is(op Op) bool
|
||||
fmt.Stringer
|
||||
}
|
||||
)
|
||||
|
||||
func (p *Container) Start() error {
|
||||
if p.cmd != nil {
|
||||
return errors.New("sandbox: already started")
|
||||
}
|
||||
if p.Ops == nil || len(*p.Ops) == 0 {
|
||||
return errors.New("sandbox: starting an empty container")
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(p.ctx)
|
||||
p.cancel = cancel
|
||||
|
||||
var cloneFlags uintptr = syscall.CLONE_NEWIPC |
|
||||
syscall.CLONE_NEWUTS |
|
||||
syscall.CLONE_NEWCGROUP
|
||||
if p.Flags&FAllowNet == 0 {
|
||||
cloneFlags |= syscall.CLONE_NEWNET
|
||||
}
|
||||
|
||||
// map to overflow id to work around ownership checks
|
||||
if p.Uid < 1 {
|
||||
p.Uid = OverflowUid()
|
||||
}
|
||||
if p.Gid < 1 {
|
||||
p.Gid = OverflowGid()
|
||||
}
|
||||
|
||||
if p.CommandContext != nil {
|
||||
p.cmd = p.CommandContext(ctx)
|
||||
} else {
|
||||
p.cmd = exec.CommandContext(ctx, MustExecutable())
|
||||
p.cmd.Args = []string{"init"}
|
||||
}
|
||||
|
||||
p.cmd.Stdin, p.cmd.Stdout, p.cmd.Stderr = p.Stdin, p.Stdout, p.Stderr
|
||||
p.cmd.Cancel, p.cmd.WaitDelay = p.Cancel, p.WaitDelay
|
||||
p.cmd.Dir = "/"
|
||||
p.cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setsid: p.Flags&FAllowTTY == 0,
|
||||
Pdeathsig: syscall.SIGKILL,
|
||||
|
||||
Cloneflags: cloneFlags |
|
||||
syscall.CLONE_NEWUSER |
|
||||
syscall.CLONE_NEWPID |
|
||||
syscall.CLONE_NEWNS,
|
||||
|
||||
// remain privileged for setup
|
||||
AmbientCaps: []uintptr{CAP_SYS_ADMIN},
|
||||
|
||||
UseCgroupFD: p.Cgroup != nil,
|
||||
}
|
||||
if p.cmd.SysProcAttr.UseCgroupFD {
|
||||
p.cmd.SysProcAttr.CgroupFD = *p.Cgroup
|
||||
}
|
||||
|
||||
// place setup pipe before user supplied extra files, this is later restored by init
|
||||
if fd, e, err := Setup(&p.cmd.ExtraFiles); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot create shim setup pipe:")
|
||||
} else {
|
||||
p.setup = e
|
||||
p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)}
|
||||
}
|
||||
p.cmd.ExtraFiles = append(p.cmd.ExtraFiles, p.ExtraFiles...)
|
||||
|
||||
msg.Verbose("starting container init")
|
||||
if err := p.cmd.Start(); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Container) Serve() error {
|
||||
if p.setup == nil {
|
||||
panic("invalid serve")
|
||||
}
|
||||
|
||||
if p.Path != "" && !path.IsAbs(p.Path) {
|
||||
return msg.WrapErr(syscall.EINVAL,
|
||||
fmt.Sprintf("invalid executable path %q", p.Path))
|
||||
}
|
||||
|
||||
if p.Path == "" {
|
||||
if p.name == "" {
|
||||
p.Path = os.Getenv("SHELL")
|
||||
if !path.IsAbs(p.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
"no command specified and $SHELL is invalid")
|
||||
}
|
||||
p.name = path.Base(p.Path)
|
||||
} else if path.IsAbs(p.name) {
|
||||
p.Path = p.name
|
||||
} else if v, err := exec.LookPath(p.name); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
} else {
|
||||
p.Path = v
|
||||
}
|
||||
}
|
||||
|
||||
setup := p.setup
|
||||
p.setup = nil
|
||||
return setup.Encode(
|
||||
&initParams{
|
||||
p.InitParams,
|
||||
syscall.Getuid(),
|
||||
syscall.Getgid(),
|
||||
len(p.ExtraFiles),
|
||||
msg.IsVerbose(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
|
||||
|
||||
func (p *Container) String() string {
|
||||
return fmt.Sprintf("argv: %q, flags: %#x, seccomp: %#x",
|
||||
p.Args, p.Flags, int(p.Flags.seccomp(p.Seccomp)))
|
||||
}
|
||||
|
||||
func New(ctx context.Context, name string, args ...string) *Container {
|
||||
return &Container{name: name, ctx: ctx,
|
||||
InitParams: InitParams{Args: append([]string{name}, args...), Dir: "/", Ops: new(Ops)},
|
||||
}
|
||||
}
|
||||
182
sandbox/container_test.go
Normal file
182
sandbox/container_test.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package sandbox_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/fst"
|
||||
"git.gensokyo.uk/security/fortify/internal"
|
||||
"git.gensokyo.uk/security/fortify/internal/fmsg"
|
||||
"git.gensokyo.uk/security/fortify/ldd"
|
||||
"git.gensokyo.uk/security/fortify/sandbox"
|
||||
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
|
||||
check "git.gensokyo.uk/security/fortify/test/sandbox"
|
||||
)
|
||||
|
||||
func TestContainer(t *testing.T) {
|
||||
{
|
||||
oldVerbose := fmsg.Load()
|
||||
oldOutput := sandbox.GetOutput()
|
||||
internal.InstallFmsg(true)
|
||||
t.Cleanup(func() { fmsg.Store(oldVerbose) })
|
||||
t.Cleanup(func() { sandbox.SetOutput(oldOutput) })
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
flags sandbox.HardeningFlags
|
||||
ops *sandbox.Ops
|
||||
mnt []*check.Mntent
|
||||
host string
|
||||
}{
|
||||
{"minimal", 0, new(sandbox.Ops), nil, "test-minimal"},
|
||||
{"allow", sandbox.FAllowUserns | sandbox.FAllowNet | sandbox.FAllowTTY,
|
||||
new(sandbox.Ops), nil, "test-minimal"},
|
||||
{"tmpfs", 0,
|
||||
new(sandbox.Ops).
|
||||
Tmpfs(fst.Tmp, 0, 0755),
|
||||
[]*check.Mntent{
|
||||
{FSName: "tmpfs", Dir: fst.Tmp, Type: "tmpfs", Opts: "\x00"},
|
||||
}, "test-tmpfs"},
|
||||
{"dev", sandbox.FAllowTTY, // go test output is not a tty
|
||||
new(sandbox.Ops).
|
||||
Dev("/dev"),
|
||||
[]*check.Mntent{
|
||||
{FSName: "devtmpfs", Dir: "/dev", Type: "tmpfs", Opts: "\x00"},
|
||||
{FSName: "devtmpfs", Dir: "/dev/null", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devtmpfs", Dir: "/dev/zero", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devtmpfs", Dir: "/dev/full", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devtmpfs", Dir: "/dev/random", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devtmpfs", Dir: "/dev/urandom", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devtmpfs", Dir: "/dev/tty", Type: "devtmpfs", Opts: "\x00", Freq: -1, Passno: -1},
|
||||
{FSName: "devpts", Dir: "/dev/pts", Type: "devpts", Opts: "rw,nosuid,noexec,relatime,mode=620,ptmxmode=666", Freq: 0, Passno: 0},
|
||||
}, ""},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
container := sandbox.New(ctx, os.Args[0], "-test.v",
|
||||
"-test.run=TestHelperCheckContainer", "--", "check", tc.host)
|
||||
container.Uid = 1000
|
||||
container.Gid = 100
|
||||
container.Hostname = tc.host
|
||||
container.CommandContext = commandContext
|
||||
container.Flags |= tc.flags
|
||||
container.Stdout, container.Stderr = os.Stdout, os.Stderr
|
||||
container.Ops = tc.ops
|
||||
if container.Args[5] == "" {
|
||||
if name, err := os.Hostname(); err != nil {
|
||||
t.Fatalf("cannot get hostname: %v", err)
|
||||
} else {
|
||||
container.Args[5] = name
|
||||
}
|
||||
}
|
||||
|
||||
container.
|
||||
Tmpfs("/tmp", 0, 0755).
|
||||
Bind(os.Args[0], os.Args[0], 0)
|
||||
// in case test has cgo enabled
|
||||
var libPaths []string
|
||||
if entries, err := ldd.ExecFilter(ctx,
|
||||
commandContext,
|
||||
func(v []byte) []byte {
|
||||
return bytes.SplitN(v, []byte("TestHelperInit\n"), 2)[1]
|
||||
}, os.Args[0]); err != nil {
|
||||
log.Fatalf("ldd: %v", err)
|
||||
} else {
|
||||
libPaths = ldd.Path(entries)
|
||||
}
|
||||
for _, name := range libPaths {
|
||||
container.Bind(name, name, 0)
|
||||
}
|
||||
|
||||
mnt := make([]*check.Mntent, 0, 3+len(libPaths))
|
||||
mnt = append(mnt, &check.Mntent{FSName: "rootfs", Dir: "/", Type: "tmpfs", Opts: "host_passthrough"})
|
||||
mnt = append(mnt, tc.mnt...)
|
||||
mnt = append(mnt,
|
||||
&check.Mntent{FSName: "tmpfs", Dir: "/tmp", Type: "tmpfs", Opts: "host_passthrough"},
|
||||
&check.Mntent{FSName: "\x00", Dir: os.Args[0], Type: "\x00", Opts: "\x00"})
|
||||
for _, name := range libPaths {
|
||||
mnt = append(mnt, &check.Mntent{FSName: "\x00", Dir: name, Type: "\x00", Opts: "\x00", Freq: -1, Passno: -1})
|
||||
}
|
||||
mnt = append(mnt, &check.Mntent{FSName: "proc", Dir: "/proc", Type: "proc", Opts: "rw,nosuid,nodev,noexec,relatime"})
|
||||
mntentWant := new(bytes.Buffer)
|
||||
if err := json.NewEncoder(mntentWant).Encode(mnt); err != nil {
|
||||
t.Fatalf("cannot serialise mntent: %v", err)
|
||||
}
|
||||
container.Stdin = mntentWant
|
||||
|
||||
// needs /proc to check mntent
|
||||
container.Proc("/proc")
|
||||
|
||||
if err := container.Start(); err != nil {
|
||||
fmsg.PrintBaseError(err, "start:")
|
||||
t.Fatalf("cannot start container: %v", err)
|
||||
} else if err = container.Serve(); err != nil {
|
||||
fmsg.PrintBaseError(err, "serve:")
|
||||
t.Errorf("cannot serve setup params: %v", err)
|
||||
}
|
||||
if err := container.Wait(); err != nil {
|
||||
fmsg.PrintBaseError(err, "wait:")
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerString(t *testing.T) {
|
||||
container := sandbox.New(context.TODO(), "ldd", "/usr/bin/env")
|
||||
container.Flags |= sandbox.FAllowDevel
|
||||
container.Seccomp |= seccomp.FlagMultiarch
|
||||
want := `argv: ["ldd" "/usr/bin/env"], flags: 0x2, seccomp: 0x2e`
|
||||
if got := container.String(); got != want {
|
||||
t.Errorf("String: %s, want %s", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHelperInit(t *testing.T) {
|
||||
if len(os.Args) != 5 || os.Args[4] != "init" {
|
||||
return
|
||||
}
|
||||
sandbox.SetOutput(fmsg.Output{})
|
||||
sandbox.Init(fmsg.Prepare, internal.InstallFmsg)
|
||||
}
|
||||
|
||||
func TestHelperCheckContainer(t *testing.T) {
|
||||
if len(os.Args) != 6 || os.Args[4] != "check" {
|
||||
return
|
||||
}
|
||||
|
||||
t.Run("user", func(t *testing.T) {
|
||||
if uid := syscall.Getuid(); uid != 1000 {
|
||||
t.Errorf("Getuid: %d, want 1000", uid)
|
||||
}
|
||||
if gid := syscall.Getgid(); gid != 100 {
|
||||
t.Errorf("Getgid: %d, want 100", gid)
|
||||
}
|
||||
})
|
||||
t.Run("hostname", func(t *testing.T) {
|
||||
if name, err := os.Hostname(); err != nil {
|
||||
t.Fatalf("cannot get hostname: %v", err)
|
||||
} else if name != os.Args[5] {
|
||||
t.Errorf("Hostname: %q, want %q", name, os.Args[5])
|
||||
}
|
||||
})
|
||||
t.Run("seccomp", func(t *testing.T) { check.MustAssertSeccomp() })
|
||||
t.Run("mntent", func(t *testing.T) { check.MustAssertMounts("", "/proc/mounts", "/proc/self/fd/0") })
|
||||
}
|
||||
|
||||
func commandContext(ctx context.Context) *exec.Cmd {
|
||||
return exec.CommandContext(ctx, os.Args[0], "-test.v",
|
||||
"-test.run=TestHelperInit", "--", "init")
|
||||
}
|
||||
26
sandbox/executable.go
Normal file
26
sandbox/executable.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
executable string
|
||||
executableOnce sync.Once
|
||||
)
|
||||
|
||||
func copyExecutable() {
|
||||
if name, err := os.Executable(); err != nil {
|
||||
msg.BeforeExit()
|
||||
log.Fatalf("cannot read executable path: %v", err)
|
||||
} else {
|
||||
executable = name
|
||||
}
|
||||
}
|
||||
|
||||
func MustExecutable() string {
|
||||
executableOnce.Do(copyExecutable)
|
||||
return executable
|
||||
}
|
||||
17
sandbox/executable_test.go
Normal file
17
sandbox/executable_test.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package sandbox_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/sandbox"
|
||||
)
|
||||
|
||||
func TestExecutable(t *testing.T) {
|
||||
for i := 0; i < 16; i++ {
|
||||
if got := sandbox.MustExecutable(); got != os.Args[0] {
|
||||
t.Errorf("MustExecutable: %q, want %q",
|
||||
got, os.Args[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
341
sandbox/init.go
Normal file
341
sandbox/init.go
Normal file
@@ -0,0 +1,341 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
|
||||
)
|
||||
|
||||
const (
|
||||
// time to wait for linger processes after death of initial process
|
||||
residualProcessTimeout = 5 * time.Second
|
||||
|
||||
// intermediate tmpfs mount point
|
||||
basePath = "/tmp"
|
||||
|
||||
// setup params file descriptor
|
||||
setupEnv = "FORTIFY_SETUP"
|
||||
)
|
||||
|
||||
type initParams struct {
|
||||
InitParams
|
||||
|
||||
HostUid, HostGid int
|
||||
// extra files count
|
||||
Count int
|
||||
// verbosity pass through
|
||||
Verbose bool
|
||||
}
|
||||
|
||||
func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
runtime.LockOSThread()
|
||||
prepare("init")
|
||||
|
||||
if os.Getpid() != 1 {
|
||||
log.Fatal("this process must run as pid 1")
|
||||
}
|
||||
|
||||
/*
|
||||
receive setup payload
|
||||
*/
|
||||
|
||||
var (
|
||||
params initParams
|
||||
closeSetup func() error
|
||||
setupFile *os.File
|
||||
offsetSetup int
|
||||
)
|
||||
if f, err := Receive(setupEnv, ¶ms, &setupFile); err != nil {
|
||||
if errors.Is(err, ErrInvalid) {
|
||||
log.Fatal("invalid setup descriptor")
|
||||
}
|
||||
if errors.Is(err, ErrNotSet) {
|
||||
log.Fatal("FORTIFY_SETUP not set")
|
||||
}
|
||||
|
||||
log.Fatalf("cannot decode init setup payload: %v", err)
|
||||
} else {
|
||||
if params.Ops == nil {
|
||||
log.Fatal("invalid setup parameters")
|
||||
}
|
||||
|
||||
setVerbose(params.Verbose)
|
||||
msg.Verbose("received setup parameters")
|
||||
closeSetup = f
|
||||
offsetSetup = int(setupFile.Fd() + 1)
|
||||
}
|
||||
|
||||
// write uid/gid map here so parent does not need to set dumpable
|
||||
if err := SetDumpable(SUID_DUMP_USER); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_USER: %s", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/uid_map",
|
||||
append([]byte{}, strconv.Itoa(params.Uid)+" "+strconv.Itoa(params.HostUid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/setgroups",
|
||||
[]byte("deny\n"),
|
||||
0); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/gid_map",
|
||||
append([]byte{}, strconv.Itoa(params.Gid)+" "+strconv.Itoa(params.HostGid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := SetDumpable(SUID_DUMP_DISABLE); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
}
|
||||
|
||||
if params.Hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(params.Hostname)); err != nil {
|
||||
log.Fatalf("cannot set hostname: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
set up mount points from intermediate root
|
||||
*/
|
||||
|
||||
if err := syscall.Mount("", "/", "",
|
||||
syscall.MS_SILENT|syscall.MS_SLAVE|syscall.MS_REC,
|
||||
""); err != nil {
|
||||
log.Fatalf("cannot make / rslave: %v", err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("rootfs", basePath, "tmpfs",
|
||||
syscall.MS_NODEV|syscall.MS_NOSUID,
|
||||
""); err != nil {
|
||||
log.Fatalf("cannot mount intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir(basePath); err != nil {
|
||||
log.Fatalf("cannot enter base path: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(sysrootDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := syscall.Mount(sysrootDir, sysrootDir, "",
|
||||
syscall.MS_SILENT|syscall.MS_MGC_VAL|syscall.MS_BIND|syscall.MS_REC,
|
||||
""); err != nil {
|
||||
log.Fatalf("cannot bind sysroot: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(hostDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := syscall.PivotRoot(basePath, hostDir); err != nil {
|
||||
log.Fatalf("cannot pivot into intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
for i, op := range *params.Ops {
|
||||
msg.Verbosef("mounting %s", op)
|
||||
if err := op.apply(¶ms.InitParams); err != nil {
|
||||
msg.PrintBaseErr(err,
|
||||
fmt.Sprintf("cannot apply op %d:", i))
|
||||
msg.BeforeExit()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pivot to sysroot
|
||||
*/
|
||||
|
||||
if err := syscall.Mount(hostDir, hostDir, "",
|
||||
syscall.MS_SILENT|syscall.MS_REC|syscall.MS_PRIVATE,
|
||||
""); err != nil {
|
||||
log.Fatalf("cannot make host root rprivate: %v", err)
|
||||
}
|
||||
if err := syscall.Unmount(hostDir, syscall.MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount host root: %v", err)
|
||||
}
|
||||
|
||||
{
|
||||
var fd int
|
||||
if err := IgnoringEINTR(func() (err error) {
|
||||
fd, err = syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
|
||||
return
|
||||
}); err != nil {
|
||||
log.Fatalf("cannot open intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir(sysrootPath); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := syscall.PivotRoot(".", "."); err != nil {
|
||||
log.Fatalf("cannot pivot into sysroot: %v", err)
|
||||
}
|
||||
if err := syscall.Fchdir(fd); err != nil {
|
||||
log.Fatalf("cannot re-enter intermediate root: %v", err)
|
||||
}
|
||||
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount intemediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := syscall.Close(fd); err != nil {
|
||||
log.Fatalf("cannot close intermediate root: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
load seccomp filter
|
||||
*/
|
||||
|
||||
if _, _, err := syscall.Syscall(PR_SET_NO_NEW_PRIVS, 1, 0, 0); err != 0 {
|
||||
log.Fatalf("prctl(PR_SET_NO_NEW_PRIVS): %v", err)
|
||||
}
|
||||
if err := seccomp.Load(params.Flags.seccomp(params.Seccomp)); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
}
|
||||
|
||||
/* at this point CAP_SYS_ADMIN can be dropped, however it is kept for now as it does not increase attack surface */
|
||||
|
||||
/*
|
||||
pass through extra files
|
||||
*/
|
||||
|
||||
extraFiles := make([]*os.File, params.Count)
|
||||
for i := range extraFiles {
|
||||
extraFiles[i] = os.NewFile(uintptr(offsetSetup+i), "extra file "+strconv.Itoa(i))
|
||||
}
|
||||
|
||||
/*
|
||||
prepare initial process
|
||||
*/
|
||||
|
||||
cmd := exec.Command(params.Path)
|
||||
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
cmd.Args = params.Args
|
||||
cmd.Env = params.Env
|
||||
cmd.ExtraFiles = extraFiles
|
||||
cmd.Dir = params.Dir
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
msg.Suspend()
|
||||
|
||||
/*
|
||||
close setup pipe
|
||||
*/
|
||||
|
||||
if err := closeSetup(); err != nil {
|
||||
log.Println("cannot close setup pipe:", err)
|
||||
// not fatal
|
||||
}
|
||||
|
||||
/*
|
||||
perform init duties
|
||||
*/
|
||||
|
||||
sig := make(chan os.Signal, 2)
|
||||
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
type winfo struct {
|
||||
wpid int
|
||||
wstatus syscall.WaitStatus
|
||||
}
|
||||
info := make(chan winfo, 1)
|
||||
done := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
var (
|
||||
err error
|
||||
wpid = -2
|
||||
wstatus syscall.WaitStatus
|
||||
)
|
||||
|
||||
// keep going until no child process is left
|
||||
for wpid != -1 {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
if wpid != -2 {
|
||||
info <- winfo{wpid, wstatus}
|
||||
}
|
||||
|
||||
err = syscall.EINTR
|
||||
for errors.Is(err, syscall.EINTR) {
|
||||
wpid, err = syscall.Wait4(-1, &wstatus, 0, nil)
|
||||
}
|
||||
}
|
||||
if !errors.Is(err, syscall.ECHILD) {
|
||||
log.Println("unexpected wait4 response:", err)
|
||||
}
|
||||
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// closed after residualProcessTimeout has elapsed after initial process death
|
||||
timeout := make(chan struct{})
|
||||
|
||||
r := 2
|
||||
for {
|
||||
select {
|
||||
case s := <-sig:
|
||||
if msg.Resume() {
|
||||
msg.Verbosef("terminating on %s after process start", s.String())
|
||||
} else {
|
||||
msg.Verbosef("terminating on %s", s.String())
|
||||
}
|
||||
msg.BeforeExit()
|
||||
os.Exit(0)
|
||||
case w := <-info:
|
||||
if w.wpid == cmd.Process.Pid {
|
||||
// initial process exited, output is most likely available again
|
||||
msg.Resume()
|
||||
|
||||
switch {
|
||||
case w.wstatus.Exited():
|
||||
r = w.wstatus.ExitStatus()
|
||||
case w.wstatus.Signaled():
|
||||
r = 128 + int(w.wstatus.Signal())
|
||||
default:
|
||||
r = 255
|
||||
}
|
||||
|
||||
go func() {
|
||||
time.Sleep(residualProcessTimeout)
|
||||
close(timeout)
|
||||
}()
|
||||
}
|
||||
case <-done:
|
||||
msg.BeforeExit()
|
||||
os.Exit(r)
|
||||
case <-timeout:
|
||||
log.Println("timeout exceeded waiting for lingering processes")
|
||||
msg.BeforeExit()
|
||||
os.Exit(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TryArgv0 calls [Init] if the last element of argv0 is "init".
|
||||
func TryArgv0(v Msg, prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
if len(os.Args) > 0 && path.Base(os.Args[0]) == "init" {
|
||||
msg = v
|
||||
Init(prepare, setVerbose)
|
||||
msg.BeforeExit()
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
95
sandbox/mount.go
Normal file
95
sandbox/mount.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
BindOptional = 1 << iota
|
||||
BindSource
|
||||
BindRecursive
|
||||
BindWritable
|
||||
BindDevices
|
||||
)
|
||||
|
||||
func bindMount(src, dest string, flags int) error {
|
||||
target := toSysroot(dest)
|
||||
var source string
|
||||
|
||||
if flags&BindSource == 0 {
|
||||
// this is what bwrap does, so the behaviour is kept for now,
|
||||
// however recursively resolving links might improve user experience
|
||||
if rp, err := realpathHost(src); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if flags&BindOptional != 0 {
|
||||
return nil
|
||||
} else {
|
||||
return msg.WrapErr(err,
|
||||
fmt.Sprintf("path %q does not exist", src))
|
||||
}
|
||||
}
|
||||
return msg.WrapErr(err, err.Error())
|
||||
} else {
|
||||
source = toHost(rp)
|
||||
}
|
||||
} else if flags&BindOptional != 0 {
|
||||
return msg.WrapErr(syscall.EINVAL,
|
||||
"flag source excludes optional")
|
||||
} else {
|
||||
source = toHost(src)
|
||||
}
|
||||
|
||||
if fi, err := os.Stat(source); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
} else if fi.IsDir() {
|
||||
if err = os.MkdirAll(target, 0755); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot create directory %q:", dest))
|
||||
}
|
||||
} else if err = ensureFile(target, 0444); err != nil {
|
||||
if errors.Is(err, syscall.EISDIR) {
|
||||
return msg.WrapErr(err,
|
||||
fmt.Sprintf("path %q is a directory", dest))
|
||||
}
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot create %q:", dest))
|
||||
}
|
||||
|
||||
var mf uintptr = syscall.MS_SILENT | syscall.MS_BIND
|
||||
if flags&BindRecursive != 0 {
|
||||
mf |= syscall.MS_REC
|
||||
}
|
||||
if flags&BindWritable == 0 {
|
||||
mf |= syscall.MS_RDONLY
|
||||
}
|
||||
if flags&BindDevices == 0 {
|
||||
mf |= syscall.MS_NODEV
|
||||
}
|
||||
if msg.IsVerbose() {
|
||||
if strings.TrimPrefix(source, hostPath) == strings.TrimPrefix(target, sysrootPath) {
|
||||
msg.Verbosef("resolved %q flags %#x", target, mf)
|
||||
} else {
|
||||
msg.Verbosef("resolved %q on %q flags %#x", source, target, mf)
|
||||
}
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount(source, target, "", mf, ""),
|
||||
fmt.Sprintf("cannot bind %q on %q:", src, dest))
|
||||
}
|
||||
|
||||
func mountTmpfs(fsname, name string, size int, perm os.FileMode) error {
|
||||
target := toSysroot(name)
|
||||
if err := os.MkdirAll(target, perm); err != nil {
|
||||
return err
|
||||
}
|
||||
opt := fmt.Sprintf("mode=%#o", perm)
|
||||
if size > 0 {
|
||||
opt += fmt.Sprintf(",size=%d", size)
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount(fsname, target, "tmpfs",
|
||||
syscall.MS_NOSUID|syscall.MS_NODEV, opt),
|
||||
fmt.Sprintf("cannot mount tmpfs on %q:", name))
|
||||
}
|
||||
43
sandbox/msg.go
Normal file
43
sandbox/msg.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type Msg interface {
|
||||
IsVerbose() bool
|
||||
Verbose(v ...any)
|
||||
Verbosef(format string, v ...any)
|
||||
WrapErr(err error, a ...any) error
|
||||
PrintBaseErr(err error, fallback string)
|
||||
|
||||
Suspend()
|
||||
Resume() bool
|
||||
|
||||
BeforeExit()
|
||||
}
|
||||
|
||||
type DefaultMsg struct{ inactive atomic.Bool }
|
||||
|
||||
func (msg *DefaultMsg) IsVerbose() bool { return true }
|
||||
func (msg *DefaultMsg) Verbose(v ...any) {
|
||||
if !msg.inactive.Load() {
|
||||
log.Println(v...)
|
||||
}
|
||||
}
|
||||
func (msg *DefaultMsg) Verbosef(format string, v ...any) {
|
||||
if !msg.inactive.Load() {
|
||||
log.Printf(format, v...)
|
||||
}
|
||||
}
|
||||
|
||||
func (msg *DefaultMsg) WrapErr(err error, a ...any) error {
|
||||
log.Println(a...)
|
||||
return err
|
||||
}
|
||||
func (msg *DefaultMsg) PrintBaseErr(err error, fallback string) { log.Println(fallback, err) }
|
||||
|
||||
func (msg *DefaultMsg) Suspend() { msg.inactive.Store(true) }
|
||||
func (msg *DefaultMsg) Resume() bool { return msg.inactive.CompareAndSwap(true, false) }
|
||||
func (msg *DefaultMsg) BeforeExit() {}
|
||||
19
sandbox/output.go
Normal file
19
sandbox/output.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package sandbox
|
||||
|
||||
var msg Msg = new(DefaultMsg)
|
||||
|
||||
func GetOutput() Msg { return msg }
|
||||
func SetOutput(v Msg) {
|
||||
if v == nil {
|
||||
msg = new(DefaultMsg)
|
||||
} else {
|
||||
msg = v
|
||||
}
|
||||
}
|
||||
|
||||
func wrapErrSuffix(err error, a ...any) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return msg.WrapErr(err, append(a, err)...)
|
||||
}
|
||||
37
sandbox/overflow.go
Normal file
37
sandbox/overflow.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
ofUid int
|
||||
ofGid int
|
||||
ofOnce sync.Once
|
||||
)
|
||||
|
||||
const (
|
||||
ofUidPath = "/proc/sys/kernel/overflowuid"
|
||||
ofGidPath = "/proc/sys/kernel/overflowgid"
|
||||
)
|
||||
|
||||
func mustReadOverflow() {
|
||||
if v, err := os.ReadFile(ofUidPath); err != nil {
|
||||
log.Fatalf("cannot read %q: %v", ofUidPath, err)
|
||||
} else if ofUid, err = strconv.Atoi(string(bytes.TrimSpace(v))); err != nil {
|
||||
log.Fatalf("cannot interpret %q: %v", ofUidPath, err)
|
||||
}
|
||||
|
||||
if v, err := os.ReadFile(ofGidPath); err != nil {
|
||||
log.Fatalf("cannot read %q: %v", ofGidPath, err)
|
||||
} else if ofGid, err = strconv.Atoi(string(bytes.TrimSpace(v))); err != nil {
|
||||
log.Fatalf("cannot interpret %q: %v", ofGidPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
func OverflowUid() int { ofOnce.Do(mustReadOverflow); return ofUid }
|
||||
func OverflowGid() int { ofOnce.Do(mustReadOverflow); return ofGid }
|
||||
47
sandbox/params.go
Normal file
47
sandbox/params.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotSet = errors.New("environment variable not set")
|
||||
ErrInvalid = errors.New("bad file descriptor")
|
||||
)
|
||||
|
||||
// Setup appends the read end of a pipe for setup params transmission and returns its fd.
|
||||
func Setup(extraFiles *[]*os.File) (int, *gob.Encoder, error) {
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
return -1, nil, err
|
||||
} else {
|
||||
fd := 3 + len(*extraFiles)
|
||||
*extraFiles = append(*extraFiles, r)
|
||||
return fd, gob.NewEncoder(w), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Receive retrieves setup fd from the environment and receives params.
|
||||
func Receive(key string, e any, v **os.File) (func() error, error) {
|
||||
var setup *os.File
|
||||
|
||||
if s, ok := os.LookupEnv(key); !ok {
|
||||
return nil, ErrNotSet
|
||||
} else {
|
||||
if fd, err := strconv.Atoi(s); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
setup = os.NewFile(uintptr(fd), "setup")
|
||||
if setup == nil {
|
||||
return nil, ErrInvalid
|
||||
}
|
||||
if v != nil {
|
||||
*v = setup
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return setup.Close, gob.NewDecoder(setup).Decode(e)
|
||||
}
|
||||
75
sandbox/path.go
Normal file
75
sandbox/path.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
hostPath = "/" + hostDir
|
||||
hostDir = "host"
|
||||
sysrootPath = "/" + sysrootDir
|
||||
sysrootDir = "sysroot"
|
||||
)
|
||||
|
||||
func toSysroot(name string) string {
|
||||
name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' })
|
||||
return path.Join(sysrootPath, name)
|
||||
}
|
||||
|
||||
func toHost(name string) string {
|
||||
name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' })
|
||||
return path.Join(hostPath, name)
|
||||
}
|
||||
|
||||
func realpathHost(name string) (string, error) {
|
||||
source := toHost(name)
|
||||
rp, err := os.Readlink(source)
|
||||
|
||||
if err != nil {
|
||||
if errors.Is(err, syscall.EINVAL) {
|
||||
// not a symlink
|
||||
return name, nil
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
|
||||
if !path.IsAbs(rp) {
|
||||
return name, nil
|
||||
}
|
||||
msg.Verbosef("path %q resolves to %q", name, rp)
|
||||
return rp, nil
|
||||
}
|
||||
|
||||
func createFile(name string, perm os.FileMode, content []byte) error {
|
||||
if err := os.MkdirAll(path.Dir(name), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(name, syscall.O_CREAT|syscall.O_EXCL|syscall.O_WRONLY, perm)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if content != nil {
|
||||
_, err = f.Write(content)
|
||||
}
|
||||
return errors.Join(f.Close(), err)
|
||||
}
|
||||
|
||||
func ensureFile(name string, perm os.FileMode) error {
|
||||
fi, err := os.Stat(name)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return createFile(name, perm, nil)
|
||||
}
|
||||
|
||||
if mode := fi.Mode(); mode&fs.ModeDir != 0 || mode&fs.ModeSymlink != 0 {
|
||||
err = syscall.EISDIR
|
||||
}
|
||||
return err
|
||||
}
|
||||
71
sandbox/seccomp/api.go
Normal file
71
sandbox/seccomp/api.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/helper/proc"
|
||||
)
|
||||
|
||||
// New returns an inactive Encoder instance.
|
||||
func New(opts SyscallOpts) *Encoder { return &Encoder{newExporter(opts)} }
|
||||
|
||||
// Load loads a filter into the kernel.
|
||||
func Load(opts SyscallOpts) error { return buildFilter(-1, opts) }
|
||||
|
||||
/*
|
||||
An Encoder writes a BPF program to an output stream.
|
||||
|
||||
Methods of Encoder are not safe for concurrent use.
|
||||
|
||||
An Encoder must not be copied after first use.
|
||||
*/
|
||||
type Encoder struct {
|
||||
*exporter
|
||||
}
|
||||
|
||||
func (e *Encoder) Read(p []byte) (n int, err error) {
|
||||
if err = e.prepare(); err != nil {
|
||||
return
|
||||
}
|
||||
return e.r.Read(p)
|
||||
}
|
||||
|
||||
func (e *Encoder) Close() error {
|
||||
if e.r == nil {
|
||||
return syscall.EINVAL
|
||||
}
|
||||
|
||||
// this hangs if the cgo thread fails to exit
|
||||
return errors.Join(e.closeWrite(), <-e.exportErr)
|
||||
}
|
||||
|
||||
// NewFile returns an instance of exporter implementing [proc.File].
|
||||
func NewFile(opts SyscallOpts) proc.File { return &File{opts: opts} }
|
||||
|
||||
// File implements [proc.File] and provides access to the read end of exporter pipe.
|
||||
type File struct {
|
||||
opts SyscallOpts
|
||||
proc.BaseFile
|
||||
}
|
||||
|
||||
func (f *File) ErrCount() int { return 2 }
|
||||
func (f *File) Fulfill(ctx context.Context, dispatchErr func(error)) error {
|
||||
e := newExporter(f.opts)
|
||||
if err := e.prepare(); err != nil {
|
||||
return err
|
||||
}
|
||||
f.Set(e.r)
|
||||
go func() {
|
||||
select {
|
||||
case err := <-e.exportErr:
|
||||
dispatchErr(nil)
|
||||
dispatchErr(err)
|
||||
case <-ctx.Done():
|
||||
dispatchErr(e.closeWrite())
|
||||
dispatchErr(<-e.exportErr)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
58
sandbox/seccomp/export.go
Normal file
58
sandbox/seccomp/export.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type exporter struct {
|
||||
opts SyscallOpts
|
||||
r, w *os.File
|
||||
|
||||
prepareOnce sync.Once
|
||||
prepareErr error
|
||||
closeOnce sync.Once
|
||||
closeErr error
|
||||
exportErr <-chan error
|
||||
}
|
||||
|
||||
func (e *exporter) prepare() error {
|
||||
e.prepareOnce.Do(func() {
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
e.prepareErr = err
|
||||
return
|
||||
} else {
|
||||
e.r, e.w = r, w
|
||||
}
|
||||
|
||||
ec := make(chan error, 1)
|
||||
go func(fd uintptr) {
|
||||
ec <- buildFilter(int(fd), e.opts)
|
||||
close(ec)
|
||||
_ = e.closeWrite()
|
||||
runtime.KeepAlive(e.w)
|
||||
}(e.w.Fd())
|
||||
e.exportErr = ec
|
||||
runtime.SetFinalizer(e, (*exporter).closeWrite)
|
||||
})
|
||||
return e.prepareErr
|
||||
}
|
||||
|
||||
func (e *exporter) closeWrite() error {
|
||||
e.closeOnce.Do(func() {
|
||||
if e.w == nil {
|
||||
panic("closeWrite called on invalid exporter")
|
||||
}
|
||||
e.closeErr = e.w.Close()
|
||||
|
||||
// no need for a finalizer anymore
|
||||
runtime.SetFinalizer(e, nil)
|
||||
})
|
||||
|
||||
return e.closeErr
|
||||
}
|
||||
|
||||
func newExporter(opts SyscallOpts) *exporter {
|
||||
return &exporter{opts: opts}
|
||||
}
|
||||
139
sandbox/seccomp/export_test.go
Normal file
139
sandbox/seccomp/export_test.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package seccomp_test
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"errors"
|
||||
"io"
|
||||
"slices"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
|
||||
)
|
||||
|
||||
func TestExport(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
opts seccomp.SyscallOpts
|
||||
want []byte
|
||||
wantErr bool
|
||||
}{
|
||||
{"compat", 0, []byte{
|
||||
0x95, 0xec, 0x69, 0xd0, 0x17, 0x73, 0x3e, 0x07,
|
||||
0x21, 0x60, 0xe0, 0xda, 0x80, 0xfd, 0xeb, 0xec,
|
||||
0xdf, 0x27, 0xae, 0x81, 0x66, 0xf5, 0xe2, 0xa7,
|
||||
0x31, 0x27, 0x0c, 0x98, 0xea, 0x2d, 0x29, 0x46,
|
||||
0xcb, 0x52, 0x31, 0x02, 0x90, 0x63, 0x66, 0x8a,
|
||||
0xf2, 0x15, 0x87, 0x91, 0x55, 0xda, 0x21, 0xac,
|
||||
0xa7, 0x9b, 0x07, 0x0e, 0x04, 0xc0, 0xee, 0x9a,
|
||||
0xcd, 0xf5, 0x8f, 0x55, 0xcf, 0xa8, 0x15, 0xa5,
|
||||
}, false},
|
||||
{"base", seccomp.FlagExt, []byte{
|
||||
0xdc, 0x7f, 0x2e, 0x1c, 0x5e, 0x82, 0x9b, 0x79,
|
||||
0xeb, 0xb7, 0xef, 0xc7, 0x59, 0x15, 0x0f, 0x54,
|
||||
0xa8, 0x3a, 0x75, 0xc8, 0xdf, 0x6f, 0xee, 0x4d,
|
||||
0xce, 0x5d, 0xad, 0xc4, 0x73, 0x6c, 0x58, 0x5d,
|
||||
0x4d, 0xee, 0xbf, 0xeb, 0x3c, 0x79, 0x69, 0xaf,
|
||||
0x3a, 0x07, 0x7e, 0x90, 0xb7, 0x7b, 0xb4, 0x74,
|
||||
0x1d, 0xb0, 0x5d, 0x90, 0x99, 0x7c, 0x86, 0x59,
|
||||
0xb9, 0x58, 0x91, 0x20, 0x6a, 0xc9, 0x95, 0x2d,
|
||||
}, false},
|
||||
{"everything", seccomp.FlagExt |
|
||||
seccomp.FlagDenyNS | seccomp.FlagDenyTTY | seccomp.FlagDenyDevel |
|
||||
seccomp.FlagMultiarch | seccomp.FlagLinux32 | seccomp.FlagCan |
|
||||
seccomp.FlagBluetooth, []byte{
|
||||
0xe9, 0x9d, 0xd3, 0x45, 0xe1, 0x95, 0x41, 0x34,
|
||||
0x73, 0xd3, 0xcb, 0xee, 0x07, 0xb4, 0xed, 0x57,
|
||||
0xb9, 0x08, 0xbf, 0xa8, 0x9e, 0xa2, 0x07, 0x2f,
|
||||
0xe9, 0x34, 0x82, 0x84, 0x7f, 0x50, 0xb5, 0xb7,
|
||||
0x58, 0xda, 0x17, 0xe7, 0x4c, 0xa2, 0xbb, 0xc0,
|
||||
0x08, 0x13, 0xde, 0x49, 0xa2, 0xb9, 0xbf, 0x83,
|
||||
0x4c, 0x02, 0x4e, 0xd4, 0x88, 0x50, 0xbe, 0x69,
|
||||
0xb6, 0x8a, 0x9a, 0x4c, 0x5f, 0x53, 0xa9, 0xdb,
|
||||
}, false},
|
||||
{"strict", seccomp.FlagExt |
|
||||
seccomp.FlagDenyNS | seccomp.FlagDenyTTY | seccomp.FlagDenyDevel, []byte{
|
||||
0xe8, 0x80, 0x29, 0x8d, 0xf2, 0xbd, 0x67, 0x51,
|
||||
0xd0, 0x04, 0x0f, 0xc2, 0x1b, 0xc0, 0xed, 0x4c,
|
||||
0x00, 0xf9, 0x5d, 0xc0, 0xd7, 0xba, 0x50, 0x6c,
|
||||
0x24, 0x4d, 0x8b, 0x8c, 0xf6, 0x86, 0x6d, 0xba,
|
||||
0x8e, 0xf4, 0xa3, 0x32, 0x96, 0xf2, 0x87, 0xb6,
|
||||
0x6c, 0xcc, 0xc1, 0xd7, 0x8e, 0x97, 0x02, 0x65,
|
||||
0x97, 0xf8, 0x4c, 0xc7, 0xde, 0xc1, 0x57, 0x3e,
|
||||
0x14, 0x89, 0x60, 0xfb, 0xd3, 0x5c, 0xd7, 0x35,
|
||||
}, false},
|
||||
{"strict compat", 0 |
|
||||
seccomp.FlagDenyNS | seccomp.FlagDenyTTY | seccomp.FlagDenyDevel, []byte{
|
||||
0x39, 0x87, 0x1b, 0x93, 0xff, 0xaf, 0xc8, 0xb9,
|
||||
0x79, 0xfc, 0xed, 0xc0, 0xb0, 0xc3, 0x7b, 0x9e,
|
||||
0x03, 0x92, 0x2f, 0x5b, 0x02, 0x74, 0x8d, 0xc5,
|
||||
0xc3, 0xc1, 0x7c, 0x92, 0x52, 0x7f, 0x6e, 0x02,
|
||||
0x2e, 0xde, 0x1f, 0x48, 0xbf, 0xf5, 0x92, 0x46,
|
||||
0xea, 0x45, 0x2c, 0x0d, 0x1d, 0xe5, 0x48, 0x27,
|
||||
0x80, 0x8b, 0x1a, 0x6f, 0x84, 0xf3, 0x2b, 0xbd,
|
||||
0xe1, 0xaa, 0x02, 0xae, 0x30, 0xee, 0xdc, 0xfa,
|
||||
}, false},
|
||||
}
|
||||
|
||||
buf := make([]byte, 8)
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
oldF := seccomp.GetOutput()
|
||||
seccomp.SetOutput(t.Log)
|
||||
t.Cleanup(func() { seccomp.SetOutput(oldF) })
|
||||
|
||||
e := seccomp.New(tc.opts)
|
||||
digest := sha512.New()
|
||||
|
||||
if _, err := io.CopyBuffer(digest, e, buf); (err != nil) != tc.wantErr {
|
||||
t.Errorf("Exporter: error = %v, wantErr %v", err, tc.wantErr)
|
||||
return
|
||||
}
|
||||
if err := e.Close(); err != nil {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
return
|
||||
}
|
||||
if got := digest.Sum(nil); slices.Compare(got, tc.want) != 0 {
|
||||
t.Fatalf("Export() hash = %x, want %x",
|
||||
got, tc.want)
|
||||
return
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("close without use", func(t *testing.T) {
|
||||
e := seccomp.New(0)
|
||||
if err := e.Close(); !errors.Is(err, syscall.EINVAL) {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
return
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("close partial read", func(t *testing.T) {
|
||||
e := seccomp.New(0)
|
||||
if _, err := e.Read(make([]byte, 0)); err != nil {
|
||||
t.Errorf("Read: error = %v", err)
|
||||
return
|
||||
}
|
||||
if err := e.Close(); err == nil || !errors.Is(err, syscall.ECANCELED) || !errors.Is(err, syscall.EBADF) {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
return
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkExport(b *testing.B) {
|
||||
buf := make([]byte, 8)
|
||||
for i := 0; i < b.N; i++ {
|
||||
e := seccomp.New(seccomp.FlagExt |
|
||||
seccomp.FlagDenyNS | seccomp.FlagDenyTTY | seccomp.FlagDenyDevel |
|
||||
seccomp.FlagMultiarch | seccomp.FlagLinux32 | seccomp.FlagCan |
|
||||
seccomp.FlagBluetooth)
|
||||
if _, err := io.CopyBuffer(io.Discard, e, buf); err != nil {
|
||||
b.Fatalf("cannot export: %v", err)
|
||||
}
|
||||
if err := e.Close(); err != nil {
|
||||
b.Fatalf("cannot close exporter: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
30
sandbox/seccomp/output.go
Normal file
30
sandbox/seccomp/output.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package seccomp
|
||||
|
||||
import "C"
|
||||
import "sync/atomic"
|
||||
|
||||
var printlnP atomic.Pointer[func(v ...any)]
|
||||
|
||||
func SetOutput(f func(v ...any)) {
|
||||
if f == nil {
|
||||
// avoid storing nil function
|
||||
printlnP.Store(nil)
|
||||
} else {
|
||||
printlnP.Store(&f)
|
||||
}
|
||||
}
|
||||
|
||||
func GetOutput() func(v ...any) {
|
||||
if fp := printlnP.Load(); fp == nil {
|
||||
return nil
|
||||
} else {
|
||||
return *fp
|
||||
}
|
||||
}
|
||||
|
||||
//export F_println
|
||||
func F_println(v *C.char) {
|
||||
if fp := printlnP.Load(); fp != nil {
|
||||
(*fp)(C.GoString(v))
|
||||
}
|
||||
}
|
||||
302
sandbox/seccomp/seccomp-build.c
Normal file
302
sandbox/seccomp/seccomp-build.c
Normal file
@@ -0,0 +1,302 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE // CLONE_NEWUSER
|
||||
#endif
|
||||
|
||||
#include "seccomp-build.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/personality.h>
|
||||
#include <sched.h>
|
||||
|
||||
#if (SCMP_VER_MAJOR < 2) || \
|
||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
||||
#error This package requires libseccomp >= v2.5.1
|
||||
#endif
|
||||
|
||||
struct f_syscall_act {
|
||||
int syscall;
|
||||
int m_errno;
|
||||
struct scmp_arg_cmp *arg;
|
||||
};
|
||||
|
||||
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
|
||||
#define SECCOMP_RULESET_ADD(ruleset) do { \
|
||||
if (opts & F_VERBOSE) F_println("adding seccomp ruleset \"" #ruleset "\""); \
|
||||
for (int i = 0; i < LEN(ruleset); i++) { \
|
||||
assert(ruleset[i].m_errno == EPERM || ruleset[i].m_errno == ENOSYS); \
|
||||
\
|
||||
if (ruleset[i].arg) \
|
||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), ruleset[i].syscall, 1, *ruleset[i].arg); \
|
||||
else \
|
||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), ruleset[i].syscall, 0); \
|
||||
\
|
||||
if (*ret_p == -EFAULT) { \
|
||||
res = 4; \
|
||||
goto out; \
|
||||
} else if (*ret_p < 0) { \
|
||||
res = 5; \
|
||||
goto out; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
int32_t f_build_filter(int *ret_p, int fd, uint32_t arch, uint32_t multiarch, f_syscall_opts opts) {
|
||||
int32_t res = 0; // refer to resErr for meaning
|
||||
int allow_multiarch = opts & F_MULTIARCH;
|
||||
int allowed_personality = PER_LINUX;
|
||||
|
||||
if (opts & F_LINUX32)
|
||||
allowed_personality = PER_LINUX32;
|
||||
|
||||
// flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532
|
||||
|
||||
struct f_syscall_act deny_common[] = {
|
||||
// Block dmesg
|
||||
{SCMP_SYS(syslog), EPERM},
|
||||
// Useless old syscall
|
||||
{SCMP_SYS(uselib), EPERM},
|
||||
// Don't allow disabling accounting
|
||||
{SCMP_SYS(acct), EPERM},
|
||||
// Don't allow reading current quota use
|
||||
{SCMP_SYS(quotactl), EPERM},
|
||||
|
||||
// Don't allow access to the kernel keyring
|
||||
{SCMP_SYS(add_key), EPERM},
|
||||
{SCMP_SYS(keyctl), EPERM},
|
||||
{SCMP_SYS(request_key), EPERM},
|
||||
|
||||
// Scary VM/NUMA ops
|
||||
{SCMP_SYS(move_pages), EPERM},
|
||||
{SCMP_SYS(mbind), EPERM},
|
||||
{SCMP_SYS(get_mempolicy), EPERM},
|
||||
{SCMP_SYS(set_mempolicy), EPERM},
|
||||
{SCMP_SYS(migrate_pages), EPERM},
|
||||
};
|
||||
|
||||
// fortify: project-specific extensions
|
||||
struct f_syscall_act deny_common_ext[] = {
|
||||
// system calls for changing the system clock
|
||||
{SCMP_SYS(adjtimex), EPERM},
|
||||
{SCMP_SYS(clock_adjtime), EPERM},
|
||||
{SCMP_SYS(clock_adjtime64), EPERM},
|
||||
{SCMP_SYS(clock_settime), EPERM},
|
||||
{SCMP_SYS(clock_settime64), EPERM},
|
||||
{SCMP_SYS(settimeofday), EPERM},
|
||||
|
||||
// loading and unloading of kernel modules
|
||||
{SCMP_SYS(delete_module), EPERM},
|
||||
{SCMP_SYS(finit_module), EPERM},
|
||||
{SCMP_SYS(init_module), EPERM},
|
||||
|
||||
// system calls for rebooting and reboot preparation
|
||||
{SCMP_SYS(kexec_file_load), EPERM},
|
||||
{SCMP_SYS(kexec_load), EPERM},
|
||||
{SCMP_SYS(reboot), EPERM},
|
||||
|
||||
// system calls for enabling/disabling swap devices
|
||||
{SCMP_SYS(swapoff), EPERM},
|
||||
{SCMP_SYS(swapon), EPERM},
|
||||
};
|
||||
|
||||
struct f_syscall_act deny_ns[] = {
|
||||
// Don't allow subnamespace setups:
|
||||
{SCMP_SYS(unshare), EPERM},
|
||||
{SCMP_SYS(setns), EPERM},
|
||||
{SCMP_SYS(mount), EPERM},
|
||||
{SCMP_SYS(umount), EPERM},
|
||||
{SCMP_SYS(umount2), EPERM},
|
||||
{SCMP_SYS(pivot_root), EPERM},
|
||||
{SCMP_SYS(chroot), EPERM},
|
||||
#if defined(__s390__) || defined(__s390x__) || defined(__CRIS__)
|
||||
// Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
|
||||
// and flags arguments are reversed so the flags come second
|
||||
{SCMP_SYS(clone), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
|
||||
#else
|
||||
// Normally the flags come first
|
||||
{SCMP_SYS(clone), EPERM, &SCMP_A0(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
|
||||
#endif
|
||||
|
||||
// seccomp can't look into clone3()'s struct clone_args to check whether
|
||||
// the flags are OK, so we have no choice but to block clone3().
|
||||
// Return ENOSYS so user-space will fall back to clone().
|
||||
// (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
|
||||
{SCMP_SYS(clone3), ENOSYS},
|
||||
|
||||
// New mount manipulation APIs can also change our VFS. There's no
|
||||
// legitimate reason to do these in the sandbox, so block all of them
|
||||
// rather than thinking about which ones might be dangerous.
|
||||
// (CVE-2021-41133)
|
||||
{SCMP_SYS(open_tree), ENOSYS},
|
||||
{SCMP_SYS(move_mount), ENOSYS},
|
||||
{SCMP_SYS(fsopen), ENOSYS},
|
||||
{SCMP_SYS(fsconfig), ENOSYS},
|
||||
{SCMP_SYS(fsmount), ENOSYS},
|
||||
{SCMP_SYS(fspick), ENOSYS},
|
||||
{SCMP_SYS(mount_setattr), ENOSYS},
|
||||
};
|
||||
|
||||
// fortify: project-specific extensions
|
||||
struct f_syscall_act deny_ns_ext[] = {
|
||||
// changing file ownership
|
||||
{SCMP_SYS(chown), EPERM},
|
||||
{SCMP_SYS(chown32), EPERM},
|
||||
{SCMP_SYS(fchown), EPERM},
|
||||
{SCMP_SYS(fchown32), EPERM},
|
||||
{SCMP_SYS(fchownat), EPERM},
|
||||
{SCMP_SYS(lchown), EPERM},
|
||||
{SCMP_SYS(lchown32), EPERM},
|
||||
|
||||
// system calls for changing user ID and group ID credentials
|
||||
{SCMP_SYS(setgid), EPERM},
|
||||
{SCMP_SYS(setgid32), EPERM},
|
||||
{SCMP_SYS(setgroups), EPERM},
|
||||
{SCMP_SYS(setgroups32), EPERM},
|
||||
{SCMP_SYS(setregid), EPERM},
|
||||
{SCMP_SYS(setregid32), EPERM},
|
||||
{SCMP_SYS(setresgid), EPERM},
|
||||
{SCMP_SYS(setresgid32), EPERM},
|
||||
{SCMP_SYS(setresuid), EPERM},
|
||||
{SCMP_SYS(setresuid32), EPERM},
|
||||
{SCMP_SYS(setreuid), EPERM},
|
||||
{SCMP_SYS(setreuid32), EPERM},
|
||||
{SCMP_SYS(setuid), EPERM},
|
||||
{SCMP_SYS(setuid32), EPERM},
|
||||
};
|
||||
|
||||
struct f_syscall_act deny_tty[] = {
|
||||
// Don't allow faking input to the controlling tty (CVE-2017-5226)
|
||||
{SCMP_SYS(ioctl), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCSTI)},
|
||||
// In the unlikely event that the controlling tty is a Linux virtual
|
||||
// console (/dev/tty2 or similar), copy/paste operations have an effect
|
||||
// similar to TIOCSTI (CVE-2023-28100)
|
||||
{SCMP_SYS(ioctl), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCLINUX)},
|
||||
};
|
||||
|
||||
struct f_syscall_act deny_devel[] = {
|
||||
// Profiling operations; we expect these to be done by tools from outside
|
||||
// the sandbox. In particular perf has been the source of many CVEs.
|
||||
{SCMP_SYS(perf_event_open), EPERM},
|
||||
// Don't allow you to switch to bsd emulation or whatnot
|
||||
{SCMP_SYS(personality), EPERM, &SCMP_A0(SCMP_CMP_NE, allowed_personality)},
|
||||
|
||||
{SCMP_SYS(ptrace), EPERM}
|
||||
};
|
||||
|
||||
struct f_syscall_act deny_emu[] = {
|
||||
// modify_ldt is a historic source of interesting information leaks,
|
||||
// so it's disabled as a hardening measure.
|
||||
// However, it is required to run old 16-bit applications
|
||||
// as well as some Wine patches, so it's allowed in multiarch.
|
||||
{SCMP_SYS(modify_ldt), EPERM},
|
||||
};
|
||||
|
||||
// fortify: project-specific extensions
|
||||
struct f_syscall_act deny_emu_ext[] = {
|
||||
{SCMP_SYS(subpage_prot), ENOSYS},
|
||||
{SCMP_SYS(switch_endian), ENOSYS},
|
||||
{SCMP_SYS(vm86), ENOSYS},
|
||||
{SCMP_SYS(vm86old), ENOSYS},
|
||||
};
|
||||
|
||||
// Blocklist all but unix, inet, inet6 and netlink
|
||||
struct
|
||||
{
|
||||
int family;
|
||||
f_syscall_opts flags_mask;
|
||||
} socket_family_allowlist[] = {
|
||||
// NOTE: Keep in numerical order
|
||||
{ AF_UNSPEC, 0 },
|
||||
{ AF_LOCAL, 0 },
|
||||
{ AF_INET, 0 },
|
||||
{ AF_INET6, 0 },
|
||||
{ AF_NETLINK, 0 },
|
||||
{ AF_CAN, F_CAN },
|
||||
{ AF_BLUETOOTH, F_BLUETOOTH },
|
||||
};
|
||||
|
||||
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
|
||||
if (ctx == NULL) {
|
||||
res = 1;
|
||||
goto out;
|
||||
} else
|
||||
errno = 0;
|
||||
|
||||
// We only really need to handle arches on multiarch systems.
|
||||
// If only one arch is supported the default is fine
|
||||
if (arch != 0) {
|
||||
// This *adds* the target arch, instead of replacing the
|
||||
// native one. This is not ideal, because we'd like to only
|
||||
// allow the target arch, but we can't really disallow the
|
||||
// native arch at this point, because then bubblewrap
|
||||
// couldn't continue running.
|
||||
*ret_p = seccomp_arch_add(ctx, arch);
|
||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (allow_multiarch && multiarch != 0) {
|
||||
*ret_p = seccomp_arch_add(ctx, multiarch);
|
||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||
res = 3;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECCOMP_RULESET_ADD(deny_common);
|
||||
if (opts & F_DENY_NS) SECCOMP_RULESET_ADD(deny_ns);
|
||||
if (opts & F_DENY_TTY) SECCOMP_RULESET_ADD(deny_tty);
|
||||
if (opts & F_DENY_DEVEL) SECCOMP_RULESET_ADD(deny_devel);
|
||||
if (!allow_multiarch) SECCOMP_RULESET_ADD(deny_emu);
|
||||
if (opts & F_EXT) {
|
||||
SECCOMP_RULESET_ADD(deny_common_ext);
|
||||
if (opts & F_DENY_NS) SECCOMP_RULESET_ADD(deny_ns_ext);
|
||||
if (!allow_multiarch) SECCOMP_RULESET_ADD(deny_emu_ext);
|
||||
}
|
||||
|
||||
// Socket filtering doesn't work on e.g. i386, so ignore failures here
|
||||
// However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
|
||||
// something else: https://github.com/seccomp/libseccomp/issues/8
|
||||
int last_allowed_family = -1;
|
||||
for (int i = 0; i < LEN(socket_family_allowlist); i++) {
|
||||
if (socket_family_allowlist[i].flags_mask != 0 &&
|
||||
(socket_family_allowlist[i].flags_mask & opts) != socket_family_allowlist[i].flags_mask)
|
||||
continue;
|
||||
|
||||
for (int disallowed = last_allowed_family + 1; disallowed < socket_family_allowlist[i].family; disallowed++) {
|
||||
// Blocklist the in-between valid families
|
||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1, SCMP_A0(SCMP_CMP_EQ, disallowed));
|
||||
}
|
||||
last_allowed_family = socket_family_allowlist[i].family;
|
||||
}
|
||||
// Blocklist the rest
|
||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1, SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
|
||||
|
||||
if (fd < 0) {
|
||||
*ret_p = seccomp_load(ctx);
|
||||
if (*ret_p != 0) {
|
||||
res = 7;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
*ret_p = seccomp_export_bpf(ctx, fd);
|
||||
if (*ret_p != 0) {
|
||||
res = 6;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (ctx)
|
||||
seccomp_release(ctx);
|
||||
|
||||
return res;
|
||||
}
|
||||
23
sandbox/seccomp/seccomp-build.h
Normal file
23
sandbox/seccomp/seccomp-build.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <stdint.h>
|
||||
#include <seccomp.h>
|
||||
|
||||
#if (SCMP_VER_MAJOR < 2) || \
|
||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
||||
#error This package requires libseccomp >= v2.5.1
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
F_VERBOSE = 1 << 0,
|
||||
F_EXT = 1 << 1,
|
||||
F_DENY_NS = 1 << 2,
|
||||
F_DENY_TTY = 1 << 3,
|
||||
F_DENY_DEVEL = 1 << 4,
|
||||
F_MULTIARCH = 1 << 5,
|
||||
F_LINUX32 = 1 << 6,
|
||||
F_CAN = 1 << 7,
|
||||
F_BLUETOOTH = 1 << 8,
|
||||
} f_syscall_opts;
|
||||
|
||||
extern void F_println(char *v);
|
||||
int32_t f_build_filter(int *ret_p, int fd, uint32_t arch, uint32_t multiarch, f_syscall_opts opts);
|
||||
114
sandbox/seccomp/seccomp.go
Normal file
114
sandbox/seccomp/seccomp.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package seccomp
|
||||
|
||||
/*
|
||||
#cgo linux pkg-config: --static libseccomp
|
||||
|
||||
#include "seccomp-build.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// LibraryError represents a libseccomp error.
|
||||
type LibraryError struct {
|
||||
Prefix string
|
||||
Seccomp syscall.Errno
|
||||
Errno error
|
||||
}
|
||||
|
||||
func (e *LibraryError) Error() string {
|
||||
if e.Seccomp == 0 {
|
||||
if e.Errno == nil {
|
||||
panic("invalid libseccomp error")
|
||||
}
|
||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Errno)
|
||||
}
|
||||
if e.Errno == nil {
|
||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Seccomp)
|
||||
}
|
||||
return fmt.Sprintf("%s: %s (%s)", e.Prefix, e.Seccomp, e.Errno)
|
||||
}
|
||||
|
||||
func (e *LibraryError) Is(err error) bool {
|
||||
if e == nil {
|
||||
return err == nil
|
||||
}
|
||||
if ef, ok := err.(*LibraryError); ok {
|
||||
return *e == *ef
|
||||
}
|
||||
return (e.Seccomp != 0 && errors.Is(err, e.Seccomp)) ||
|
||||
(e.Errno != nil && errors.Is(err, e.Errno))
|
||||
}
|
||||
|
||||
var resPrefix = [...]string{
|
||||
0: "",
|
||||
1: "seccomp_init failed",
|
||||
2: "seccomp_arch_add failed",
|
||||
3: "seccomp_arch_add failed (multiarch)",
|
||||
4: "internal libseccomp failure",
|
||||
5: "seccomp_rule_add failed",
|
||||
6: "seccomp_export_bpf failed",
|
||||
7: "seccomp_load failed",
|
||||
}
|
||||
|
||||
type SyscallOpts = C.f_syscall_opts
|
||||
|
||||
const (
|
||||
flagVerbose SyscallOpts = C.F_VERBOSE
|
||||
// FlagExt are project-specific extensions.
|
||||
FlagExt SyscallOpts = C.F_EXT
|
||||
// FlagDenyNS denies namespace setup syscalls.
|
||||
FlagDenyNS SyscallOpts = C.F_DENY_NS
|
||||
// FlagDenyTTY denies faking input.
|
||||
FlagDenyTTY SyscallOpts = C.F_DENY_TTY
|
||||
// FlagDenyDevel denies development-related syscalls.
|
||||
FlagDenyDevel SyscallOpts = C.F_DENY_DEVEL
|
||||
// FlagMultiarch allows multiarch/emulation.
|
||||
FlagMultiarch SyscallOpts = C.F_MULTIARCH
|
||||
// FlagLinux32 sets PER_LINUX32.
|
||||
FlagLinux32 SyscallOpts = C.F_LINUX32
|
||||
// FlagCan allows AF_CAN.
|
||||
FlagCan SyscallOpts = C.F_CAN
|
||||
// FlagBluetooth allows AF_BLUETOOTH.
|
||||
FlagBluetooth SyscallOpts = C.F_BLUETOOTH
|
||||
)
|
||||
|
||||
func buildFilter(fd int, opts SyscallOpts) error {
|
||||
var (
|
||||
arch C.uint32_t = 0
|
||||
multiarch C.uint32_t = 0
|
||||
)
|
||||
switch runtime.GOARCH {
|
||||
case "386":
|
||||
arch = C.SCMP_ARCH_X86
|
||||
case "amd64":
|
||||
arch = C.SCMP_ARCH_X86_64
|
||||
multiarch = C.SCMP_ARCH_X86
|
||||
case "arm":
|
||||
arch = C.SCMP_ARCH_ARM
|
||||
case "arm64":
|
||||
arch = C.SCMP_ARCH_AARCH64
|
||||
multiarch = C.SCMP_ARCH_ARM
|
||||
}
|
||||
|
||||
// this removes repeated transitions between C and Go execution
|
||||
// when producing log output via F_println and CPrintln is nil
|
||||
if fp := printlnP.Load(); fp != nil {
|
||||
opts |= flagVerbose
|
||||
}
|
||||
|
||||
var ret C.int
|
||||
res, err := C.f_build_filter(&ret, C.int(fd), arch, multiarch, opts)
|
||||
if prefix := resPrefix[res]; prefix != "" {
|
||||
return &LibraryError{
|
||||
prefix,
|
||||
-syscall.Errno(ret),
|
||||
err,
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
65
sandbox/seccomp/seccomp_test.go
Normal file
65
sandbox/seccomp/seccomp_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package seccomp_test
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/fortify/sandbox/seccomp"
|
||||
)
|
||||
|
||||
func TestLibraryError(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
sample *seccomp.LibraryError
|
||||
want string
|
||||
wantIs bool
|
||||
compare error
|
||||
}{
|
||||
{
|
||||
"full",
|
||||
&seccomp.LibraryError{Prefix: "seccomp_export_bpf failed", Seccomp: syscall.ECANCELED, Errno: syscall.EBADF},
|
||||
"seccomp_export_bpf failed: operation canceled (bad file descriptor)",
|
||||
true,
|
||||
&seccomp.LibraryError{Prefix: "seccomp_export_bpf failed", Seccomp: syscall.ECANCELED, Errno: syscall.EBADF},
|
||||
},
|
||||
{
|
||||
"errno only",
|
||||
&seccomp.LibraryError{Prefix: "seccomp_init failed", Errno: syscall.ENOMEM},
|
||||
"seccomp_init failed: cannot allocate memory",
|
||||
false,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"seccomp only",
|
||||
&seccomp.LibraryError{Prefix: "internal libseccomp failure", Seccomp: syscall.EFAULT},
|
||||
"internal libseccomp failure: bad address",
|
||||
true,
|
||||
syscall.EFAULT,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if errors.Is(tc.sample, tc.compare) != tc.wantIs {
|
||||
t.Errorf("errors.Is(%#v, %#v) did not return %v",
|
||||
tc.sample, tc.compare, tc.wantIs)
|
||||
}
|
||||
|
||||
if got := tc.sample.Error(); got != tc.want {
|
||||
t.Errorf("Error: %q, want %q",
|
||||
got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("invalid", func(t *testing.T) {
|
||||
wantPanic := "invalid libseccomp error"
|
||||
defer func() {
|
||||
if r := recover(); r != wantPanic {
|
||||
t.Errorf("panic: %q, want %q", r, wantPanic)
|
||||
}
|
||||
}()
|
||||
runtime.KeepAlive(new(seccomp.LibraryError).Error())
|
||||
})
|
||||
}
|
||||
180
sandbox/sequential.go
Normal file
180
sandbox/sequential.go
Normal file
@@ -0,0 +1,180 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() { gob.Register(new(BindMount)) }
|
||||
|
||||
// BindMount bind mounts host path Source on container path Target.
|
||||
type BindMount struct {
|
||||
Source, Target string
|
||||
|
||||
Flags int
|
||||
}
|
||||
|
||||
func (b *BindMount) apply(*InitParams) error {
|
||||
if !path.IsAbs(b.Source) || !path.IsAbs(b.Target) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
"path is not absolute")
|
||||
}
|
||||
return bindMount(b.Source, b.Target, b.Flags)
|
||||
}
|
||||
|
||||
func (b *BindMount) Is(op Op) bool { vb, ok := op.(*BindMount); return ok && *b == *vb }
|
||||
func (b *BindMount) String() string {
|
||||
if b.Source == b.Target {
|
||||
return fmt.Sprintf("%q flags %#x", b.Source, b.Flags)
|
||||
}
|
||||
return fmt.Sprintf("%q on %q flags %#x", b.Source, b.Target, b.Flags&BindWritable)
|
||||
}
|
||||
func (f *Ops) Bind(source, target string, flags int) *Ops {
|
||||
*f = append(*f, &BindMount{source, target, flags | BindRecursive})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountProc)) }
|
||||
|
||||
// MountProc mounts a private proc instance on container Path.
|
||||
type MountProc struct {
|
||||
Path string
|
||||
}
|
||||
|
||||
func (p *MountProc) apply(*InitParams) error {
|
||||
if !path.IsAbs(p.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", p.Path))
|
||||
}
|
||||
|
||||
target := toSysroot(p.Path)
|
||||
if err := os.MkdirAll(target, 0755); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount("proc", target, "proc",
|
||||
syscall.MS_NOSUID|syscall.MS_NOEXEC|syscall.MS_NODEV, ""),
|
||||
fmt.Sprintf("cannot mount proc on %q:", p.Path))
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountDev)) }
|
||||
|
||||
// MountDev mounts dev on container Path.
|
||||
type MountDev struct {
|
||||
Path string
|
||||
}
|
||||
|
||||
func (d *MountDev) apply(params *InitParams) error {
|
||||
if !path.IsAbs(d.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", d.Path))
|
||||
}
|
||||
target := toSysroot(d.Path)
|
||||
|
||||
if err := mountTmpfs("devtmpfs", d.Path, 0, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, name := range []string{"null", "zero", "full", "random", "urandom", "tty"} {
|
||||
if err := bindMount(
|
||||
"/dev/"+name, path.Join(d.Path, name),
|
||||
BindSource|BindDevices,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for i, name := range []string{"stdin", "stdout", "stderr"} {
|
||||
if err := os.Symlink(
|
||||
"/proc/self/fd/"+string(rune(i+'0')),
|
||||
path.Join(target, name),
|
||||
); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
}
|
||||
for _, pair := range [][2]string{
|
||||
{"/proc/self/fd", "fd"},
|
||||
{"/proc/kcore", "core"},
|
||||
{"pts/ptmx", "ptmx"},
|
||||
} {
|
||||
if err := os.Symlink(pair[0], path.Join(target, pair[1])); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
devPtsPath := path.Join(target, "pts")
|
||||
for _, name := range []string{path.Join(target, "shm"), devPtsPath} {
|
||||
if err := os.Mkdir(name, 0755); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if err := syscall.Mount("devpts", devPtsPath, "devpts",
|
||||
syscall.MS_NOSUID|syscall.MS_NOEXEC,
|
||||
"newinstance,ptmxmode=0666,mode=620"); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot mount devpts on %q:", devPtsPath))
|
||||
}
|
||||
|
||||
if params.Flags&FAllowTTY != 0 {
|
||||
var buf [8]byte
|
||||
if _, _, errno := syscall.Syscall(
|
||||
syscall.SYS_IOCTL, 1, syscall.TIOCGWINSZ,
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
); errno == 0 {
|
||||
if err := bindMount(
|
||||
"/proc/self/fd/1", path.Join(d.Path, "console"),
|
||||
BindDevices,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *MountDev) Is(op Op) bool { vd, ok := op.(*MountDev); return ok && *d == *vd }
|
||||
func (d *MountDev) String() string { return fmt.Sprintf("dev on %q", d.Path) }
|
||||
func (f *Ops) Dev(dest string) *Ops {
|
||||
*f = append(*f, &MountDev{dest})
|
||||
return f
|
||||
}
|
||||
|
||||
func (p *MountProc) Is(op Op) bool { vp, ok := op.(*MountProc); return ok && *p == *vp }
|
||||
func (p *MountProc) String() string { return fmt.Sprintf("proc on %q", p.Path) }
|
||||
func (f *Ops) Proc(dest string) *Ops {
|
||||
*f = append(*f, &MountProc{dest})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountTmpfs)) }
|
||||
|
||||
// MountTmpfs mounts tmpfs on container Path.
|
||||
type MountTmpfs struct {
|
||||
Path string
|
||||
Size int
|
||||
Perm os.FileMode
|
||||
}
|
||||
|
||||
func (t *MountTmpfs) apply(*InitParams) error {
|
||||
if !path.IsAbs(t.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
}
|
||||
if t.Size < 0 || t.Size > math.MaxUint>>1 {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("size %d out of bounds", t.Size))
|
||||
}
|
||||
return mountTmpfs("tmpfs", t.Path, t.Size, t.Perm)
|
||||
}
|
||||
|
||||
func (t *MountTmpfs) Is(op Op) bool { vt, ok := op.(*MountTmpfs); return ok && *t == *vt }
|
||||
func (t *MountTmpfs) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) }
|
||||
func (f *Ops) Tmpfs(dest string, size int, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MountTmpfs{dest, size, perm})
|
||||
return f
|
||||
}
|
||||
41
sandbox/syscall.go
Normal file
41
sandbox/syscall.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package sandbox
|
||||
|
||||
import "syscall"
|
||||
|
||||
const (
|
||||
SUID_DUMP_DISABLE = iota
|
||||
SUID_DUMP_USER
|
||||
)
|
||||
|
||||
func SetDumpable(dumpable uintptr) error {
|
||||
// linux/sched/coredump.h
|
||||
if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_DUMPABLE, dumpable, 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetPdeathsig(sig syscall.Signal) error {
|
||||
if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(sig), 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IgnoringEINTR makes a function call and repeats it if it returns an
|
||||
// EINTR error. This appears to be required even though we install all
|
||||
// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
|
||||
// Also #20400 and #36644 are issues in which a signal handler is
|
||||
// installed without setting SA_RESTART. None of these are the common case,
|
||||
// but there are enough of them that it seems that we can't avoid
|
||||
// an EINTR loop.
|
||||
func IgnoringEINTR(fn func() error) error {
|
||||
for {
|
||||
err := fn()
|
||||
if err != syscall.EINTR {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user