forked from security/hakurei
hakurei: move container toplevel
Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -1,229 +0,0 @@
|
||||
// Package sandbox implements unprivileged Linux container with hardening options useful for creating application sandboxes.
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strconv"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
)
|
||||
|
||||
type (
|
||||
// Container represents a container environment being prepared or run.
|
||||
// None of [Container] methods are safe for concurrent use.
|
||||
Container struct {
|
||||
// Name of initial process in the container.
|
||||
name string
|
||||
// Cgroup fd, nil to disable.
|
||||
Cgroup *int
|
||||
// ExtraFiles passed through to initial process in the container,
|
||||
// with behaviour identical to its [exec.Cmd] counterpart.
|
||||
ExtraFiles []*os.File
|
||||
|
||||
// Custom [exec.Cmd] initialisation function.
|
||||
CommandContext func(ctx context.Context) (cmd *exec.Cmd)
|
||||
|
||||
// param encoder for shim and init
|
||||
setup *gob.Encoder
|
||||
// cancels cmd
|
||||
cancel context.CancelFunc
|
||||
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
Stderr io.Writer
|
||||
|
||||
Cancel func(cmd *exec.Cmd) error
|
||||
WaitDelay time.Duration
|
||||
|
||||
cmd *exec.Cmd
|
||||
ctx context.Context
|
||||
Params
|
||||
}
|
||||
|
||||
// Params holds container configuration and is safe to serialise.
|
||||
Params struct {
|
||||
// Working directory in the container.
|
||||
Dir string
|
||||
// Initial process environment.
|
||||
Env []string
|
||||
// Absolute path of initial process in the container. Overrides name.
|
||||
Path string
|
||||
// Initial process argv.
|
||||
Args []string
|
||||
|
||||
// Mapped Uid in user namespace.
|
||||
Uid int
|
||||
// Mapped Gid in user namespace.
|
||||
Gid int
|
||||
// Hostname value in UTS namespace.
|
||||
Hostname string
|
||||
// Sequential container setup ops.
|
||||
*Ops
|
||||
// Seccomp system call filter rules.
|
||||
SeccompRules []seccomp.NativeRule
|
||||
// Extra seccomp flags.
|
||||
SeccompFlags seccomp.ExportFlag
|
||||
// Seccomp presets. Has no effect unless SeccompRules is zero-length.
|
||||
SeccompPresets seccomp.FilterPreset
|
||||
// Do not load seccomp program.
|
||||
SeccompDisable bool
|
||||
// Permission bits of newly created parent directories.
|
||||
// The zero value is interpreted as 0755.
|
||||
ParentPerm os.FileMode
|
||||
// Do not syscall.Setsid.
|
||||
RetainSession bool
|
||||
// Do not [syscall.CLONE_NEWNET].
|
||||
HostNet bool
|
||||
// Retain CAP_SYS_ADMIN.
|
||||
Privileged bool
|
||||
}
|
||||
)
|
||||
|
||||
func (p *Container) Start() error {
|
||||
if p.cmd != nil {
|
||||
return errors.New("sandbox: already started")
|
||||
}
|
||||
if p.Ops == nil || len(*p.Ops) == 0 {
|
||||
return errors.New("sandbox: starting an empty container")
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(p.ctx)
|
||||
p.cancel = cancel
|
||||
|
||||
var cloneFlags uintptr = CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWCGROUP
|
||||
if !p.HostNet {
|
||||
cloneFlags |= CLONE_NEWNET
|
||||
}
|
||||
|
||||
// map to overflow id to work around ownership checks
|
||||
if p.Uid < 1 {
|
||||
p.Uid = OverflowUid()
|
||||
}
|
||||
if p.Gid < 1 {
|
||||
p.Gid = OverflowGid()
|
||||
}
|
||||
|
||||
if !p.RetainSession {
|
||||
p.SeccompPresets |= seccomp.PresetDenyTTY
|
||||
}
|
||||
|
||||
if p.CommandContext != nil {
|
||||
p.cmd = p.CommandContext(ctx)
|
||||
} else {
|
||||
p.cmd = exec.CommandContext(ctx, MustExecutable())
|
||||
p.cmd.Args = []string{"init"}
|
||||
}
|
||||
|
||||
p.cmd.Stdin, p.cmd.Stdout, p.cmd.Stderr = p.Stdin, p.Stdout, p.Stderr
|
||||
p.cmd.WaitDelay = p.WaitDelay
|
||||
if p.Cancel != nil {
|
||||
p.cmd.Cancel = func() error { return p.Cancel(p.cmd) }
|
||||
} else {
|
||||
p.cmd.Cancel = func() error { return p.cmd.Process.Signal(SIGTERM) }
|
||||
}
|
||||
p.cmd.Dir = "/"
|
||||
p.cmd.SysProcAttr = &SysProcAttr{
|
||||
Setsid: !p.RetainSession,
|
||||
Pdeathsig: SIGKILL,
|
||||
Cloneflags: cloneFlags | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS,
|
||||
|
||||
// remain privileged for setup
|
||||
AmbientCaps: []uintptr{CAP_SYS_ADMIN, CAP_SETPCAP},
|
||||
|
||||
UseCgroupFD: p.Cgroup != nil,
|
||||
}
|
||||
if p.cmd.SysProcAttr.UseCgroupFD {
|
||||
p.cmd.SysProcAttr.CgroupFD = *p.Cgroup
|
||||
}
|
||||
|
||||
// place setup pipe before user supplied extra files, this is later restored by init
|
||||
if fd, e, err := Setup(&p.cmd.ExtraFiles); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot create shim setup pipe:")
|
||||
} else {
|
||||
p.setup = e
|
||||
p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)}
|
||||
}
|
||||
p.cmd.ExtraFiles = append(p.cmd.ExtraFiles, p.ExtraFiles...)
|
||||
|
||||
msg.Verbose("starting container init")
|
||||
if err := p.cmd.Start(); err != nil {
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Container) Serve() error {
|
||||
if p.setup == nil {
|
||||
panic("invalid serve")
|
||||
}
|
||||
|
||||
setup := p.setup
|
||||
p.setup = nil
|
||||
|
||||
if p.Path != "" && !path.IsAbs(p.Path) {
|
||||
p.cancel()
|
||||
return msg.WrapErr(EINVAL,
|
||||
fmt.Sprintf("invalid executable path %q", p.Path))
|
||||
}
|
||||
|
||||
if p.Path == "" {
|
||||
if p.name == "" {
|
||||
p.Path = os.Getenv("SHELL")
|
||||
if !path.IsAbs(p.Path) {
|
||||
p.cancel()
|
||||
return msg.WrapErr(EBADE,
|
||||
"no command specified and $SHELL is invalid")
|
||||
}
|
||||
p.name = path.Base(p.Path)
|
||||
} else if path.IsAbs(p.name) {
|
||||
p.Path = p.name
|
||||
} else if v, err := exec.LookPath(p.name); err != nil {
|
||||
p.cancel()
|
||||
return msg.WrapErr(err, err.Error())
|
||||
} else {
|
||||
p.Path = v
|
||||
}
|
||||
}
|
||||
|
||||
if p.SeccompRules == nil {
|
||||
// do not transmit nil
|
||||
p.SeccompRules = make([]seccomp.NativeRule, 0)
|
||||
}
|
||||
|
||||
err := setup.Encode(
|
||||
&initParams{
|
||||
p.Params,
|
||||
Getuid(),
|
||||
Getgid(),
|
||||
len(p.ExtraFiles),
|
||||
msg.IsVerbose(),
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
p.cancel()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
|
||||
|
||||
func (p *Container) String() string {
|
||||
return fmt.Sprintf("argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
|
||||
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets))
|
||||
}
|
||||
|
||||
func New(ctx context.Context, name string, args ...string) *Container {
|
||||
return &Container{name: name, ctx: ctx,
|
||||
Params: Params{Args: append([]string{name}, args...), Dir: "/", Ops: new(Ops)},
|
||||
}
|
||||
}
|
||||
@@ -1,281 +0,0 @@
|
||||
package sandbox_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/ldd"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
)
|
||||
|
||||
const (
|
||||
ignore = "\x00"
|
||||
ignoreV = -1
|
||||
)
|
||||
|
||||
func TestContainer(t *testing.T) {
|
||||
{
|
||||
oldVerbose := hlog.Load()
|
||||
oldOutput := sandbox.GetOutput()
|
||||
internal.InstallOutput(true)
|
||||
t.Cleanup(func() { hlog.Store(oldVerbose) })
|
||||
t.Cleanup(func() { sandbox.SetOutput(oldOutput) })
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
filter bool
|
||||
session bool
|
||||
net bool
|
||||
ops *sandbox.Ops
|
||||
mnt []*vfs.MountInfoEntry
|
||||
host string
|
||||
rules []seccomp.NativeRule
|
||||
flags seccomp.ExportFlag
|
||||
presets seccomp.FilterPreset
|
||||
}{
|
||||
{"minimal", true, false, false,
|
||||
new(sandbox.Ops), nil, "test-minimal",
|
||||
nil, 0, seccomp.PresetStrict},
|
||||
{"allow", true, true, true,
|
||||
new(sandbox.Ops), nil, "test-minimal",
|
||||
nil, 0, seccomp.PresetExt | seccomp.PresetDenyDevel},
|
||||
{"no filter", false, true, true,
|
||||
new(sandbox.Ops), nil, "test-no-filter",
|
||||
nil, 0, seccomp.PresetExt},
|
||||
{"custom rules", true, true, true,
|
||||
new(sandbox.Ops), nil, "test-no-filter",
|
||||
[]seccomp.NativeRule{
|
||||
{seccomp.ScmpSyscall(syscall.SYS_SETUID), seccomp.ScmpErrno(syscall.EPERM), nil},
|
||||
}, 0, seccomp.PresetExt},
|
||||
{"tmpfs", true, false, false,
|
||||
new(sandbox.Ops).
|
||||
Tmpfs(hst.Tmp, 0, 0755),
|
||||
[]*vfs.MountInfoEntry{
|
||||
e("/", hst.Tmp, "rw,nosuid,nodev,relatime", "tmpfs", "tmpfs", ignore),
|
||||
}, "test-tmpfs",
|
||||
nil, 0, seccomp.PresetStrict},
|
||||
{"dev", true, true /* go test output is not a tty */, false,
|
||||
new(sandbox.Ops).
|
||||
Dev("/dev").
|
||||
Mqueue("/dev/mqueue"),
|
||||
[]*vfs.MountInfoEntry{
|
||||
e("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore),
|
||||
e("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/zero", "/dev/zero", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/full", "/dev/full", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/random", "/dev/random", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/urandom", "/dev/urandom", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/tty", "/dev/tty", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
e("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"),
|
||||
e("/", "/dev/mqueue", "rw,nosuid,nodev,noexec,relatime", "mqueue", "mqueue", "rw"),
|
||||
}, "",
|
||||
nil, 0, seccomp.PresetStrict},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
container := sandbox.New(ctx, "/usr/bin/sandbox.test", "-test.v",
|
||||
"-test.run=TestHelperCheckContainer", "--", "check", tc.host)
|
||||
container.Uid = 1000
|
||||
container.Gid = 100
|
||||
container.Hostname = tc.host
|
||||
container.CommandContext = commandContext
|
||||
container.Stdout, container.Stderr = os.Stdout, os.Stderr
|
||||
container.Ops = tc.ops
|
||||
container.SeccompRules = tc.rules
|
||||
container.SeccompFlags = tc.flags | seccomp.AllowMultiarch
|
||||
container.SeccompPresets = tc.presets
|
||||
container.SeccompDisable = !tc.filter
|
||||
container.RetainSession = tc.session
|
||||
container.HostNet = tc.net
|
||||
if container.Args[5] == "" {
|
||||
if name, err := os.Hostname(); err != nil {
|
||||
t.Fatalf("cannot get hostname: %v", err)
|
||||
} else {
|
||||
container.Args[5] = name
|
||||
}
|
||||
}
|
||||
|
||||
container.
|
||||
Tmpfs("/tmp", 0, 0755).
|
||||
Bind(os.Args[0], os.Args[0], 0).
|
||||
Mkdir("/usr/bin", 0755).
|
||||
Link(os.Args[0], "/usr/bin/sandbox.test").
|
||||
Place("/etc/hostname", []byte(container.Args[5]))
|
||||
// in case test has cgo enabled
|
||||
var libPaths []string
|
||||
if entries, err := ldd.ExecFilter(ctx,
|
||||
commandContext,
|
||||
func(v []byte) []byte {
|
||||
return bytes.SplitN(v, []byte("TestHelperInit\n"), 2)[1]
|
||||
}, os.Args[0]); err != nil {
|
||||
log.Fatalf("ldd: %v", err)
|
||||
} else {
|
||||
libPaths = ldd.Path(entries)
|
||||
}
|
||||
for _, name := range libPaths {
|
||||
container.Bind(name, name, 0)
|
||||
}
|
||||
// needs /proc to check mountinfo
|
||||
container.Proc("/proc")
|
||||
|
||||
mnt := make([]*vfs.MountInfoEntry, 0, 3+len(libPaths))
|
||||
mnt = append(mnt, e("/sysroot", "/", "rw,nosuid,nodev,relatime", "tmpfs", "rootfs", ignore))
|
||||
mnt = append(mnt, tc.mnt...)
|
||||
mnt = append(mnt,
|
||||
e("/", "/tmp", "rw,nosuid,nodev,relatime", "tmpfs", "tmpfs", ignore),
|
||||
e(ignore, os.Args[0], "ro,nosuid,nodev,relatime", ignore, ignore, ignore),
|
||||
e(ignore, "/etc/hostname", "ro,nosuid,nodev,relatime", "tmpfs", "rootfs", ignore),
|
||||
)
|
||||
for _, name := range libPaths {
|
||||
mnt = append(mnt, e(ignore, name, "ro,nosuid,nodev,relatime", ignore, ignore, ignore))
|
||||
}
|
||||
mnt = append(mnt, e("/", "/proc", "rw,nosuid,nodev,noexec,relatime", "proc", "proc", "rw"))
|
||||
want := new(bytes.Buffer)
|
||||
if err := gob.NewEncoder(want).Encode(mnt); err != nil {
|
||||
t.Fatalf("cannot serialise expected mount points: %v", err)
|
||||
}
|
||||
container.Stdin = want
|
||||
|
||||
if err := container.Start(); err != nil {
|
||||
hlog.PrintBaseError(err, "start:")
|
||||
t.Fatalf("cannot start container: %v", err)
|
||||
} else if err = container.Serve(); err != nil {
|
||||
hlog.PrintBaseError(err, "serve:")
|
||||
t.Errorf("cannot serve setup params: %v", err)
|
||||
}
|
||||
if err := container.Wait(); err != nil {
|
||||
hlog.PrintBaseError(err, "wait:")
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func e(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoEntry {
|
||||
return &vfs.MountInfoEntry{
|
||||
ID: ignoreV,
|
||||
Parent: ignoreV,
|
||||
Devno: vfs.DevT{ignoreV, ignoreV},
|
||||
Root: root,
|
||||
Target: target,
|
||||
VfsOptstr: vfsOptstr,
|
||||
OptFields: []string{ignore},
|
||||
FsType: fsType,
|
||||
Source: source,
|
||||
FsOptstr: fsOptstr,
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerString(t *testing.T) {
|
||||
container := sandbox.New(t.Context(), "ldd", "/usr/bin/env")
|
||||
container.SeccompFlags |= seccomp.AllowMultiarch
|
||||
container.SeccompRules = seccomp.Preset(
|
||||
seccomp.PresetExt|seccomp.PresetDenyNS|seccomp.PresetDenyTTY,
|
||||
container.SeccompFlags)
|
||||
container.SeccompPresets = seccomp.PresetStrict
|
||||
want := `argv: ["ldd" "/usr/bin/env"], filter: true, rules: 65, flags: 0x1, presets: 0xf`
|
||||
if got := container.String(); got != want {
|
||||
t.Errorf("String: %s, want %s", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHelperInit(t *testing.T) {
|
||||
if len(os.Args) != 5 || os.Args[4] != "init" {
|
||||
return
|
||||
}
|
||||
sandbox.SetOutput(hlog.Output{})
|
||||
sandbox.Init(hlog.Prepare, internal.InstallOutput)
|
||||
}
|
||||
|
||||
func TestHelperCheckContainer(t *testing.T) {
|
||||
if len(os.Args) != 6 || os.Args[4] != "check" {
|
||||
return
|
||||
}
|
||||
|
||||
t.Run("user", func(t *testing.T) {
|
||||
if uid := syscall.Getuid(); uid != 1000 {
|
||||
t.Errorf("Getuid: %d, want 1000", uid)
|
||||
}
|
||||
if gid := syscall.Getgid(); gid != 100 {
|
||||
t.Errorf("Getgid: %d, want 100", gid)
|
||||
}
|
||||
})
|
||||
t.Run("hostname", func(t *testing.T) {
|
||||
if name, err := os.Hostname(); err != nil {
|
||||
t.Fatalf("cannot get hostname: %v", err)
|
||||
} else if name != os.Args[5] {
|
||||
t.Errorf("Hostname: %q, want %q", name, os.Args[5])
|
||||
}
|
||||
|
||||
if p, err := os.ReadFile("/etc/hostname"); err != nil {
|
||||
t.Fatalf("%v", err)
|
||||
} else if string(p) != os.Args[5] {
|
||||
t.Errorf("/etc/hostname: %q, want %q", string(p), os.Args[5])
|
||||
}
|
||||
})
|
||||
t.Run("mount", func(t *testing.T) {
|
||||
var mnt []*vfs.MountInfoEntry
|
||||
if err := gob.NewDecoder(os.Stdin).Decode(&mnt); err != nil {
|
||||
t.Fatalf("cannot receive expected mount points: %v", err)
|
||||
}
|
||||
|
||||
var d *vfs.MountInfoDecoder
|
||||
if f, err := os.Open("/proc/self/mountinfo"); err != nil {
|
||||
t.Fatalf("cannot open mountinfo: %v", err)
|
||||
} else {
|
||||
d = vfs.NewMountInfoDecoder(f)
|
||||
}
|
||||
|
||||
i := 0
|
||||
for cur := range d.Entries() {
|
||||
if i == len(mnt) {
|
||||
t.Errorf("got more than %d entries", len(mnt))
|
||||
break
|
||||
}
|
||||
|
||||
// ugly hack but should be reliable and is less likely to false negative than comparing by parsed flags
|
||||
cur.VfsOptstr = strings.TrimSuffix(cur.VfsOptstr, ",relatime")
|
||||
cur.VfsOptstr = strings.TrimSuffix(cur.VfsOptstr, ",noatime")
|
||||
mnt[i].VfsOptstr = strings.TrimSuffix(mnt[i].VfsOptstr, ",relatime")
|
||||
mnt[i].VfsOptstr = strings.TrimSuffix(mnt[i].VfsOptstr, ",noatime")
|
||||
|
||||
if !cur.EqualWithIgnore(mnt[i], "\x00") {
|
||||
t.Errorf("[FAIL] %s", cur)
|
||||
} else {
|
||||
t.Logf("[ OK ] %s", cur)
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
if err := d.Err(); err != nil {
|
||||
t.Errorf("cannot parse mountinfo: %v", err)
|
||||
}
|
||||
|
||||
if i != len(mnt) {
|
||||
t.Errorf("got %d entries, want %d", i, len(mnt))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func commandContext(ctx context.Context) *exec.Cmd {
|
||||
return exec.CommandContext(ctx, os.Args[0], "-test.v",
|
||||
"-test.run=TestHelperInit", "--", "init")
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
executable string
|
||||
executableOnce sync.Once
|
||||
)
|
||||
|
||||
func copyExecutable() {
|
||||
if name, err := os.Executable(); err != nil {
|
||||
msg.BeforeExit()
|
||||
log.Fatalf("cannot read executable path: %v", err)
|
||||
} else {
|
||||
executable = name
|
||||
}
|
||||
}
|
||||
|
||||
func MustExecutable() string {
|
||||
executableOnce.Do(copyExecutable)
|
||||
return executable
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package sandbox_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
)
|
||||
|
||||
func TestExecutable(t *testing.T) {
|
||||
for i := 0; i < 16; i++ {
|
||||
if got := sandbox.MustExecutable(); got != os.Args[0] {
|
||||
t.Errorf("MustExecutable: %q, want %q",
|
||||
got, os.Args[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
364
sandbox/init.go
364
sandbox/init.go
@@ -1,364 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
)
|
||||
|
||||
const (
|
||||
// time to wait for linger processes after death of initial process
|
||||
residualProcessTimeout = 5 * time.Second
|
||||
|
||||
// intermediate tmpfs mount point
|
||||
basePath = "/tmp"
|
||||
|
||||
// setup params file descriptor
|
||||
setupEnv = "HAKUREI_SETUP"
|
||||
)
|
||||
|
||||
type initParams struct {
|
||||
Params
|
||||
|
||||
HostUid, HostGid int
|
||||
// extra files count
|
||||
Count int
|
||||
// verbosity pass through
|
||||
Verbose bool
|
||||
}
|
||||
|
||||
func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
runtime.LockOSThread()
|
||||
prepare("init")
|
||||
|
||||
if os.Getpid() != 1 {
|
||||
log.Fatal("this process must run as pid 1")
|
||||
}
|
||||
|
||||
var (
|
||||
params initParams
|
||||
closeSetup func() error
|
||||
setupFile *os.File
|
||||
offsetSetup int
|
||||
)
|
||||
if f, err := Receive(setupEnv, ¶ms, &setupFile); err != nil {
|
||||
if errors.Is(err, ErrInvalid) {
|
||||
log.Fatal("invalid setup descriptor")
|
||||
}
|
||||
if errors.Is(err, ErrNotSet) {
|
||||
log.Fatal("HAKUREI_SETUP not set")
|
||||
}
|
||||
|
||||
log.Fatalf("cannot decode init setup payload: %v", err)
|
||||
} else {
|
||||
if params.Ops == nil {
|
||||
log.Fatal("invalid setup parameters")
|
||||
}
|
||||
if params.ParentPerm == 0 {
|
||||
params.ParentPerm = 0755
|
||||
}
|
||||
|
||||
setVerbose(params.Verbose)
|
||||
msg.Verbose("received setup parameters")
|
||||
closeSetup = f
|
||||
offsetSetup = int(setupFile.Fd() + 1)
|
||||
}
|
||||
|
||||
// write uid/gid map here so parent does not need to set dumpable
|
||||
if err := SetDumpable(SUID_DUMP_USER); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_USER: %s", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/uid_map",
|
||||
append([]byte{}, strconv.Itoa(params.Uid)+" "+strconv.Itoa(params.HostUid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/setgroups",
|
||||
[]byte("deny\n"),
|
||||
0); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := os.WriteFile("/proc/self/gid_map",
|
||||
append([]byte{}, strconv.Itoa(params.Gid)+" "+strconv.Itoa(params.HostGid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := SetDumpable(SUID_DUMP_DISABLE); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
}
|
||||
|
||||
oldmask := Umask(0)
|
||||
if params.Hostname != "" {
|
||||
if err := Sethostname([]byte(params.Hostname)); err != nil {
|
||||
log.Fatalf("cannot set hostname: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// cache sysctl before pivot_root
|
||||
LastCap()
|
||||
|
||||
if err := Mount("", "/", "", MS_SILENT|MS_SLAVE|MS_REC, ""); err != nil {
|
||||
log.Fatalf("cannot make / rslave: %v", err)
|
||||
}
|
||||
|
||||
for i, op := range *params.Ops {
|
||||
if op == nil {
|
||||
log.Fatalf("invalid op %d", i)
|
||||
}
|
||||
|
||||
if err := op.early(¶ms.Params); err != nil {
|
||||
msg.PrintBaseErr(err,
|
||||
fmt.Sprintf("cannot prepare op %d:", i))
|
||||
msg.BeforeExit()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
if err := Mount("rootfs", basePath, "tmpfs", MS_NODEV|MS_NOSUID, ""); err != nil {
|
||||
log.Fatalf("cannot mount intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir(basePath); err != nil {
|
||||
log.Fatalf("cannot enter base path: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(sysrootDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := Mount(sysrootDir, sysrootDir, "", MS_SILENT|MS_MGC_VAL|MS_BIND|MS_REC, ""); err != nil {
|
||||
log.Fatalf("cannot bind sysroot: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(hostDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
// pivot_root uncovers basePath in hostDir
|
||||
if err := PivotRoot(basePath, hostDir); err != nil {
|
||||
log.Fatalf("cannot pivot into intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
for i, op := range *params.Ops {
|
||||
// ops already checked during early setup
|
||||
msg.Verbosef("%s %s", op.prefix(), op)
|
||||
if err := op.apply(¶ms.Params); err != nil {
|
||||
msg.PrintBaseErr(err,
|
||||
fmt.Sprintf("cannot apply op %d:", i))
|
||||
msg.BeforeExit()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// setup requiring host root complete at this point
|
||||
if err := Mount(hostDir, hostDir, "", MS_SILENT|MS_REC|MS_PRIVATE, ""); err != nil {
|
||||
log.Fatalf("cannot make host root rprivate: %v", err)
|
||||
}
|
||||
if err := Unmount(hostDir, MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount host root: %v", err)
|
||||
}
|
||||
|
||||
{
|
||||
var fd int
|
||||
if err := IgnoringEINTR(func() (err error) {
|
||||
fd, err = Open("/", O_DIRECTORY|O_RDONLY, 0)
|
||||
return
|
||||
}); err != nil {
|
||||
log.Fatalf("cannot open intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir(sysrootPath); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := PivotRoot(".", "."); err != nil {
|
||||
log.Fatalf("cannot pivot into sysroot: %v", err)
|
||||
}
|
||||
if err := Fchdir(fd); err != nil {
|
||||
log.Fatalf("cannot re-enter intermediate root: %v", err)
|
||||
}
|
||||
if err := Unmount(".", MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount intemediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := Close(fd); err != nil {
|
||||
log.Fatalf("cannot close intermediate root: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, _, errno := Syscall(PR_SET_NO_NEW_PRIVS, 1, 0, 0); errno != 0 {
|
||||
log.Fatalf("prctl(PR_SET_NO_NEW_PRIVS): %v", errno)
|
||||
}
|
||||
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0); errno != 0 {
|
||||
log.Fatalf("cannot clear the ambient capability set: %v", errno)
|
||||
}
|
||||
for i := uintptr(0); i <= LastCap(); i++ {
|
||||
if params.Privileged && i == CAP_SYS_ADMIN {
|
||||
continue
|
||||
}
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAPBSET_DROP, i, 0); errno != 0 {
|
||||
log.Fatalf("cannot drop capability from bonding set: %v", errno)
|
||||
}
|
||||
}
|
||||
|
||||
var keep [2]uint32
|
||||
if params.Privileged {
|
||||
keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN)
|
||||
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_SYS_ADMIN); errno != 0 {
|
||||
log.Fatalf("cannot raise CAP_SYS_ADMIN: %v", errno)
|
||||
}
|
||||
}
|
||||
if err := capset(
|
||||
&capHeader{_LINUX_CAPABILITY_VERSION_3, 0},
|
||||
&[2]capData{{0, keep[0], keep[0]}, {0, keep[1], keep[1]}},
|
||||
); err != nil {
|
||||
log.Fatalf("cannot capset: %v", err)
|
||||
}
|
||||
|
||||
if !params.SeccompDisable {
|
||||
rules := params.SeccompRules
|
||||
if len(rules) == 0 { // non-empty rules slice always overrides presets
|
||||
msg.Verbosef("resolving presets %#x", params.SeccompPresets)
|
||||
rules = seccomp.Preset(params.SeccompPresets, params.SeccompFlags)
|
||||
}
|
||||
if err := seccomp.Load(rules, params.SeccompFlags); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
}
|
||||
msg.Verbosef("%d filter rules loaded", len(rules))
|
||||
} else {
|
||||
msg.Verbose("syscall filter not configured")
|
||||
}
|
||||
|
||||
extraFiles := make([]*os.File, params.Count)
|
||||
for i := range extraFiles {
|
||||
// setup fd is placed before all extra files
|
||||
extraFiles[i] = os.NewFile(uintptr(offsetSetup+i), "extra file "+strconv.Itoa(i))
|
||||
}
|
||||
Umask(oldmask)
|
||||
|
||||
cmd := exec.Command(params.Path)
|
||||
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
cmd.Args = params.Args
|
||||
cmd.Env = params.Env
|
||||
cmd.ExtraFiles = extraFiles
|
||||
cmd.Dir = params.Dir
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
msg.Suspend()
|
||||
|
||||
if err := closeSetup(); err != nil {
|
||||
log.Println("cannot close setup pipe:", err)
|
||||
// not fatal
|
||||
}
|
||||
|
||||
type winfo struct {
|
||||
wpid int
|
||||
wstatus WaitStatus
|
||||
}
|
||||
info := make(chan winfo, 1)
|
||||
done := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
var (
|
||||
err error
|
||||
wpid = -2
|
||||
wstatus WaitStatus
|
||||
)
|
||||
|
||||
// keep going until no child process is left
|
||||
for wpid != -1 {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
if wpid != -2 {
|
||||
info <- winfo{wpid, wstatus}
|
||||
}
|
||||
|
||||
err = EINTR
|
||||
for errors.Is(err, EINTR) {
|
||||
wpid, err = Wait4(-1, &wstatus, 0, nil)
|
||||
}
|
||||
}
|
||||
if !errors.Is(err, ECHILD) {
|
||||
log.Println("unexpected wait4 response:", err)
|
||||
}
|
||||
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// handle signals to dump withheld messages
|
||||
sig := make(chan os.Signal, 2)
|
||||
signal.Notify(sig, SIGINT, SIGTERM)
|
||||
|
||||
// closed after residualProcessTimeout has elapsed after initial process death
|
||||
timeout := make(chan struct{})
|
||||
|
||||
r := 2
|
||||
for {
|
||||
select {
|
||||
case s := <-sig:
|
||||
if msg.Resume() {
|
||||
msg.Verbosef("terminating on %s after process start", s.String())
|
||||
} else {
|
||||
msg.Verbosef("terminating on %s", s.String())
|
||||
}
|
||||
os.Exit(0)
|
||||
case w := <-info:
|
||||
if w.wpid == cmd.Process.Pid {
|
||||
// initial process exited, output is most likely available again
|
||||
msg.Resume()
|
||||
|
||||
switch {
|
||||
case w.wstatus.Exited():
|
||||
r = w.wstatus.ExitStatus()
|
||||
msg.Verbosef("initial process exited with code %d", w.wstatus.ExitStatus())
|
||||
case w.wstatus.Signaled():
|
||||
r = 128 + int(w.wstatus.Signal())
|
||||
msg.Verbosef("initial process exited with signal %s", w.wstatus.Signal())
|
||||
default:
|
||||
r = 255
|
||||
msg.Verbosef("initial process exited with status %#x", w.wstatus)
|
||||
}
|
||||
|
||||
go func() {
|
||||
time.Sleep(residualProcessTimeout)
|
||||
close(timeout)
|
||||
}()
|
||||
}
|
||||
case <-done:
|
||||
msg.BeforeExit()
|
||||
os.Exit(r)
|
||||
case <-timeout:
|
||||
log.Println("timeout exceeded waiting for lingering processes")
|
||||
msg.BeforeExit()
|
||||
os.Exit(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TryArgv0 calls [Init] if the last element of argv0 is "init".
|
||||
func TryArgv0(v Msg, prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
if len(os.Args) > 0 && path.Base(os.Args[0]) == "init" {
|
||||
msg = v
|
||||
Init(prepare, setVerbose)
|
||||
msg.BeforeExit()
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
123
sandbox/mount.go
123
sandbox/mount.go
@@ -1,123 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
. "syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
)
|
||||
|
||||
func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) error {
|
||||
if eq {
|
||||
msg.Verbosef("resolved %q flags %#x", target, flags)
|
||||
} else {
|
||||
msg.Verbosef("resolved %q on %q flags %#x", source, target, flags)
|
||||
}
|
||||
|
||||
if err := Mount(source, target, "", MS_SILENT|MS_BIND|flags&MS_REC, ""); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot mount %q on %q:", source, target))
|
||||
}
|
||||
|
||||
var targetFinal string
|
||||
if v, err := filepath.EvalSymlinks(target); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
targetFinal = v
|
||||
if targetFinal != target {
|
||||
msg.Verbosef("target resolves to %q", targetFinal)
|
||||
}
|
||||
}
|
||||
|
||||
// final target path according to the kernel through proc
|
||||
var targetKFinal string
|
||||
{
|
||||
var destFd int
|
||||
if err := IgnoringEINTR(func() (err error) {
|
||||
destFd, err = Open(targetFinal, O_PATH|O_CLOEXEC, 0)
|
||||
return
|
||||
}); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot open %q:", targetFinal))
|
||||
}
|
||||
if v, err := os.Readlink(p.fd(destFd)); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if err = Close(destFd); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot close %q:", targetFinal))
|
||||
} else {
|
||||
targetKFinal = v
|
||||
}
|
||||
}
|
||||
|
||||
mf := MS_NOSUID | flags&MS_NODEV | flags&MS_RDONLY
|
||||
return hostProc.mountinfo(func(d *vfs.MountInfoDecoder) error {
|
||||
n, err := d.Unfold(targetKFinal)
|
||||
if err != nil {
|
||||
if errors.Is(err, ESTALE) {
|
||||
return msg.WrapErr(err,
|
||||
fmt.Sprintf("mount point %q never appeared in mountinfo", targetKFinal))
|
||||
}
|
||||
return wrapErrSuffix(err,
|
||||
"cannot unfold mount hierarchy:")
|
||||
}
|
||||
|
||||
if err = remountWithFlags(n, mf); err != nil {
|
||||
return err
|
||||
}
|
||||
if flags&MS_REC == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for cur := range n.Collective() {
|
||||
err = remountWithFlags(cur, mf)
|
||||
if err != nil && !errors.Is(err, EACCES) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func remountWithFlags(n *vfs.MountInfoNode, mf uintptr) error {
|
||||
kf, unmatched := n.Flags()
|
||||
if len(unmatched) != 0 {
|
||||
msg.Verbosef("unmatched vfs options: %q", unmatched)
|
||||
}
|
||||
|
||||
if kf&mf != mf {
|
||||
return wrapErrSuffix(
|
||||
Mount("none", n.Clean, "", MS_SILENT|MS_BIND|MS_REMOUNT|kf|mf, ""),
|
||||
fmt.Sprintf("cannot remount %q:", n.Clean))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountTmpfs(fsname, name string, size int, perm os.FileMode) error {
|
||||
target := toSysroot(name)
|
||||
if err := os.MkdirAll(target, parentPerm(perm)); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
opt := fmt.Sprintf("mode=%#o", perm)
|
||||
if size > 0 {
|
||||
opt += fmt.Sprintf(",size=%d", size)
|
||||
}
|
||||
return wrapErrSuffix(
|
||||
Mount(fsname, target, "tmpfs", MS_NOSUID|MS_NODEV, opt),
|
||||
fmt.Sprintf("cannot mount tmpfs on %q:", name))
|
||||
}
|
||||
|
||||
func parentPerm(perm os.FileMode) os.FileMode {
|
||||
pperm := 0755
|
||||
if perm&0070 == 0 {
|
||||
pperm &= ^0050
|
||||
}
|
||||
if perm&0007 == 0 {
|
||||
pperm &= ^0005
|
||||
}
|
||||
return os.FileMode(pperm)
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type Msg interface {
|
||||
IsVerbose() bool
|
||||
Verbose(v ...any)
|
||||
Verbosef(format string, v ...any)
|
||||
WrapErr(err error, a ...any) error
|
||||
PrintBaseErr(err error, fallback string)
|
||||
|
||||
Suspend()
|
||||
Resume() bool
|
||||
|
||||
BeforeExit()
|
||||
}
|
||||
|
||||
type DefaultMsg struct{ inactive atomic.Bool }
|
||||
|
||||
func (msg *DefaultMsg) IsVerbose() bool { return true }
|
||||
func (msg *DefaultMsg) Verbose(v ...any) {
|
||||
if !msg.inactive.Load() {
|
||||
log.Println(v...)
|
||||
}
|
||||
}
|
||||
func (msg *DefaultMsg) Verbosef(format string, v ...any) {
|
||||
if !msg.inactive.Load() {
|
||||
log.Printf(format, v...)
|
||||
}
|
||||
}
|
||||
|
||||
func (msg *DefaultMsg) WrapErr(err error, a ...any) error {
|
||||
log.Println(a...)
|
||||
return err
|
||||
}
|
||||
func (msg *DefaultMsg) PrintBaseErr(err error, fallback string) { log.Println(fallback, err) }
|
||||
|
||||
func (msg *DefaultMsg) Suspend() { msg.inactive.Store(true) }
|
||||
func (msg *DefaultMsg) Resume() bool { return msg.inactive.CompareAndSwap(true, false) }
|
||||
func (msg *DefaultMsg) BeforeExit() {}
|
||||
482
sandbox/ops.go
482
sandbox/ops.go
@@ -1,482 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
. "syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type (
|
||||
Ops []Op
|
||||
Op interface {
|
||||
// early is called in host root.
|
||||
early(params *Params) error
|
||||
// apply is called in intermediate root.
|
||||
apply(params *Params) error
|
||||
|
||||
prefix() string
|
||||
Is(op Op) bool
|
||||
fmt.Stringer
|
||||
}
|
||||
)
|
||||
|
||||
func (f *Ops) Grow(n int) { *f = slices.Grow(*f, n) }
|
||||
|
||||
func init() { gob.Register(new(BindMountOp)) }
|
||||
|
||||
// BindMountOp bind mounts host path Source on container path Target.
|
||||
type BindMountOp struct {
|
||||
Source, SourceFinal, Target string
|
||||
|
||||
Flags int
|
||||
}
|
||||
|
||||
const (
|
||||
BindOptional = 1 << iota
|
||||
BindWritable
|
||||
BindDevice
|
||||
)
|
||||
|
||||
func (b *BindMountOp) early(*Params) error {
|
||||
if !path.IsAbs(b.Source) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", b.Source))
|
||||
}
|
||||
|
||||
if v, err := filepath.EvalSymlinks(b.Source); err != nil {
|
||||
if os.IsNotExist(err) && b.Flags&BindOptional != 0 {
|
||||
b.SourceFinal = "\x00"
|
||||
return nil
|
||||
}
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
b.SourceFinal = v
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BindMountOp) apply(*Params) error {
|
||||
if b.SourceFinal == "\x00" {
|
||||
if b.Flags&BindOptional == 0 {
|
||||
// unreachable
|
||||
return EBADE
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if !path.IsAbs(b.SourceFinal) || !path.IsAbs(b.Target) {
|
||||
return msg.WrapErr(EBADE, "path is not absolute")
|
||||
}
|
||||
|
||||
source := toHost(b.SourceFinal)
|
||||
target := toSysroot(b.Target)
|
||||
|
||||
// this perm value emulates bwrap behaviour as it clears bits from 0755 based on
|
||||
// op->perms which is never set for any bind setup op so always results in 0700
|
||||
if fi, err := os.Stat(source); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if fi.IsDir() {
|
||||
if err = os.MkdirAll(target, 0700); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
} else if err = ensureFile(target, 0444, 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var flags uintptr = MS_REC
|
||||
if b.Flags&BindWritable == 0 {
|
||||
flags |= MS_RDONLY
|
||||
}
|
||||
if b.Flags&BindDevice == 0 {
|
||||
flags |= MS_NODEV
|
||||
}
|
||||
|
||||
return hostProc.bindMount(source, target, flags, b.SourceFinal == b.Target)
|
||||
}
|
||||
|
||||
func (b *BindMountOp) Is(op Op) bool { vb, ok := op.(*BindMountOp); return ok && *b == *vb }
|
||||
func (*BindMountOp) prefix() string { return "mounting" }
|
||||
func (b *BindMountOp) String() string {
|
||||
if b.Source == b.Target {
|
||||
return fmt.Sprintf("%q flags %#x", b.Source, b.Flags)
|
||||
}
|
||||
return fmt.Sprintf("%q on %q flags %#x", b.Source, b.Target, b.Flags&BindWritable)
|
||||
}
|
||||
func (f *Ops) Bind(source, target string, flags int) *Ops {
|
||||
*f = append(*f, &BindMountOp{source, "", target, flags})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountProcOp)) }
|
||||
|
||||
// MountProcOp mounts a private instance of proc.
|
||||
type MountProcOp string
|
||||
|
||||
func (p MountProcOp) early(*Params) error { return nil }
|
||||
func (p MountProcOp) apply(params *Params) error {
|
||||
v := string(p)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
|
||||
target := toSysroot(v)
|
||||
if err := os.MkdirAll(target, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return wrapErrSuffix(Mount("proc", target, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, ""),
|
||||
fmt.Sprintf("cannot mount proc on %q:", v))
|
||||
}
|
||||
|
||||
func (p MountProcOp) Is(op Op) bool { vp, ok := op.(MountProcOp); return ok && p == vp }
|
||||
func (MountProcOp) prefix() string { return "mounting" }
|
||||
func (p MountProcOp) String() string { return fmt.Sprintf("proc on %q", string(p)) }
|
||||
func (f *Ops) Proc(dest string) *Ops {
|
||||
*f = append(*f, MountProcOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountDevOp)) }
|
||||
|
||||
// MountDevOp mounts part of host dev.
|
||||
type MountDevOp string
|
||||
|
||||
func (d MountDevOp) early(*Params) error { return nil }
|
||||
func (d MountDevOp) apply(params *Params) error {
|
||||
v := string(d)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
target := toSysroot(v)
|
||||
|
||||
if err := mountTmpfs("devtmpfs", v, 0, params.ParentPerm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, name := range []string{"null", "zero", "full", "random", "urandom", "tty"} {
|
||||
targetPath := toSysroot(path.Join(v, name))
|
||||
if err := ensureFile(targetPath, 0444, params.ParentPerm); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := hostProc.bindMount(
|
||||
toHost("/dev/"+name),
|
||||
targetPath,
|
||||
0,
|
||||
true,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for i, name := range []string{"stdin", "stdout", "stderr"} {
|
||||
if err := os.Symlink(
|
||||
"/proc/self/fd/"+string(rune(i+'0')),
|
||||
path.Join(target, name),
|
||||
); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
for _, pair := range [][2]string{
|
||||
{"/proc/self/fd", "fd"},
|
||||
{"/proc/kcore", "core"},
|
||||
{"pts/ptmx", "ptmx"},
|
||||
} {
|
||||
if err := os.Symlink(pair[0], path.Join(target, pair[1])); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
|
||||
devPtsPath := path.Join(target, "pts")
|
||||
for _, name := range []string{path.Join(target, "shm"), devPtsPath} {
|
||||
if err := os.Mkdir(name, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := Mount("devpts", devPtsPath, "devpts", MS_NOSUID|MS_NOEXEC,
|
||||
"newinstance,ptmxmode=0666,mode=620"); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot mount devpts on %q:", devPtsPath))
|
||||
}
|
||||
|
||||
if params.RetainSession {
|
||||
var buf [8]byte
|
||||
if _, _, errno := Syscall(SYS_IOCTL, 1, TIOCGWINSZ, uintptr(unsafe.Pointer(&buf[0]))); errno == 0 {
|
||||
consolePath := toSysroot(path.Join(v, "console"))
|
||||
if err := ensureFile(consolePath, 0444, params.ParentPerm); err != nil {
|
||||
return err
|
||||
}
|
||||
if name, err := os.Readlink(hostProc.stdout()); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if err = hostProc.bindMount(
|
||||
toHost(name),
|
||||
consolePath,
|
||||
0,
|
||||
false,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d MountDevOp) Is(op Op) bool { vd, ok := op.(MountDevOp); return ok && d == vd }
|
||||
func (MountDevOp) prefix() string { return "mounting" }
|
||||
func (d MountDevOp) String() string { return fmt.Sprintf("dev on %q", string(d)) }
|
||||
func (f *Ops) Dev(dest string) *Ops {
|
||||
*f = append(*f, MountDevOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountMqueueOp)) }
|
||||
|
||||
// MountMqueueOp mounts a private mqueue instance on container Path.
|
||||
type MountMqueueOp string
|
||||
|
||||
func (m MountMqueueOp) early(*Params) error { return nil }
|
||||
func (m MountMqueueOp) apply(params *Params) error {
|
||||
v := string(m)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
|
||||
target := toSysroot(v)
|
||||
if err := os.MkdirAll(target, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return wrapErrSuffix(Mount("mqueue", target, "mqueue", MS_NOSUID|MS_NOEXEC|MS_NODEV, ""),
|
||||
fmt.Sprintf("cannot mount mqueue on %q:", v))
|
||||
}
|
||||
|
||||
func (m MountMqueueOp) Is(op Op) bool { vm, ok := op.(MountMqueueOp); return ok && m == vm }
|
||||
func (MountMqueueOp) prefix() string { return "mounting" }
|
||||
func (m MountMqueueOp) String() string { return fmt.Sprintf("mqueue on %q", string(m)) }
|
||||
func (f *Ops) Mqueue(dest string) *Ops {
|
||||
*f = append(*f, MountMqueueOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountTmpfsOp)) }
|
||||
|
||||
// MountTmpfsOp mounts tmpfs on container Path.
|
||||
type MountTmpfsOp struct {
|
||||
Path string
|
||||
Size int
|
||||
Perm os.FileMode
|
||||
}
|
||||
|
||||
func (t *MountTmpfsOp) early(*Params) error { return nil }
|
||||
func (t *MountTmpfsOp) apply(*Params) error {
|
||||
if !path.IsAbs(t.Path) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
}
|
||||
if t.Size < 0 || t.Size > math.MaxUint>>1 {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("size %d out of bounds", t.Size))
|
||||
}
|
||||
return mountTmpfs("tmpfs", t.Path, t.Size, t.Perm)
|
||||
}
|
||||
|
||||
func (t *MountTmpfsOp) Is(op Op) bool { vt, ok := op.(*MountTmpfsOp); return ok && *t == *vt }
|
||||
func (*MountTmpfsOp) prefix() string { return "mounting" }
|
||||
func (t *MountTmpfsOp) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) }
|
||||
func (f *Ops) Tmpfs(dest string, size int, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MountTmpfsOp{dest, size, perm})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(SymlinkOp)) }
|
||||
|
||||
// SymlinkOp creates a symlink in the container filesystem.
|
||||
type SymlinkOp [2]string
|
||||
|
||||
func (l *SymlinkOp) early(*Params) error {
|
||||
if strings.HasPrefix(l[0], "*") {
|
||||
l[0] = l[0][1:]
|
||||
if !path.IsAbs(l[0]) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", l[0]))
|
||||
}
|
||||
if name, err := os.Readlink(l[0]); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
l[0] = name
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (l *SymlinkOp) apply(params *Params) error {
|
||||
// symlink target is an arbitrary path value, so only validate link name here
|
||||
if !path.IsAbs(l[1]) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", l[1]))
|
||||
}
|
||||
|
||||
target := toSysroot(l[1])
|
||||
if err := os.MkdirAll(path.Dir(target), params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
if err := os.Symlink(l[0], target); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *SymlinkOp) Is(op Op) bool { vl, ok := op.(*SymlinkOp); return ok && *l == *vl }
|
||||
func (*SymlinkOp) prefix() string { return "creating" }
|
||||
func (l *SymlinkOp) String() string { return fmt.Sprintf("symlink on %q target %q", l[1], l[0]) }
|
||||
func (f *Ops) Link(target, linkName string) *Ops {
|
||||
*f = append(*f, &SymlinkOp{target, linkName})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MkdirOp)) }
|
||||
|
||||
// MkdirOp creates a directory in the container filesystem.
|
||||
type MkdirOp struct {
|
||||
Path string
|
||||
Perm os.FileMode
|
||||
}
|
||||
|
||||
func (m *MkdirOp) early(*Params) error { return nil }
|
||||
func (m *MkdirOp) apply(*Params) error {
|
||||
if !path.IsAbs(m.Path) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", m.Path))
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(toSysroot(m.Path), m.Perm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MkdirOp) Is(op Op) bool { vm, ok := op.(*MkdirOp); return ok && m == vm }
|
||||
func (*MkdirOp) prefix() string { return "creating" }
|
||||
func (m *MkdirOp) String() string { return fmt.Sprintf("directory %q perm %s", m.Path, m.Perm) }
|
||||
func (f *Ops) Mkdir(dest string, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MkdirOp{dest, perm})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(TmpfileOp)) }
|
||||
|
||||
// TmpfileOp places a file in container Path containing Data.
|
||||
type TmpfileOp struct {
|
||||
Path string
|
||||
Data []byte
|
||||
}
|
||||
|
||||
func (t *TmpfileOp) early(*Params) error { return nil }
|
||||
func (t *TmpfileOp) apply(params *Params) error {
|
||||
if !path.IsAbs(t.Path) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
}
|
||||
|
||||
var tmpPath string
|
||||
if f, err := os.CreateTemp("/", "tmp.*"); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if _, err = f.Write(t.Data); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot write to intermediate file:")
|
||||
} else if err = f.Close(); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot close intermediate file:")
|
||||
} else {
|
||||
tmpPath = f.Name()
|
||||
}
|
||||
|
||||
target := toSysroot(t.Path)
|
||||
if err := ensureFile(target, 0444, params.ParentPerm); err != nil {
|
||||
return err
|
||||
} else if err = hostProc.bindMount(
|
||||
tmpPath,
|
||||
target,
|
||||
MS_RDONLY|MS_NODEV,
|
||||
false,
|
||||
); err != nil {
|
||||
return err
|
||||
} else if err = os.Remove(tmpPath); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TmpfileOp) Is(op Op) bool {
|
||||
vt, ok := op.(*TmpfileOp)
|
||||
return ok && t.Path == vt.Path && slices.Equal(t.Data, vt.Data)
|
||||
}
|
||||
func (*TmpfileOp) prefix() string { return "placing" }
|
||||
func (t *TmpfileOp) String() string {
|
||||
return fmt.Sprintf("tmpfile %q (%d bytes)", t.Path, len(t.Data))
|
||||
}
|
||||
func (f *Ops) Place(name string, data []byte) *Ops { *f = append(*f, &TmpfileOp{name, data}); return f }
|
||||
func (f *Ops) PlaceP(name string, dataP **[]byte) *Ops {
|
||||
t := &TmpfileOp{Path: name}
|
||||
*dataP = &t.Data
|
||||
|
||||
*f = append(*f, t)
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(AutoEtcOp)) }
|
||||
|
||||
// AutoEtcOp expands host /etc into a toplevel symlink mirror with /etc semantics.
|
||||
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
||||
type AutoEtcOp struct{ Prefix string }
|
||||
|
||||
func (e *AutoEtcOp) early(*Params) error { return nil }
|
||||
func (e *AutoEtcOp) apply(*Params) error {
|
||||
const target = sysrootPath + "/etc/"
|
||||
rel := e.hostRel() + "/"
|
||||
|
||||
if err := os.MkdirAll(target, 0755); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
if d, err := os.ReadDir(toSysroot(e.hostPath())); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
for _, ent := range d {
|
||||
n := ent.Name()
|
||||
switch n {
|
||||
case ".host":
|
||||
|
||||
case "passwd":
|
||||
case "group":
|
||||
|
||||
case "mtab":
|
||||
if err = os.Symlink("/proc/mounts", target+n); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
|
||||
default:
|
||||
if err = os.Symlink(rel+n, target+n); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (e *AutoEtcOp) hostPath() string { return "/etc/" + e.hostRel() }
|
||||
func (e *AutoEtcOp) hostRel() string { return ".host/" + e.Prefix }
|
||||
|
||||
func (e *AutoEtcOp) Is(op Op) bool {
|
||||
ve, ok := op.(*AutoEtcOp)
|
||||
return ok && ((e == nil && ve == nil) || (e != nil && ve != nil && *e == *ve))
|
||||
}
|
||||
func (*AutoEtcOp) prefix() string { return "setting up" }
|
||||
func (e *AutoEtcOp) String() string { return fmt.Sprintf("auto etc %s", e.Prefix) }
|
||||
func (f *Ops) Etc(host, prefix string) *Ops {
|
||||
e := &AutoEtcOp{prefix}
|
||||
f.Mkdir("/etc", 0755)
|
||||
f.Bind(host, e.hostPath(), 0)
|
||||
*f = append(*f, e)
|
||||
return f
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
var msg Msg = new(DefaultMsg)
|
||||
|
||||
func GetOutput() Msg { return msg }
|
||||
func SetOutput(v Msg) {
|
||||
if v == nil {
|
||||
msg = new(DefaultMsg)
|
||||
} else {
|
||||
msg = v
|
||||
}
|
||||
}
|
||||
|
||||
func wrapErrSuffix(err error, a ...any) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return msg.WrapErr(err, append(a, err)...)
|
||||
}
|
||||
|
||||
func wrapErrSelf(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return msg.WrapErr(err, err.Error())
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotSet = errors.New("environment variable not set")
|
||||
ErrInvalid = errors.New("bad file descriptor")
|
||||
)
|
||||
|
||||
// Setup appends the read end of a pipe for setup params transmission and returns its fd.
|
||||
func Setup(extraFiles *[]*os.File) (int, *gob.Encoder, error) {
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
return -1, nil, err
|
||||
} else {
|
||||
fd := 3 + len(*extraFiles)
|
||||
*extraFiles = append(*extraFiles, r)
|
||||
return fd, gob.NewEncoder(w), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Receive retrieves setup fd from the environment and receives params.
|
||||
func Receive(key string, e any, v **os.File) (func() error, error) {
|
||||
var setup *os.File
|
||||
|
||||
if s, ok := os.LookupEnv(key); !ok {
|
||||
return nil, ErrNotSet
|
||||
} else {
|
||||
if fd, err := strconv.Atoi(s); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
setup = os.NewFile(uintptr(fd), "setup")
|
||||
if setup == nil {
|
||||
return nil, ErrInvalid
|
||||
}
|
||||
if v != nil {
|
||||
*v = setup
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return setup.Close, gob.NewDecoder(setup).Decode(e)
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
)
|
||||
|
||||
const (
|
||||
hostPath = "/" + hostDir
|
||||
hostDir = "host"
|
||||
sysrootPath = "/" + sysrootDir
|
||||
sysrootDir = "sysroot"
|
||||
)
|
||||
|
||||
func toSysroot(name string) string {
|
||||
name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' })
|
||||
return path.Join(sysrootPath, name)
|
||||
}
|
||||
|
||||
func toHost(name string) string {
|
||||
name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' })
|
||||
return path.Join(hostPath, name)
|
||||
}
|
||||
|
||||
func createFile(name string, perm, pperm os.FileMode, content []byte) error {
|
||||
if err := os.MkdirAll(path.Dir(name), pperm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
f, err := os.OpenFile(name, syscall.O_CREAT|syscall.O_EXCL|syscall.O_WRONLY, perm)
|
||||
if err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
if content != nil {
|
||||
_, err = f.Write(content)
|
||||
if err != nil {
|
||||
err = wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
return errors.Join(f.Close(), err)
|
||||
}
|
||||
|
||||
func ensureFile(name string, perm, pperm os.FileMode) error {
|
||||
fi, err := os.Stat(name)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return createFile(name, perm, pperm, nil)
|
||||
}
|
||||
|
||||
if mode := fi.Mode(); mode&fs.ModeDir != 0 || mode&fs.ModeSymlink != 0 {
|
||||
err = msg.WrapErr(syscall.EISDIR,
|
||||
fmt.Sprintf("path %q is a directory", name))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var hostProc = newProcPats(hostPath)
|
||||
|
||||
func newProcPats(prefix string) *procPaths {
|
||||
return &procPaths{prefix + "/proc", prefix + "/proc/self"}
|
||||
}
|
||||
|
||||
type procPaths struct {
|
||||
prefix string
|
||||
self string
|
||||
}
|
||||
|
||||
func (p *procPaths) stdout() string { return p.self + "/fd/1" }
|
||||
func (p *procPaths) fd(fd int) string { return p.self + "/fd/" + strconv.Itoa(fd) }
|
||||
func (p *procPaths) mountinfo(f func(d *vfs.MountInfoDecoder) error) error {
|
||||
if r, err := os.Open(p.self + "/mountinfo"); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
d := vfs.NewMountInfoDecoder(r)
|
||||
err0 := f(d)
|
||||
if err = r.Close(); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot close mountinfo:")
|
||||
} else if err = d.Err(); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
"cannot parse mountinfo:")
|
||||
}
|
||||
return err0
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
O_PATH = 0x200000
|
||||
|
||||
PR_SET_NO_NEW_PRIVS = 0x26
|
||||
|
||||
CAP_SYS_ADMIN = 0x15
|
||||
CAP_SETPCAP = 0x8
|
||||
)
|
||||
|
||||
const (
|
||||
SUID_DUMP_DISABLE = iota
|
||||
SUID_DUMP_USER
|
||||
)
|
||||
|
||||
func SetDumpable(dumpable uintptr) error {
|
||||
// linux/sched/coredump.h
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_DUMPABLE, dumpable, 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
_LINUX_CAPABILITY_VERSION_3 = 0x20080522
|
||||
|
||||
PR_CAP_AMBIENT = 0x2f
|
||||
PR_CAP_AMBIENT_RAISE = 0x2
|
||||
PR_CAP_AMBIENT_CLEAR_ALL = 0x4
|
||||
)
|
||||
|
||||
type (
|
||||
capHeader struct {
|
||||
version uint32
|
||||
pid int32
|
||||
}
|
||||
|
||||
capData struct {
|
||||
effective uint32
|
||||
permitted uint32
|
||||
inheritable uint32
|
||||
}
|
||||
)
|
||||
|
||||
// See CAP_TO_INDEX in linux/capability.h:
|
||||
func capToIndex(cap uintptr) uintptr { return cap >> 5 }
|
||||
|
||||
// See CAP_TO_MASK in linux/capability.h:
|
||||
func capToMask(cap uintptr) uint32 { return 1 << uint(cap&31) }
|
||||
|
||||
func capset(hdrp *capHeader, datap *[2]capData) error {
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_CAPSET,
|
||||
uintptr(unsafe.Pointer(hdrp)),
|
||||
uintptr(unsafe.Pointer(&datap[0])), 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IgnoringEINTR makes a function call and repeats it if it returns an
|
||||
// EINTR error. This appears to be required even though we install all
|
||||
// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
|
||||
// Also #20400 and #36644 are issues in which a signal handler is
|
||||
// installed without setting SA_RESTART. None of these are the common case,
|
||||
// but there are enough of them that it seems that we can't avoid
|
||||
// an EINTR loop.
|
||||
func IgnoringEINTR(fn func() error) error {
|
||||
for {
|
||||
err := fn()
|
||||
if err != syscall.EINTR {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
kernelOverflowuid int
|
||||
kernelOverflowgid int
|
||||
kernelCapLastCap int
|
||||
|
||||
sysctlOnce sync.Once
|
||||
)
|
||||
|
||||
const (
|
||||
kernelOverflowuidPath = "/proc/sys/kernel/overflowuid"
|
||||
kernelOverflowgidPath = "/proc/sys/kernel/overflowgid"
|
||||
kernelCapLastCapPath = "/proc/sys/kernel/cap_last_cap"
|
||||
)
|
||||
|
||||
func mustReadSysctl() {
|
||||
if v, err := os.ReadFile(kernelOverflowuidPath); err != nil {
|
||||
log.Fatalf("cannot read %q: %v", kernelOverflowuidPath, err)
|
||||
} else if kernelOverflowuid, err = strconv.Atoi(string(bytes.TrimSpace(v))); err != nil {
|
||||
log.Fatalf("cannot interpret %q: %v", kernelOverflowuidPath, err)
|
||||
}
|
||||
|
||||
if v, err := os.ReadFile(kernelOverflowgidPath); err != nil {
|
||||
log.Fatalf("cannot read %q: %v", kernelOverflowgidPath, err)
|
||||
} else if kernelOverflowgid, err = strconv.Atoi(string(bytes.TrimSpace(v))); err != nil {
|
||||
log.Fatalf("cannot interpret %q: %v", kernelOverflowgidPath, err)
|
||||
}
|
||||
|
||||
if v, err := os.ReadFile(kernelCapLastCapPath); err != nil {
|
||||
log.Fatalf("cannot read %q: %v", kernelCapLastCapPath, err)
|
||||
} else if kernelCapLastCap, err = strconv.Atoi(string(bytes.TrimSpace(v))); err != nil {
|
||||
log.Fatalf("cannot interpret %q: %v", kernelCapLastCapPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
func OverflowUid() int { sysctlOnce.Do(mustReadSysctl); return kernelOverflowuid }
|
||||
func OverflowGid() int { sysctlOnce.Do(mustReadSysctl); return kernelOverflowgid }
|
||||
func LastCap() uintptr { sysctlOnce.Do(mustReadSysctl); return uintptr(kernelCapLastCap) }
|
||||
Reference in New Issue
Block a user