From 9b1a60b5c9f7a92abb632639117d61c5cbaf74f7 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Thu, 13 Mar 2025 20:59:03 +0900 Subject: [PATCH] sandbox: native container tooling This should eventually replace bwrap. Signed-off-by: Ophestra --- flake.nix | 1 + internal/sandbox/const.go | 6 + internal/sandbox/container.go | 212 +++++++++++++++++++ internal/sandbox/container_test.go | 149 ++++++++++++++ internal/sandbox/init.go | 317 +++++++++++++++++++++++++++++ internal/sandbox/path.go | 77 +++++++ internal/sandbox/sequential.go | 169 +++++++++++++++ package.nix | 1 + 8 files changed, 932 insertions(+) create mode 100644 internal/sandbox/const.go create mode 100644 internal/sandbox/container.go create mode 100644 internal/sandbox/container_test.go create mode 100644 internal/sandbox/init.go create mode 100644 internal/sandbox/path.go create mode 100644 internal/sandbox/sequential.go diff --git a/flake.nix b/flake.nix index b1929d5..55aa2e1 100644 --- a/flake.nix +++ b/flake.nix @@ -140,6 +140,7 @@ gcc pkg-config wayland-scanner + bubblewrap ] ++ ( with pkgs.pkgsStatic; diff --git a/internal/sandbox/const.go b/internal/sandbox/const.go new file mode 100644 index 0000000..afced96 --- /dev/null +++ b/internal/sandbox/const.go @@ -0,0 +1,6 @@ +package sandbox + +const ( + PR_SET_NO_NEW_PRIVS = 0x26 + CAP_SYS_ADMIN = 0x15 +) diff --git a/internal/sandbox/container.go b/internal/sandbox/container.go new file mode 100644 index 0000000..91db06e --- /dev/null +++ b/internal/sandbox/container.go @@ -0,0 +1,212 @@ +package sandbox + +import ( + "context" + "encoding/gob" + "fmt" + "io" + "os" + "os/exec" + "path" + "strconv" + "syscall" + + "git.gensokyo.uk/security/fortify/helper/proc" + "git.gensokyo.uk/security/fortify/helper/seccomp" + "git.gensokyo.uk/security/fortify/internal" + "git.gensokyo.uk/security/fortify/internal/fmsg" +) + +type HardeningFlags uintptr + +const ( + FAllowUserns HardeningFlags = 1 << iota + FAllowTTY + FAllowNet +) + +func (flags HardeningFlags) seccomp(opts seccomp.SyscallOpts) seccomp.SyscallOpts { + if flags&FAllowUserns == 0 { + opts |= seccomp.FlagDenyNS + } + if flags&FAllowTTY == 0 { + opts |= seccomp.FlagDenyTTY + } + return opts +} + +type ( + // Container represents a container environment being prepared or run. + // None of [Container] methods are safe for concurrent use. + Container struct { + // Name of initial process in the container. + name string + // Cgroup fd, nil to disable. + Cgroup *int + // ExtraFiles passed through to initial process in the container, + // with behaviour identical to its [exec.Cmd] counterpart. + ExtraFiles []*os.File + + InitParams + // Custom [exec.Cmd] initialisation function. + CommandContext func(ctx context.Context) (cmd *exec.Cmd) + // mapped uid in user namespace + Uid int + // mapped gid in user namespace + Gid int + + // param encoder for shim and init + setup *gob.Encoder + // cancels cmd + cancel context.CancelFunc + + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer + + cmd *exec.Cmd + ctx context.Context + } + + InitParams struct { + // Working directory in the container. + Dir string + // Initial process environment. + Env []string + // Absolute path of initial process in the container. Overrides name. + Path string + // Initial process argv. + Args []string + + // Hostname value in UTS namespace. + Hostname string + // Sequential container setup ops. + *Ops + // Extra seccomp options. + Seccomp seccomp.SyscallOpts + + Flags HardeningFlags + } + + Ops []Op + Op interface { + apply() error + + Is(op Op) bool + fmt.Stringer + } +) + +func (p *Container) Start() error { + if p.cmd != nil { + panic("attempted to start twice") + } + + c, cancel := context.WithCancel(p.ctx) + p.cancel = cancel + + var cloneFlags uintptr = syscall.CLONE_NEWIPC | + syscall.CLONE_NEWUTS | + syscall.CLONE_NEWCGROUP + if p.Flags&FAllowNet == 0 { + cloneFlags |= syscall.CLONE_NEWNET + } + + // map to overflow id to work around ownership checks + if p.Uid < 1 { + p.Uid = OverflowUid() + } + if p.Gid < 1 { + p.Gid = OverflowGid() + } + + p.cmd = p.CommandContext(c) + p.cmd.Stdin, p.cmd.Stdout, p.cmd.Stderr = p.Stdin, p.Stdout, p.Stderr + p.cmd.Dir = "/" + p.cmd.SysProcAttr = &syscall.SysProcAttr{ + Setsid: p.Flags&FAllowTTY == 0, + Pdeathsig: syscall.SIGKILL, + + Cloneflags: cloneFlags | + syscall.CLONE_NEWUSER | + syscall.CLONE_NEWPID | + syscall.CLONE_NEWNS, + + UidMappings: []syscall.SysProcIDMap{{p.Uid, syscall.Getuid(), 1}}, + GidMappings: []syscall.SysProcIDMap{{p.Gid, syscall.Getgid(), 1}}, + // remain privileged for setup + AmbientCaps: []uintptr{CAP_SYS_ADMIN}, + + UseCgroupFD: p.Cgroup != nil, + } + if p.cmd.SysProcAttr.UseCgroupFD { + p.cmd.SysProcAttr.CgroupFD = *p.Cgroup + } + + // place setup pipe before user supplied extra files, this is later restored by init + if fd, e, err := proc.Setup(&p.cmd.ExtraFiles); err != nil { + return fmsg.WrapErrorSuffix(err, + "cannot create shim setup pipe:") + } else { + p.setup = e + p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)} + } + p.cmd.ExtraFiles = append(p.cmd.ExtraFiles, p.ExtraFiles...) + + fmsg.Verbose("starting container init") + if err := p.cmd.Start(); err != nil { + return fmsg.WrapError(err, err.Error()) + } + return nil +} + +func (p *Container) Serve() error { + if p.setup == nil { + panic("invalid serve") + } + + if p.Path != "" && !path.IsAbs(p.Path) { + return fmsg.WrapError(syscall.EINVAL, + fmt.Sprintf("invalid executable path %q", p.Path)) + } + + if p.Path == "" { + if p.name == "" { + p.Path = os.Getenv("SHELL") + if !path.IsAbs(p.Path) { + return fmsg.WrapError(syscall.EBADE, + "no command specified and $SHELL is invalid") + } + p.name = path.Base(p.Path) + } else if path.IsAbs(p.name) { + p.Path = p.name + } else if v, err := exec.LookPath(p.name); err != nil { + return fmsg.WrapError(err, err.Error()) + } else { + p.Path = v + } + } + + setup := p.setup + p.setup = nil + return setup.Encode( + &initParams{ + p.InitParams, + len(p.ExtraFiles), + fmsg.Load(), + }, + ) +} + +func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() } + +func New(ctx context.Context, name string, args ...string) *Container { + return &Container{name: name, ctx: ctx, + InitParams: InitParams{Args: append([]string{name}, args...), Dir: "/", Ops: new(Ops)}, + CommandContext: func(ctx context.Context) (cmd *exec.Cmd) { + cmd = exec.CommandContext(ctx, internal.MustExecutable()) + cmd.Args = []string{"init"} + return + }, + } +} diff --git a/internal/sandbox/container_test.go b/internal/sandbox/container_test.go new file mode 100644 index 0000000..648e88c --- /dev/null +++ b/internal/sandbox/container_test.go @@ -0,0 +1,149 @@ +package sandbox_test + +import ( + "bytes" + "context" + "encoding/json" + "log" + "os" + "os/exec" + "path" + "slices" + "testing" + "time" + + "git.gensokyo.uk/security/fortify/fst" + "git.gensokyo.uk/security/fortify/internal" + "git.gensokyo.uk/security/fortify/internal/fmsg" + "git.gensokyo.uk/security/fortify/internal/sandbox" + "git.gensokyo.uk/security/fortify/ldd" + check "git.gensokyo.uk/security/fortify/test/sandbox" +) + +func TestContainer(t *testing.T) { + { + oldVerbose := fmsg.Load() + fmsg.Store(true) + t.Cleanup(func() { fmsg.Store(oldVerbose) }) + } + + testCases := []struct { + name string + ops *sandbox.Ops + mnt []*check.Mntent + host string + }{ + {"minimal", new(sandbox.Ops), nil, "test-minimal"}, + {"tmpfs", + new(sandbox.Ops). + Tmpfs(fst.Tmp, 0, 0755), + []*check.Mntent{ + {FSName: "tmpfs", Dir: fst.Tmp, Type: "tmpfs", Opts: "\x00"}, + }, "test-tmpfs"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + container := sandbox.New(ctx, os.Args[0], "-test.v", + "-test.run=TestHelperCheckContainer", "--", "check", tc.host) + container.Hostname = tc.host + container.CommandContext = func(ctx context.Context) *exec.Cmd { + return exec.CommandContext(ctx, os.Args[0], "-test.v", + "-test.run=TestHelperInit", "--", "init") + } + container.Stdout, container.Stderr = os.Stdout, os.Stderr + container.Ops = tc.ops + if container.Args[5] == "" { + if name, err := os.Hostname(); err != nil { + t.Fatalf("cannot get hostname: %v", err) + } else { + container.Args[5] = name + } + } + + container. + Tmpfs("/tmp", 0, 0755). + Bind(os.Args[0], os.Args[0], 0) + // in case test has cgo enabled + var libPaths []string + if entries, err := ldd.Exec(ctx, os.Args[0]); err != nil { + log.Fatalf("ldd: %v", err) + } else { + libPathsM := make(map[string]struct{}, len(entries)) + for _, ent := range entries { + if path.IsAbs(ent.Path) { + libPathsM[path.Dir(ent.Path)] = struct{}{} + } + if path.IsAbs(ent.Name) { + libPathsM[path.Dir(ent.Name)] = struct{}{} + } + } + libPaths = make([]string, 0, len(libPathsM)) + for name := range libPathsM { + libPaths = append(libPaths, name) + } + slices.Sort(libPaths) + for _, name := range libPaths { + container.Bind(name, name, 0) + } + } + + mnt := make([]*check.Mntent, 0, 3+len(libPaths)) + mnt = append(mnt, &check.Mntent{FSName: "rootfs", Dir: "/", Type: "tmpfs", Opts: "host_passthrough"}) + mnt = append(mnt, tc.mnt...) + mnt = append(mnt, + &check.Mntent{FSName: "tmpfs", Dir: "/tmp", Type: "tmpfs", Opts: "host_passthrough"}, + &check.Mntent{FSName: "\x00", Dir: os.Args[0], Type: "\x00", Opts: "\x00"}) + for _, name := range libPaths { + mnt = append(mnt, &check.Mntent{FSName: "\x00", Dir: name, Type: "\x00", Opts: "\x00", Freq: -1, Passno: -1}) + } + mnt = append(mnt, &check.Mntent{FSName: "proc", Dir: "/proc", Type: "proc", Opts: "rw,nosuid,nodev,noexec,relatime"}) + mntentWant := new(bytes.Buffer) + if err := json.NewEncoder(mntentWant).Encode(mnt); err != nil { + t.Fatalf("cannot serialise mntent: %v", err) + } + container.Stdin = mntentWant + + // needs /proc to check mntent + container.Proc("/proc") + + if err := container.Start(); err != nil { + fmsg.PrintBaseError(err, "start:") + t.Fatalf("cannot start container: %v", err) + } else if err = container.Serve(); err != nil { + fmsg.PrintBaseError(err, "serve:") + t.Errorf("cannot serve setup params: %v", err) + } + if err := container.Wait(); err != nil { + fmsg.PrintBaseError(err, "wait:") + t.Fatalf("wait: %v", err) + } + }) + } +} + +func TestHelperInit(t *testing.T) { + if len(os.Args) != 5 || os.Args[4] != "init" { + return + } + sandbox.Init(internal.Exit) +} + +func TestHelperCheckContainer(t *testing.T) { + if len(os.Args) != 6 || os.Args[4] != "check" { + return + } + + t.Run("hostname", func(t *testing.T) { + if name, err := os.Hostname(); err != nil { + t.Fatalf("cannot get hostname: %v", err) + } else if name != os.Args[5] { + t.Errorf("Hostname: %q, want %q", name, os.Args[5]) + } + }) + t.Run("seccomp", func(t *testing.T) { check.MustAssertSeccomp() }) + t.Run("mntent", func(t *testing.T) { check.MustAssertMounts("", "/proc/mounts", "/proc/self/fd/0") }) +} diff --git a/internal/sandbox/init.go b/internal/sandbox/init.go new file mode 100644 index 0000000..1b9bbf8 --- /dev/null +++ b/internal/sandbox/init.go @@ -0,0 +1,317 @@ +package sandbox + +import ( + "errors" + "fmt" + "log" + "os" + "os/exec" + "os/signal" + "path" + "runtime" + "strconv" + "syscall" + "time" + + "git.gensokyo.uk/security/fortify/helper/proc" + "git.gensokyo.uk/security/fortify/helper/seccomp" + "git.gensokyo.uk/security/fortify/internal" + "git.gensokyo.uk/security/fortify/internal/fmsg" +) + +const ( + // time to wait for linger processes after death of initial process + residualProcessTimeout = 5 * time.Second + + // intermediate tmpfs mount point + basePath = "/tmp" + + // setup params file descriptor + setupEnv = "FORTIFY_SETUP" +) + +type initParams struct { + InitParams + + // extra files count + Count int + // verbosity pass through + Verbose bool +} + +func Init(exit func(code int)) { + runtime.LockOSThread() + fmsg.Prepare("init") + + if err := internal.SetDumpable(internal.SUID_DUMP_DISABLE); err != nil { + log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err) + } + + if os.Getpid() != 1 { + log.Fatal("this process must run as pid 1") + } + + /* + receive setup payload + */ + + var ( + params initParams + closeSetup func() error + setupFile *os.File + offsetSetup int + ) + if f, err := proc.Receive(setupEnv, ¶ms, &setupFile); err != nil { + if errors.Is(err, proc.ErrInvalid) { + log.Fatal("invalid setup descriptor") + } + if errors.Is(err, proc.ErrNotSet) { + log.Fatal("FORTIFY_SETUP not set") + } + + log.Fatalf("cannot decode init setup payload: %v", err) + } else { + fmsg.Store(params.Verbose) + fmsg.Verbose("received setup parameters") + if params.Verbose { + seccomp.CPrintln = fmsg.Verbose + } + closeSetup = f + offsetSetup = int(setupFile.Fd() + 1) + } + + if params.Hostname != "" { + if err := syscall.Sethostname([]byte(params.Hostname)); err != nil { + log.Fatalf("cannot set hostname: %v", err) + } + } + + /* + set up mount points from intermediate root + */ + + if err := syscall.Mount("", "/", "", + syscall.MS_SILENT|syscall.MS_SLAVE|syscall.MS_REC, + ""); err != nil { + log.Fatalf("cannot make / rslave: %v", err) + } + + if err := syscall.Mount("rootfs", basePath, "tmpfs", + syscall.MS_NODEV|syscall.MS_NOSUID, + ""); err != nil { + log.Fatalf("cannot mount intermediate root: %v", err) + } + if err := os.Chdir(basePath); err != nil { + log.Fatalf("cannot enter base path: %v", err) + } + + if err := os.Mkdir(sysrootDir, 0755); err != nil { + log.Fatalf("%v", err) + } + if err := syscall.Mount(sysrootDir, sysrootDir, "", + syscall.MS_SILENT|syscall.MS_MGC_VAL|syscall.MS_BIND|syscall.MS_REC, + ""); err != nil { + log.Fatalf("cannot bind sysroot: %v", err) + } + + if err := os.Mkdir(hostDir, 0755); err != nil { + log.Fatalf("%v", err) + } + if err := syscall.PivotRoot(basePath, hostDir); err != nil { + log.Fatalf("cannot pivot into intermediate root: %v", err) + } + if err := os.Chdir("/"); err != nil { + log.Fatalf("%v", err) + } + + for i, op := range *params.Ops { + fmsg.Verbosef("mounting %s", op) + if err := op.apply(); err != nil { + fmsg.PrintBaseError(err, + fmt.Sprintf("cannot apply op %d:", i)) + exit(1) + } + } + + /* + pivot to sysroot + */ + + if err := syscall.Mount(hostDir, hostDir, "", + syscall.MS_SILENT|syscall.MS_REC|syscall.MS_PRIVATE, + ""); err != nil { + log.Fatalf("cannot make host root rprivate: %v", err) + } + if err := syscall.Unmount(hostDir, syscall.MNT_DETACH); err != nil { + log.Fatalf("cannot unmount host root: %v", err) + } + + { + var fd int + if err := internal.IgnoringEINTR(func() (err error) { + fd, err = syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0) + return + }); err != nil { + log.Fatalf("cannot open intermediate root: %v", err) + } + if err := os.Chdir(sysrootPath); err != nil { + log.Fatalf("%v", err) + } + + if err := syscall.PivotRoot(".", "."); err != nil { + log.Fatalf("cannot pivot into sysroot: %v", err) + } + if err := syscall.Fchdir(fd); err != nil { + log.Fatalf("cannot re-enter intermediate root: %v", err) + } + if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + log.Fatalf("cannot unmount intemediate root: %v", err) + } + if err := os.Chdir("/"); err != nil { + log.Fatalf("%v", err) + } + + if err := syscall.Close(fd); err != nil { + log.Fatalf("cannot close intermediate root: %v", err) + } + } + + /* + load seccomp filter + */ + + if _, _, err := syscall.Syscall(PR_SET_NO_NEW_PRIVS, 1, 0, 0); err != 0 { + log.Fatalf("prctl(PR_SET_NO_NEW_PRIVS): %v", err) + } + if err := seccomp.Load(params.Flags.seccomp(params.Seccomp)); err != nil { + log.Fatalf("cannot load syscall filter: %v", err) + } + + /* at this point CAP_SYS_ADMIN can be dropped, however it is kept for now as it does not increase attack surface */ + + /* + pass through extra files + */ + + extraFiles := make([]*os.File, params.Count) + for i := range extraFiles { + extraFiles[i] = os.NewFile(uintptr(offsetSetup+i), "extra file "+strconv.Itoa(i)) + } + + /* + prepare initial process + */ + + cmd := exec.Command(params.Path) + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Args = params.Args + cmd.Env = params.Env + cmd.ExtraFiles = extraFiles + cmd.Dir = params.Dir + + if err := cmd.Start(); err != nil { + log.Fatalf("%v", err) + } + fmsg.Suspend() + + /* + close setup pipe + */ + + if err := closeSetup(); err != nil { + log.Println("cannot close setup pipe:", err) + // not fatal + } + + /* + perform init duties + */ + + sig := make(chan os.Signal, 2) + signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM) + + type winfo struct { + wpid int + wstatus syscall.WaitStatus + } + info := make(chan winfo, 1) + done := make(chan struct{}) + + go func() { + var ( + err error + wpid = -2 + wstatus syscall.WaitStatus + ) + + // keep going until no child process is left + for wpid != -1 { + if err != nil { + break + } + + if wpid != -2 { + info <- winfo{wpid, wstatus} + } + + err = syscall.EINTR + for errors.Is(err, syscall.EINTR) { + wpid, err = syscall.Wait4(-1, &wstatus, 0, nil) + } + } + if !errors.Is(err, syscall.ECHILD) { + log.Println("unexpected wait4 response:", err) + } + + close(done) + }() + + // closed after residualProcessTimeout has elapsed after initial process death + timeout := make(chan struct{}) + + r := 2 + for { + select { + case s := <-sig: + if fmsg.Resume() { + fmsg.Verbosef("terminating on %s after process start", s.String()) + } else { + fmsg.Verbosef("terminating on %s", s.String()) + } + exit(0) + case w := <-info: + if w.wpid == cmd.Process.Pid { + // initial process exited, output is most likely available again + fmsg.Resume() + + switch { + case w.wstatus.Exited(): + r = w.wstatus.ExitStatus() + case w.wstatus.Signaled(): + r = 128 + int(w.wstatus.Signal()) + default: + r = 255 + } + + go func() { + time.Sleep(residualProcessTimeout) + close(timeout) + }() + } + case <-done: + exit(r) + case <-timeout: + log.Println("timeout exceeded waiting for lingering processes") + exit(r) + } + } +} + +// TryArgv0 calls [Init] if the last element of argv0 is "init". +func TryArgv0() { + if len(os.Args) > 0 && path.Base(os.Args[0]) == "init" { + Init(internal.Exit) + internal.Exit(0) + } +} diff --git a/internal/sandbox/path.go b/internal/sandbox/path.go new file mode 100644 index 0000000..0d49afa --- /dev/null +++ b/internal/sandbox/path.go @@ -0,0 +1,77 @@ +package sandbox + +import ( + "errors" + "io/fs" + "os" + "path" + "strings" + "syscall" + + "git.gensokyo.uk/security/fortify/internal/fmsg" +) + +const ( + hostPath = "/" + hostDir + hostDir = "host" + sysrootPath = "/" + sysrootDir + sysrootDir = "sysroot" +) + +func toSysroot(name string) string { + name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' }) + return path.Join(sysrootPath, name) +} + +func toHost(name string) string { + name = strings.TrimLeftFunc(name, func(r rune) bool { return r == '/' }) + return path.Join(hostPath, name) +} + +func realpathHost(name string) (string, error) { + source := toHost(name) + rp, err := os.Readlink(source) + + if err != nil { + if errors.Is(err, syscall.EINVAL) { + // not a symlink + return name, nil + } + return "", err + } + + if !path.IsAbs(rp) { + return name, nil + } + fmsg.Verbosef("path %q resolves to %q", name, rp) + return rp, nil +} + +func createFile(name string, perm os.FileMode, content []byte) error { + if err := os.MkdirAll(path.Dir(name), 0755); err != nil { + return err + } + f, err := os.OpenFile(name, syscall.O_CREAT|syscall.O_EXCL|syscall.O_WRONLY, perm) + if err != nil { + return err + } + if content != nil { + _, err = f.Write(content) + } + return errors.Join(f.Close(), err) +} + +func ensureFile(name string, perm os.FileMode) error { + fi, err := os.Stat(name) + if err != nil { + if !os.IsNotExist(err) { + return err + } + return createFile(name, perm, nil) + } + + if mode := fi.Mode(); mode&fs.ModeDir != 0 || mode&fs.ModeSymlink != 0 { + err = syscall.EISDIR + } + return err +} diff --git a/internal/sandbox/sequential.go b/internal/sandbox/sequential.go new file mode 100644 index 0000000..603d59e --- /dev/null +++ b/internal/sandbox/sequential.go @@ -0,0 +1,169 @@ +package sandbox + +import ( + "encoding/gob" + "errors" + "fmt" + "math" + "os" + "path" + "strings" + "syscall" + + "git.gensokyo.uk/security/fortify/internal/fmsg" +) + +func init() { gob.Register(new(BindMount)) } + +const ( + BindOptional = 1 << iota + BindRecursive + BindWritable + BindDevices +) + +// BindMount bind mounts host path Source on container path Target. +type BindMount struct { + Source, Target string + + Flags int +} + +func (b *BindMount) apply() error { + if !path.IsAbs(b.Source) || !path.IsAbs(b.Target) { + return syscall.EBADE + } + target := toSysroot(b.Target) + var source string + + // this is what bwrap does, so the behaviour is kept for now, + // however recursively resolving links might improve user experience + if rp, err := realpathHost(b.Source); err != nil { + if os.IsNotExist(err) { + if b.Flags&BindOptional != 0 { + return nil + } else { + return fmsg.WrapError(err, + fmt.Sprintf("path %q does not exist", b.Source)) + } + } + return fmsg.WrapError(err, err.Error()) + } else { + source = toHost(rp) + } + + if fi, err := os.Stat(source); err != nil { + return fmsg.WrapError(err, err.Error()) + } else if fi.IsDir() { + if err = os.MkdirAll(target, 0755); err != nil { + return fmsg.WrapErrorSuffix(err, + fmt.Sprintf("cannot create directory %q:", b.Target)) + } + } else if err = ensureFile(target, 0444); err != nil { + if errors.Is(err, syscall.EISDIR) { + return fmsg.WrapError(err, + fmt.Sprintf("path %q is a directory", b.Target)) + } + return fmsg.WrapErrorSuffix(err, + fmt.Sprintf("cannot create %q:", b.Target)) + } + + var flags uintptr = syscall.MS_SILENT | syscall.MS_BIND + if b.Flags&BindRecursive != 0 { + flags |= syscall.MS_REC + } + if b.Flags&BindWritable == 0 { + flags |= syscall.MS_RDONLY + } + if b.Flags&BindDevices == 0 { + flags |= syscall.MS_NODEV + } + if fmsg.Load() { + if strings.TrimPrefix(source, hostPath) == strings.TrimPrefix(target, sysrootPath) { + fmsg.Verbosef("resolved %q flags %#x", target, flags) + } else { + fmsg.Verbosef("resolved %q on %q flags %#x", source, target, flags) + } + } + return fmsg.WrapErrorSuffix(syscall.Mount(source, target, "", flags, ""), + fmt.Sprintf("cannot bind %q on %q:", b.Source, b.Target)) +} + +func (b *BindMount) Is(op Op) bool { vb, ok := op.(*BindMount); return ok && *b == *vb } +func (b *BindMount) String() string { + if b.Source == b.Target { + return fmt.Sprintf("%q flags %#x", b.Source, b.Flags) + } + return fmt.Sprintf("%q on %q flags %#x", b.Source, b.Target, b.Flags&BindWritable) +} +func (f *Ops) Bind(source, target string, flags int) *Ops { + *f = append(*f, &BindMount{source, target, flags | BindRecursive}) + return f +} + +func init() { gob.Register(new(MountProc)) } + +// MountProc mounts a private proc instance on container Path. +type MountProc struct { + Path string +} + +func (p *MountProc) apply() error { + if !path.IsAbs(p.Path) { + return fmsg.WrapError(syscall.EBADE, + fmt.Sprintf("path %q is not absolute", p.Path)) + } + + target := toSysroot(p.Path) + if err := os.MkdirAll(target, 0755); err != nil { + return fmsg.WrapError(err, err.Error()) + } + return fmsg.WrapErrorSuffix(syscall.Mount("proc", target, "proc", + syscall.MS_NOSUID|syscall.MS_NOEXEC|syscall.MS_NODEV, ""), + fmt.Sprintf("cannot mount proc on %q:", p.Path)) +} + +func (p *MountProc) Is(op Op) bool { vp, ok := op.(*MountProc); return ok && *p == *vp } +func (p *MountProc) String() string { return fmt.Sprintf("proc on %q", p.Path) } +func (f *Ops) Proc(dest string) *Ops { + *f = append(*f, &MountProc{dest}) + return f +} + +func init() { gob.Register(new(MountTmpfs)) } + +// MountTmpfs mounts tmpfs on container Path. +type MountTmpfs struct { + Path string + Size int + Mode os.FileMode +} + +func (t *MountTmpfs) apply() error { + if !path.IsAbs(t.Path) { + return fmsg.WrapError(syscall.EBADE, + fmt.Sprintf("path %q is not absolute", t.Path)) + } + if t.Size < 0 || t.Size > math.MaxUint>>1 { + return fmsg.WrapError(syscall.EBADE, + fmt.Sprintf("size %d out of bounds", t.Size)) + } + target := toSysroot(t.Path) + if err := os.MkdirAll(target, 0755); err != nil { + return err + } + opt := fmt.Sprintf("mode=%#o", t.Mode) + if t.Size > 0 { + opt += fmt.Sprintf(",size=%d", t.Mode) + } + return fmsg.WrapErrorSuffix(syscall.Mount("tmpfs", target, "tmpfs", + syscall.MS_NOSUID|syscall.MS_NODEV, opt), + fmt.Sprintf("cannot mount tmpfs on %q:", t.Path)) +} + +func (t *MountTmpfs) Is(op Op) bool { vt, ok := op.(*MountTmpfs); return ok && *t == *vt } +func (t *MountTmpfs) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) } +func (f *Ops) Tmpfs(dest string, size int, mode os.FileMode) *Ops { + *f = append(*f, &MountTmpfs{dest, size, mode}) + return f +} diff --git a/package.nix b/package.nix index 3088ec3..bdf95ca 100644 --- a/package.nix +++ b/package.nix @@ -73,6 +73,7 @@ buildGoModule rec { pkg-config wayland-scanner makeBinaryWrapper + bubblewrap ]; preBuild = ''