From 14824b8a9b1e418c63482a7fafe5a27a2c347d00 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Sun, 3 Aug 2025 18:34:55 +0900 Subject: [PATCH] container/op: merge mqueue and dev Ops There is no reason to mount mqueue anywhere else, and these Ops usually follow each other. This change merges them. This helps decrease IPC overhead and also enables mounting dev readonly. Signed-off-by: Ophestra --- container/container_test.go | 18 ++++++- container/ops.go | 70 +++++++++++----------------- helper/container_test.go | 2 +- internal/app/app_nixos_linux_test.go | 2 +- internal/app/app_pd_linux_test.go | 4 +- internal/app/container_linux.go | 2 +- ldd/exec.go | 2 +- 7 files changed, 50 insertions(+), 50 deletions(-) diff --git a/container/container_test.go b/container/container_test.go index 91fa49bb..861a4dbd 100644 --- a/container/container_test.go +++ b/container/container_test.go @@ -72,8 +72,7 @@ var containerTestCases = []struct { {"dev", true, true /* go test output is not a tty */, false, false, new(container.Ops). - Dev("/dev"). - Mqueue("/dev/mqueue"), + Dev("/dev", true), []*vfs.MountInfoEntry{ ent("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore), ent("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), @@ -86,6 +85,21 @@ var containerTestCases = []struct { ent("/", "/dev/mqueue", "rw,nosuid,nodev,noexec,relatime", "mqueue", "mqueue", "rw"), }, 1971, 100, nil, 0, seccomp.PresetStrict}, + + {"dev no mqueue", true, true /* go test output is not a tty */, false, false, + new(container.Ops). + Dev("/dev", false), + []*vfs.MountInfoEntry{ + ent("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore), + ent("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/zero", "/dev/zero", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/full", "/dev/full", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/random", "/dev/random", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/urandom", "/dev/urandom", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/tty", "/dev/tty", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"), + }, + 1971, 100, nil, 0, seccomp.PresetStrict}, } func TestContainer(t *testing.T) { diff --git a/container/ops.go b/container/ops.go index b8d287fa..d64235fc 100644 --- a/container/ops.go +++ b/container/ops.go @@ -180,27 +180,29 @@ func (p MountProcOp) String() string { return fmt.Sprintf("proc on %q", string(p func init() { gob.Register(new(MountDevOp)) } // Dev appends an [Op] that mounts a subset of host /dev. -func (f *Ops) Dev(dest string) *Ops { - *f = append(*f, MountDevOp(dest)) +func (f *Ops) Dev(dest string, mqueue bool) *Ops { + *f = append(*f, &MountDevOp{dest, mqueue}) return f } -type MountDevOp string +type MountDevOp struct { + Target string + Mqueue bool +} -func (d MountDevOp) early(*Params) error { return nil } -func (d MountDevOp) apply(params *Params) error { - v := string(d) - if !path.IsAbs(v) { - return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v)) +func (d *MountDevOp) early(*Params) error { return nil } +func (d *MountDevOp) apply(params *Params) error { + if !path.IsAbs(d.Target) { + return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", d.Target)) } - target := toSysroot(v) + target := toSysroot(d.Target) if err := mountTmpfs(SourceTmpfsDevtmpfs, target, MS_NOSUID|MS_NODEV, 0, params.ParentPerm); err != nil { return err } for _, name := range []string{"null", "zero", "full", "random", "urandom", "tty"} { - targetPath := toSysroot(path.Join(v, name)) + targetPath := path.Join(target, name) if err := ensureFile(targetPath, 0444, params.ParentPerm); err != nil { return err } @@ -247,7 +249,7 @@ func (d MountDevOp) apply(params *Params) error { if params.RetainSession { var buf [8]byte if _, _, errno := Syscall(SYS_IOCTL, 1, TIOCGWINSZ, uintptr(unsafe.Pointer(&buf[0]))); errno == 0 { - consolePath := toSysroot(path.Join(v, "console")) + consolePath := path.Join(target, "console") if err := ensureFile(consolePath, 0444, params.ParentPerm); err != nil { return err } @@ -264,43 +266,27 @@ func (d MountDevOp) apply(params *Params) error { } } + if d.Mqueue { + mqueueTarget := path.Join(target, "mqueue") + if err := os.Mkdir(mqueueTarget, params.ParentPerm); err != nil { + return wrapErrSelf(err) + } + return wrapErrSuffix(Mount(SourceMqueue, mqueueTarget, FstypeMqueue, MS_NOSUID|MS_NOEXEC|MS_NODEV, zeroString), + "cannot mount mqueue:") + } + return nil } -func (d MountDevOp) Is(op Op) bool { vd, ok := op.(MountDevOp); return ok && d == vd } -func (MountDevOp) prefix() string { return "mounting" } -func (d MountDevOp) String() string { return fmt.Sprintf("dev on %q", string(d)) } - -func init() { gob.Register(new(MountMqueueOp)) } - -// Mqueue appends an [Op] that mounts a private instance of mqueue. -func (f *Ops) Mqueue(dest string) *Ops { - *f = append(*f, MountMqueueOp(dest)) - return f -} - -type MountMqueueOp string - -func (m MountMqueueOp) early(*Params) error { return nil } -func (m MountMqueueOp) apply(params *Params) error { - v := string(m) - - if !path.IsAbs(v) { - return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v)) +func (d *MountDevOp) Is(op Op) bool { vd, ok := op.(*MountDevOp); return ok && *d == *vd } +func (*MountDevOp) prefix() string { return "mounting" } +func (d *MountDevOp) String() string { + if d.Mqueue { + return fmt.Sprintf("dev on %q with mqueue", d.Target) } - - target := toSysroot(v) - if err := os.MkdirAll(target, params.ParentPerm); err != nil { - return wrapErrSelf(err) - } - return wrapErrSuffix(Mount(SourceMqueue, target, FstypeMqueue, MS_NOSUID|MS_NOEXEC|MS_NODEV, zeroString), - fmt.Sprintf("cannot mount mqueue on %q:", v)) + return fmt.Sprintf("dev on %q", d.Target) } -func (m MountMqueueOp) Is(op Op) bool { vm, ok := op.(MountMqueueOp); return ok && m == vm } -func (MountMqueueOp) prefix() string { return "mounting" } -func (m MountMqueueOp) String() string { return fmt.Sprintf("mqueue on %q", string(m)) } - func init() { gob.Register(new(MountTmpfsOp)) } // Tmpfs appends an [Op] that mounts tmpfs on container path [MountTmpfsOp.Path]. diff --git a/helper/container_test.go b/helper/container_test.go index c5ad0659..8be4bcbb 100644 --- a/helper/container_test.go +++ b/helper/container_test.go @@ -33,7 +33,7 @@ func TestContainer(t *testing.T) { testHelper(t, func(ctx context.Context, setOutput func(stdoutP, stderrP *io.Writer), stat bool) helper.Helper { return helper.New(ctx, os.Args[0], argsWt, stat, argF, func(z *container.Container) { setOutput(&z.Stdout, &z.Stderr) - z.Bind("/", "/", 0).Proc("/proc").Dev("/dev") + z.Bind("/", "/", 0).Proc("/proc").Dev("/dev", true) }, nil) }) }) diff --git a/internal/app/app_nixos_linux_test.go b/internal/app/app_nixos_linux_test.go index c11fbd81..2db3bf8a 100644 --- a/internal/app/app_nixos_linux_test.go +++ b/internal/app/app_nixos_linux_test.go @@ -118,7 +118,7 @@ var testCasesNixos = []sealTestCase{ Ops: new(container.Ops). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/bin", "/bin", 0). Bind("/usr/bin", "/usr/bin", 0). Bind("/nix/store", "/nix/store", 0). diff --git a/internal/app/app_pd_linux_test.go b/internal/app/app_pd_linux_test.go index 66cebe4f..b3d03f03 100644 --- a/internal/app/app_pd_linux_test.go +++ b/internal/app/app_pd_linux_test.go @@ -46,7 +46,7 @@ var testCasesPd = []sealTestCase{ Root("/", "4a450b6596d7bc15bd01780eb9a607ac", container.BindWritable). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional). Readonly("/var/run/nscd", 0755). Tmpfs("/run/user/1971", 8192, 0755). @@ -180,7 +180,7 @@ var testCasesPd = []sealTestCase{ Root("/", "ebf083d1b175911782d413369b64ce7c", container.BindWritable). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/dev/dri", "/dev/dri", container.BindWritable|container.BindDevice|container.BindOptional). Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional). Readonly("/var/run/nscd", 0755). diff --git a/internal/app/container_linux.go b/internal/app/container_linux.go index ce3a5154..35483018 100644 --- a/internal/app/container_linux.go +++ b/internal/app/container_linux.go @@ -85,7 +85,7 @@ func newContainer(s *hst.ContainerConfig, os sys.State, prefix string, uid, gid Tmpfs(hst.Tmp, 1<<12, 0755) if !s.Device { - params.Dev("/dev").Mqueue("/dev/mqueue") + params.Dev("/dev", true) } else { params.Bind("/dev", "/dev", container.BindWritable|container.BindDevice) } diff --git a/ldd/exec.go b/ldd/exec.go index c1593680..538d5446 100644 --- a/ldd/exec.go +++ b/ldd/exec.go @@ -28,7 +28,7 @@ func Exec(ctx context.Context, p string) ([]*Entry, error) { stdout, stderr := new(bytes.Buffer), new(bytes.Buffer) z.Stdout = stdout z.Stderr = stderr - z.Bind("/", "/", 0).Proc("/proc").Dev("/dev") + z.Bind("/", "/", 0).Proc("/proc").Dev("/dev", false) if err := z.Start(); err != nil { return nil, err