From 14824b8a9b1e418c63482a7fafe5a27a2c347d00 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Sun, 3 Aug 2025 18:34:55 +0900 Subject: [PATCH] container/op: merge mqueue and dev Ops There is no reason to mount mqueue anywhere else, and these Ops usually follow each other. This change merges them. This helps decrease IPC overhead and also enables mounting dev readonly. Signed-off-by: Ophestra --- container/container_test.go | 18 ++++++- container/ops.go | 70 +++++++++++----------------- helper/container_test.go | 2 +- internal/app/app_nixos_linux_test.go | 2 +- internal/app/app_pd_linux_test.go | 4 +- internal/app/container_linux.go | 2 +- ldd/exec.go | 2 +- 7 files changed, 50 insertions(+), 50 deletions(-) diff --git a/container/container_test.go b/container/container_test.go index 91fa49b..861a4db 100644 --- a/container/container_test.go +++ b/container/container_test.go @@ -72,8 +72,7 @@ var containerTestCases = []struct { {"dev", true, true /* go test output is not a tty */, false, false, new(container.Ops). - Dev("/dev"). - Mqueue("/dev/mqueue"), + Dev("/dev", true), []*vfs.MountInfoEntry{ ent("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore), ent("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), @@ -86,6 +85,21 @@ var containerTestCases = []struct { ent("/", "/dev/mqueue", "rw,nosuid,nodev,noexec,relatime", "mqueue", "mqueue", "rw"), }, 1971, 100, nil, 0, seccomp.PresetStrict}, + + {"dev no mqueue", true, true /* go test output is not a tty */, false, false, + new(container.Ops). + Dev("/dev", false), + []*vfs.MountInfoEntry{ + ent("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore), + ent("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/zero", "/dev/zero", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/full", "/dev/full", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/random", "/dev/random", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/urandom", "/dev/urandom", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/tty", "/dev/tty", "rw,nosuid", "devtmpfs", "devtmpfs", ignore), + ent("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"), + }, + 1971, 100, nil, 0, seccomp.PresetStrict}, } func TestContainer(t *testing.T) { diff --git a/container/ops.go b/container/ops.go index b8d287f..d64235f 100644 --- a/container/ops.go +++ b/container/ops.go @@ -180,27 +180,29 @@ func (p MountProcOp) String() string { return fmt.Sprintf("proc on %q", string(p func init() { gob.Register(new(MountDevOp)) } // Dev appends an [Op] that mounts a subset of host /dev. -func (f *Ops) Dev(dest string) *Ops { - *f = append(*f, MountDevOp(dest)) +func (f *Ops) Dev(dest string, mqueue bool) *Ops { + *f = append(*f, &MountDevOp{dest, mqueue}) return f } -type MountDevOp string +type MountDevOp struct { + Target string + Mqueue bool +} -func (d MountDevOp) early(*Params) error { return nil } -func (d MountDevOp) apply(params *Params) error { - v := string(d) - if !path.IsAbs(v) { - return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v)) +func (d *MountDevOp) early(*Params) error { return nil } +func (d *MountDevOp) apply(params *Params) error { + if !path.IsAbs(d.Target) { + return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", d.Target)) } - target := toSysroot(v) + target := toSysroot(d.Target) if err := mountTmpfs(SourceTmpfsDevtmpfs, target, MS_NOSUID|MS_NODEV, 0, params.ParentPerm); err != nil { return err } for _, name := range []string{"null", "zero", "full", "random", "urandom", "tty"} { - targetPath := toSysroot(path.Join(v, name)) + targetPath := path.Join(target, name) if err := ensureFile(targetPath, 0444, params.ParentPerm); err != nil { return err } @@ -247,7 +249,7 @@ func (d MountDevOp) apply(params *Params) error { if params.RetainSession { var buf [8]byte if _, _, errno := Syscall(SYS_IOCTL, 1, TIOCGWINSZ, uintptr(unsafe.Pointer(&buf[0]))); errno == 0 { - consolePath := toSysroot(path.Join(v, "console")) + consolePath := path.Join(target, "console") if err := ensureFile(consolePath, 0444, params.ParentPerm); err != nil { return err } @@ -264,43 +266,27 @@ func (d MountDevOp) apply(params *Params) error { } } + if d.Mqueue { + mqueueTarget := path.Join(target, "mqueue") + if err := os.Mkdir(mqueueTarget, params.ParentPerm); err != nil { + return wrapErrSelf(err) + } + return wrapErrSuffix(Mount(SourceMqueue, mqueueTarget, FstypeMqueue, MS_NOSUID|MS_NOEXEC|MS_NODEV, zeroString), + "cannot mount mqueue:") + } + return nil } -func (d MountDevOp) Is(op Op) bool { vd, ok := op.(MountDevOp); return ok && d == vd } -func (MountDevOp) prefix() string { return "mounting" } -func (d MountDevOp) String() string { return fmt.Sprintf("dev on %q", string(d)) } - -func init() { gob.Register(new(MountMqueueOp)) } - -// Mqueue appends an [Op] that mounts a private instance of mqueue. -func (f *Ops) Mqueue(dest string) *Ops { - *f = append(*f, MountMqueueOp(dest)) - return f -} - -type MountMqueueOp string - -func (m MountMqueueOp) early(*Params) error { return nil } -func (m MountMqueueOp) apply(params *Params) error { - v := string(m) - - if !path.IsAbs(v) { - return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v)) +func (d *MountDevOp) Is(op Op) bool { vd, ok := op.(*MountDevOp); return ok && *d == *vd } +func (*MountDevOp) prefix() string { return "mounting" } +func (d *MountDevOp) String() string { + if d.Mqueue { + return fmt.Sprintf("dev on %q with mqueue", d.Target) } - - target := toSysroot(v) - if err := os.MkdirAll(target, params.ParentPerm); err != nil { - return wrapErrSelf(err) - } - return wrapErrSuffix(Mount(SourceMqueue, target, FstypeMqueue, MS_NOSUID|MS_NOEXEC|MS_NODEV, zeroString), - fmt.Sprintf("cannot mount mqueue on %q:", v)) + return fmt.Sprintf("dev on %q", d.Target) } -func (m MountMqueueOp) Is(op Op) bool { vm, ok := op.(MountMqueueOp); return ok && m == vm } -func (MountMqueueOp) prefix() string { return "mounting" } -func (m MountMqueueOp) String() string { return fmt.Sprintf("mqueue on %q", string(m)) } - func init() { gob.Register(new(MountTmpfsOp)) } // Tmpfs appends an [Op] that mounts tmpfs on container path [MountTmpfsOp.Path]. diff --git a/helper/container_test.go b/helper/container_test.go index c5ad065..8be4bcb 100644 --- a/helper/container_test.go +++ b/helper/container_test.go @@ -33,7 +33,7 @@ func TestContainer(t *testing.T) { testHelper(t, func(ctx context.Context, setOutput func(stdoutP, stderrP *io.Writer), stat bool) helper.Helper { return helper.New(ctx, os.Args[0], argsWt, stat, argF, func(z *container.Container) { setOutput(&z.Stdout, &z.Stderr) - z.Bind("/", "/", 0).Proc("/proc").Dev("/dev") + z.Bind("/", "/", 0).Proc("/proc").Dev("/dev", true) }, nil) }) }) diff --git a/internal/app/app_nixos_linux_test.go b/internal/app/app_nixos_linux_test.go index c11fbd8..2db3bf8 100644 --- a/internal/app/app_nixos_linux_test.go +++ b/internal/app/app_nixos_linux_test.go @@ -118,7 +118,7 @@ var testCasesNixos = []sealTestCase{ Ops: new(container.Ops). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/bin", "/bin", 0). Bind("/usr/bin", "/usr/bin", 0). Bind("/nix/store", "/nix/store", 0). diff --git a/internal/app/app_pd_linux_test.go b/internal/app/app_pd_linux_test.go index 66cebe4..b3d03f0 100644 --- a/internal/app/app_pd_linux_test.go +++ b/internal/app/app_pd_linux_test.go @@ -46,7 +46,7 @@ var testCasesPd = []sealTestCase{ Root("/", "4a450b6596d7bc15bd01780eb9a607ac", container.BindWritable). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional). Readonly("/var/run/nscd", 0755). Tmpfs("/run/user/1971", 8192, 0755). @@ -180,7 +180,7 @@ var testCasesPd = []sealTestCase{ Root("/", "ebf083d1b175911782d413369b64ce7c", container.BindWritable). Proc("/proc"). Tmpfs(hst.Tmp, 4096, 0755). - Dev("/dev").Mqueue("/dev/mqueue"). + Dev("/dev", true). Bind("/dev/dri", "/dev/dri", container.BindWritable|container.BindDevice|container.BindOptional). Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional). Readonly("/var/run/nscd", 0755). diff --git a/internal/app/container_linux.go b/internal/app/container_linux.go index ce3a515..3548301 100644 --- a/internal/app/container_linux.go +++ b/internal/app/container_linux.go @@ -85,7 +85,7 @@ func newContainer(s *hst.ContainerConfig, os sys.State, prefix string, uid, gid Tmpfs(hst.Tmp, 1<<12, 0755) if !s.Device { - params.Dev("/dev").Mqueue("/dev/mqueue") + params.Dev("/dev", true) } else { params.Bind("/dev", "/dev", container.BindWritable|container.BindDevice) } diff --git a/ldd/exec.go b/ldd/exec.go index c159368..538d544 100644 --- a/ldd/exec.go +++ b/ldd/exec.go @@ -28,7 +28,7 @@ func Exec(ctx context.Context, p string) ([]*Entry, error) { stdout, stderr := new(bytes.Buffer), new(bytes.Buffer) z.Stdout = stdout z.Stderr = stderr - z.Bind("/", "/", 0).Proc("/proc").Dev("/dev") + z.Bind("/", "/", 0).Proc("/proc").Dev("/dev", false) if err := z.Start(); err != nil { return nil, err