From 4e856438656ea0a1b30d52975f6b5415568df9a8 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Fri, 1 Aug 2025 04:04:36 +0900 Subject: [PATCH] container: implement autoroot as setup op This code is useful beyond just pd behaviour, and implementing it this way also reduces IPC overhead. Signed-off-by: Ophestra --- container/autoroot.go | 91 +++++++++++++++++++++++++++++++++++++++++++ container/init.go | 8 ++++ 2 files changed, 99 insertions(+) create mode 100644 container/autoroot.go diff --git a/container/autoroot.go b/container/autoroot.go new file mode 100644 index 0000000..b6375b8 --- /dev/null +++ b/container/autoroot.go @@ -0,0 +1,91 @@ +package container + +import ( + "encoding/gob" + "fmt" + "os" + "path" + . "syscall" +) + +func init() { gob.Register(new(AutoRootOp)) } + +// Root appends an [Op] that expands a directory into a toplevel bind mount mirror on container root. +// This is not a generic setup op. It is implemented here to reduce ipc overhead. +func (f *Ops) Root(host, prefix string, flags int) *Ops { + *f = append(*f, &AutoRootOp{host, prefix, flags, nil}) + return f +} + +type AutoRootOp struct { + Host, Prefix string + // passed through to bindMount + Flags int + + // obtained during early; + // these wrap the underlying Op because BindMountOp is relatively complex, + // so duplicating that code would be unwise + resolved []Op +} + +func (r *AutoRootOp) early(params *Params) error { + if !path.IsAbs(r.Host) { + return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", r.Host)) + } + + if d, err := os.ReadDir(r.Host); err != nil { + return wrapErrSelf(err) + } else { + r.resolved = make([]Op, 0, len(d)) + for _, ent := range d { + name := ent.Name() + if IsAutoRootBindable(name) { + op := &BindMountOp{ + Source: path.Join(r.Host, name), + Target: "/" + name, + Flags: r.Flags, + } + if err = op.early(params); err != nil { + return err + } + r.resolved = append(r.resolved, op) + } + } + return nil + } +} + +func (r *AutoRootOp) apply(params *Params) error { + for _, op := range r.resolved { + msg.Verbosef("%s %s", op.prefix(), op) + if err := op.apply(params); err != nil { + return err + } + } + return nil +} + +func (r *AutoRootOp) Is(op Op) bool { + vr, ok := op.(*AutoRootOp) + return ok && ((r == nil && vr == nil) || (r != nil && vr != nil && + r.Host == vr.Host && r.Prefix == vr.Prefix && r.Flags == vr.Flags)) +} +func (*AutoRootOp) prefix() string { return "setting up" } +func (r *AutoRootOp) String() string { + return fmt.Sprintf("auto root %q prefix %s flags %#x", r.Host, r.Prefix, r.Flags) +} + +// IsAutoRootBindable returns whether a dir entry name is selected for AutoRoot. +func IsAutoRootBindable(name string) bool { + switch name { + case "proc": + case "dev": + case "tmp": + case "mnt": + case "etc": + + default: + return true + } + return false +} diff --git a/container/init.go b/container/init.go index d4fce6e..8b178d8 100644 --- a/container/init.go +++ b/container/init.go @@ -121,6 +121,10 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) { log.Fatalf("cannot make / rslave: %v", err) } + /* early is called right before pivot_root into intermediate root; + this step is mostly for gathering information that would otherwise be difficult to obtain + via library functions after pivot_root, and implementations are expected to avoid changing + the state of the mount namespace */ for i, op := range *params.Ops { if op == nil { log.Fatalf("invalid op %d", i) @@ -159,6 +163,10 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) { log.Fatalf("%v", err) } + /* apply is called right after pivot_root and entering the new root; + this step sets up the container filesystem, and implementations are expected to keep the host root + and sysroot mount points intact but otherwise can do whatever they need to; + chdir is allowed but discouraged */ for i, op := range *params.Ops { // ops already checked during early setup msg.Verbosef("%s %s", op.prefix(), op)