forked from security/hakurei
container/ops: implement overlay op
There are significant limitations to using the overlay mount, and the implementation in the kernel is quite quirky. For now the Op is quite robust, however a higher level interface for it has not been decided yet. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -115,6 +116,95 @@ var containerTestCases = []struct {
|
||||
ent("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"),
|
||||
),
|
||||
1971, 100, nil, 0, seccomp.PresetStrict},
|
||||
|
||||
{"overlay", true, false, false, true,
|
||||
func(t *testing.T) (*container.Ops, context.Context) {
|
||||
tempDir := t.TempDir()
|
||||
lower0, lower1, upper, work :=
|
||||
path.Join(tempDir, "lower0"),
|
||||
path.Join(tempDir, "lower1"),
|
||||
path.Join(tempDir, "upper"),
|
||||
path.Join(tempDir, "work")
|
||||
for _, name := range []string{lower0, lower1, upper, work} {
|
||||
if err := os.Mkdir(name, 0755); err != nil {
|
||||
t.Fatalf("Mkdir: error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return new(container.Ops).
|
||||
Overlay(hst.Tmp, upper, work, lower0, lower1),
|
||||
context.WithValue(context.WithValue(context.WithValue(context.WithValue(t.Context(),
|
||||
testVal("lower1"), lower1),
|
||||
testVal("lower0"), lower0),
|
||||
testVal("work"), work),
|
||||
testVal("upper"), upper)
|
||||
},
|
||||
func(t *testing.T, ctx context.Context) []*vfs.MountInfoEntry {
|
||||
return []*vfs.MountInfoEntry{
|
||||
ent("/", hst.Tmp, "rw", "overlay", "overlay",
|
||||
"rw,lowerdir="+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("lower0")).(string))+":"+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("lower1")).(string))+
|
||||
",upperdir="+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("upper")).(string))+
|
||||
",workdir="+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("work")).(string))+
|
||||
",redirect_dir=nofollow,uuid=on,userxattr"),
|
||||
}
|
||||
},
|
||||
1 << 3, 1 << 14, nil, 0, seccomp.PresetStrict},
|
||||
|
||||
{"overlay ephemeral", true, false, false, true,
|
||||
func(t *testing.T) (*container.Ops, context.Context) {
|
||||
tempDir := t.TempDir()
|
||||
lower0, lower1 :=
|
||||
path.Join(tempDir, "lower0"),
|
||||
path.Join(tempDir, "lower1")
|
||||
for _, name := range []string{lower0, lower1} {
|
||||
if err := os.Mkdir(name, 0755); err != nil {
|
||||
t.Fatalf("Mkdir: error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return new(container.Ops).
|
||||
OverlayEphemeral(hst.Tmp, lower0, lower1),
|
||||
t.Context()
|
||||
},
|
||||
func(t *testing.T, ctx context.Context) []*vfs.MountInfoEntry {
|
||||
return []*vfs.MountInfoEntry{
|
||||
// contains random suffix
|
||||
ent("/", hst.Tmp, "rw", "overlay", "overlay", ignore),
|
||||
}
|
||||
},
|
||||
1 << 3, 1 << 14, nil, 0, seccomp.PresetStrict},
|
||||
|
||||
{"overlay readonly", true, false, false, true,
|
||||
func(t *testing.T) (*container.Ops, context.Context) {
|
||||
tempDir := t.TempDir()
|
||||
lower0, lower1 :=
|
||||
path.Join(tempDir, "lower0"),
|
||||
path.Join(tempDir, "lower1")
|
||||
for _, name := range []string{lower0, lower1} {
|
||||
if err := os.Mkdir(name, 0755); err != nil {
|
||||
t.Fatalf("Mkdir: error = %v", err)
|
||||
}
|
||||
}
|
||||
return new(container.Ops).
|
||||
OverlayReadonly(hst.Tmp, lower0, lower1),
|
||||
context.WithValue(context.WithValue(t.Context(),
|
||||
testVal("lower1"), lower1),
|
||||
testVal("lower0"), lower0)
|
||||
},
|
||||
func(t *testing.T, ctx context.Context) []*vfs.MountInfoEntry {
|
||||
return []*vfs.MountInfoEntry{
|
||||
ent("/", hst.Tmp, "rw", "overlay", "overlay",
|
||||
"ro,lowerdir="+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("lower0")).(string))+":"+
|
||||
container.InternalToHostOvlEscape(ctx.Value(testVal("lower1")).(string))+
|
||||
",redirect_dir=nofollow,userxattr"),
|
||||
}
|
||||
},
|
||||
1 << 3, 1 << 14, nil, 0, seccomp.PresetStrict},
|
||||
}
|
||||
|
||||
func TestContainer(t *testing.T) {
|
||||
|
||||
@@ -40,6 +40,9 @@ const (
|
||||
// SourceMqueue is used when mounting mqueue.
|
||||
// Note that any source value is allowed when fstype is [FstypeMqueue].
|
||||
SourceMqueue = "mqueue"
|
||||
// SourceOverlay is used when mounting overlay.
|
||||
// Note that any source value is allowed when fstype is [FstypeOverlay].
|
||||
SourceOverlay = "overlay"
|
||||
|
||||
// SourceTmpfsRootfs is used when mounting the tmpfs instance backing the intermediate root.
|
||||
SourceTmpfsRootfs = "rootfs"
|
||||
@@ -66,6 +69,29 @@ const (
|
||||
// FstypeMqueue represents the mqueue pseudo-filesystem.
|
||||
// This filesystem type is usually mounted on /dev/mqueue.
|
||||
FstypeMqueue = "mqueue"
|
||||
// FstypeOverlay represents the overlay pseudo-filesystem.
|
||||
// This filesystem type can be mounted anywhere in the container filesystem.
|
||||
FstypeOverlay = "overlay"
|
||||
|
||||
// OptionOverlayLowerdir represents the lowerdir option of the overlay pseudo-filesystem.
|
||||
// Any filesystem, does not need to be on a writable filesystem.
|
||||
OptionOverlayLowerdir = "lowerdir"
|
||||
// OptionOverlayUpperdir represents the upperdir option of the overlay pseudo-filesystem.
|
||||
// The upperdir is normally on a writable filesystem.
|
||||
OptionOverlayUpperdir = "upperdir"
|
||||
// OptionOverlayWorkdir represents the workdir option of the overlay pseudo-filesystem.
|
||||
// The workdir needs to be an empty directory on the same filesystem as upperdir.
|
||||
OptionOverlayWorkdir = "workdir"
|
||||
// OptionOverlayUserxattr represents the userxattr option of the overlay pseudo-filesystem.
|
||||
// Use the "user.overlay." xattr namespace instead of "trusted.overlay.".
|
||||
OptionOverlayUserxattr = "userxattr"
|
||||
|
||||
// SpecialOverlayEscape is the escape string for overlay mount options.
|
||||
SpecialOverlayEscape = `\`
|
||||
// SpecialOverlayOption is the separator string between overlay mount options.
|
||||
SpecialOverlayOption = ","
|
||||
// SpecialOverlayPath is the separator string between overlay paths.
|
||||
SpecialOverlayPath = ":"
|
||||
)
|
||||
|
||||
// bindMount mounts source on target and recursively applies flags if MS_REC is set.
|
||||
@@ -199,8 +225,8 @@ func escapeOverlayDataSegment(s string) string {
|
||||
}
|
||||
|
||||
return strings.NewReplacer(
|
||||
`\`, `\\`,
|
||||
`,`, `\,`,
|
||||
`:`, `\:`,
|
||||
SpecialOverlayEscape, SpecialOverlayEscape+SpecialOverlayEscape,
|
||||
SpecialOverlayOption, SpecialOverlayEscape+SpecialOverlayOption,
|
||||
SpecialOverlayPath, SpecialOverlayEscape+SpecialOverlayPath,
|
||||
).Replace(s)
|
||||
}
|
||||
|
||||
167
container/ops.go
167
container/ops.go
@@ -13,6 +13,17 @@ import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
// intermediate root file name pattern for [MountOverlayOp.Upper];
|
||||
// remains after apply returns
|
||||
intermediatePatternOverlayUpper = "overlay.upper.*"
|
||||
// intermediate root file name pattern for [MountOverlayOp.Work];
|
||||
// remains after apply returns
|
||||
intermediatePatternOverlayWork = "overlay.work.*"
|
||||
// intermediate root file name pattern for [TmpfileOp]
|
||||
intermediatePatternTmpfile = "tmp.*"
|
||||
)
|
||||
|
||||
type (
|
||||
Ops []Op
|
||||
|
||||
@@ -337,6 +348,160 @@ func (t *MountTmpfsOp) Is(op Op) bool { vt, ok := op.(*MountTmpfsOp); return ok
|
||||
func (*MountTmpfsOp) prefix() string { return "mounting" }
|
||||
func (t *MountTmpfsOp) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) }
|
||||
|
||||
func init() { gob.Register(new(MountOverlayOp)) }
|
||||
|
||||
// Overlay appends an [Op] that mounts the overlay pseudo filesystem on [MountOverlayOp.Target].
|
||||
func (f *Ops) Overlay(target, state, work string, layers ...string) *Ops {
|
||||
*f = append(*f, &MountOverlayOp{
|
||||
Target: target,
|
||||
Lower: layers,
|
||||
Upper: state,
|
||||
Work: work,
|
||||
})
|
||||
return f
|
||||
}
|
||||
|
||||
// OverlayEphemeral appends an [Op] that mounts the overlay pseudo filesystem on [MountOverlayOp.Target]
|
||||
// with an ephemeral upperdir and workdir.
|
||||
func (f *Ops) OverlayEphemeral(target string, layers ...string) *Ops {
|
||||
return f.Overlay(target, SourceTmpfsEphemeral, zeroString, layers...)
|
||||
}
|
||||
|
||||
// OverlayReadonly appends an [Op] that mounts the overlay pseudo filesystem readonly on [MountOverlayOp.Target]
|
||||
func (f *Ops) OverlayReadonly(target string, layers ...string) *Ops {
|
||||
return f.Overlay(target, zeroString, zeroString, layers...)
|
||||
}
|
||||
|
||||
type MountOverlayOp struct {
|
||||
Target string
|
||||
|
||||
// formatted for [OptionOverlayLowerdir], resolved, prefixed and escaped during early;
|
||||
Lower []string
|
||||
// formatted for [OptionOverlayUpperdir], resolved, prefixed and escaped during early;
|
||||
//
|
||||
// If Work is an empty string and Upper holds the special value [SourceTmpfsEphemeral],
|
||||
// an ephemeral upperdir and workdir will be set up.
|
||||
//
|
||||
// If both Work and Upper are empty strings, upperdir and workdir is omitted and the overlay is mounted readonly.
|
||||
Upper string
|
||||
// formatted for [OptionOverlayWorkdir], resolved, prefixed and escaped during early;
|
||||
Work string
|
||||
|
||||
ephemeral bool
|
||||
}
|
||||
|
||||
func (o *MountOverlayOp) early(*Params) error {
|
||||
if o.Work == zeroString {
|
||||
switch o.Upper {
|
||||
case SourceTmpfsEphemeral: // ephemeral
|
||||
o.ephemeral = true // intermediate root not yet available
|
||||
|
||||
case zeroString: // readonly
|
||||
|
||||
default:
|
||||
return msg.WrapErr(EINVAL, fmt.Sprintf("upperdir has unexpected value %q", o.Upper))
|
||||
}
|
||||
}
|
||||
|
||||
if !o.ephemeral {
|
||||
if o.Upper != o.Work && (o.Upper == zeroString || o.Work == zeroString) {
|
||||
// unreachable
|
||||
return msg.WrapErr(ENOTRECOVERABLE, "impossible overlay state reached")
|
||||
}
|
||||
|
||||
if o.Upper != zeroString {
|
||||
if !path.IsAbs(o.Upper) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("upperdir %q is not absolute", o.Upper))
|
||||
}
|
||||
if v, err := filepath.EvalSymlinks(o.Upper); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
o.Upper = escapeOverlayDataSegment(toHost(v))
|
||||
}
|
||||
}
|
||||
|
||||
if o.Work != zeroString {
|
||||
if !path.IsAbs(o.Work) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("workdir %q is not absolute", o.Work))
|
||||
}
|
||||
if v, err := filepath.EvalSymlinks(o.Work); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
o.Work = escapeOverlayDataSegment(toHost(v))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := range o.Lower {
|
||||
if !path.IsAbs(o.Lower[i]) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("lowerdir %q is not absolute", o.Lower[i]))
|
||||
}
|
||||
|
||||
if v, err := filepath.EvalSymlinks(o.Lower[i]); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else {
|
||||
o.Lower[i] = escapeOverlayDataSegment(toHost(v))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *MountOverlayOp) apply(params *Params) error {
|
||||
if !path.IsAbs(o.Target) {
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", o.Target))
|
||||
}
|
||||
target := toSysroot(o.Target)
|
||||
if err := os.MkdirAll(target, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
|
||||
if o.ephemeral {
|
||||
var err error
|
||||
// these directories are created internally, therefore early (absolute, symlink, prefix, escape) is bypassed
|
||||
if o.Upper, err = os.MkdirTemp(FHSRoot, intermediatePatternOverlayUpper); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
if o.Work, err = os.MkdirTemp(FHSRoot, intermediatePatternOverlayWork); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
}
|
||||
|
||||
options := make([]string, 0, 4)
|
||||
|
||||
if o.Upper == zeroString && o.Work == zeroString { // readonly
|
||||
if len(o.Lower) < 2 {
|
||||
return msg.WrapErr(EINVAL, "readonly overlay requires at least two lowerdir")
|
||||
}
|
||||
// "upperdir=" and "workdir=" may be omitted. In that case the overlay will be read-only
|
||||
} else {
|
||||
if len(o.Lower) == 0 {
|
||||
return msg.WrapErr(EINVAL, "overlay requires at least one lowerdir")
|
||||
}
|
||||
options = append(options,
|
||||
OptionOverlayUpperdir+"="+o.Upper,
|
||||
OptionOverlayWorkdir+"="+o.Work)
|
||||
}
|
||||
options = append(options,
|
||||
OptionOverlayLowerdir+"="+strings.Join(o.Lower, SpecialOverlayPath),
|
||||
OptionOverlayUserxattr)
|
||||
|
||||
return wrapErrSuffix(Mount(SourceOverlay, target, FstypeOverlay, 0, strings.Join(options, SpecialOverlayOption)),
|
||||
fmt.Sprintf("cannot mount overlay on %q:", o.Target))
|
||||
}
|
||||
|
||||
func (o *MountOverlayOp) Is(op Op) bool {
|
||||
vo, ok := op.(*MountOverlayOp)
|
||||
return ok &&
|
||||
o.Target == vo.Target &&
|
||||
slices.Equal(o.Lower, vo.Lower) &&
|
||||
o.Upper == vo.Upper &&
|
||||
o.Work == vo.Work
|
||||
}
|
||||
func (*MountOverlayOp) prefix() string { return "mounting" }
|
||||
func (o *MountOverlayOp) String() string {
|
||||
return fmt.Sprintf("overlay on %q with %d layers", o.Target, len(o.Lower))
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(SymlinkOp)) }
|
||||
|
||||
// Link appends an [Op] that creates a symlink in the container filesystem.
|
||||
@@ -436,7 +601,7 @@ func (t *TmpfileOp) apply(params *Params) error {
|
||||
}
|
||||
|
||||
var tmpPath string
|
||||
if f, err := os.CreateTemp(FHSRoot, "tmp.*"); err != nil {
|
||||
if f, err := os.CreateTemp(FHSRoot, intermediatePatternTmpfile); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if _, err = f.Write(t.Data); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
|
||||
42
container/path_test.go
Normal file
42
container/path_test.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package container
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestToSysroot(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
want string
|
||||
}{
|
||||
{"", "/sysroot"},
|
||||
{"/", "/sysroot"},
|
||||
{"//etc///", "/sysroot/etc"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := toSysroot(tc.name); got != tc.want {
|
||||
t.Errorf("toSysroot: %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestToHost(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
want string
|
||||
}{
|
||||
{"", "/host"},
|
||||
{"/", "/host"},
|
||||
{"//etc///", "/host/etc"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := toHost(tc.name); got != tc.want {
|
||||
t.Errorf("toHost: %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// InternalToHostOvlEscape exports toHost passed to escapeOverlayDataSegment.
|
||||
func InternalToHostOvlEscape(s string) string { return escapeOverlayDataSegment(toHost(s)) }
|
||||
Reference in New Issue
Block a user