container: optionally map uid/gid 0 as init
All checks were successful
Test / Create distribution (push) Successful in 1m3s
Test / Sandbox (push) Successful in 2m48s
Test / Hakurei (push) Successful in 3m45s
Test / ShareFS (push) Successful in 3m55s
Test / Sandbox (race detector) (push) Successful in 5m15s
Test / Hakurei (race detector) (push) Successful in 6m31s
Test / Flake checks (push) Successful in 1m21s
All checks were successful
Test / Create distribution (push) Successful in 1m3s
Test / Sandbox (push) Successful in 2m48s
Test / Hakurei (push) Successful in 3m45s
Test / ShareFS (push) Successful in 3m55s
Test / Sandbox (race detector) (push) Successful in 5m15s
Test / Hakurei (race detector) (push) Successful in 6m31s
Test / Flake checks (push) Successful in 1m21s
Unfortunately required to work around flawed APIs like binfmt_misc. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -18,6 +18,7 @@ const (
|
|||||||
CAP_SETPCAP = 0x8
|
CAP_SETPCAP = 0x8
|
||||||
CAP_NET_ADMIN = 0xc
|
CAP_NET_ADMIN = 0xc
|
||||||
CAP_DAC_OVERRIDE = 0x1
|
CAP_DAC_OVERRIDE = 0x1
|
||||||
|
CAP_SETFCAP = 0x1f
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
|
|||||||
@@ -91,6 +91,9 @@ type (
|
|||||||
// Time to wait for processes lingering after the initial process terminates.
|
// Time to wait for processes lingering after the initial process terminates.
|
||||||
AdoptWaitDelay time.Duration
|
AdoptWaitDelay time.Duration
|
||||||
|
|
||||||
|
// Map uid/gid 0 in the init process. Requires [FstypeProc] attached to
|
||||||
|
// [fhs.Proc] in the container filesystem.
|
||||||
|
InitAsRoot bool
|
||||||
// Mapped Uid in user namespace.
|
// Mapped Uid in user namespace.
|
||||||
Uid int
|
Uid int
|
||||||
// Mapped Gid in user namespace.
|
// Mapped Gid in user namespace.
|
||||||
@@ -286,6 +289,18 @@ func (p *Container) Start() error {
|
|||||||
if !p.HostNet {
|
if !p.HostNet {
|
||||||
p.cmd.SysProcAttr.Cloneflags |= CLONE_NEWNET
|
p.cmd.SysProcAttr.Cloneflags |= CLONE_NEWNET
|
||||||
}
|
}
|
||||||
|
if p.InitAsRoot {
|
||||||
|
p.cmd.SysProcAttr.AmbientCaps = append(p.cmd.SysProcAttr.AmbientCaps,
|
||||||
|
// mappings during init as root
|
||||||
|
CAP_SETFCAP,
|
||||||
|
)
|
||||||
|
|
||||||
|
if !p.SeccompDisable &&
|
||||||
|
len(p.SeccompRules) == 0 &&
|
||||||
|
p.SeccompPresets&std.PresetDenyNS != 0 {
|
||||||
|
return errors.New("container: as root requires late namespace creation")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// place setup pipe before user supplied extra files, this is later restored by init
|
// place setup pipe before user supplied extra files, this is later restored by init
|
||||||
if r, w, err := os.Pipe(); err != nil {
|
if r, w, err := os.Pipe(); err != nil {
|
||||||
|
|||||||
@@ -409,8 +409,11 @@ var containerTestCases = []struct {
|
|||||||
func TestContainer(t *testing.T) {
|
func TestContainer(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
var suffix string
|
||||||
|
runTests:
|
||||||
for i, tc := range containerTestCases {
|
for i, tc := range containerTestCases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
_suffix := suffix
|
||||||
|
t.Run(tc.name+_suffix, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
wantOps, wantOpsCtx := tc.ops(t)
|
wantOps, wantOpsCtx := tc.ops(t)
|
||||||
@@ -434,6 +437,8 @@ func TestContainer(t *testing.T) {
|
|||||||
c.SeccompDisable = !tc.filter
|
c.SeccompDisable = !tc.filter
|
||||||
c.RetainSession = tc.session
|
c.RetainSession = tc.session
|
||||||
c.HostNet = tc.net
|
c.HostNet = tc.net
|
||||||
|
c.InitAsRoot = _suffix != ""
|
||||||
|
c.Env = append(c.Env, "HAKUREI_TEST_SUFFIX="+_suffix)
|
||||||
if info.CanDegrade {
|
if info.CanDegrade {
|
||||||
if _, err := landlock.GetABI(); err != nil {
|
if _, err := landlock.GetABI(); err != nil {
|
||||||
if !errors.Is(err, syscall.ENOSYS) {
|
if !errors.Is(err, syscall.ENOSYS) {
|
||||||
@@ -443,6 +448,9 @@ func TestContainer(t *testing.T) {
|
|||||||
t.Log("Landlock LSM is unavailable, enabling HostAbstract")
|
t.Log("Landlock LSM is unavailable, enabling HostAbstract")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if c.InitAsRoot {
|
||||||
|
c.SeccompPresets &= ^std.PresetDenyNS
|
||||||
|
}
|
||||||
|
|
||||||
c.
|
c.
|
||||||
Readonly(check.MustAbs(pathReadonly), 0755).
|
Readonly(check.MustAbs(pathReadonly), 0755).
|
||||||
@@ -511,6 +519,11 @@ func TestContainer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if suffix == "" {
|
||||||
|
suffix = " as root"
|
||||||
|
goto runTests
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ent(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoEntry {
|
func ent(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoEntry {
|
||||||
@@ -589,9 +602,9 @@ func testContainerCancel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestForward(t *testing.T) {
|
func TestForward(t *testing.T) {
|
||||||
testContainerCancel(t, func(c *container.Container) {
|
t.Parallel()
|
||||||
c.ForwardCancel = true
|
|
||||||
}, func(ps *os.ProcessState, waitErr error) {
|
f := func(ps *os.ProcessState, waitErr error) {
|
||||||
var exitError *exec.ExitError
|
var exitError *exec.ExitError
|
||||||
if !errors.As(waitErr, &exitError) {
|
if !errors.As(waitErr, &exitError) {
|
||||||
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
||||||
@@ -602,11 +615,26 @@ func TestForward(t *testing.T) {
|
|||||||
if code := exitError.ExitCode(); code != blockExitCodeInterrupt {
|
if code := exitError.ExitCode(); code != blockExitCodeInterrupt {
|
||||||
t.Errorf("ExitCode: %d, want %d", code, blockExitCodeInterrupt)
|
t.Errorf("ExitCode: %d, want %d", code, blockExitCodeInterrupt)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
t.Run("direct", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
testContainerCancel(t, func(c *container.Container) {
|
||||||
|
c.ForwardCancel = true
|
||||||
|
}, f)
|
||||||
|
})
|
||||||
|
t.Run("as root", func(t *testing.T) {
|
||||||
|
testContainerCancel(t, func(c *container.Container) {
|
||||||
|
c.ForwardCancel = true
|
||||||
|
c.InitAsRoot = true
|
||||||
|
c.Proc(fhs.AbsProc)
|
||||||
|
}, f)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCancel(t *testing.T) {
|
func TestCancel(t *testing.T) {
|
||||||
testContainerCancel(t, nil, func(ps *os.ProcessState, waitErr error) {
|
t.Parallel()
|
||||||
|
|
||||||
|
f := func(ps *os.ProcessState, waitErr error) {
|
||||||
wantErr := context.Canceled
|
wantErr := context.Canceled
|
||||||
if !reflect.DeepEqual(waitErr, wantErr) {
|
if !reflect.DeepEqual(waitErr, wantErr) {
|
||||||
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
||||||
@@ -619,6 +647,16 @@ func TestCancel(t *testing.T) {
|
|||||||
} else if code := ps.ExitCode(); code != 0 {
|
} else if code := ps.ExitCode(); code != 0 {
|
||||||
t.Errorf("ExitCode: %d, want %d", code, 0)
|
t.Errorf("ExitCode: %d, want %d", code, 0)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
t.Run("direct", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
testContainerCancel(t, nil, f)
|
||||||
|
})
|
||||||
|
t.Run("as root", func(t *testing.T) {
|
||||||
|
testContainerCancel(t, func(c *container.Container) {
|
||||||
|
c.InitAsRoot = true
|
||||||
|
c.Proc(fhs.AbsProc)
|
||||||
|
}, f)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -655,6 +693,8 @@ func init() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
c.Command("container", command.UsageInternal, func(args []string) error {
|
c.Command("container", command.UsageInternal, func(args []string) error {
|
||||||
|
asRoot := os.Getenv("HAKUREI_TEST_SUFFIX") == " as root"
|
||||||
|
|
||||||
if len(args) != 1 {
|
if len(args) != 1 {
|
||||||
return syscall.EINVAL
|
return syscall.EINVAL
|
||||||
}
|
}
|
||||||
@@ -672,11 +712,19 @@ func init() {
|
|||||||
return fmt.Errorf("gid: %d, want %d", gid, tc.gid)
|
return fmt.Errorf("gid: %d, want %d", gid, tc.gid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// no attack surface increase during as root due to no_new_privs
|
||||||
|
var wantBounding uintptr = 1
|
||||||
|
asRootNot := " not"
|
||||||
|
if !asRoot {
|
||||||
|
wantBounding = 0
|
||||||
|
asRootNot = ""
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
PR_CAP_AMBIENT = 0x2f
|
PR_CAP_AMBIENT = 0x2f
|
||||||
PR_CAP_AMBIENT_IS_SET = 0x1
|
PR_CAP_AMBIENT_IS_SET = 0x1
|
||||||
)
|
)
|
||||||
for i := range container.LastCap(nil) {
|
for i := range container.LastCap(nil) + 1 {
|
||||||
r, _, errno := syscall.Syscall(
|
r, _, errno := syscall.Syscall(
|
||||||
syscall.SYS_PRCTL,
|
syscall.SYS_PRCTL,
|
||||||
PR_CAP_AMBIENT,
|
PR_CAP_AMBIENT,
|
||||||
@@ -687,7 +735,7 @@ func init() {
|
|||||||
return os.NewSyscallError("prctl", errno)
|
return os.NewSyscallError("prctl", errno)
|
||||||
}
|
}
|
||||||
if r != 0 {
|
if r != 0 {
|
||||||
return fmt.Errorf("capability %d is set", i)
|
return fmt.Errorf("capability %d in ambient set", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, _, errno = syscall.Syscall(
|
r, _, errno = syscall.Syscall(
|
||||||
@@ -699,8 +747,8 @@ func init() {
|
|||||||
if errno != 0 {
|
if errno != 0 {
|
||||||
return os.NewSyscallError("prctl", errno)
|
return os.NewSyscallError("prctl", errno)
|
||||||
}
|
}
|
||||||
if r != 0 {
|
if r != wantBounding {
|
||||||
return fmt.Errorf("capability %d in set", i)
|
return fmt.Errorf("capability %d%s in bounding set", i, asRootNot)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -182,13 +182,18 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
cancel()
|
cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uid, gid := param.Uid, param.Gid
|
||||||
|
if param.InitAsRoot {
|
||||||
|
uid, gid = 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
// write uid/gid map here so parent does not need to set dumpable
|
// write uid/gid map here so parent does not need to set dumpable
|
||||||
if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil {
|
if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil {
|
||||||
k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err)
|
k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err)
|
||||||
}
|
}
|
||||||
if err := k.writeFile(
|
if err := k.writeFile(
|
||||||
fhs.Proc+"self/uid_map",
|
fhs.Proc+"self/uid_map",
|
||||||
[]byte(strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"),
|
[]byte(strconv.Itoa(uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"),
|
||||||
0,
|
0,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
k.fatalf(msg, "%v", err)
|
k.fatalf(msg, "%v", err)
|
||||||
@@ -201,7 +206,7 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
k.fatalf(msg, "%v", err)
|
k.fatalf(msg, "%v", err)
|
||||||
}
|
}
|
||||||
if err := k.writeFile(fhs.Proc+"self/gid_map",
|
if err := k.writeFile(fhs.Proc+"self/gid_map",
|
||||||
[]byte(strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"),
|
[]byte(strconv.Itoa(gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"),
|
||||||
0,
|
0,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
k.fatalf(msg, "%v", err)
|
k.fatalf(msg, "%v", err)
|
||||||
@@ -332,6 +337,9 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
if param.Privileged {
|
if param.Privileged {
|
||||||
keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP)
|
keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP)
|
||||||
}
|
}
|
||||||
|
if param.InitAsRoot {
|
||||||
|
keepCaps = append(keepCaps, CAP_SETFCAP)
|
||||||
|
}
|
||||||
|
|
||||||
if err := k.capAmbientClearAll(); err != nil {
|
if err := k.capAmbientClearAll(); err != nil {
|
||||||
k.fatalf(msg, "cannot clear the ambient capability set: %v", err)
|
k.fatalf(msg, "cannot clear the ambient capability set: %v", err)
|
||||||
@@ -487,6 +495,14 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
cmd.ExtraFiles = extraFiles
|
cmd.ExtraFiles = extraFiles
|
||||||
cmd.Dir = param.Dir.String()
|
cmd.Dir = param.Dir.String()
|
||||||
|
|
||||||
|
if param.InitAsRoot {
|
||||||
|
cmd.SysProcAttr = &SysProcAttr{
|
||||||
|
Cloneflags: CLONE_NEWUSER,
|
||||||
|
UidMappings: []SysProcIDMap{{ContainerID: param.Uid, HostID: 0, Size: 1}},
|
||||||
|
GidMappings: []SysProcIDMap{{ContainerID: param.Gid, HostID: 0, Size: 1}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
msg.Verbosef("starting initial process %s", param.Path)
|
msg.Verbosef("starting initial process %s", param.Path)
|
||||||
if err := k.start(cmd); err != nil {
|
if err := k.start(cmd); err != nil {
|
||||||
k.fatalf(msg, "%v", err)
|
k.fatalf(msg, "%v", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user