From bad66facbc1b555d8883534c27ada586fc15df9c Mon Sep 17 00:00:00 2001 From: Ophestra Date: Thu, 7 May 2026 14:14:06 +0900 Subject: [PATCH] container: improve capability handling This cleans up preserving caps for expansion and correctly sets privileged caps. Signed-off-by: Ophestra --- container/container_test.go | 1 - container/init.go | 45 ++++++++++++++++++++++++------------- container/init_test.go | 16 ++++++------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/container/container_test.go b/container/container_test.go index f34035b2..48b6e8a4 100644 --- a/container/container_test.go +++ b/container/container_test.go @@ -537,7 +537,6 @@ func testContainerCancel( containerExtra func(c *container.Container), waitCheck func(ps *os.ProcessState, waitErr error), ) { - t.Parallel() ctx, cancel := context.WithCancel(t.Context()) c := helperNewContainer(ctx, "block") diff --git a/container/init.go b/container/init.go index 76a77828..78c95439 100644 --- a/container/init.go +++ b/container/init.go @@ -186,19 +186,24 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil { k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err) } - if err := k.writeFile(fhs.Proc+"self/uid_map", - append([]byte{}, strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"...), - 0); err != nil { + if err := k.writeFile( + fhs.Proc+"self/uid_map", + []byte(strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"), + 0, + ); err != nil { k.fatalf(msg, "%v", err) } - if err := k.writeFile(fhs.Proc+"self/setgroups", + if err := k.writeFile( + fhs.Proc+"self/setgroups", []byte("deny\n"), - 0); err != nil && !os.IsNotExist(err) { + 0, + ); err != nil && !os.IsNotExist(err) { k.fatalf(msg, "%v", err) } if err := k.writeFile(fhs.Proc+"self/gid_map", - append([]byte{}, strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"...), - 0); err != nil { + []byte(strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"), + 0, + ); err != nil { k.fatalf(msg, "%v", err) } if err := k.setDumpable(ext.SUID_DUMP_DISABLE); err != nil { @@ -323,11 +328,16 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { } } + var keepCaps []uintptr + if param.Privileged { + keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP) + } + if err := k.capAmbientClearAll(); err != nil { k.fatalf(msg, "cannot clear the ambient capability set: %v", err) } - for i := uintptr(0); i <= lastcap; i++ { - if param.Privileged && i == CAP_SYS_ADMIN { + for i := range lastcap + 1 { + if slices.Contains(keepCaps, i) { continue } if err := k.capBoundingSetDrop(i); err != nil { @@ -336,20 +346,23 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) { } var keep [2]uint32 - if param.Privileged { - keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN) - - if err := k.capAmbientRaise(CAP_SYS_ADMIN); err != nil { - k.fatalf(msg, "cannot raise CAP_SYS_ADMIN: %v", err) - } + for _, c := range keepCaps { + keep[capToIndex(c)] |= capToMask(c) } + if err := k.capset( &capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, - &[2]capData{{0, keep[0], keep[0]}, {0, keep[1], keep[1]}}, + &[2]capData{{keep[0], keep[0], keep[0]}, {keep[1], keep[1], keep[1]}}, ); err != nil { k.fatalf(msg, "cannot capset: %v", err) } + for _, c := range keepCaps { + if err := k.capAmbientRaise(c); err != nil { + k.fatalf(msg, "cannot raise %#x: %v", c, err) + } + } + if !param.SeccompDisable { rules := param.SeccompRules if len(rules) == 0 { // non-empty rules slice always overrides presets diff --git a/container/init_test.go b/container/init_test.go index a27391a2..23d0a7b3 100644 --- a/container/init_test.go +++ b/container/init_test.go @@ -1624,7 +1624,6 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil), - call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil), @@ -1656,8 +1655,9 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil), + call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil), call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, stub.UniqueError(19)), - call("fatalf", stub.ExpectArgs{"cannot raise CAP_SYS_ADMIN: %v", []any{stub.UniqueError(19)}}, nil, nil), + call("fatalf", stub.ExpectArgs{"cannot raise %#x: %v", []any{uintptr(0x15), stub.UniqueError(19)}}, nil, nil), }, }, nil}, @@ -1731,7 +1731,6 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil), - call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil), @@ -1763,8 +1762,7 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil), - call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil), - call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, stub.UniqueError(17)), + call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, stub.UniqueError(17)), call("fatalf", stub.ExpectArgs{"cannot capset: %v", []any{stub.UniqueError(17)}}, nil, nil), }, }, nil}, @@ -1839,7 +1837,6 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil), - call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil), @@ -1871,8 +1868,9 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil), + call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil), call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil), - call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil), + call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil), call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, stub.UniqueError(15)), call("fatalf", stub.ExpectArgs{"cannot load syscall filter: %v", []any{stub.UniqueError(15)}}, nil, nil), @@ -2699,7 +2697,6 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil), - call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil), @@ -2731,8 +2728,9 @@ func TestInitEntrypoint(t *testing.T) { call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil), call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil), + call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil), call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil), - call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil), + call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil), call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil), call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, nil), call("verbosef", stub.ExpectArgs{"%d filter rules loaded", []any{73}}, nil, nil),