container: improve capability handling

This cleans up preserving caps for expansion and correctly sets privileged caps.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-05-07 14:14:06 +09:00
parent 4aba014eac
commit bad66facbc
3 changed files with 36 additions and 26 deletions

View File

@@ -537,7 +537,6 @@ func testContainerCancel(
containerExtra func(c *container.Container),
waitCheck func(ps *os.ProcessState, waitErr error),
) {
t.Parallel()
ctx, cancel := context.WithCancel(t.Context())
c := helperNewContainer(ctx, "block")

View File

@@ -186,19 +186,24 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil {
k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err)
}
if err := k.writeFile(fhs.Proc+"self/uid_map",
append([]byte{}, strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"...),
0); err != nil {
if err := k.writeFile(
fhs.Proc+"self/uid_map",
[]byte(strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"),
0,
); err != nil {
k.fatalf(msg, "%v", err)
}
if err := k.writeFile(fhs.Proc+"self/setgroups",
if err := k.writeFile(
fhs.Proc+"self/setgroups",
[]byte("deny\n"),
0); err != nil && !os.IsNotExist(err) {
0,
); err != nil && !os.IsNotExist(err) {
k.fatalf(msg, "%v", err)
}
if err := k.writeFile(fhs.Proc+"self/gid_map",
append([]byte{}, strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"...),
0); err != nil {
[]byte(strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"),
0,
); err != nil {
k.fatalf(msg, "%v", err)
}
if err := k.setDumpable(ext.SUID_DUMP_DISABLE); err != nil {
@@ -323,11 +328,16 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
}
var keepCaps []uintptr
if param.Privileged {
keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP)
}
if err := k.capAmbientClearAll(); err != nil {
k.fatalf(msg, "cannot clear the ambient capability set: %v", err)
}
for i := uintptr(0); i <= lastcap; i++ {
if param.Privileged && i == CAP_SYS_ADMIN {
for i := range lastcap + 1 {
if slices.Contains(keepCaps, i) {
continue
}
if err := k.capBoundingSetDrop(i); err != nil {
@@ -336,20 +346,23 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
var keep [2]uint32
if param.Privileged {
keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN)
if err := k.capAmbientRaise(CAP_SYS_ADMIN); err != nil {
k.fatalf(msg, "cannot raise CAP_SYS_ADMIN: %v", err)
}
for _, c := range keepCaps {
keep[capToIndex(c)] |= capToMask(c)
}
if err := k.capset(
&capHeader{_LINUX_CAPABILITY_VERSION_3, 0},
&[2]capData{{0, keep[0], keep[0]}, {0, keep[1], keep[1]}},
&[2]capData{{keep[0], keep[0], keep[0]}, {keep[1], keep[1], keep[1]}},
); err != nil {
k.fatalf(msg, "cannot capset: %v", err)
}
for _, c := range keepCaps {
if err := k.capAmbientRaise(c); err != nil {
k.fatalf(msg, "cannot raise %#x: %v", c, err)
}
}
if !param.SeccompDisable {
rules := param.SeccompRules
if len(rules) == 0 { // non-empty rules slice always overrides presets

View File

@@ -1624,7 +1624,6 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
@@ -1656,8 +1655,9 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, stub.UniqueError(19)),
call("fatalf", stub.ExpectArgs{"cannot raise CAP_SYS_ADMIN: %v", []any{stub.UniqueError(19)}}, nil, nil),
call("fatalf", stub.ExpectArgs{"cannot raise %#x: %v", []any{uintptr(0x15), stub.UniqueError(19)}}, nil, nil),
},
}, nil},
@@ -1731,7 +1731,6 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
@@ -1763,8 +1762,7 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, stub.UniqueError(17)),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, stub.UniqueError(17)),
call("fatalf", stub.ExpectArgs{"cannot capset: %v", []any{stub.UniqueError(17)}}, nil, nil),
},
}, nil},
@@ -1839,7 +1837,6 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
@@ -1871,8 +1868,9 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil),
call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, stub.UniqueError(15)),
call("fatalf", stub.ExpectArgs{"cannot load syscall filter: %v", []any{stub.UniqueError(15)}}, nil, nil),
@@ -2699,7 +2697,6 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
@@ -2731,8 +2728,9 @@ func TestInitEntrypoint(t *testing.T) {
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil),
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil),
call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, nil),
call("verbosef", stub.ExpectArgs{"%d filter rules loaded", []any{73}}, nil, nil),