container: improve capability handling
All checks were successful
Test / Create distribution (push) Successful in 1m4s
Test / Sandbox (push) Successful in 2m44s
Test / Hakurei (push) Successful in 4m5s
Test / ShareFS (push) Successful in 4m25s
Test / Sandbox (race detector) (push) Successful in 5m55s
Test / Hakurei (race detector) (push) Successful in 7m54s
Test / Flake checks (push) Successful in 1m35s

This cleans up preserving caps for expansion and correctly sets privileged caps.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-05-07 14:14:06 +09:00
parent 4aba014eac
commit bad66facbc
3 changed files with 36 additions and 26 deletions

View File

@@ -186,19 +186,24 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil {
k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err)
}
if err := k.writeFile(fhs.Proc+"self/uid_map",
append([]byte{}, strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"...),
0); err != nil {
if err := k.writeFile(
fhs.Proc+"self/uid_map",
[]byte(strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"),
0,
); err != nil {
k.fatalf(msg, "%v", err)
}
if err := k.writeFile(fhs.Proc+"self/setgroups",
if err := k.writeFile(
fhs.Proc+"self/setgroups",
[]byte("deny\n"),
0); err != nil && !os.IsNotExist(err) {
0,
); err != nil && !os.IsNotExist(err) {
k.fatalf(msg, "%v", err)
}
if err := k.writeFile(fhs.Proc+"self/gid_map",
append([]byte{}, strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"...),
0); err != nil {
[]byte(strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"),
0,
); err != nil {
k.fatalf(msg, "%v", err)
}
if err := k.setDumpable(ext.SUID_DUMP_DISABLE); err != nil {
@@ -323,11 +328,16 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
}
var keepCaps []uintptr
if param.Privileged {
keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP)
}
if err := k.capAmbientClearAll(); err != nil {
k.fatalf(msg, "cannot clear the ambient capability set: %v", err)
}
for i := uintptr(0); i <= lastcap; i++ {
if param.Privileged && i == CAP_SYS_ADMIN {
for i := range lastcap + 1 {
if slices.Contains(keepCaps, i) {
continue
}
if err := k.capBoundingSetDrop(i); err != nil {
@@ -336,20 +346,23 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
}
var keep [2]uint32
if param.Privileged {
keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN)
if err := k.capAmbientRaise(CAP_SYS_ADMIN); err != nil {
k.fatalf(msg, "cannot raise CAP_SYS_ADMIN: %v", err)
}
for _, c := range keepCaps {
keep[capToIndex(c)] |= capToMask(c)
}
if err := k.capset(
&capHeader{_LINUX_CAPABILITY_VERSION_3, 0},
&[2]capData{{0, keep[0], keep[0]}, {0, keep[1], keep[1]}},
&[2]capData{{keep[0], keep[0], keep[0]}, {keep[1], keep[1], keep[1]}},
); err != nil {
k.fatalf(msg, "cannot capset: %v", err)
}
for _, c := range keepCaps {
if err := k.capAmbientRaise(c); err != nil {
k.fatalf(msg, "cannot raise %#x: %v", c, err)
}
}
if !param.SeccompDisable {
rules := param.SeccompRules
if len(rules) == 0 { // non-empty rules slice always overrides presets