container: set FD_CLOEXEC on all open files
All checks were successful
Test / Create distribution (push) Successful in 29s
Test / Sandbox (race detector) (push) Successful in 40s
Test / Hakurei (race detector) (push) Successful in 46s
Test / Hakurei (push) Successful in 47s
Test / Sandbox (push) Successful in 44s
Test / Hpkg (push) Successful in 43s
Test / Flake checks (push) Successful in 1m31s

While fd created from this side always has the FD_CLOEXEC flag, the same is not true for files left open by the parent. This change prevents those files from leaking into the container.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
Ophestra 2025-11-12 00:18:11 +09:00
parent 9dec9dbc4b
commit ac34635890
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
3 changed files with 65 additions and 21 deletions

View File

@ -11,6 +11,7 @@ import (
"os/exec" "os/exec"
"runtime" "runtime"
"strconv" "strconv"
"sync"
. "syscall" . "syscall"
"time" "time"
@ -143,11 +144,18 @@ func (e *StartError) Error() string {
// Message returns a user-facing error message. // Message returns a user-facing error message.
func (e *StartError) Message() string { func (e *StartError) Message() string {
if e.Passthrough { if e.Passthrough {
var (
numError *strconv.NumError
)
switch { switch {
case errors.As(e.Err, new(*os.PathError)), case errors.As(e.Err, new(*os.PathError)),
errors.As(e.Err, new(*os.SyscallError)): errors.As(e.Err, new(*os.SyscallError)):
return "cannot " + e.Err.Error() return "cannot " + e.Err.Error()
case errors.As(e.Err, &numError) && numError != nil:
return "cannot parse " + strconv.Quote(numError.Num) + ": " + numError.Err.Error()
default: default:
return e.Err.Error() return e.Err.Error()
} }
@ -158,6 +166,39 @@ func (e *StartError) Message() string {
return "cannot " + e.Error() return "cannot " + e.Error()
} }
// for ensureCloseOnExec
var (
closeOnExecOnce sync.Once
closeOnExecErr error
)
// ensureCloseOnExec ensures all currently open file descriptors have the syscall.FD_CLOEXEC flag set.
// This is only ran once as it is intended to handle files left open by the parent, and any file opened
// on this side should already have syscall.FD_CLOEXEC set.
func ensureCloseOnExec() error {
closeOnExecOnce.Do(func() {
const fdPrefixPath = "/proc/self/fd/"
var entries []os.DirEntry
if entries, closeOnExecErr = os.ReadDir(fdPrefixPath); closeOnExecErr != nil {
return
}
var fd int
for _, ent := range entries {
if fd, closeOnExecErr = strconv.Atoi(ent.Name()); closeOnExecErr != nil {
break // not reached
}
CloseOnExec(fd)
}
})
if closeOnExecErr == nil {
return nil
}
return &StartError{Fatal: true, Step: "set FD_CLOEXEC on all open files", Err: closeOnExecErr, Passthrough: true}
}
// Start starts the container init. The init process blocks until Serve is called. // Start starts the container init. The init process blocks until Serve is called.
func (p *Container) Start() error { func (p *Container) Start() error {
if p == nil || p.cmd == nil || if p == nil || p.cmd == nil ||
@ -168,6 +209,10 @@ func (p *Container) Start() error {
return errors.New("container: already started") return errors.New("container: already started")
} }
if err := ensureCloseOnExec(); err != nil {
return err
}
// map to overflow id to work around ownership checks // map to overflow id to work around ownership checks
if p.Uid < 1 { if p.Uid < 1 {
p.Uid = OverflowUid(p.msg) p.Uid = OverflowUid(p.msg)

View File

@ -44,8 +44,7 @@ func TestStartError(t *testing.T) {
Fatal: true, Fatal: true,
Step: "set up params stream", Step: "set up params stream",
Err: container.ErrReceiveEnv, Err: container.ErrReceiveEnv,
}, }, "set up params stream: environment variable not set",
"set up params stream: environment variable not set",
container.ErrReceiveEnv, syscall.EBADF, container.ErrReceiveEnv, syscall.EBADF,
"cannot set up params stream: environment variable not set"}, "cannot set up params stream: environment variable not set"},
@ -53,8 +52,7 @@ func TestStartError(t *testing.T) {
Fatal: true, Fatal: true,
Step: "set up params stream", Step: "set up params stream",
Err: &os.SyscallError{Syscall: "pipe2", Err: syscall.EBADF}, Err: &os.SyscallError{Syscall: "pipe2", Err: syscall.EBADF},
}, }, "set up params stream pipe2: bad file descriptor",
"set up params stream pipe2: bad file descriptor",
syscall.EBADF, os.ErrInvalid, syscall.EBADF, os.ErrInvalid,
"cannot set up params stream pipe2: bad file descriptor"}, "cannot set up params stream pipe2: bad file descriptor"},
@ -62,16 +60,14 @@ func TestStartError(t *testing.T) {
Fatal: true, Fatal: true,
Step: "prctl(PR_SET_NO_NEW_PRIVS)", Step: "prctl(PR_SET_NO_NEW_PRIVS)",
Err: syscall.EPERM, Err: syscall.EPERM,
}, }, "prctl(PR_SET_NO_NEW_PRIVS): operation not permitted",
"prctl(PR_SET_NO_NEW_PRIVS): operation not permitted",
syscall.EPERM, syscall.EACCES, syscall.EPERM, syscall.EACCES,
"cannot prctl(PR_SET_NO_NEW_PRIVS): operation not permitted"}, "cannot prctl(PR_SET_NO_NEW_PRIVS): operation not permitted"},
{"landlock abi", &container.StartError{ {"landlock abi", &container.StartError{
Step: "get landlock ABI", Step: "get landlock ABI",
Err: syscall.ENOSYS, Err: syscall.ENOSYS,
}, }, "get landlock ABI: function not implemented",
"get landlock ABI: function not implemented",
syscall.ENOSYS, syscall.ENOEXEC, syscall.ENOSYS, syscall.ENOEXEC,
"cannot get landlock ABI: function not implemented"}, "cannot get landlock ABI: function not implemented"},
@ -79,8 +75,7 @@ func TestStartError(t *testing.T) {
Step: "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET", Step: "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET",
Err: syscall.ENOSYS, Err: syscall.ENOSYS,
Origin: true, Origin: true,
}, }, "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET",
"kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET",
syscall.ENOSYS, syscall.ENOSPC, syscall.ENOSYS, syscall.ENOSPC,
"kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET"}, "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET"},
@ -88,8 +83,7 @@ func TestStartError(t *testing.T) {
Fatal: true, Fatal: true,
Step: "create landlock ruleset", Step: "create landlock ruleset",
Err: syscall.EBADFD, Err: syscall.EBADFD,
}, }, "create landlock ruleset: file descriptor in bad state",
"create landlock ruleset: file descriptor in bad state",
syscall.EBADFD, syscall.EBADF, syscall.EBADFD, syscall.EBADF,
"cannot create landlock ruleset: file descriptor in bad state"}, "cannot create landlock ruleset: file descriptor in bad state"},
@ -97,8 +91,7 @@ func TestStartError(t *testing.T) {
Fatal: true, Fatal: true,
Step: "enforce landlock ruleset", Step: "enforce landlock ruleset",
Err: syscall.ENOTRECOVERABLE, Err: syscall.ENOTRECOVERABLE,
}, }, "enforce landlock ruleset: state not recoverable",
"enforce landlock ruleset: state not recoverable",
syscall.ENOTRECOVERABLE, syscall.ETIMEDOUT, syscall.ENOTRECOVERABLE, syscall.ETIMEDOUT,
"cannot enforce landlock ruleset: state not recoverable"}, "cannot enforce landlock ruleset: state not recoverable"},
@ -109,8 +102,7 @@ func TestStartError(t *testing.T) {
Path: "/proc/nonexistent", Path: "/proc/nonexistent",
Err: syscall.ENOENT, Err: syscall.ENOENT,
}, Passthrough: true, }, Passthrough: true,
}, }, "fork/exec /proc/nonexistent: no such file or directory",
"fork/exec /proc/nonexistent: no such file or directory",
syscall.ENOENT, syscall.ENOSYS, syscall.ENOENT, syscall.ENOSYS,
"cannot fork/exec /proc/nonexistent: no such file or directory"}, "cannot fork/exec /proc/nonexistent: no such file or directory"},
@ -120,11 +112,19 @@ func TestStartError(t *testing.T) {
Syscall: "open", Syscall: "open",
Err: syscall.ENOSYS, Err: syscall.ENOSYS,
}, Passthrough: true, }, Passthrough: true,
}, }, "open: function not implemented",
"open: function not implemented",
syscall.ENOSYS, syscall.ENOENT, syscall.ENOSYS, syscall.ENOENT,
"cannot open: function not implemented"}, "cannot open: function not implemented"},
{"start FD_CLOEXEC", &container.StartError{
Fatal: true,
Step: "set FD_CLOEXEC on all open files",
Err: func() error { _, err := strconv.Atoi("invalid"); return err }(),
Passthrough: true,
}, `strconv.Atoi: parsing "invalid": invalid syntax`,
strconv.ErrSyntax, os.ErrInvalid,
`cannot parse "invalid": invalid syntax`},
{"start other", &container.StartError{ {"start other", &container.StartError{
Step: "start container init", Step: "start container init",
Err: &net.OpError{ Err: &net.OpError{
@ -132,8 +132,7 @@ func TestStartError(t *testing.T) {
Net: "unix", Net: "unix",
Err: syscall.ECONNREFUSED, Err: syscall.ECONNREFUSED,
}, Passthrough: true, }, Passthrough: true,
}, }, "dial unix: connection refused",
"dial unix: connection refused",
syscall.ECONNREFUSED, syscall.ECONNABORTED, syscall.ECONNREFUSED, syscall.ECONNABORTED,
"dial unix: connection refused"}, "dial unix: connection refused"},
} }

View File

@ -46,7 +46,7 @@ swaymsg("exec hakurei run cat")
check_filter(0, "pdlike", "cat") check_filter(0, "pdlike", "cat")
# Check fd leak: # Check fd leak:
swaymsg("exec hakurei -v run sleep infinity") swaymsg("exec exec 127</proc/cmdline && hakurei -v run sleep infinity")
pd_identity0_sleep_pid = int(machine.wait_until_succeeds("pgrep -U 10000 -x sleep", timeout=60)) pd_identity0_sleep_pid = int(machine.wait_until_succeeds("pgrep -U 10000 -x sleep", timeout=60))
print(machine.succeed(f"hakurei-test fd {pd_identity0_sleep_pid}")) print(machine.succeed(f"hakurei-test fd {pd_identity0_sleep_pid}"))
machine.succeed(f"kill -INT {pd_identity0_sleep_pid}") machine.succeed(f"kill -INT {pd_identity0_sleep_pid}")