container: set scheduling policy
All checks were successful
Test / Create distribution (push) Successful in 1m59s
Test / Hakurei (push) Successful in 10m58s
Test / ShareFS (push) Successful in 11m11s
Test / Hakurei (race detector) (push) Successful in 15m14s
Test / Sandbox (push) Successful in 4m4s
Test / Hpkg (push) Successful in 4m29s
Test / Sandbox (race detector) (push) Successful in 2m49s
Test / Flake checks (push) Successful in 1m50s

This is thread-directed so cannot be done externally. The glibc wrapper exposes this behaviour so most multithreaded programs using this is straight up incorrect.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-02-26 16:29:47 +09:00
parent 826347fe1f
commit a6160cd410
2 changed files with 65 additions and 0 deletions

View File

@@ -37,6 +37,9 @@ type (
Container struct {
// Whether the container init should stay alive after its parent terminates.
AllowOrphan bool
// Scheduling policy to set via sched_setscheduler(2). The zero value
// skips this call. Supported policies are [SCHED_BATCH], [SCHED_IDLE].
SchedPolicy int
// Cgroup fd, nil to disable.
Cgroup *int
// ExtraFiles passed through to initial process in the container,
@@ -342,6 +345,23 @@ func (p *Container) Start() error {
landlockOut:
}
// sched_setscheduler: thread-directed but acts on all processes
// created from that thread
if p.SchedPolicy > 0 {
p.msg.Verbosef("setting scheduling policy %d", p.SchedPolicy)
if err := schedSetscheduler(
0, // calling thread
p.SchedPolicy,
&schedParam{0},
); err != nil {
return &StartError{
Fatal: true,
Step: "enforce landlock ruleset",
Err: err,
}
}
}
p.msg.Verbose("starting container init")
if err := p.cmd.Start(); err != nil {
return &StartError{false, "start container init", err, false, true}

View File

@@ -3,6 +3,8 @@ package container
import (
. "syscall"
"unsafe"
"hakurei.app/container/std"
)
// Prctl manipulates various aspects of the behavior of the calling thread or process.
@@ -41,6 +43,49 @@ func Isatty(fd int) bool {
return r == 0
}
// include/uapi/linux/sched.h
const (
SCHED_NORMAL = iota
SCHED_FIFO
SCHED_RR
SCHED_BATCH
_ // SCHED_ISO: reserved but not implemented yet
SCHED_IDLE
SCHED_DEADLINE
SCHED_EXT
)
// schedParam is equivalent to struct sched_param from include/linux/sched.h.
type schedParam struct {
// sched_priority
priority std.ScmpInt
}
// schedSetscheduler sets both the scheduling policy and parameters for the
// thread whose ID is specified in tid. If tid equals zero, the scheduling
// policy and parameters of the calling thread will be set.
//
// This function is unexported because it is [very subtle to use correctly]. The
// function signature in libc is misleading: pid actually refers to a thread ID.
// The glibc wrapper for this system call ignores this semantic and exposes
// this counterintuitive behaviour.
//
// This function is only called from the container setup thread. Do not reuse
// this if you do not have something similar in place!
//
// [very subtle to use correctly]: https://www.openwall.com/lists/musl/2016/03/01/4
func schedSetscheduler(tid, policy int, param *schedParam) error {
if r, _, errno := Syscall(
SYS_SCHED_SETSCHEDULER,
uintptr(tid),
uintptr(policy),
uintptr(unsafe.Pointer(param)),
); r < 0 {
return errno
}
return nil
}
// IgnoringEINTR makes a function call and repeats it if it returns an
// EINTR error. This appears to be required even though we install all
// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.