container: improve documentation
All checks were successful
Test / Create distribution (push) Successful in 1m16s
Test / Sandbox (push) Successful in 3m2s
Test / Hakurei (push) Successful in 4m4s
Test / ShareFS (push) Successful in 4m17s
Test / Hpkg (push) Successful in 4m49s
Test / Sandbox (race detector) (push) Successful in 5m22s
Test / Hakurei (race detector) (push) Successful in 6m30s
Test / Flake checks (push) Successful in 1m48s
All checks were successful
Test / Create distribution (push) Successful in 1m16s
Test / Sandbox (push) Successful in 3m2s
Test / Hakurei (push) Successful in 4m4s
Test / ShareFS (push) Successful in 4m17s
Test / Hpkg (push) Successful in 4m49s
Test / Sandbox (race detector) (push) Successful in 5m22s
Test / Hakurei (race detector) (push) Successful in 6m30s
Test / Flake checks (push) Successful in 1m48s
This change removes inconsistencies collected over time in this package. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -10,8 +10,7 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(AutoEtcOp)) }
|
func init() { gob.Register(new(AutoEtcOp)) }
|
||||||
|
|
||||||
// Etc appends an [Op] that expands host /etc into a toplevel symlink mirror with /etc semantics.
|
// Etc is a helper for appending [AutoEtcOp] to [Ops].
|
||||||
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
|
||||||
func (f *Ops) Etc(host *check.Absolute, prefix string) *Ops {
|
func (f *Ops) Etc(host *check.Absolute, prefix string) *Ops {
|
||||||
e := &AutoEtcOp{prefix}
|
e := &AutoEtcOp{prefix}
|
||||||
f.Mkdir(fhs.AbsEtc, 0755)
|
f.Mkdir(fhs.AbsEtc, 0755)
|
||||||
@@ -20,6 +19,9 @@ func (f *Ops) Etc(host *check.Absolute, prefix string) *Ops {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AutoEtcOp expands host /etc into a toplevel symlink mirror with /etc semantics.
|
||||||
|
//
|
||||||
|
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
||||||
type AutoEtcOp struct{ Prefix string }
|
type AutoEtcOp struct{ Prefix string }
|
||||||
|
|
||||||
func (e *AutoEtcOp) Valid() bool { return e != nil }
|
func (e *AutoEtcOp) Valid() bool { return e != nil }
|
||||||
|
|||||||
@@ -11,13 +11,15 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(AutoRootOp)) }
|
func init() { gob.Register(new(AutoRootOp)) }
|
||||||
|
|
||||||
// Root appends an [Op] that expands a directory into a toplevel bind mount mirror on container root.
|
// Root is a helper for appending [AutoRootOp] to [Ops].
|
||||||
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
|
||||||
func (f *Ops) Root(host *check.Absolute, flags int) *Ops {
|
func (f *Ops) Root(host *check.Absolute, flags int) *Ops {
|
||||||
*f = append(*f, &AutoRootOp{host, flags, nil})
|
*f = append(*f, &AutoRootOp{host, flags, nil})
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AutoRootOp expands a directory into a toplevel bind mount mirror on container root.
|
||||||
|
//
|
||||||
|
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
||||||
type AutoRootOp struct {
|
type AutoRootOp struct {
|
||||||
Host *check.Absolute
|
Host *check.Absolute
|
||||||
// passed through to bindMount
|
// passed through to bindMount
|
||||||
|
|||||||
@@ -50,10 +50,16 @@ func capset(hdrp *capHeader, datap *[2]capData) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// capBoundingSetDrop drops a capability from the calling thread's capability bounding set.
|
// capBoundingSetDrop drops a capability from the calling thread's capability bounding set.
|
||||||
func capBoundingSetDrop(cap uintptr) error { return Prctl(syscall.PR_CAPBSET_DROP, cap, 0) }
|
func capBoundingSetDrop(cap uintptr) error {
|
||||||
|
return Prctl(syscall.PR_CAPBSET_DROP, cap, 0)
|
||||||
|
}
|
||||||
|
|
||||||
// capAmbientClearAll clears the ambient capability set of the calling thread.
|
// capAmbientClearAll clears the ambient capability set of the calling thread.
|
||||||
func capAmbientClearAll() error { return Prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0) }
|
func capAmbientClearAll() error {
|
||||||
|
return Prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0)
|
||||||
|
}
|
||||||
|
|
||||||
// capAmbientRaise adds to the ambient capability set of the calling thread.
|
// capAmbientRaise adds to the ambient capability set of the calling thread.
|
||||||
func capAmbientRaise(cap uintptr) error { return Prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap) }
|
func capAmbientRaise(cap uintptr) error {
|
||||||
|
return Prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap)
|
||||||
|
}
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ const (
|
|||||||
SpecialOverlayPath = ":"
|
SpecialOverlayPath = ":"
|
||||||
)
|
)
|
||||||
|
|
||||||
// EscapeOverlayDataSegment escapes a string for formatting into the data argument of an overlay mount call.
|
// EscapeOverlayDataSegment escapes a string for formatting into the data
|
||||||
|
// argument of an overlay mount system call.
|
||||||
func EscapeOverlayDataSegment(s string) string {
|
func EscapeOverlayDataSegment(s string) string {
|
||||||
if s == "" {
|
if s == "" {
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
// Package container implements unprivileged Linux containers with built-in support for syscall filtering.
|
// Package container implements unprivileged Linux containers with built-in
|
||||||
|
// support for syscall filtering.
|
||||||
package container
|
package container
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -42,22 +43,25 @@ type (
|
|||||||
SchedPolicy int
|
SchedPolicy int
|
||||||
// Cgroup fd, nil to disable.
|
// Cgroup fd, nil to disable.
|
||||||
Cgroup *int
|
Cgroup *int
|
||||||
// ExtraFiles passed through to initial process in the container,
|
// ExtraFiles passed through to initial process in the container, with
|
||||||
// with behaviour identical to its [exec.Cmd] counterpart.
|
// behaviour identical to its [exec.Cmd] counterpart.
|
||||||
ExtraFiles []*os.File
|
ExtraFiles []*os.File
|
||||||
|
|
||||||
// param pipe for shim and init
|
// Write end of a pipe connected to the init to deliver [Params].
|
||||||
setup *os.File
|
setup *os.File
|
||||||
// cancels cmd
|
// Cancels the context passed to the underlying cmd.
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
// closed after Wait returns
|
// Closed after Wait returns. Keeps the spawning thread alive.
|
||||||
wait chan struct{}
|
wait chan struct{}
|
||||||
|
|
||||||
Stdin io.Reader
|
Stdin io.Reader
|
||||||
Stdout io.Writer
|
Stdout io.Writer
|
||||||
Stderr io.Writer
|
Stderr io.Writer
|
||||||
|
|
||||||
Cancel func(cmd *exec.Cmd) error
|
// Custom cancellation behaviour for the underlying [exec.Cmd]. Must
|
||||||
|
// deliver [CancelSignal] before returning.
|
||||||
|
Cancel func(cmd *exec.Cmd) error
|
||||||
|
// Copied to the underlying [exec.Cmd].
|
||||||
WaitDelay time.Duration
|
WaitDelay time.Duration
|
||||||
|
|
||||||
cmd *exec.Cmd
|
cmd *exec.Cmd
|
||||||
@@ -286,7 +290,11 @@ func (p *Container) Start() error {
|
|||||||
|
|
||||||
// place setup pipe before user supplied extra files, this is later restored by init
|
// place setup pipe before user supplied extra files, this is later restored by init
|
||||||
if fd, f, err := Setup(&p.cmd.ExtraFiles); err != nil {
|
if fd, f, err := Setup(&p.cmd.ExtraFiles); err != nil {
|
||||||
return &StartError{true, "set up params stream", err, false, false}
|
return &StartError{
|
||||||
|
Fatal: true,
|
||||||
|
Step: "set up params stream",
|
||||||
|
Err: err,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
p.setup = f
|
p.setup = f
|
||||||
p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)}
|
p.cmd.Env = []string{setupEnv + "=" + strconv.Itoa(fd)}
|
||||||
@@ -298,10 +306,16 @@ func (p *Container) Start() error {
|
|||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
p.wait = make(chan struct{})
|
p.wait = make(chan struct{})
|
||||||
|
|
||||||
done <- func() error { // setup depending on per-thread state must happen here
|
// setup depending on per-thread state must happen here
|
||||||
// PR_SET_NO_NEW_PRIVS: depends on per-thread state but acts on all processes created from that thread
|
done <- func() error {
|
||||||
|
// PR_SET_NO_NEW_PRIVS: thread-directed but acts on all processes
|
||||||
|
// created from the calling thread
|
||||||
if err := SetNoNewPrivs(); err != nil {
|
if err := SetNoNewPrivs(); err != nil {
|
||||||
return &StartError{true, "prctl(PR_SET_NO_NEW_PRIVS)", err, false, false}
|
return &StartError{
|
||||||
|
Fatal: true,
|
||||||
|
Step: "prctl(PR_SET_NO_NEW_PRIVS)",
|
||||||
|
Err: err,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// landlock: depends on per-thread state but acts on a process group
|
// landlock: depends on per-thread state but acts on a process group
|
||||||
@@ -313,28 +327,40 @@ func (p *Container) Start() error {
|
|||||||
|
|
||||||
if abi, err := LandlockGetABI(); err != nil {
|
if abi, err := LandlockGetABI(); err != nil {
|
||||||
if p.HostAbstract {
|
if p.HostAbstract {
|
||||||
// landlock can be skipped here as it restricts access to resources
|
// landlock can be skipped here as it restricts access
|
||||||
// already covered by namespaces (pid)
|
// to resources already covered by namespaces (pid)
|
||||||
goto landlockOut
|
goto landlockOut
|
||||||
}
|
}
|
||||||
return &StartError{false, "get landlock ABI", err, false, false}
|
return &StartError{Step: "get landlock ABI", Err: err}
|
||||||
} else if abi < 6 {
|
} else if abi < 6 {
|
||||||
if p.HostAbstract {
|
if p.HostAbstract {
|
||||||
// see above comment
|
// see above comment
|
||||||
goto landlockOut
|
goto landlockOut
|
||||||
}
|
}
|
||||||
return &StartError{false, "kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET", ENOSYS, true, false}
|
return &StartError{
|
||||||
|
Step: "kernel too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET",
|
||||||
|
Err: ENOSYS,
|
||||||
|
Origin: true,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
p.msg.Verbosef("landlock abi version %d", abi)
|
p.msg.Verbosef("landlock abi version %d", abi)
|
||||||
}
|
}
|
||||||
|
|
||||||
if rulesetFd, err := rulesetAttr.Create(0); err != nil {
|
if rulesetFd, err := rulesetAttr.Create(0); err != nil {
|
||||||
return &StartError{true, "create landlock ruleset", err, false, false}
|
return &StartError{
|
||||||
|
Fatal: true,
|
||||||
|
Step: "create landlock ruleset",
|
||||||
|
Err: err,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
p.msg.Verbosef("enforcing landlock ruleset %s", rulesetAttr)
|
p.msg.Verbosef("enforcing landlock ruleset %s", rulesetAttr)
|
||||||
if err = LandlockRestrictSelf(rulesetFd, 0); err != nil {
|
if err = LandlockRestrictSelf(rulesetFd, 0); err != nil {
|
||||||
_ = Close(rulesetFd)
|
_ = Close(rulesetFd)
|
||||||
return &StartError{true, "enforce landlock ruleset", err, false, false}
|
return &StartError{
|
||||||
|
Fatal: true,
|
||||||
|
Step: "enforce landlock ruleset",
|
||||||
|
Err: err,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err = Close(rulesetFd); err != nil {
|
if err = Close(rulesetFd); err != nil {
|
||||||
p.msg.Verbosef("cannot close landlock ruleset: %v", err)
|
p.msg.Verbosef("cannot close landlock ruleset: %v", err)
|
||||||
@@ -346,7 +372,7 @@ func (p *Container) Start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sched_setscheduler: thread-directed but acts on all processes
|
// sched_setscheduler: thread-directed but acts on all processes
|
||||||
// created from that thread
|
// created from the calling thread
|
||||||
if p.SchedPolicy > 0 {
|
if p.SchedPolicy > 0 {
|
||||||
p.msg.Verbosef("setting scheduling policy %d", p.SchedPolicy)
|
p.msg.Verbosef("setting scheduling policy %d", p.SchedPolicy)
|
||||||
if err := schedSetscheduler(
|
if err := schedSetscheduler(
|
||||||
@@ -364,7 +390,11 @@ func (p *Container) Start() error {
|
|||||||
|
|
||||||
p.msg.Verbose("starting container init")
|
p.msg.Verbose("starting container init")
|
||||||
if err := p.cmd.Start(); err != nil {
|
if err := p.cmd.Start(); err != nil {
|
||||||
return &StartError{false, "start container init", err, false, true}
|
return &StartError{
|
||||||
|
Step: "start container init",
|
||||||
|
Err: err,
|
||||||
|
Passthrough: true,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}()
|
}()
|
||||||
@@ -376,6 +406,7 @@ func (p *Container) Start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Serve serves [Container.Params] to the container init.
|
// Serve serves [Container.Params] to the container init.
|
||||||
|
//
|
||||||
// Serve must only be called once.
|
// Serve must only be called once.
|
||||||
func (p *Container) Serve() error {
|
func (p *Container) Serve() error {
|
||||||
if p.setup == nil {
|
if p.setup == nil {
|
||||||
@@ -385,12 +416,21 @@ func (p *Container) Serve() error {
|
|||||||
setup := p.setup
|
setup := p.setup
|
||||||
p.setup = nil
|
p.setup = nil
|
||||||
if err := setup.SetDeadline(time.Now().Add(initSetupTimeout)); err != nil {
|
if err := setup.SetDeadline(time.Now().Add(initSetupTimeout)); err != nil {
|
||||||
return &StartError{true, "set init pipe deadline", err, false, true}
|
return &StartError{
|
||||||
|
Fatal: true,
|
||||||
|
Step: "set init pipe deadline",
|
||||||
|
Err: err,
|
||||||
|
Passthrough: true,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.Path == nil {
|
if p.Path == nil {
|
||||||
p.cancel()
|
p.cancel()
|
||||||
return &StartError{false, "invalid executable pathname", EINVAL, true, false}
|
return &StartError{
|
||||||
|
Step: "invalid executable pathname",
|
||||||
|
Err: EINVAL,
|
||||||
|
Origin: true,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// do not transmit nil
|
// do not transmit nil
|
||||||
@@ -415,7 +455,8 @@ func (p *Container) Serve() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait waits for the container init process to exit and releases any resources associated with the [Container].
|
// Wait blocks until the container init process to exit and releases any
|
||||||
|
// resources associated with the [Container].
|
||||||
func (p *Container) Wait() error {
|
func (p *Container) Wait() error {
|
||||||
if p.cmd == nil || p.cmd.Process == nil {
|
if p.cmd == nil || p.cmd.Process == nil {
|
||||||
return EINVAL
|
return EINVAL
|
||||||
@@ -460,11 +501,13 @@ func (p *Container) StderrPipe() (r io.ReadCloser, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *Container) String() string {
|
func (p *Container) String() string {
|
||||||
return fmt.Sprintf("argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
|
return fmt.Sprintf(
|
||||||
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets))
|
"argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
|
||||||
|
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessState returns the address to os.ProcessState held by the underlying [exec.Cmd].
|
// ProcessState returns the address of os.ProcessState held by the underlying [exec.Cmd].
|
||||||
func (p *Container) ProcessState() *os.ProcessState {
|
func (p *Container) ProcessState() *os.ProcessState {
|
||||||
if p.cmd == nil {
|
if p.cmd == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -472,7 +515,8 @@ func (p *Container) ProcessState() *os.ProcessState {
|
|||||||
return p.cmd.ProcessState
|
return p.cmd.ProcessState
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns the address to a new instance of [Container] that requires further initialisation before use.
|
// New returns the address to a new instance of [Container]. This value requires
|
||||||
|
// further initialisation before use.
|
||||||
func New(ctx context.Context, msg message.Msg) *Container {
|
func New(ctx context.Context, msg message.Msg) *Container {
|
||||||
if msg == nil {
|
if msg == nil {
|
||||||
msg = message.New(nil)
|
msg = message.New(nil)
|
||||||
@@ -486,7 +530,13 @@ func New(ctx context.Context, msg message.Msg) *Container {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewCommand calls [New] and initialises the [Params.Path] and [Params.Args] fields.
|
// NewCommand calls [New] and initialises the [Params.Path] and [Params.Args] fields.
|
||||||
func NewCommand(ctx context.Context, msg message.Msg, pathname *check.Absolute, name string, args ...string) *Container {
|
func NewCommand(
|
||||||
|
ctx context.Context,
|
||||||
|
msg message.Msg,
|
||||||
|
pathname *check.Absolute,
|
||||||
|
name string,
|
||||||
|
args ...string,
|
||||||
|
) *Container {
|
||||||
z := New(ctx, msg)
|
z := New(ctx, msg)
|
||||||
z.Path = pathname
|
z.Path = pathname
|
||||||
z.Args = append([]string{name}, args...)
|
z.Args = append([]string{name}, args...)
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ type osFile interface {
|
|||||||
fs.File
|
fs.File
|
||||||
}
|
}
|
||||||
|
|
||||||
// syscallDispatcher provides methods that make state-dependent system calls as part of their behaviour.
|
// syscallDispatcher provides methods that make state-dependent system calls as
|
||||||
|
// part of their behaviour.
|
||||||
type syscallDispatcher interface {
|
type syscallDispatcher interface {
|
||||||
// new starts a goroutine with a new instance of syscallDispatcher.
|
// new starts a goroutine with a new instance of syscallDispatcher.
|
||||||
// A syscallDispatcher must never be used in any goroutine other than the one owning it,
|
// A syscallDispatcher must never be used in any goroutine other than the one owning it,
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ func messageFromError(err error) (m string, ok bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// messagePrefix checks and prefixes the error message of a non-pointer error.
|
// messagePrefix checks and prefixes the error message of a non-pointer error.
|
||||||
// While this is usable for pointer errors, such use should be avoided as nil check is omitted.
|
// While this is usable for pointer errors, such use should be avoided as nil
|
||||||
|
// check is omitted.
|
||||||
func messagePrefix[T error](prefix string, err error) (string, bool) {
|
func messagePrefix[T error](prefix string, err error) (string, bool) {
|
||||||
var targetError T
|
var targetError T
|
||||||
if errors.As(err, &targetError) {
|
if errors.As(err, &targetError) {
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ const (
|
|||||||
// Tmp points to the place for small temporary files.
|
// Tmp points to the place for small temporary files.
|
||||||
Tmp = "/tmp/"
|
Tmp = "/tmp/"
|
||||||
|
|
||||||
// Run points to a "tmpfs" file system for system packages to place runtime data, socket files, and similar.
|
// Run points to a "tmpfs" file system for system packages to place runtime
|
||||||
|
// data, socket files, and similar.
|
||||||
Run = "/run/"
|
Run = "/run/"
|
||||||
// RunUser points to a directory containing per-user runtime directories,
|
// RunUser points to a directory containing per-user runtime directories,
|
||||||
// each usually individually mounted "tmpfs" instances.
|
// each usually individually mounted "tmpfs" instances.
|
||||||
@@ -17,10 +18,12 @@ const (
|
|||||||
|
|
||||||
// Usr points to vendor-supplied operating system resources.
|
// Usr points to vendor-supplied operating system resources.
|
||||||
Usr = "/usr/"
|
Usr = "/usr/"
|
||||||
// UsrBin points to binaries and executables for user commands that shall appear in the $PATH search path.
|
// UsrBin points to binaries and executables for user commands that shall
|
||||||
|
// appear in the $PATH search path.
|
||||||
UsrBin = Usr + "bin/"
|
UsrBin = Usr + "bin/"
|
||||||
|
|
||||||
// Var points to persistent, variable system data. Writable during normal system operation.
|
// Var points to persistent, variable system data. Writable during normal
|
||||||
|
// system operation.
|
||||||
Var = "/var/"
|
Var = "/var/"
|
||||||
// VarLib points to persistent system data.
|
// VarLib points to persistent system data.
|
||||||
VarLib = Var + "lib/"
|
VarLib = Var + "lib/"
|
||||||
@@ -29,12 +32,16 @@ const (
|
|||||||
|
|
||||||
// Dev points to the root directory for device nodes.
|
// Dev points to the root directory for device nodes.
|
||||||
Dev = "/dev/"
|
Dev = "/dev/"
|
||||||
// DevShm is the place for POSIX shared memory segments, as created via shm_open(3).
|
// DevShm is the place for POSIX shared memory segments, as created via
|
||||||
|
// shm_open(3).
|
||||||
DevShm = "/dev/shm/"
|
DevShm = "/dev/shm/"
|
||||||
// Proc points to a virtual kernel file system exposing the process list and other functionality.
|
// Proc points to a virtual kernel file system exposing the process list and
|
||||||
|
// other functionality.
|
||||||
Proc = "/proc/"
|
Proc = "/proc/"
|
||||||
// ProcSys points to a hierarchy below /proc/ that exposes a number of kernel tunables.
|
// ProcSys points to a hierarchy below /proc/ that exposes a number of
|
||||||
|
// kernel tunables.
|
||||||
ProcSys = Proc + "sys/"
|
ProcSys = Proc + "sys/"
|
||||||
// Sys points to a virtual kernel file system exposing discovered devices and other functionality.
|
// Sys points to a virtual kernel file system exposing discovered devices
|
||||||
|
// and other functionality.
|
||||||
Sys = "/sys/"
|
Sys = "/sys/"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -33,12 +33,12 @@ const (
|
|||||||
- This path is only accessible by init and root:
|
- This path is only accessible by init and root:
|
||||||
The container init sets SUID_DUMP_DISABLE and terminates if that fails.
|
The container init sets SUID_DUMP_DISABLE and terminates if that fails.
|
||||||
|
|
||||||
It should be noted that none of this should become relevant at any point since the resulting
|
It should be noted that none of this should become relevant at any point
|
||||||
intermediate root tmpfs should be effectively anonymous. */
|
since the resulting intermediate root tmpfs should be effectively anonymous. */
|
||||||
intermediateHostPath = fhs.Proc + "self/fd"
|
intermediateHostPath = fhs.Proc + "self/fd"
|
||||||
|
|
||||||
// setupEnv is the name of the environment variable holding the string representation of
|
// setupEnv is the name of the environment variable holding the string
|
||||||
// the read end file descriptor of the setup params pipe.
|
// representation of the read end file descriptor of the setup params pipe.
|
||||||
setupEnv = "HAKUREI_SETUP"
|
setupEnv = "HAKUREI_SETUP"
|
||||||
|
|
||||||
// exitUnexpectedWait4 is the exit code if wait4 returns an unexpected errno.
|
// exitUnexpectedWait4 is the exit code if wait4 returns an unexpected errno.
|
||||||
@@ -59,7 +59,8 @@ type (
|
|||||||
// late is called right before starting the initial process.
|
// late is called right before starting the initial process.
|
||||||
late(state *setupState, k syscallDispatcher) error
|
late(state *setupState, k syscallDispatcher) error
|
||||||
|
|
||||||
// prefix returns a log message prefix, and whether this Op prints no identifying message on its own.
|
// prefix returns a log message prefix, and whether this Op prints no
|
||||||
|
// identifying message on its own.
|
||||||
prefix() (string, bool)
|
prefix() (string, bool)
|
||||||
|
|
||||||
Is(op Op) bool
|
Is(op Op) bool
|
||||||
@@ -71,9 +72,11 @@ type (
|
|||||||
setupState struct {
|
setupState struct {
|
||||||
nonrepeatable uintptr
|
nonrepeatable uintptr
|
||||||
|
|
||||||
// Whether early reaping has concluded. Must only be accessed in the wait4 loop.
|
// Whether early reaping has concluded. Must only be accessed in the
|
||||||
|
// wait4 loop.
|
||||||
processConcluded bool
|
processConcluded bool
|
||||||
// Process to syscall.WaitStatus populated in the wait4 loop. Freed after early reaping concludes.
|
// Process to syscall.WaitStatus populated in the wait4 loop. Freed
|
||||||
|
// after early reaping concludes.
|
||||||
process map[int]WaitStatus
|
process map[int]WaitStatus
|
||||||
// Synchronises access to process.
|
// Synchronises access to process.
|
||||||
processMu sync.RWMutex
|
processMu sync.RWMutex
|
||||||
@@ -216,9 +219,10 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
/* early is called right before pivot_root into intermediate root;
|
/* early is called right before pivot_root into intermediate root;
|
||||||
this step is mostly for gathering information that would otherwise be difficult to obtain
|
this step is mostly for gathering information that would otherwise be
|
||||||
via library functions after pivot_root, and implementations are expected to avoid changing
|
difficult to obtain via library functions after pivot_root, and
|
||||||
the state of the mount namespace */
|
implementations are expected to avoid changing the state of the mount
|
||||||
|
namespace */
|
||||||
for i, op := range *params.Ops {
|
for i, op := range *params.Ops {
|
||||||
if op == nil || !op.Valid() {
|
if op == nil || !op.Valid() {
|
||||||
k.fatalf(msg, "invalid op at index %d", i)
|
k.fatalf(msg, "invalid op at index %d", i)
|
||||||
@@ -258,10 +262,10 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
|||||||
k.fatalf(msg, "cannot enter intermediate root: %v", err)
|
k.fatalf(msg, "cannot enter intermediate root: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* apply is called right after pivot_root and entering the new root;
|
/* apply is called right after pivot_root and entering the new root. This
|
||||||
this step sets up the container filesystem, and implementations are expected to keep the host root
|
step sets up the container filesystem, and implementations are expected to
|
||||||
and sysroot mount points intact but otherwise can do whatever they need to;
|
keep the host root and sysroot mount points intact but otherwise can do
|
||||||
chdir is allowed but discouraged */
|
whatever they need to. Calling chdir is allowed but discouraged. */
|
||||||
for i, op := range *params.Ops {
|
for i, op := range *params.Ops {
|
||||||
// ops already checked during early setup
|
// ops already checked during early setup
|
||||||
if prefix, ok := op.prefix(); ok {
|
if prefix, ok := op.prefix(); ok {
|
||||||
|
|||||||
@@ -12,14 +12,16 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(BindMountOp)) }
|
func init() { gob.Register(new(BindMountOp)) }
|
||||||
|
|
||||||
// Bind appends an [Op] that bind mounts host path [BindMountOp.Source] on container path [BindMountOp.Target].
|
// Bind is a helper for appending [BindMountOp] to [Ops].
|
||||||
func (f *Ops) Bind(source, target *check.Absolute, flags int) *Ops {
|
func (f *Ops) Bind(source, target *check.Absolute, flags int) *Ops {
|
||||||
*f = append(*f, &BindMountOp{nil, source, target, flags})
|
*f = append(*f, &BindMountOp{nil, source, target, flags})
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// BindMountOp bind mounts host path Source on container path Target.
|
// BindMountOp creates a bind mount from host path Source to container path Target.
|
||||||
// Note that Flags uses bits declared in this package and should not be set with constants in [syscall].
|
//
|
||||||
|
// Note that Flags uses bits declared in the [std] package and should not be set
|
||||||
|
// with constants in [syscall].
|
||||||
type BindMountOp struct {
|
type BindMountOp struct {
|
||||||
sourceFinal, Source, Target *check.Absolute
|
sourceFinal, Source, Target *check.Absolute
|
||||||
|
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ const (
|
|||||||
daemonTimeout = 5 * time.Second
|
daemonTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// Daemon appends an [Op] that starts a daemon in the container and blocks until
|
// Daemon is a helper for appending [DaemonOp] to [Ops].
|
||||||
// [DaemonOp.Target] appears.
|
|
||||||
func (f *Ops) Daemon(target, path *check.Absolute, args ...string) *Ops {
|
func (f *Ops) Daemon(target, path *check.Absolute, args ...string) *Ops {
|
||||||
*f = append(*f, &DaemonOp{target, path, args})
|
*f = append(*f, &DaemonOp{target, path, args})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -19,7 +19,9 @@ func (f *Ops) Dev(target *check.Absolute, mqueue bool) *Ops {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DevWritable appends an [Op] that mounts a writable subset of host /dev.
|
// DevWritable appends an [Op] that mounts a writable subset of host /dev.
|
||||||
// There is usually no good reason to write to /dev, so this should always be followed by a [RemountOp].
|
//
|
||||||
|
// There is usually no good reason to write to /dev, so this should always be
|
||||||
|
// followed by a [RemountOp].
|
||||||
func (f *Ops) DevWritable(target *check.Absolute, mqueue bool) *Ops {
|
func (f *Ops) DevWritable(target *check.Absolute, mqueue bool) *Ops {
|
||||||
*f = append(*f, &MountDevOp{target, mqueue, true})
|
*f = append(*f, &MountDevOp{target, mqueue, true})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(MkdirOp)) }
|
func init() { gob.Register(new(MkdirOp)) }
|
||||||
|
|
||||||
// Mkdir appends an [Op] that creates a directory in the container filesystem.
|
// Mkdir is a helper for appending [MkdirOp] to [Ops].
|
||||||
func (f *Ops) Mkdir(name *check.Absolute, perm os.FileMode) *Ops {
|
func (f *Ops) Mkdir(name *check.Absolute, perm os.FileMode) *Ops {
|
||||||
*f = append(*f, &MkdirOp{name, perm})
|
*f = append(*f, &MkdirOp{name, perm})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -54,8 +54,11 @@ func (e *OverlayArgumentError) Error() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overlay appends an [Op] that mounts the overlay pseudo filesystem on [MountOverlayOp.Target].
|
// Overlay is a helper for appending [MountOverlayOp] to [Ops].
|
||||||
func (f *Ops) Overlay(target, state, work *check.Absolute, layers ...*check.Absolute) *Ops {
|
func (f *Ops) Overlay(
|
||||||
|
target, state, work *check.Absolute,
|
||||||
|
layers ...*check.Absolute,
|
||||||
|
) *Ops {
|
||||||
*f = append(*f, &MountOverlayOp{
|
*f = append(*f, &MountOverlayOp{
|
||||||
Target: target,
|
Target: target,
|
||||||
Lower: layers,
|
Lower: layers,
|
||||||
@@ -65,13 +68,12 @@ func (f *Ops) Overlay(target, state, work *check.Absolute, layers ...*check.Abso
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// OverlayEphemeral appends an [Op] that mounts the overlay pseudo filesystem on [MountOverlayOp.Target]
|
// OverlayEphemeral appends a [MountOverlayOp] with an ephemeral upperdir and workdir.
|
||||||
// with an ephemeral upperdir and workdir.
|
|
||||||
func (f *Ops) OverlayEphemeral(target *check.Absolute, layers ...*check.Absolute) *Ops {
|
func (f *Ops) OverlayEphemeral(target *check.Absolute, layers ...*check.Absolute) *Ops {
|
||||||
return f.Overlay(target, fhs.AbsRoot, nil, layers...)
|
return f.Overlay(target, fhs.AbsRoot, nil, layers...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// OverlayReadonly appends an [Op] that mounts the overlay pseudo filesystem readonly on [MountOverlayOp.Target]
|
// OverlayReadonly appends a readonly [MountOverlayOp].
|
||||||
func (f *Ops) OverlayReadonly(target *check.Absolute, layers ...*check.Absolute) *Ops {
|
func (f *Ops) OverlayReadonly(target *check.Absolute, layers ...*check.Absolute) *Ops {
|
||||||
return f.Overlay(target, nil, nil, layers...)
|
return f.Overlay(target, nil, nil, layers...)
|
||||||
}
|
}
|
||||||
@@ -82,25 +84,34 @@ type MountOverlayOp struct {
|
|||||||
|
|
||||||
// Any filesystem, does not need to be on a writable filesystem.
|
// Any filesystem, does not need to be on a writable filesystem.
|
||||||
Lower []*check.Absolute
|
Lower []*check.Absolute
|
||||||
// formatted for [OptionOverlayLowerdir], resolved, prefixed and escaped during early
|
// Formatted for [OptionOverlayLowerdir].
|
||||||
|
//
|
||||||
|
// Resolved, prefixed and escaped during early.
|
||||||
lower []string
|
lower []string
|
||||||
|
|
||||||
// The upperdir is normally on a writable filesystem.
|
// The upperdir is normally on a writable filesystem.
|
||||||
//
|
//
|
||||||
// If Work is nil and Upper holds the special value [fhs.AbsRoot],
|
// If Work is nil and Upper holds the special value [fhs.AbsRoot], an
|
||||||
// an ephemeral upperdir and workdir will be set up.
|
// ephemeral upperdir and workdir will be set up.
|
||||||
//
|
//
|
||||||
// If both Work and Upper are nil, upperdir and workdir is omitted and the overlay is mounted readonly.
|
// If both Work and Upper are nil, upperdir and workdir is omitted and the
|
||||||
|
// overlay is mounted readonly.
|
||||||
Upper *check.Absolute
|
Upper *check.Absolute
|
||||||
// formatted for [OptionOverlayUpperdir], resolved, prefixed and escaped during early
|
// Formatted for [OptionOverlayUpperdir].
|
||||||
|
//
|
||||||
|
// Resolved, prefixed and escaped during early.
|
||||||
upper string
|
upper string
|
||||||
|
|
||||||
// The workdir needs to be an empty directory on the same filesystem as upperdir.
|
// The workdir needs to be an empty directory on the same filesystem as upperdir.
|
||||||
Work *check.Absolute
|
Work *check.Absolute
|
||||||
// formatted for [OptionOverlayWorkdir], resolved, prefixed and escaped during early
|
// Formatted for [OptionOverlayWorkdir].
|
||||||
|
//
|
||||||
|
// Resolved, prefixed and escaped during early.
|
||||||
work string
|
work string
|
||||||
|
|
||||||
ephemeral bool
|
ephemeral bool
|
||||||
|
|
||||||
// used internally for mounting to the intermediate root
|
// Used internally for mounting to the intermediate root.
|
||||||
noPrefix bool
|
noPrefix bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ const (
|
|||||||
|
|
||||||
func init() { gob.Register(new(TmpfileOp)) }
|
func init() { gob.Register(new(TmpfileOp)) }
|
||||||
|
|
||||||
// Place appends an [Op] that places a file in container path [TmpfileOp.Path] containing [TmpfileOp.Data].
|
// Place is a helper for appending [TmpfileOp] to [Ops].
|
||||||
func (f *Ops) Place(name *check.Absolute, data []byte) *Ops {
|
func (f *Ops) Place(name *check.Absolute, data []byte) *Ops {
|
||||||
*f = append(*f, &TmpfileOp{name, data})
|
*f = append(*f, &TmpfileOp{name, data})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(MountProcOp)) }
|
func init() { gob.Register(new(MountProcOp)) }
|
||||||
|
|
||||||
// Proc appends an [Op] that mounts a private instance of proc.
|
// Proc is a helper for appending [MountProcOp] to [Ops].
|
||||||
func (f *Ops) Proc(target *check.Absolute) *Ops {
|
func (f *Ops) Proc(target *check.Absolute) *Ops {
|
||||||
*f = append(*f, &MountProcOp{target})
|
*f = append(*f, &MountProcOp{target})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
|
|
||||||
func init() { gob.Register(new(RemountOp)) }
|
func init() { gob.Register(new(RemountOp)) }
|
||||||
|
|
||||||
// Remount appends an [Op] that applies [RemountOp.Flags] on container path [RemountOp.Target].
|
// Remount is a helper for appending [RemountOp] to [Ops].
|
||||||
func (f *Ops) Remount(target *check.Absolute, flags uintptr) *Ops {
|
func (f *Ops) Remount(target *check.Absolute, flags uintptr) *Ops {
|
||||||
*f = append(*f, &RemountOp{target, flags})
|
*f = append(*f, &RemountOp{target, flags})
|
||||||
return f
|
return f
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ const (
|
|||||||
_LANDLOCK_ACCESS_FS_DELIM
|
_LANDLOCK_ACCESS_FS_DELIM
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// String returns a space-separated string of [LandlockAccessFS] flags.
|
||||||
func (f LandlockAccessFS) String() string {
|
func (f LandlockAccessFS) String() string {
|
||||||
switch f {
|
switch f {
|
||||||
case LANDLOCK_ACCESS_FS_EXECUTE:
|
case LANDLOCK_ACCESS_FS_EXECUTE:
|
||||||
@@ -116,6 +117,7 @@ const (
|
|||||||
_LANDLOCK_ACCESS_NET_DELIM
|
_LANDLOCK_ACCESS_NET_DELIM
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// String returns a space-separated string of [LandlockAccessNet] flags.
|
||||||
func (f LandlockAccessNet) String() string {
|
func (f LandlockAccessNet) String() string {
|
||||||
switch f {
|
switch f {
|
||||||
case LANDLOCK_ACCESS_NET_BIND_TCP:
|
case LANDLOCK_ACCESS_NET_BIND_TCP:
|
||||||
@@ -152,6 +154,7 @@ const (
|
|||||||
_LANDLOCK_SCOPE_DELIM
|
_LANDLOCK_SCOPE_DELIM
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// String returns a space-separated string of [LandlockScope] flags.
|
||||||
func (f LandlockScope) String() string {
|
func (f LandlockScope) String() string {
|
||||||
switch f {
|
switch f {
|
||||||
case LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET:
|
case LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET:
|
||||||
@@ -184,10 +187,12 @@ type RulesetAttr struct {
|
|||||||
HandledAccessFS LandlockAccessFS
|
HandledAccessFS LandlockAccessFS
|
||||||
// Bitmask of handled network actions.
|
// Bitmask of handled network actions.
|
||||||
HandledAccessNet LandlockAccessNet
|
HandledAccessNet LandlockAccessNet
|
||||||
// Bitmask of scopes restricting a Landlock domain from accessing outside resources (e.g. IPCs).
|
// Bitmask of scopes restricting a Landlock domain from accessing outside
|
||||||
|
// resources (e.g. IPCs).
|
||||||
Scoped LandlockScope
|
Scoped LandlockScope
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String returns a user-facing description of [RulesetAttr].
|
||||||
func (rulesetAttr *RulesetAttr) String() string {
|
func (rulesetAttr *RulesetAttr) String() string {
|
||||||
if rulesetAttr == nil {
|
if rulesetAttr == nil {
|
||||||
return "NULL"
|
return "NULL"
|
||||||
@@ -208,6 +213,7 @@ func (rulesetAttr *RulesetAttr) String() string {
|
|||||||
return strings.Join(elems, ", ")
|
return strings.Join(elems, ", ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create loads the ruleset into the kernel.
|
||||||
func (rulesetAttr *RulesetAttr) Create(flags uintptr) (fd int, err error) {
|
func (rulesetAttr *RulesetAttr) Create(flags uintptr) (fd int, err error) {
|
||||||
var pointer, size uintptr
|
var pointer, size uintptr
|
||||||
// NULL needed for abi version
|
// NULL needed for abi version
|
||||||
@@ -216,10 +222,13 @@ func (rulesetAttr *RulesetAttr) Create(flags uintptr) (fd int, err error) {
|
|||||||
size = unsafe.Sizeof(*rulesetAttr)
|
size = unsafe.Sizeof(*rulesetAttr)
|
||||||
}
|
}
|
||||||
|
|
||||||
rulesetFd, _, errno := syscall.Syscall(std.SYS_LANDLOCK_CREATE_RULESET, pointer, size, flags)
|
rulesetFd, _, errno := syscall.Syscall(
|
||||||
|
std.SYS_LANDLOCK_CREATE_RULESET,
|
||||||
|
pointer, size,
|
||||||
|
flags,
|
||||||
|
)
|
||||||
fd = int(rulesetFd)
|
fd = int(rulesetFd)
|
||||||
err = errno
|
err = errno
|
||||||
|
|
||||||
if fd < 0 {
|
if fd < 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -230,12 +239,19 @@ func (rulesetAttr *RulesetAttr) Create(flags uintptr) (fd int, err error) {
|
|||||||
return fd, nil
|
return fd, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LandlockGetABI returns the ABI version supported by the kernel.
|
||||||
func LandlockGetABI() (int, error) {
|
func LandlockGetABI() (int, error) {
|
||||||
return (*RulesetAttr)(nil).Create(LANDLOCK_CREATE_RULESET_VERSION)
|
return (*RulesetAttr)(nil).Create(LANDLOCK_CREATE_RULESET_VERSION)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LandlockRestrictSelf applies a loaded ruleset to the calling thread.
|
||||||
func LandlockRestrictSelf(rulesetFd int, flags uintptr) error {
|
func LandlockRestrictSelf(rulesetFd int, flags uintptr) error {
|
||||||
r, _, errno := syscall.Syscall(std.SYS_LANDLOCK_RESTRICT_SELF, uintptr(rulesetFd), flags, 0)
|
r, _, errno := syscall.Syscall(
|
||||||
|
std.SYS_LANDLOCK_RESTRICT_SELF,
|
||||||
|
uintptr(rulesetFd),
|
||||||
|
flags,
|
||||||
|
0,
|
||||||
|
)
|
||||||
if r != 0 {
|
if r != 0 {
|
||||||
return errno
|
return errno
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,10 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// Nonexistent is a path that cannot exist.
|
// Nonexistent is a path that cannot exist.
|
||||||
// /proc is chosen because a system with covered /proc is unsupported by this package.
|
//
|
||||||
|
// This path can never be presented by the kernel if proc is mounted on
|
||||||
|
// /proc/. This can only exist if parts of /proc/ is covered, or proc is not
|
||||||
|
// mounted at all. Neither configuration is supported by this package.
|
||||||
Nonexistent = fhs.Proc + "nonexistent"
|
Nonexistent = fhs.Proc + "nonexistent"
|
||||||
|
|
||||||
hostPath = fhs.Root + hostDir
|
hostPath = fhs.Root + hostDir
|
||||||
|
|||||||
@@ -88,18 +88,22 @@ var resPrefix = [...]string{
|
|||||||
7: "seccomp_load failed",
|
7: "seccomp_load failed",
|
||||||
}
|
}
|
||||||
|
|
||||||
// cbAllocateBuffer is the function signature for the function handle passed to hakurei_export_filter
|
// cbAllocateBuffer is the function signature for the function handle passed to
|
||||||
// which allocates the buffer that the resulting bpf program is copied into, and writes its slice header
|
// hakurei_scmp_make_filter which allocates the buffer that the resulting bpf
|
||||||
// to a value held by the caller.
|
// program is copied into, and writes its slice header to a value held by the caller.
|
||||||
type cbAllocateBuffer = func(len C.size_t) (buf unsafe.Pointer)
|
type cbAllocateBuffer = func(len C.size_t) (buf unsafe.Pointer)
|
||||||
|
|
||||||
|
// hakurei_scmp_allocate allocates a buffer of specified size known to the
|
||||||
|
// runtime through a callback passed in a [cgo.Handle].
|
||||||
|
//
|
||||||
//export hakurei_scmp_allocate
|
//export hakurei_scmp_allocate
|
||||||
func hakurei_scmp_allocate(f C.uintptr_t, len C.size_t) (buf unsafe.Pointer) {
|
func hakurei_scmp_allocate(f C.uintptr_t, len C.size_t) (buf unsafe.Pointer) {
|
||||||
return cgo.Handle(f).Value().(cbAllocateBuffer)(len)
|
return cgo.Handle(f).Value().(cbAllocateBuffer)(len)
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeFilter generates a bpf program from a slice of [std.NativeRule] and writes the resulting byte slice to p.
|
// makeFilter generates a bpf program from a slice of [std.NativeRule] and
|
||||||
// The filter is installed to the current process if p is nil.
|
// writes the resulting byte slice to p. The filter is installed to the current
|
||||||
|
// process if p is nil.
|
||||||
func makeFilter(rules []std.NativeRule, flags ExportFlag, p *[]byte) error {
|
func makeFilter(rules []std.NativeRule, flags ExportFlag, p *[]byte) error {
|
||||||
if len(rules) == 0 {
|
if len(rules) == 0 {
|
||||||
return ErrInvalidRules
|
return ErrInvalidRules
|
||||||
@@ -170,8 +174,8 @@ func Export(rules []std.NativeRule, flags ExportFlag) (data []byte, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load generates a bpf program from a slice of [std.NativeRule] and enforces it on the current process.
|
// Load generates a bpf program from a slice of [std.NativeRule] and enforces it
|
||||||
// Errors returned by libseccomp is wrapped in [LibraryError].
|
// on the current process. Errors returned by libseccomp is wrapped in [LibraryError].
|
||||||
func Load(rules []std.NativeRule, flags ExportFlag) error { return makeFilter(rules, flags, nil) }
|
func Load(rules []std.NativeRule, flags ExportFlag) error { return makeFilter(rules, flags, nil) }
|
||||||
|
|
||||||
type (
|
type (
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package vfs
|
|||||||
|
|
||||||
import "strings"
|
import "strings"
|
||||||
|
|
||||||
|
// Unmangle reverses mangling of strings done by the kernel. Its behaviour is
|
||||||
|
// consistent with the equivalent function in util-linux.
|
||||||
func Unmangle(s string) string {
|
func Unmangle(s string) string {
|
||||||
if !strings.ContainsRune(s, '\\') {
|
if !strings.ContainsRune(s, '\\') {
|
||||||
return s
|
return s
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ var (
|
|||||||
ErrMountInfoSep = errors.New("bad optional fields separator")
|
ErrMountInfoSep = errors.New("bad optional fields separator")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A DecoderError describes a nonrecoverable error decoding a mountinfo stream.
|
||||||
type DecoderError struct {
|
type DecoderError struct {
|
||||||
Op string
|
Op string
|
||||||
Line int
|
Line int
|
||||||
@@ -51,7 +52,8 @@ func (e *DecoderError) Error() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type (
|
type (
|
||||||
// A MountInfoDecoder reads and decodes proc_pid_mountinfo(5) entries from an input stream.
|
// A MountInfoDecoder reads and decodes proc_pid_mountinfo(5) entries from
|
||||||
|
// an input stream.
|
||||||
MountInfoDecoder struct {
|
MountInfoDecoder struct {
|
||||||
s *bufio.Scanner
|
s *bufio.Scanner
|
||||||
m *MountInfo
|
m *MountInfo
|
||||||
@@ -72,13 +74,16 @@ type (
|
|||||||
MountInfoEntry struct {
|
MountInfoEntry struct {
|
||||||
// mount ID: a unique ID for the mount (may be reused after umount(2)).
|
// mount ID: a unique ID for the mount (may be reused after umount(2)).
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
// parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree).
|
// parent ID: the ID of the parent mount (or of self for the root of
|
||||||
|
// this mount namespace's mount tree).
|
||||||
Parent int `json:"parent"`
|
Parent int `json:"parent"`
|
||||||
// major:minor: the value of st_dev for files on this filesystem (see stat(2)).
|
// major:minor: the value of st_dev for files on this filesystem (see stat(2)).
|
||||||
Devno DevT `json:"devno"`
|
Devno DevT `json:"devno"`
|
||||||
// root: the pathname of the directory in the filesystem which forms the root of this mount.
|
// root: the pathname of the directory in the filesystem which forms the
|
||||||
|
// root of this mount.
|
||||||
Root string `json:"root"`
|
Root string `json:"root"`
|
||||||
// mount point: the pathname of the mount point relative to the process's root directory.
|
// mount point: the pathname of the mount point relative to the
|
||||||
|
// process's root directory.
|
||||||
Target string `json:"target"`
|
Target string `json:"target"`
|
||||||
// mount options: per-mount options (see mount(2)).
|
// mount options: per-mount options (see mount(2)).
|
||||||
VfsOptstr string `json:"vfs_optstr"`
|
VfsOptstr string `json:"vfs_optstr"`
|
||||||
@@ -126,7 +131,8 @@ func (e *MountInfoEntry) Flags() (flags uintptr, unmatched []string) {
|
|||||||
|
|
||||||
// NewMountInfoDecoder returns a new decoder that reads from r.
|
// NewMountInfoDecoder returns a new decoder that reads from r.
|
||||||
//
|
//
|
||||||
// The decoder introduces its own buffering and may read data from r beyond the mountinfo entries requested.
|
// The decoder introduces its own buffering and may read data from r beyond the
|
||||||
|
// mountinfo entries requested.
|
||||||
func NewMountInfoDecoder(r io.Reader) *MountInfoDecoder {
|
func NewMountInfoDecoder(r io.Reader) *MountInfoDecoder {
|
||||||
return &MountInfoDecoder{s: bufio.NewScanner(r)}
|
return &MountInfoDecoder{s: bufio.NewScanner(r)}
|
||||||
}
|
}
|
||||||
@@ -271,6 +277,8 @@ func parseMountInfoLine(s string, ent *MountInfoEntry) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EqualWithIgnore compares to [MountInfoEntry] values, ignoring fields that
|
||||||
|
// compare equal to ignore.
|
||||||
func (e *MountInfoEntry) EqualWithIgnore(want *MountInfoEntry, ignore string) bool {
|
func (e *MountInfoEntry) EqualWithIgnore(want *MountInfoEntry, ignore string) bool {
|
||||||
return (e.ID == want.ID || want.ID == -1) &&
|
return (e.ID == want.ID || want.ID == -1) &&
|
||||||
(e.Parent == want.Parent || want.Parent == -1) &&
|
(e.Parent == want.Parent || want.Parent == -1) &&
|
||||||
@@ -284,6 +292,8 @@ func (e *MountInfoEntry) EqualWithIgnore(want *MountInfoEntry, ignore string) bo
|
|||||||
(e.FsOptstr == want.FsOptstr || want.FsOptstr == ignore)
|
(e.FsOptstr == want.FsOptstr || want.FsOptstr == ignore)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String returns a user-facing representation of a [MountInfoEntry]. It fits
|
||||||
|
// roughly into the mountinfo format, but without mangling.
|
||||||
func (e *MountInfoEntry) String() string {
|
func (e *MountInfoEntry) String() string {
|
||||||
return fmt.Sprintf("%d %d %d:%d %s %s %s %s %s %s %s",
|
return fmt.Sprintf("%d %d %d:%d %s %s %s %s %s %s %s",
|
||||||
e.ID, e.Parent, e.Devno[0], e.Devno[1], e.Root, e.Target, e.VfsOptstr,
|
e.ID, e.Parent, e.Devno[0], e.Devno[1], e.Root, e.Target, e.VfsOptstr,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// UnfoldTargetError is a pathname that never appeared in a mount hierarchy.
|
||||||
type UnfoldTargetError string
|
type UnfoldTargetError string
|
||||||
|
|
||||||
func (e UnfoldTargetError) Error() string {
|
func (e UnfoldTargetError) Error() string {
|
||||||
@@ -27,6 +28,7 @@ func (n *MountInfoNode) Collective() iter.Seq[*MountInfoNode] {
|
|||||||
return func(yield func(*MountInfoNode) bool) { n.visit(yield) }
|
return func(yield func(*MountInfoNode) bool) { n.visit(yield) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// visit recursively visits all visible mountinfo nodes.
|
||||||
func (n *MountInfoNode) visit(yield func(*MountInfoNode) bool) bool {
|
func (n *MountInfoNode) visit(yield func(*MountInfoNode) bool) bool {
|
||||||
if !n.Covered && !yield(n) {
|
if !n.Covered && !yield(n) {
|
||||||
return false
|
return false
|
||||||
|
|||||||
Reference in New Issue
Block a user