container: optionally isolate host abstract UNIX domain sockets via landlock
All checks were successful
Test / Create distribution (pull_request) Successful in 33s
Test / Sandbox (pull_request) Successful in 2m10s
Test / Hpkg (pull_request) Successful in 4m1s
Test / Sandbox (race detector) (pull_request) Successful in 4m19s
Test / Hakurei (pull_request) Successful in 4m55s
Test / Hakurei (race detector) (pull_request) Successful in 5m0s
Test / Create distribution (push) Successful in 27s
Test / Sandbox (race detector) (push) Successful in 44s
Test / Sandbox (push) Successful in 44s
Test / Hakurei (push) Successful in 47s
Test / Hakurei (race detector) (push) Successful in 47s
Test / Hpkg (push) Successful in 45s
Test / Flake checks (pull_request) Successful in 1m47s
Test / Flake checks (push) Successful in 1m36s

This commit is contained in:
Clayton Gilmer 2025-08-18 12:00:52 +09:00 committed by Ophestra
parent 69a4ab8105
commit 5db0714072
Signed by: cat
SSH Key Fingerprint: SHA256:gQ67O0enBZ7UdZypgtspB2FDM1g3GVw8nX0XSdcFw8Q
17 changed files with 375 additions and 9 deletions

View File

@ -28,6 +28,8 @@ type appInfo struct {
// passed through to [hst.Config]
Net bool `json:"net,omitempty"`
// passed through to [hst.Config]
Abstract bool `json:"abstract,omitempty"`
// passed through to [hst.Config]
Device bool `json:"dev,omitempty"`
// passed through to [hst.Config]
Tty bool `json:"tty,omitempty"`
@ -87,6 +89,7 @@ func (app *appInfo) toHst(pathSet *appPathSet, pathname *container.Absolute, arg
Devel: app.Devel,
Userns: app.Userns,
Net: app.Net,
Abstract: app.Abstract,
Device: app.Device,
Tty: app.Tty || flagDropShell,
MapRealUID: app.MapRealUID,

View File

@ -92,6 +92,8 @@ type (
RetainSession bool
// Do not [syscall.CLONE_NEWNET].
HostNet bool
// Do not [LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET].
HostAbstract bool
// Retain CAP_SYS_ADMIN.
Privileged bool
}
@ -185,6 +187,51 @@ func (p *Container) Start() error {
"prctl(PR_SET_NO_NEW_PRIVS):")
}
// landlock: depends on per-thread state but acts on a process group
{
rulesetAttr := &RulesetAttr{Scoped: LANDLOCK_SCOPE_SIGNAL}
if !p.HostAbstract {
rulesetAttr.Scoped |= LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET
}
if abi, err := LandlockGetABI(); err != nil {
if p.HostAbstract {
// landlock can be skipped here as it restricts access to resources
// already covered by namespaces (pid)
goto landlockOut
}
return wrapErrSuffix(err,
"landlock does not appear to be enabled:")
} else if abi < 6 {
if p.HostAbstract {
// see above comment
goto landlockOut
}
return msg.WrapErr(ENOSYS,
"kernel version too old for LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET")
} else {
msg.Verbosef("landlock abi version %d", abi)
}
if rulesetFd, err := rulesetAttr.Create(0); err != nil {
return wrapErrSuffix(err,
"cannot create landlock ruleset:")
} else {
msg.Verbosef("enforcing landlock ruleset %s", rulesetAttr)
if err = LandlockRestrictSelf(rulesetFd, 0); err != nil {
_ = Close(rulesetFd)
return wrapErrSuffix(err,
"cannot enforce landlock ruleset:")
}
if err = Close(rulesetFd); err != nil {
msg.Verbosef("cannot close landlock ruleset: %v", err)
// not fatal
}
}
landlockOut:
}
msg.Verbose("starting container init")
if err := p.cmd.Start(); err != nil {
return msg.WrapErr(err, err.Error())

239
container/landlock.go Normal file
View File

@ -0,0 +1,239 @@
package container
import (
"strings"
"syscall"
"unsafe"
"hakurei.app/container/seccomp"
)
// include/uapi/linux/landlock.h
const (
LANDLOCK_CREATE_RULESET_VERSION = 1 << iota
)
type LandlockAccessFS uintptr
const (
LANDLOCK_ACCESS_FS_EXECUTE LandlockAccessFS = 1 << iota
LANDLOCK_ACCESS_FS_WRITE_FILE
LANDLOCK_ACCESS_FS_READ_FILE
LANDLOCK_ACCESS_FS_READ_DIR
LANDLOCK_ACCESS_FS_REMOVE_DIR
LANDLOCK_ACCESS_FS_REMOVE_FILE
LANDLOCK_ACCESS_FS_MAKE_CHAR
LANDLOCK_ACCESS_FS_MAKE_DIR
LANDLOCK_ACCESS_FS_MAKE_REG
LANDLOCK_ACCESS_FS_MAKE_SOCK
LANDLOCK_ACCESS_FS_MAKE_FIFO
LANDLOCK_ACCESS_FS_MAKE_BLOCK
LANDLOCK_ACCESS_FS_MAKE_SYM
LANDLOCK_ACCESS_FS_REFER
LANDLOCK_ACCESS_FS_TRUNCATE
LANDLOCK_ACCESS_FS_IOCTL_DEV
_LANDLOCK_ACCESS_FS_DELIM
)
func (f LandlockAccessFS) String() string {
switch f {
case LANDLOCK_ACCESS_FS_EXECUTE:
return "execute"
case LANDLOCK_ACCESS_FS_WRITE_FILE:
return "write_file"
case LANDLOCK_ACCESS_FS_READ_FILE:
return "read_file"
case LANDLOCK_ACCESS_FS_READ_DIR:
return "read_dir"
case LANDLOCK_ACCESS_FS_REMOVE_DIR:
return "remove_dir"
case LANDLOCK_ACCESS_FS_REMOVE_FILE:
return "remove_file"
case LANDLOCK_ACCESS_FS_MAKE_CHAR:
return "make_char"
case LANDLOCK_ACCESS_FS_MAKE_DIR:
return "make_dir"
case LANDLOCK_ACCESS_FS_MAKE_REG:
return "make_reg"
case LANDLOCK_ACCESS_FS_MAKE_SOCK:
return "make_sock"
case LANDLOCK_ACCESS_FS_MAKE_FIFO:
return "make_fifo"
case LANDLOCK_ACCESS_FS_MAKE_BLOCK:
return "make_block"
case LANDLOCK_ACCESS_FS_MAKE_SYM:
return "make_sym"
case LANDLOCK_ACCESS_FS_REFER:
return "fs_refer"
case LANDLOCK_ACCESS_FS_TRUNCATE:
return "fs_truncate"
case LANDLOCK_ACCESS_FS_IOCTL_DEV:
return "fs_ioctl_dev"
default:
var c []LandlockAccessFS
for i := LandlockAccessFS(1); i < _LANDLOCK_ACCESS_FS_DELIM; i <<= 1 {
if f&i != 0 {
c = append(c, i)
}
}
if len(c) == 0 {
return "NULL"
}
s := make([]string, len(c))
for i, v := range c {
s[i] = v.String()
}
return strings.Join(s, " ")
}
}
type LandlockAccessNet uintptr
const (
LANDLOCK_ACCESS_NET_BIND_TCP LandlockAccessNet = 1 << iota
LANDLOCK_ACCESS_NET_CONNECT_TCP
_LANDLOCK_ACCESS_NET_DELIM
)
func (f LandlockAccessNet) String() string {
switch f {
case LANDLOCK_ACCESS_NET_BIND_TCP:
return "bind_tcp"
case LANDLOCK_ACCESS_NET_CONNECT_TCP:
return "connect_tcp"
default:
var c []LandlockAccessNet
for i := LandlockAccessNet(1); i < _LANDLOCK_ACCESS_NET_DELIM; i <<= 1 {
if f&i != 0 {
c = append(c, i)
}
}
if len(c) == 0 {
return "NULL"
}
s := make([]string, len(c))
for i, v := range c {
s[i] = v.String()
}
return strings.Join(s, " ")
}
}
type LandlockScope uintptr
const (
LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET LandlockScope = 1 << iota
LANDLOCK_SCOPE_SIGNAL
_LANDLOCK_SCOPE_DELIM
)
func (f LandlockScope) String() string {
switch f {
case LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET:
return "abstract_unix_socket"
case LANDLOCK_SCOPE_SIGNAL:
return "signal"
default:
var c []LandlockScope
for i := LandlockScope(1); i < _LANDLOCK_SCOPE_DELIM; i <<= 1 {
if f&i != 0 {
c = append(c, i)
}
}
if len(c) == 0 {
return "NULL"
}
s := make([]string, len(c))
for i, v := range c {
s[i] = v.String()
}
return strings.Join(s, " ")
}
}
type RulesetAttr struct {
// Bitmask of handled filesystem actions.
HandledAccessFS LandlockAccessFS
// Bitmask of handled network actions.
HandledAccessNet LandlockAccessNet
// Bitmask of scopes restricting a Landlock domain from accessing outside resources (e.g. IPCs).
Scoped LandlockScope
}
func (rulesetAttr *RulesetAttr) String() string {
if rulesetAttr == nil {
return "NULL"
}
elems := make([]string, 0, 3)
if rulesetAttr.HandledAccessFS > 0 {
elems = append(elems, "fs: "+rulesetAttr.HandledAccessFS.String())
}
if rulesetAttr.HandledAccessNet > 0 {
elems = append(elems, "net: "+rulesetAttr.HandledAccessNet.String())
}
if rulesetAttr.Scoped > 0 {
elems = append(elems, "scoped: "+rulesetAttr.Scoped.String())
}
if len(elems) == 0 {
return "0"
}
return strings.Join(elems, ", ")
}
func (rulesetAttr *RulesetAttr) Create(flags uintptr) (fd int, err error) {
var pointer, size uintptr
// NULL needed for abi version
if rulesetAttr != nil {
pointer = uintptr(unsafe.Pointer(rulesetAttr))
size = unsafe.Sizeof(*rulesetAttr)
}
rulesetFd, _, errno := syscall.Syscall(seccomp.SYS_LANDLOCK_CREATE_RULESET, pointer, size, flags)
fd = int(rulesetFd)
err = errno
if fd < 0 {
return
}
if rulesetAttr != nil { // not a fd otherwise
syscall.CloseOnExec(fd)
}
return fd, nil
}
func LandlockGetABI() (int, error) {
return (*RulesetAttr)(nil).Create(LANDLOCK_CREATE_RULESET_VERSION)
}
func LandlockRestrictSelf(rulesetFd int, flags uintptr) error {
r, _, errno := syscall.Syscall(seccomp.SYS_LANDLOCK_RESTRICT_SELF, uintptr(rulesetFd), flags, 0)
if r != 0 {
return errno
}
return nil
}

View File

@ -0,0 +1,61 @@
package container_test
import (
"testing"
"unsafe"
"hakurei.app/container"
)
func TestLandlockString(t *testing.T) {
testCases := []struct {
name string
rulesetAttr *container.RulesetAttr
want string
}{
{"nil", nil, "NULL"},
{"zero", new(container.RulesetAttr), "0"},
{"some", &container.RulesetAttr{Scoped: container.LANDLOCK_SCOPE_SIGNAL}, "scoped: signal"},
{"set", &container.RulesetAttr{
HandledAccessFS: container.LANDLOCK_ACCESS_FS_MAKE_SYM | container.LANDLOCK_ACCESS_FS_IOCTL_DEV | container.LANDLOCK_ACCESS_FS_WRITE_FILE,
HandledAccessNet: container.LANDLOCK_ACCESS_NET_BIND_TCP,
Scoped: container.LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | container.LANDLOCK_SCOPE_SIGNAL,
}, "fs: write_file make_sym fs_ioctl_dev, net: bind_tcp, scoped: abstract_unix_socket signal"},
{"all", &container.RulesetAttr{
HandledAccessFS: container.LANDLOCK_ACCESS_FS_EXECUTE |
container.LANDLOCK_ACCESS_FS_WRITE_FILE |
container.LANDLOCK_ACCESS_FS_READ_FILE |
container.LANDLOCK_ACCESS_FS_READ_DIR |
container.LANDLOCK_ACCESS_FS_REMOVE_DIR |
container.LANDLOCK_ACCESS_FS_REMOVE_FILE |
container.LANDLOCK_ACCESS_FS_MAKE_CHAR |
container.LANDLOCK_ACCESS_FS_MAKE_DIR |
container.LANDLOCK_ACCESS_FS_MAKE_REG |
container.LANDLOCK_ACCESS_FS_MAKE_SOCK |
container.LANDLOCK_ACCESS_FS_MAKE_FIFO |
container.LANDLOCK_ACCESS_FS_MAKE_BLOCK |
container.LANDLOCK_ACCESS_FS_MAKE_SYM |
container.LANDLOCK_ACCESS_FS_REFER |
container.LANDLOCK_ACCESS_FS_TRUNCATE |
container.LANDLOCK_ACCESS_FS_IOCTL_DEV,
HandledAccessNet: container.LANDLOCK_ACCESS_NET_BIND_TCP |
container.LANDLOCK_ACCESS_NET_CONNECT_TCP,
Scoped: container.LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
container.LANDLOCK_SCOPE_SIGNAL,
}, "fs: execute write_file read_file read_dir remove_dir remove_file make_char make_dir make_reg make_sock make_fifo make_block make_sym fs_refer fs_truncate fs_ioctl_dev, net: bind_tcp connect_tcp, scoped: abstract_unix_socket signal"},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if got := tc.rulesetAttr.String(); got != tc.want {
t.Errorf("String: %s, want %s", got, tc.want)
}
})
}
}
func TestLandlockAttrSize(t *testing.T) {
want := 24
if got := unsafe.Sizeof(container.RulesetAttr{}); got != uintptr(want) {
t.Errorf("Sizeof: %d, want %d", got, want)
}
}

View File

@ -79,6 +79,8 @@ type (
Userns bool `json:"userns,omitempty"`
// share host net namespace
Net bool `json:"net,omitempty"`
// share abstract unix socket scope
Abstract bool `json:"abstract,omitempty"`
// allow dangerous terminal I/O
Tty bool `json:"tty,omitempty"`
// allow multiarch

View File

@ -62,6 +62,7 @@ var testCasesPd = []sealTestCase{
Remount(m("/"), syscall.MS_RDONLY),
SeccompPresets: seccomp.PresetExt | seccomp.PresetDenyDevel,
HostNet: true,
HostAbstract: true,
RetainSession: true,
ForwardCancel: true,
},
@ -203,6 +204,7 @@ var testCasesPd = []sealTestCase{
Remount(m("/"), syscall.MS_RDONLY),
SeccompPresets: seccomp.PresetExt | seccomp.PresetDenyDevel,
HostNet: true,
HostAbstract: true,
RetainSession: true,
ForwardCancel: true,
},

View File

@ -33,6 +33,7 @@ func newContainer(s *hst.ContainerConfig, os sys.State, prefix string, uid, gid
SeccompPresets: s.SeccompPresets,
RetainSession: s.Tty,
HostNet: s.Net,
HostAbstract: s.Abstract,
// the container is canceled when shim is requested to exit or receives an interrupt or termination signal;
// this behaviour is implemented in the shim

View File

@ -238,10 +238,11 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
}
conf := &hst.ContainerConfig{
Userns: true,
Net: true,
Tty: true,
AutoEtc: true,
Userns: true,
Net: true,
Abstract: true,
Tty: true,
AutoEtc: true,
AutoRoot: container.AbsFHSRoot,
RootFlags: container.BindWritable,

View File

@ -132,6 +132,7 @@ in
devel
userns
net
abstract
device
tty
multiarch

View File

@ -182,6 +182,7 @@ in
net = mkEnableOption "network access" // {
default = true;
};
abstract = mkEnableOption "abstract unix domain socket access";
nix = mkEnableOption "nix daemon access";
mapRealUid = mkEnableOption "mapping to priv-user uid";

View File

@ -64,6 +64,10 @@ func (p *Proxy) Start() error {
argF, func(z *container.Container) {
z.SeccompFlags |= seccomp.AllowMultiarch
z.SeccompPresets |= seccomp.PresetStrict
// xdg-dbus-proxy fails with scoped abstract unix sockets despite pathname socket being available
z.HostAbstract = true
z.Hostname = "hakurei-dbus"
if p.output != nil {
z.Stdout, z.Stderr = p.output, p.output

View File

@ -243,7 +243,7 @@ in
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_abstract = false;
socket_pathname = true;
};
}

View File

@ -269,7 +269,7 @@ in
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_abstract = false;
socket_pathname = false;
};
}

View File

@ -194,5 +194,9 @@
];
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_pathname = false;
};
}

View File

@ -264,7 +264,7 @@ in
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_abstract = false;
socket_pathname = false;
};
}

View File

@ -262,7 +262,7 @@ in
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_abstract = false;
socket_pathname = false;
};
}

View File

@ -275,7 +275,7 @@ in
seccomp = true;
try_socket = "/tmp/.X11-unix/X0";
socket_abstract = true;
socket_abstract = false;
socket_pathname = true;
};
}