helper/bwrap: integrate seccomp into helper interface
All checks were successful
Build / Create distribution (push) Successful in 1m36s
Test / Run NixOS test (push) Successful in 3m40s

This makes API usage much cleaner, and encapsulates all bwrap arguments in argsWt.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-01-22 01:51:10 +09:00
parent 82029948e6
commit 9a239fa1a5
21 changed files with 224 additions and 187 deletions

View File

@@ -1,6 +1,13 @@
package bwrap
import "encoding/gob"
import (
"encoding/gob"
"os"
"slices"
"strconv"
"git.gensokyo.uk/security/fortify/internal/proc"
)
type Builder interface {
Len() int
@@ -12,6 +19,11 @@ type FSBuilder interface {
Builder
}
type FDBuilder interface {
Len() int
Append(args *[]string, extraFiles *[]*os.File) error
}
func init() {
gob.Register(new(pairF))
gob.Register(new(stringF))
@@ -45,6 +57,33 @@ func (s stringF) Append(args *[]string) {
*args = append(*args, s[0], s[1])
}
type fileF struct {
name string
file *os.File
}
func (f *fileF) Len() int {
if f.file == nil {
return 0
}
return 2
}
func (f *fileF) Append(args *[]string, extraFiles *[]*os.File) error {
if f.file == nil {
return nil
}
extraFile(args, extraFiles, f.name, f.file)
return nil
}
func extraFile(args *[]string, extraFiles *[]*os.File, name string, f *os.File) {
if f == nil {
return
}
*args = append(*args, name, strconv.Itoa(int(proc.ExtraFileSlice(extraFiles, f))))
}
// Args returns a slice of bwrap args corresponding to c.
func (c *Config) Args() (args []string) {
builders := []Builder{
@@ -75,3 +114,25 @@ func (c *Config) Args() (args []string) {
return
}
func (c *Config) FDArgs(syncFd *os.File, extraFiles *[]*os.File) (args []string, err error) {
builders := []FDBuilder{
&seccompBuilder{c},
&fileF{positionalArgs[SyncFd], syncFd},
}
argc := 0
for _, b := range builders {
argc += b.Len()
}
args = make([]string, 0, argc)
*extraFiles = slices.Grow(*extraFiles, len(builders))
for _, b := range builders {
if err = b.Append(&args, extraFiles); err != nil {
break
}
}
return
}

View File

@@ -47,6 +47,10 @@ type Config struct {
// (--chmod OCTAL PATH)
Chmod ChmodConfig `json:"chmod,omitempty"`
// load and use seccomp rules from FD (not repeatable)
// (--seccomp FD)
Syscall *SyscallPolicy
// create a new terminal session
// (--new-session)
NewSession bool `json:"new_session"`
@@ -70,7 +74,6 @@ type Config struct {
--file FD DEST Copy from FD to destination DEST
--bind-data FD DEST Copy from FD to file which is bind-mounted on DEST
--ro-bind-data FD DEST Copy from FD to file which is readonly bind-mounted on DEST
--seccomp FD Load and use seccomp rules from FD (not repeatable)
--add-seccomp-fd FD Load and use seccomp rules from FD (repeatable)
--block-fd FD Block on FD until some data to read is available
--userns-block-fd FD Block on FD until the user namespace is ready

View File

@@ -0,0 +1,254 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE // CLONE_NEWUSER
#endif
#include "seccomp-export.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/personality.h>
#include <sched.h>
#if (SCMP_VER_MAJOR < 2) || \
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
#error This package requires libseccomp >= v2.5.1
#endif
struct f_syscall_act {
int syscall;
int m_errno;
struct scmp_arg_cmp *arg;
};
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
#define SECCOMP_RULESET_ADD(ruleset) do { \
F_println("adding seccomp ruleset \"" #ruleset "\""); \
for (int i = 0; i < LEN(ruleset); i++) { \
assert(ruleset[i].m_errno == EPERM || ruleset[i].m_errno == ENOSYS); \
\
if (ruleset[i].arg) \
ret = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), ruleset[i].syscall, 1, *ruleset[i].arg); \
else \
ret = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), ruleset[i].syscall, 0); \
\
if (ret == -EFAULT) { \
res = 4; \
goto out; \
} else if (ret < 0) { \
res = 5; \
errno = -ret; \
goto out; \
} \
} \
} while (0)
int f_tmpfile_fd() {
FILE *f = tmpfile();
if (f == NULL)
return -1;
return fileno(f);
}
int32_t f_export_bpf(int fd, uint32_t arch, uint32_t multiarch, f_syscall_opts opts) {
int32_t res = 0; // refer to resErr for meaning
int allow_multiarch = opts & F_MULTIARCH;
int allowed_personality = PER_LINUX;
if (opts & F_LINUX32)
allowed_personality = PER_LINUX32;
// flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532
struct f_syscall_act deny_common[] = {
// Block dmesg
{SCMP_SYS(syslog), EPERM},
// Useless old syscall
{SCMP_SYS(uselib), EPERM},
// Don't allow disabling accounting
{SCMP_SYS(acct), EPERM},
// Don't allow reading current quota use
{SCMP_SYS(quotactl), EPERM},
// Don't allow access to the kernel keyring
{SCMP_SYS(add_key), EPERM},
{SCMP_SYS(keyctl), EPERM},
{SCMP_SYS(request_key), EPERM},
// Scary VM/NUMA ops
{SCMP_SYS(move_pages), EPERM},
{SCMP_SYS(mbind), EPERM},
{SCMP_SYS(get_mempolicy), EPERM},
{SCMP_SYS(set_mempolicy), EPERM},
{SCMP_SYS(migrate_pages), EPERM},
};
struct f_syscall_act deny_ns[] = {
// Don't allow subnamespace setups:
{SCMP_SYS(unshare), EPERM},
{SCMP_SYS(setns), EPERM},
{SCMP_SYS(mount), EPERM},
{SCMP_SYS(umount), EPERM},
{SCMP_SYS(umount2), EPERM},
{SCMP_SYS(pivot_root), EPERM},
{SCMP_SYS(chroot), EPERM},
#if defined(__s390__) || defined(__s390x__) || defined(__CRIS__)
// Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
// and flags arguments are reversed so the flags come second
{SCMP_SYS(clone), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
#else
// Normally the flags come first
{SCMP_SYS(clone), EPERM, &SCMP_A0(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
#endif
// seccomp can't look into clone3()'s struct clone_args to check whether
// the flags are OK, so we have no choice but to block clone3().
// Return ENOSYS so user-space will fall back to clone().
// (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
{SCMP_SYS(clone3), ENOSYS},
// New mount manipulation APIs can also change our VFS. There's no
// legitimate reason to do these in the sandbox, so block all of them
// rather than thinking about which ones might be dangerous.
// (CVE-2021-41133)
{SCMP_SYS(open_tree), ENOSYS},
{SCMP_SYS(move_mount), ENOSYS},
{SCMP_SYS(fsopen), ENOSYS},
{SCMP_SYS(fsconfig), ENOSYS},
{SCMP_SYS(fsmount), ENOSYS},
{SCMP_SYS(fspick), ENOSYS},
{SCMP_SYS(mount_setattr), ENOSYS},
};
struct f_syscall_act deny_tty[] = {
// Don't allow faking input to the controlling tty (CVE-2017-5226)
{SCMP_SYS(ioctl), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCSTI)},
// In the unlikely event that the controlling tty is a Linux virtual
// console (/dev/tty2 or similar), copy/paste operations have an effect
// similar to TIOCSTI (CVE-2023-28100)
{SCMP_SYS(ioctl), EPERM, &SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCLINUX)},
};
struct f_syscall_act deny_devel[] = {
// Profiling operations; we expect these to be done by tools from outside
// the sandbox. In particular perf has been the source of many CVEs.
{SCMP_SYS(perf_event_open), EPERM},
// Don't allow you to switch to bsd emulation or whatnot
{SCMP_SYS(personality), EPERM, &SCMP_A0(SCMP_CMP_NE, allowed_personality)},
{SCMP_SYS(ptrace), EPERM}
};
// Blocklist all but unix, inet, inet6 and netlink
struct
{
int family;
f_syscall_opts flags_mask;
} socket_family_allowlist[] = {
// NOTE: Keep in numerical order
{ AF_UNSPEC, 0 },
{ AF_LOCAL, 0 },
{ AF_INET, 0 },
{ AF_INET6, 0 },
{ AF_NETLINK, 0 },
{ AF_CAN, F_CAN },
{ AF_BLUETOOTH, F_BLUETOOTH },
};
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
if (ctx == NULL) {
res = 1;
goto out;
} else
errno = 0;
int ret;
// We only really need to handle arches on multiarch systems.
// If only one arch is supported the default is fine
if (arch != 0) {
// This *adds* the target arch, instead of replacing the
// native one. This is not ideal, because we'd like to only
// allow the target arch, but we can't really disallow the
// native arch at this point, because then bubblewrap
// couldn't continue running.
ret = seccomp_arch_add(ctx, arch);
if (ret < 0 && ret != -EEXIST) {
res = 2;
errno = -ret;
goto out;
}
if (allow_multiarch && multiarch != 0) {
ret = seccomp_arch_add(ctx, multiarch);
if (ret < 0 && ret != -EEXIST) {
res = 3;
errno = -ret;
goto out;
}
}
}
SECCOMP_RULESET_ADD(deny_common);
if (opts & F_DENY_NS) SECCOMP_RULESET_ADD(deny_ns);
if (opts & F_DENY_TTY) SECCOMP_RULESET_ADD(deny_tty);
if (opts & F_DENY_DEVEL) SECCOMP_RULESET_ADD(deny_devel);
if (!allow_multiarch) {
F_println("disabling modify_ldt");
// modify_ldt is a historic source of interesting information leaks,
// so it's disabled as a hardening measure.
// However, it is required to run old 16-bit applications
// as well as some Wine patches, so it's allowed in multiarch.
ret = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(modify_ldt), 0);
// See above for the meaning of EFAULT.
if (ret == -EFAULT) {
// call fmsg here?
res = 4;
goto out;
} else if (ret < 0) {
res = 5;
errno = -ret;
goto out;
}
}
// Socket filtering doesn't work on e.g. i386, so ignore failures here
// However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
// something else: https://github.com/seccomp/libseccomp/issues/8
int last_allowed_family = -1;
for (int i = 0; i < LEN(socket_family_allowlist); i++) {
if (socket_family_allowlist[i].flags_mask != 0 &&
(socket_family_allowlist[i].flags_mask & opts) != socket_family_allowlist[i].flags_mask)
continue;
for (int disallowed = last_allowed_family + 1; disallowed < socket_family_allowlist[i].family; disallowed++) {
// Blocklist the in-between valid families
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1, SCMP_A0(SCMP_CMP_EQ, disallowed));
}
last_allowed_family = socket_family_allowlist[i].family;
}
// Blocklist the rest
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1, SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
ret = seccomp_export_bpf(ctx, fd);
if (ret != 0) {
res = 6;
errno = -ret;
goto out;
}
out:
if (ctx)
seccomp_release(ctx);
return res;
}

View File

@@ -0,0 +1,22 @@
#include <stdint.h>
#include <seccomp.h>
#if (SCMP_VER_MAJOR < 2) || \
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
#error This package requires libseccomp >= v2.5.1
#endif
typedef enum {
F_DENY_NS = 1 << 0,
F_DENY_TTY = 1 << 1,
F_DENY_DEVEL = 1 << 2,
F_MULTIARCH = 1 << 3,
F_LINUX32 = 1 << 4,
F_CAN = 1 << 5,
F_BLUETOOTH = 1 << 6,
} f_syscall_opts;
extern void F_println(char *v);
int f_tmpfile_fd();
int32_t f_export_bpf(int fd, uint32_t arch, uint32_t multiarch, f_syscall_opts opts);

View File

@@ -0,0 +1,95 @@
package bwrap
import (
"fmt"
"io"
"os"
"git.gensokyo.uk/security/fortify/internal/fmsg"
)
type SyscallPolicy struct {
DenyDevel bool `json:"deny_devel"`
Multiarch bool `json:"multiarch"`
Linux32 bool `json:"linux32"`
Can bool `json:"can"`
Bluetooth bool `json:"bluetooth"`
}
type seccompBuilder struct {
config *Config
}
func (s *seccompBuilder) Len() int {
if s == nil {
return 0
}
return 2
}
func (s *seccompBuilder) Append(args *[]string, extraFiles *[]*os.File) error {
if s == nil {
return nil
}
if f, err := s.config.resolveSeccomp(); err != nil {
return err
} else {
extraFile(args, extraFiles, positionalArgs[Seccomp], f)
return nil
}
}
func (c *Config) resolveSeccomp() (*os.File, error) {
if c.Syscall == nil {
return nil, nil
}
// resolve seccomp filter opts
var (
opts syscallOpts
optd []string
optCond = [...]struct {
v bool
o syscallOpts
d string
}{
{!c.UserNS, flagDenyNS, "denyns"},
{c.NewSession, flagDenyTTY, "denytty"},
{c.Syscall.DenyDevel, flagDenyDevel, "denydevel"},
{c.Syscall.Multiarch, flagMultiarch, "multiarch"},
{c.Syscall.Linux32, flagLinux32, "linux32"},
{c.Syscall.Can, flagCan, "can"},
{c.Syscall.Bluetooth, flagBluetooth, "bluetooth"},
}
)
if CPrintln != nil {
optd = make([]string, 1, len(optCond)+1)
optd[0] = "common"
}
for _, opt := range optCond {
if opt.v {
opts |= opt.o
if fmsg.Verbose() {
optd = append(optd, opt.d)
}
}
}
if CPrintln != nil {
CPrintln(fmt.Sprintf("seccomp flags: %s", optd))
}
// export seccomp filter to tmpfile
if f, err := tmpfile(); err != nil {
return nil, err
} else {
return f, exportAndSeek(f, opts)
}
}
func exportAndSeek(f *os.File, opts syscallOpts) error {
if err := exportFilter(f.Fd(), opts); err != nil {
return err
}
_, err := f.Seek(0, io.SeekStart)
return err
}

83
helper/bwrap/seccomp.go Normal file
View File

@@ -0,0 +1,83 @@
package bwrap
/*
#cgo linux pkg-config: --static libseccomp
#include "seccomp-export.h"
*/
import "C"
import (
"errors"
"fmt"
"os"
"runtime"
)
var CPrintln func(v ...any)
var resErr = [...]error{
0: nil,
1: errors.New("seccomp_init failed"),
2: errors.New("seccomp_arch_add failed"),
3: errors.New("seccomp_arch_add failed (multiarch)"),
4: errors.New("internal libseccomp failure"),
5: errors.New("seccomp_rule_add failed"),
6: errors.New("seccomp_export_bpf failed"),
}
type (
syscallOpts = C.f_syscall_opts
)
const (
flagDenyNS syscallOpts = C.F_DENY_NS
flagDenyTTY syscallOpts = C.F_DENY_TTY
flagDenyDevel syscallOpts = C.F_DENY_DEVEL
flagMultiarch syscallOpts = C.F_MULTIARCH
flagLinux32 syscallOpts = C.F_LINUX32
flagCan syscallOpts = C.F_CAN
flagBluetooth syscallOpts = C.F_BLUETOOTH
)
func tmpfile() (*os.File, error) {
fd, err := C.f_tmpfile_fd()
if err != nil {
return nil, err
}
return os.NewFile(uintptr(fd), "tmpfile"), err
}
func exportFilter(fd uintptr, opts syscallOpts) error {
var (
arch C.uint32_t = 0
multiarch C.uint32_t = 0
)
switch runtime.GOARCH {
case "386":
arch = C.SCMP_ARCH_X86
case "amd64":
arch = C.SCMP_ARCH_X86_64
multiarch = C.SCMP_ARCH_X86
case "arm":
arch = C.SCMP_ARCH_ARM
case "arm64":
arch = C.SCMP_ARCH_AARCH64
multiarch = C.SCMP_ARCH_ARM
}
res, err := C.f_export_bpf(C.int(fd), arch, multiarch, opts)
if re := resErr[res]; re != nil {
if err == nil {
return re
}
return fmt.Errorf("%s: %v", re.Error(), err)
}
return err
}
//export F_println
func F_println(v *C.char) {
if CPrintln != nil {
CPrintln(C.GoString(v))
}
}

View File

@@ -43,6 +43,9 @@ const (
Overlay
TmpOverlay
ROOverlay
SyncFd
Seccomp
)
var positionalArgs = [...]string{
@@ -70,6 +73,9 @@ var positionalArgs = [...]string{
Overlay: "--overlay",
TmpOverlay: "--tmp-overlay",
ROOverlay: "--ro-overlay",
SyncFd: "--sync-fd",
Seccomp: "--seccomp",
}
type PermConfig[T FSBuilder] struct {