sandbox/seccomp: resolve rules natively
All checks were successful
Test / Create distribution (push) Successful in 32s
Test / Sandbox (push) Successful in 1m45s
Test / Hakurei (push) Successful in 2m49s
Test / Sandbox (race detector) (push) Successful in 3m1s
Test / Planterette (push) Successful in 3m31s
Test / Hakurei (race detector) (push) Successful in 4m18s
Test / Flake checks (push) Successful in 1m6s
All checks were successful
Test / Create distribution (push) Successful in 32s
Test / Sandbox (push) Successful in 1m45s
Test / Hakurei (push) Successful in 2m49s
Test / Sandbox (race detector) (push) Successful in 3m1s
Test / Planterette (push) Successful in 3m31s
Test / Hakurei (race detector) (push) Successful in 4m18s
Test / Flake checks (push) Successful in 1m6s
This enables loading syscall filter policies from external cross-platform config files. This also removes a significant amount of C code. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
parent
1fb453dffe
commit
1a8840bebc
@ -115,10 +115,10 @@ func (app *appInfo) toFst(pathSet *appPathSet, argv []string, flagDropShell bool
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
if app.Multiarch {
|
if app.Multiarch {
|
||||||
config.Container.Seccomp |= seccomp.FilterMultiarch
|
config.Container.SeccompFlags |= seccomp.AllowMultiarch
|
||||||
}
|
}
|
||||||
if app.Bluetooth {
|
if app.Bluetooth {
|
||||||
config.Container.Seccomp |= seccomp.FilterBluetooth
|
config.Container.SeccompFlags |= seccomp.AllowBluetooth
|
||||||
}
|
}
|
||||||
return config
|
return config
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ func withNixDaemon(
|
|||||||
Hostname: formatHostname(app.Name) + "-" + action,
|
Hostname: formatHostname(app.Name) + "-" + action,
|
||||||
Userns: true, // nix sandbox requires userns
|
Userns: true, // nix sandbox requires userns
|
||||||
Net: net,
|
Net: net,
|
||||||
Seccomp: seccomp.FilterMultiarch,
|
SeccompFlags: seccomp.AllowMultiarch,
|
||||||
Tty: dropShell,
|
Tty: dropShell,
|
||||||
Filesystem: []*hst.FilesystemConfig{
|
Filesystem: []*hst.FilesystemConfig{
|
||||||
{Src: pathSet.nixPath, Dst: "/nix", Write: true, Must: true},
|
{Src: pathSet.nixPath, Dst: "/nix", Write: true, Must: true},
|
||||||
@ -86,7 +86,7 @@ func withCacheDir(
|
|||||||
|
|
||||||
Container: &hst.ContainerConfig{
|
Container: &hst.ContainerConfig{
|
||||||
Hostname: formatHostname(app.Name) + "-" + action,
|
Hostname: formatHostname(app.Name) + "-" + action,
|
||||||
Seccomp: seccomp.FilterMultiarch,
|
SeccompFlags: seccomp.AllowMultiarch,
|
||||||
Tty: dropShell,
|
Tty: dropShell,
|
||||||
Filesystem: []*hst.FilesystemConfig{
|
Filesystem: []*hst.FilesystemConfig{
|
||||||
{Src: path.Join(workDir, "nix"), Dst: "/nix", Must: true},
|
{Src: path.Join(workDir, "nix"), Dst: "/nix", Must: true},
|
||||||
|
@ -178,7 +178,7 @@ func testProxyFinaliseStartWaitCloseString(t *testing.T, useSandbox bool) {
|
|||||||
t.Run("string", func(t *testing.T) {
|
t.Run("string", func(t *testing.T) {
|
||||||
wantSubstr := fmt.Sprintf("%s -test.run=TestHelperStub -- --args=3 --fd=4", os.Args[0])
|
wantSubstr := fmt.Sprintf("%s -test.run=TestHelperStub -- --args=3 --fd=4", os.Args[0])
|
||||||
if useSandbox {
|
if useSandbox {
|
||||||
wantSubstr = fmt.Sprintf(`argv: ["%s" "-test.run=TestHelperStub" "--" "--args=3" "--fd=4"], flags: 0x0, seccomp: 0x3e`, os.Args[0])
|
wantSubstr = fmt.Sprintf(`argv: ["%s" "-test.run=TestHelperStub" "--" "--args=3" "--fd=4"], flags: 0x0, seccomp: 0x1, presets: 0xf`, os.Args[0])
|
||||||
}
|
}
|
||||||
if got := p.String(); !strings.Contains(got, wantSubstr) {
|
if got := p.String(); !strings.Contains(got, wantSubstr) {
|
||||||
t.Errorf("String: %q, want %q",
|
t.Errorf("String: %q, want %q",
|
||||||
|
@ -66,7 +66,7 @@ func (p *Proxy) Start() error {
|
|||||||
ctx, toolPath,
|
ctx, toolPath,
|
||||||
p.final, true,
|
p.final, true,
|
||||||
argF, func(container *sandbox.Container) {
|
argF, func(container *sandbox.Container) {
|
||||||
container.Seccomp |= seccomp.FilterMultiarch
|
container.SeccompFlags |= seccomp.AllowMultiarch
|
||||||
container.Hostname = "hakurei-dbus"
|
container.Hostname = "hakurei-dbus"
|
||||||
container.CommandContext = p.CommandContext
|
container.CommandContext = p.CommandContext
|
||||||
if p.output != nil {
|
if p.output != nil {
|
||||||
|
@ -11,7 +11,9 @@ type (
|
|||||||
Hostname string `json:"hostname,omitempty"`
|
Hostname string `json:"hostname,omitempty"`
|
||||||
|
|
||||||
// extra seccomp flags
|
// extra seccomp flags
|
||||||
Seccomp seccomp.FilterOpts `json:"seccomp"`
|
SeccompFlags seccomp.PrepareFlag `json:"seccomp_flags"`
|
||||||
|
// extra seccomp presets
|
||||||
|
SeccompPresets seccomp.FilterPreset `json:"seccomp_presets"`
|
||||||
// allow ptrace and friends
|
// allow ptrace and friends
|
||||||
Devel bool `json:"devel,omitempty"`
|
Devel bool `json:"devel,omitempty"`
|
||||||
// allow userns creation in container
|
// allow userns creation in container
|
||||||
|
@ -62,7 +62,8 @@ func Template() *Config {
|
|||||||
Userns: true,
|
Userns: true,
|
||||||
Net: true,
|
Net: true,
|
||||||
Device: true,
|
Device: true,
|
||||||
Seccomp: seccomp.FilterMultiarch,
|
SeccompFlags: seccomp.AllowMultiarch,
|
||||||
|
SeccompPresets: seccomp.PresetExt,
|
||||||
Tty: true,
|
Tty: true,
|
||||||
Multiarch: true,
|
Multiarch: true,
|
||||||
MapRealUID: true,
|
MapRealUID: true,
|
||||||
|
@ -80,7 +80,8 @@ func TestTemplate(t *testing.T) {
|
|||||||
],
|
],
|
||||||
"container": {
|
"container": {
|
||||||
"hostname": "localhost",
|
"hostname": "localhost",
|
||||||
"seccomp": 32,
|
"seccomp_flags": 1,
|
||||||
|
"seccomp_presets": 1,
|
||||||
"devel": true,
|
"devel": true,
|
||||||
"userns": true,
|
"userns": true,
|
||||||
"net": true,
|
"net": true,
|
||||||
|
@ -28,7 +28,8 @@ func NewContainer(s *hst.ContainerConfig, os sys.State, uid, gid *int) (*sandbox
|
|||||||
|
|
||||||
container := &sandbox.Params{
|
container := &sandbox.Params{
|
||||||
Hostname: s.Hostname,
|
Hostname: s.Hostname,
|
||||||
Seccomp: s.Seccomp,
|
SeccompFlags: s.SeccompFlags,
|
||||||
|
SeccompPresets: s.SeccompPresets,
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -37,7 +38,7 @@ func NewContainer(s *hst.ContainerConfig, os sys.State, uid, gid *int) (*sandbox
|
|||||||
}
|
}
|
||||||
|
|
||||||
if s.Multiarch {
|
if s.Multiarch {
|
||||||
container.Seccomp |= seccomp.FilterMultiarch
|
container.SeccompFlags |= seccomp.AllowMultiarch
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.Devel {
|
if s.Devel {
|
||||||
|
@ -163,7 +163,7 @@ func ShimMain() {
|
|||||||
hlog.PrintBaseError(err, "cannot configure container:")
|
hlog.PrintBaseError(err, "cannot configure container:")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := seccomp.Load(seccomp.PresetCommon); err != nil {
|
if err := seccomp.Load(seccomp.PresetStrict, seccomp.AllowMultiarch); err != nil {
|
||||||
log.Fatalf("cannot load syscall filter: %v", err)
|
log.Fatalf("cannot load syscall filter: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ package internal
|
|||||||
import (
|
import (
|
||||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
|
||||||
"git.gensokyo.uk/security/hakurei/system"
|
"git.gensokyo.uk/security/hakurei/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -11,7 +10,4 @@ func InstallFmsg(verbose bool) {
|
|||||||
hlog.Store(verbose)
|
hlog.Store(verbose)
|
||||||
sandbox.SetOutput(hlog.Output{})
|
sandbox.SetOutput(hlog.Output{})
|
||||||
system.SetOutput(hlog.Output{})
|
system.SetOutput(hlog.Output{})
|
||||||
if verbose {
|
|
||||||
seccomp.SetOutput(hlog.Verbose)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -257,7 +257,8 @@ App
|
|||||||
],
|
],
|
||||||
"container": {
|
"container": {
|
||||||
"hostname": "localhost",
|
"hostname": "localhost",
|
||||||
"seccomp": 32,
|
"seccomp_flags": 1,
|
||||||
|
"seccomp_presets": 1,
|
||||||
"devel": true,
|
"devel": true,
|
||||||
"userns": true,
|
"userns": true,
|
||||||
"net": true,
|
"net": true,
|
||||||
@ -382,7 +383,8 @@ App
|
|||||||
],
|
],
|
||||||
"container": {
|
"container": {
|
||||||
"hostname": "localhost",
|
"hostname": "localhost",
|
||||||
"seccomp": 32,
|
"seccomp_flags": 1,
|
||||||
|
"seccomp_presets": 1,
|
||||||
"devel": true,
|
"devel": true,
|
||||||
"userns": true,
|
"userns": true,
|
||||||
"net": true,
|
"net": true,
|
||||||
@ -561,7 +563,8 @@ func Test_printPs(t *testing.T) {
|
|||||||
],
|
],
|
||||||
"container": {
|
"container": {
|
||||||
"hostname": "localhost",
|
"hostname": "localhost",
|
||||||
"seccomp": 32,
|
"seccomp_flags": 1,
|
||||||
|
"seccomp_presets": 1,
|
||||||
"devel": true,
|
"devel": true,
|
||||||
"userns": true,
|
"userns": true,
|
||||||
"net": true,
|
"net": true,
|
||||||
|
@ -27,20 +27,20 @@ const (
|
|||||||
FAllowNet
|
FAllowNet
|
||||||
)
|
)
|
||||||
|
|
||||||
func (flags HardeningFlags) seccomp(opts seccomp.FilterOpts) seccomp.FilterOpts {
|
func (flags HardeningFlags) seccomp(presets seccomp.FilterPreset) seccomp.FilterPreset {
|
||||||
if flags&FSyscallCompat == 0 {
|
if flags&FSyscallCompat == 0 {
|
||||||
opts |= seccomp.FilterExt
|
presets |= seccomp.PresetExt
|
||||||
}
|
}
|
||||||
if flags&FAllowDevel == 0 {
|
if flags&FAllowDevel == 0 {
|
||||||
opts |= seccomp.FilterDenyDevel
|
presets |= seccomp.PresetDenyDevel
|
||||||
}
|
}
|
||||||
if flags&FAllowUserns == 0 {
|
if flags&FAllowUserns == 0 {
|
||||||
opts |= seccomp.FilterDenyNS
|
presets |= seccomp.PresetDenyNS
|
||||||
}
|
}
|
||||||
if flags&FAllowTTY == 0 {
|
if flags&FAllowTTY == 0 {
|
||||||
opts |= seccomp.FilterDenyTTY
|
presets |= seccomp.PresetDenyTTY
|
||||||
}
|
}
|
||||||
return opts
|
return presets
|
||||||
}
|
}
|
||||||
|
|
||||||
type (
|
type (
|
||||||
@ -94,8 +94,10 @@ type (
|
|||||||
Hostname string
|
Hostname string
|
||||||
// Sequential container setup ops.
|
// Sequential container setup ops.
|
||||||
*Ops
|
*Ops
|
||||||
// Extra seccomp options.
|
// Extra seccomp flags.
|
||||||
Seccomp seccomp.FilterOpts
|
SeccompFlags seccomp.PrepareFlag
|
||||||
|
// Extra seccomp presets.
|
||||||
|
SeccompPresets seccomp.FilterPreset
|
||||||
// Permission bits of newly created parent directories.
|
// Permission bits of newly created parent directories.
|
||||||
// The zero value is interpreted as 0755.
|
// The zero value is interpreted as 0755.
|
||||||
ParentPerm os.FileMode
|
ParentPerm os.FileMode
|
||||||
@ -233,8 +235,8 @@ func (p *Container) Serve() error {
|
|||||||
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
|
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
|
||||||
|
|
||||||
func (p *Container) String() string {
|
func (p *Container) String() string {
|
||||||
return fmt.Sprintf("argv: %q, flags: %#x, seccomp: %#x",
|
return fmt.Sprintf("argv: %q, flags: %#x, seccomp: %#x, presets: %#x",
|
||||||
p.Args, p.Flags, int(p.Flags.seccomp(p.Seccomp)))
|
p.Args, p.Flags, int(p.SeccompFlags), int(p.Flags.seccomp(p.SeccompPresets)))
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(ctx context.Context, name string, args ...string) *Container {
|
func New(ctx context.Context, name string, args ...string) *Container {
|
||||||
|
@ -164,8 +164,8 @@ func e(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoE
|
|||||||
func TestContainerString(t *testing.T) {
|
func TestContainerString(t *testing.T) {
|
||||||
container := sandbox.New(t.Context(), "ldd", "/usr/bin/env")
|
container := sandbox.New(t.Context(), "ldd", "/usr/bin/env")
|
||||||
container.Flags |= sandbox.FAllowDevel
|
container.Flags |= sandbox.FAllowDevel
|
||||||
container.Seccomp |= seccomp.FilterMultiarch
|
container.SeccompFlags |= seccomp.AllowMultiarch
|
||||||
want := `argv: ["ldd" "/usr/bin/env"], flags: 0x2, seccomp: 0x2e`
|
want := `argv: ["ldd" "/usr/bin/env"], flags: 0x2, seccomp: 0x1, presets: 0x7`
|
||||||
if got := container.String(); got != want {
|
if got := container.String(); got != want {
|
||||||
t.Errorf("String: %s, want %s", got, want)
|
t.Errorf("String: %s, want %s", got, want)
|
||||||
}
|
}
|
||||||
|
@ -237,7 +237,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
|||||||
log.Fatalf("cannot capset: %v", err)
|
log.Fatalf("cannot capset: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := seccomp.Load(params.Flags.seccomp(params.Seccomp)); err != nil {
|
if err := seccomp.Load(params.Flags.seccomp(params.SeccompPresets), params.SeccompFlags); err != nil {
|
||||||
log.Fatalf("cannot load syscall filter: %v", err)
|
log.Fatalf("cannot load syscall filter: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,58 +0,0 @@
|
|||||||
package seccomp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"runtime"
|
|
||||||
"sync"
|
|
||||||
)
|
|
||||||
|
|
||||||
type exporter struct {
|
|
||||||
opts FilterOpts
|
|
||||||
r, w *os.File
|
|
||||||
|
|
||||||
prepareOnce sync.Once
|
|
||||||
prepareErr error
|
|
||||||
closeOnce sync.Once
|
|
||||||
closeErr error
|
|
||||||
exportErr <-chan error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *exporter) prepare() error {
|
|
||||||
e.prepareOnce.Do(func() {
|
|
||||||
if r, w, err := os.Pipe(); err != nil {
|
|
||||||
e.prepareErr = err
|
|
||||||
return
|
|
||||||
} else {
|
|
||||||
e.r, e.w = r, w
|
|
||||||
}
|
|
||||||
|
|
||||||
ec := make(chan error, 1)
|
|
||||||
go func(fd uintptr) {
|
|
||||||
ec <- buildFilter(int(fd), e.opts)
|
|
||||||
close(ec)
|
|
||||||
_ = e.closeWrite()
|
|
||||||
runtime.KeepAlive(e.w)
|
|
||||||
}(e.w.Fd())
|
|
||||||
e.exportErr = ec
|
|
||||||
runtime.SetFinalizer(e, (*exporter).closeWrite)
|
|
||||||
})
|
|
||||||
return e.prepareErr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *exporter) closeWrite() error {
|
|
||||||
e.closeOnce.Do(func() {
|
|
||||||
if e.w == nil {
|
|
||||||
panic("closeWrite called on invalid exporter")
|
|
||||||
}
|
|
||||||
e.closeErr = e.w.Close()
|
|
||||||
|
|
||||||
// no need for a finalizer anymore
|
|
||||||
runtime.SetFinalizer(e, nil)
|
|
||||||
})
|
|
||||||
|
|
||||||
return e.closeErr
|
|
||||||
}
|
|
||||||
|
|
||||||
func newExporter(opts FilterOpts) *exporter {
|
|
||||||
return &exporter{opts: opts}
|
|
||||||
}
|
|
130
sandbox/seccomp/libseccomp-helper.c
Normal file
130
sandbox/seccomp/libseccomp-helper.c
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE /* CLONE_NEWUSER */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "libseccomp-helper.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
|
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||||
|
|
||||||
|
int32_t hakurei_prepare_filter(int *ret_p, int fd, uint32_t arch,
|
||||||
|
uint32_t multiarch,
|
||||||
|
struct hakurei_syscall_rule *rules,
|
||||||
|
size_t rules_sz, hakurei_prepare_flag flags) {
|
||||||
|
int i;
|
||||||
|
int last_allowed_family;
|
||||||
|
int disallowed;
|
||||||
|
struct hakurei_syscall_rule *rule;
|
||||||
|
|
||||||
|
int32_t res = 0; /* refer to resPrefix for message */
|
||||||
|
|
||||||
|
/* Blocklist all but unix, inet, inet6 and netlink */
|
||||||
|
struct {
|
||||||
|
int family;
|
||||||
|
hakurei_prepare_flag flags_mask;
|
||||||
|
} socket_family_allowlist[] = {
|
||||||
|
/* NOTE: Keep in numerical order */
|
||||||
|
{AF_UNSPEC, 0},
|
||||||
|
{AF_LOCAL, 0},
|
||||||
|
{AF_INET, 0},
|
||||||
|
{AF_INET6, 0},
|
||||||
|
{AF_NETLINK, 0},
|
||||||
|
{AF_CAN, HAKUREI_PREPARE_CAN},
|
||||||
|
{AF_BLUETOOTH, HAKUREI_PREPARE_BLUETOOTH},
|
||||||
|
};
|
||||||
|
|
||||||
|
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
|
||||||
|
if (ctx == NULL) {
|
||||||
|
res = 1;
|
||||||
|
goto out;
|
||||||
|
} else
|
||||||
|
errno = 0;
|
||||||
|
|
||||||
|
/* We only really need to handle arches on multiarch systems.
|
||||||
|
* If only one arch is supported the default is fine */
|
||||||
|
if (arch != 0) {
|
||||||
|
/* This *adds* the target arch, instead of replacing the
|
||||||
|
* native one. This is not ideal, because we'd like to only
|
||||||
|
* allow the target arch, but we can't really disallow the
|
||||||
|
* native arch at this point, because then bubblewrap
|
||||||
|
* couldn't continue running. */
|
||||||
|
*ret_p = seccomp_arch_add(ctx, arch);
|
||||||
|
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||||
|
res = 2;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & HAKUREI_PREPARE_MULTIARCH && multiarch != 0) {
|
||||||
|
*ret_p = seccomp_arch_add(ctx, multiarch);
|
||||||
|
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||||
|
res = 3;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < rules_sz; i++) {
|
||||||
|
rule = &rules[i];
|
||||||
|
assert(rule->m_errno == EPERM || rule->m_errno == ENOSYS);
|
||||||
|
|
||||||
|
if (rule->arg)
|
||||||
|
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
|
||||||
|
rule->syscall, 1, *rule->arg);
|
||||||
|
else
|
||||||
|
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
|
||||||
|
rule->syscall, 0);
|
||||||
|
|
||||||
|
if (*ret_p == -EFAULT) {
|
||||||
|
res = 4;
|
||||||
|
goto out;
|
||||||
|
} else if (*ret_p < 0) {
|
||||||
|
res = 5;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Socket filtering doesn't work on e.g. i386, so ignore failures here
|
||||||
|
* However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
|
||||||
|
* something else: https://github.com/seccomp/libseccomp/issues/8 */
|
||||||
|
last_allowed_family = -1;
|
||||||
|
for (i = 0; i < LEN(socket_family_allowlist); i++) {
|
||||||
|
if (socket_family_allowlist[i].flags_mask != 0 &&
|
||||||
|
(socket_family_allowlist[i].flags_mask & flags) !=
|
||||||
|
socket_family_allowlist[i].flags_mask)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (disallowed = last_allowed_family + 1;
|
||||||
|
disallowed < socket_family_allowlist[i].family; disallowed++) {
|
||||||
|
/* Blocklist the in-between valid families */
|
||||||
|
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT),
|
||||||
|
SCMP_SYS(socket), 1,
|
||||||
|
SCMP_A0(SCMP_CMP_EQ, disallowed));
|
||||||
|
}
|
||||||
|
last_allowed_family = socket_family_allowlist[i].family;
|
||||||
|
}
|
||||||
|
/* Blocklist the rest */
|
||||||
|
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1,
|
||||||
|
SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
|
||||||
|
|
||||||
|
if (fd < 0) {
|
||||||
|
*ret_p = seccomp_load(ctx);
|
||||||
|
if (*ret_p != 0) {
|
||||||
|
res = 7;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*ret_p = seccomp_export_bpf(ctx, fd);
|
||||||
|
if (*ret_p != 0) {
|
||||||
|
res = 6;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (ctx)
|
||||||
|
seccomp_release(ctx);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
24
sandbox/seccomp/libseccomp-helper.h
Normal file
24
sandbox/seccomp/libseccomp-helper.h
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#include <seccomp.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if (SCMP_VER_MAJOR < 2) || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
||||||
|
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
||||||
|
#error This package requires libseccomp >= v2.5.1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
HAKUREI_PREPARE_MULTIARCH = 1 << 0,
|
||||||
|
HAKUREI_PREPARE_CAN = 1 << 1,
|
||||||
|
HAKUREI_PREPARE_BLUETOOTH = 1 << 2,
|
||||||
|
} hakurei_prepare_flag;
|
||||||
|
|
||||||
|
struct hakurei_syscall_rule {
|
||||||
|
int syscall;
|
||||||
|
int m_errno;
|
||||||
|
struct scmp_arg_cmp *arg;
|
||||||
|
};
|
||||||
|
|
||||||
|
int32_t hakurei_prepare_filter(int *ret_p, int fd, uint32_t arch,
|
||||||
|
uint32_t multiarch,
|
||||||
|
struct hakurei_syscall_rule *rules,
|
||||||
|
size_t rules_sz, hakurei_prepare_flag flags);
|
183
sandbox/seccomp/libseccomp.go
Normal file
183
sandbox/seccomp/libseccomp.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
package seccomp
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo linux pkg-config: --static libseccomp
|
||||||
|
|
||||||
|
#include <libseccomp-helper.h>
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrInvalidRules = errors.New("invalid native rules slice")
|
||||||
|
)
|
||||||
|
|
||||||
|
// LibraryError represents a libseccomp error.
|
||||||
|
type LibraryError struct {
|
||||||
|
Prefix string
|
||||||
|
Seccomp syscall.Errno
|
||||||
|
Errno error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *LibraryError) Error() string {
|
||||||
|
if e.Seccomp == 0 {
|
||||||
|
if e.Errno == nil {
|
||||||
|
panic("invalid libseccomp error")
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s: %s", e.Prefix, e.Errno)
|
||||||
|
}
|
||||||
|
if e.Errno == nil {
|
||||||
|
return fmt.Sprintf("%s: %s", e.Prefix, e.Seccomp)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s: %s (%s)", e.Prefix, e.Seccomp, e.Errno)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *LibraryError) Is(err error) bool {
|
||||||
|
if e == nil {
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
if ef, ok := err.(*LibraryError); ok {
|
||||||
|
return *e == *ef
|
||||||
|
}
|
||||||
|
return (e.Seccomp != 0 && errors.Is(err, e.Seccomp)) ||
|
||||||
|
(e.Errno != nil && errors.Is(err, e.Errno))
|
||||||
|
}
|
||||||
|
|
||||||
|
// A NativeRule specifies an arch-specific action taken by seccomp under certain conditions.
|
||||||
|
type NativeRule struct {
|
||||||
|
// Syscall is the arch-dependent syscall number to act against.
|
||||||
|
Syscall C.int
|
||||||
|
// Errno is the errno value to return when the condition is satisfied.
|
||||||
|
Errno C.int
|
||||||
|
// Arg is the optional struct scmp_arg_cmp passed to libseccomp.
|
||||||
|
Arg *ScmpArgCmp
|
||||||
|
}
|
||||||
|
|
||||||
|
type PrepareFlag = C.hakurei_prepare_flag
|
||||||
|
|
||||||
|
const (
|
||||||
|
// AllowMultiarch allows multiarch/emulation.
|
||||||
|
AllowMultiarch PrepareFlag = C.HAKUREI_PREPARE_MULTIARCH
|
||||||
|
// AllowCAN allows AF_CAN.
|
||||||
|
AllowCAN PrepareFlag = C.HAKUREI_PREPARE_CAN
|
||||||
|
// AllowBluetooth allows AF_BLUETOOTH.
|
||||||
|
AllowBluetooth PrepareFlag = C.HAKUREI_PREPARE_BLUETOOTH
|
||||||
|
)
|
||||||
|
|
||||||
|
var resPrefix = [...]string{
|
||||||
|
0: "",
|
||||||
|
1: "seccomp_init failed",
|
||||||
|
2: "seccomp_arch_add failed",
|
||||||
|
3: "seccomp_arch_add failed (multiarch)",
|
||||||
|
4: "internal libseccomp failure",
|
||||||
|
5: "seccomp_rule_add failed",
|
||||||
|
6: "seccomp_export_bpf failed",
|
||||||
|
7: "seccomp_load failed",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare streams filter contents to fd, or installs it to the current process if fd < 0.
|
||||||
|
func Prepare(fd int, rules []NativeRule, flags PrepareFlag) error {
|
||||||
|
if len(rules) == 0 {
|
||||||
|
return ErrInvalidRules
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
arch C.uint32_t = 0
|
||||||
|
multiarch C.uint32_t = 0
|
||||||
|
)
|
||||||
|
switch runtime.GOARCH {
|
||||||
|
case "386":
|
||||||
|
arch = C.SCMP_ARCH_X86
|
||||||
|
case "amd64":
|
||||||
|
arch = C.SCMP_ARCH_X86_64
|
||||||
|
multiarch = C.SCMP_ARCH_X86
|
||||||
|
case "arm":
|
||||||
|
arch = C.SCMP_ARCH_ARM
|
||||||
|
case "arm64":
|
||||||
|
arch = C.SCMP_ARCH_AARCH64
|
||||||
|
multiarch = C.SCMP_ARCH_ARM
|
||||||
|
}
|
||||||
|
|
||||||
|
var ret C.int
|
||||||
|
|
||||||
|
rulesPinner := new(runtime.Pinner)
|
||||||
|
for i := range rules {
|
||||||
|
rule := &rules[i]
|
||||||
|
rulesPinner.Pin(rule)
|
||||||
|
if rule.Arg != nil {
|
||||||
|
rulesPinner.Pin(rule.Arg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res, err := C.hakurei_prepare_filter(
|
||||||
|
&ret, C.int(fd),
|
||||||
|
arch, multiarch,
|
||||||
|
(*C.struct_hakurei_syscall_rule)(unsafe.Pointer(&rules[0])),
|
||||||
|
C.size_t(len(rules)),
|
||||||
|
flags,
|
||||||
|
)
|
||||||
|
rulesPinner.Unpin()
|
||||||
|
|
||||||
|
if prefix := resPrefix[res]; prefix != "" {
|
||||||
|
return &LibraryError{
|
||||||
|
prefix,
|
||||||
|
-syscall.Errno(ret),
|
||||||
|
err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScmpCompare is the equivalent of scmp_compare;
|
||||||
|
// Comparison operators
|
||||||
|
type ScmpCompare = C.enum_scmp_compare
|
||||||
|
|
||||||
|
const (
|
||||||
|
_SCMP_CMP_MIN = C._SCMP_CMP_MIN
|
||||||
|
|
||||||
|
// not equal
|
||||||
|
SCMP_CMP_NE = C.SCMP_CMP_NE
|
||||||
|
// less than
|
||||||
|
SCMP_CMP_LT = C.SCMP_CMP_LT
|
||||||
|
// less than or equal
|
||||||
|
SCMP_CMP_LE = C.SCMP_CMP_LE
|
||||||
|
// equal
|
||||||
|
SCMP_CMP_EQ = C.SCMP_CMP_EQ
|
||||||
|
// greater than or equal
|
||||||
|
SCMP_CMP_GE = C.SCMP_CMP_GE
|
||||||
|
// greater than
|
||||||
|
SCMP_CMP_GT = C.SCMP_CMP_GT
|
||||||
|
// masked equality
|
||||||
|
SCMP_CMP_MASKED_EQ = C.SCMP_CMP_MASKED_EQ
|
||||||
|
|
||||||
|
_SCMP_CMP_MAX = C._SCMP_CMP_MAX
|
||||||
|
)
|
||||||
|
|
||||||
|
// ScmpDatum is the equivalent of scmp_datum_t;
|
||||||
|
// Argument datum
|
||||||
|
type ScmpDatum uint64
|
||||||
|
|
||||||
|
// ScmpArgCmp is the equivalent of struct scmp_arg_cmp;
|
||||||
|
// Argument / Value comparison definition
|
||||||
|
type ScmpArgCmp struct {
|
||||||
|
// argument number, starting at 0
|
||||||
|
arg C.uint
|
||||||
|
// the comparison op, e.g. SCMP_CMP_*
|
||||||
|
op ScmpCompare
|
||||||
|
|
||||||
|
datum_a, datum_b ScmpDatum
|
||||||
|
}
|
||||||
|
|
||||||
|
// only used for testing
|
||||||
|
func syscallResolveName(s string) (trap int) {
|
||||||
|
v := C.CString(s)
|
||||||
|
trap = int(C.seccomp_syscall_resolve_name(v))
|
||||||
|
C.free(unsafe.Pointer(v))
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
@ -8,17 +8,18 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
. "git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestExport(t *testing.T) {
|
func TestExport(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
name string
|
name string
|
||||||
opts seccomp.FilterOpts
|
presets FilterPreset
|
||||||
|
flags PrepareFlag
|
||||||
want []byte
|
want []byte
|
||||||
wantErr bool
|
wantErr bool
|
||||||
}{
|
}{
|
||||||
{"compat", 0, []byte{
|
{"compat", 0, 0, []byte{
|
||||||
0x95, 0xec, 0x69, 0xd0, 0x17, 0x73, 0x3e, 0x07,
|
0x95, 0xec, 0x69, 0xd0, 0x17, 0x73, 0x3e, 0x07,
|
||||||
0x21, 0x60, 0xe0, 0xda, 0x80, 0xfd, 0xeb, 0xec,
|
0x21, 0x60, 0xe0, 0xda, 0x80, 0xfd, 0xeb, 0xec,
|
||||||
0xdf, 0x27, 0xae, 0x81, 0x66, 0xf5, 0xe2, 0xa7,
|
0xdf, 0x27, 0xae, 0x81, 0x66, 0xf5, 0xe2, 0xa7,
|
||||||
@ -28,7 +29,7 @@ func TestExport(t *testing.T) {
|
|||||||
0xa7, 0x9b, 0x07, 0x0e, 0x04, 0xc0, 0xee, 0x9a,
|
0xa7, 0x9b, 0x07, 0x0e, 0x04, 0xc0, 0xee, 0x9a,
|
||||||
0xcd, 0xf5, 0x8f, 0x55, 0xcf, 0xa8, 0x15, 0xa5,
|
0xcd, 0xf5, 0x8f, 0x55, 0xcf, 0xa8, 0x15, 0xa5,
|
||||||
}, false},
|
}, false},
|
||||||
{"base", seccomp.FilterExt, []byte{
|
{"base", PresetExt, 0, []byte{
|
||||||
0xdc, 0x7f, 0x2e, 0x1c, 0x5e, 0x82, 0x9b, 0x79,
|
0xdc, 0x7f, 0x2e, 0x1c, 0x5e, 0x82, 0x9b, 0x79,
|
||||||
0xeb, 0xb7, 0xef, 0xc7, 0x59, 0x15, 0x0f, 0x54,
|
0xeb, 0xb7, 0xef, 0xc7, 0x59, 0x15, 0x0f, 0x54,
|
||||||
0xa8, 0x3a, 0x75, 0xc8, 0xdf, 0x6f, 0xee, 0x4d,
|
0xa8, 0x3a, 0x75, 0xc8, 0xdf, 0x6f, 0xee, 0x4d,
|
||||||
@ -38,10 +39,10 @@ func TestExport(t *testing.T) {
|
|||||||
0x1d, 0xb0, 0x5d, 0x90, 0x99, 0x7c, 0x86, 0x59,
|
0x1d, 0xb0, 0x5d, 0x90, 0x99, 0x7c, 0x86, 0x59,
|
||||||
0xb9, 0x58, 0x91, 0x20, 0x6a, 0xc9, 0x95, 0x2d,
|
0xb9, 0x58, 0x91, 0x20, 0x6a, 0xc9, 0x95, 0x2d,
|
||||||
}, false},
|
}, false},
|
||||||
{"everything", seccomp.FilterExt |
|
{"everything", PresetExt |
|
||||||
seccomp.FilterDenyNS | seccomp.FilterDenyTTY | seccomp.FilterDenyDevel |
|
PresetDenyNS | PresetDenyTTY | PresetDenyDevel |
|
||||||
seccomp.FilterMultiarch | seccomp.FilterLinux32 | seccomp.FilterCan |
|
PresetLinux32, AllowMultiarch | AllowCAN |
|
||||||
seccomp.FilterBluetooth, []byte{
|
AllowBluetooth, []byte{
|
||||||
0xe9, 0x9d, 0xd3, 0x45, 0xe1, 0x95, 0x41, 0x34,
|
0xe9, 0x9d, 0xd3, 0x45, 0xe1, 0x95, 0x41, 0x34,
|
||||||
0x73, 0xd3, 0xcb, 0xee, 0x07, 0xb4, 0xed, 0x57,
|
0x73, 0xd3, 0xcb, 0xee, 0x07, 0xb4, 0xed, 0x57,
|
||||||
0xb9, 0x08, 0xbf, 0xa8, 0x9e, 0xa2, 0x07, 0x2f,
|
0xb9, 0x08, 0xbf, 0xa8, 0x9e, 0xa2, 0x07, 0x2f,
|
||||||
@ -51,7 +52,7 @@ func TestExport(t *testing.T) {
|
|||||||
0x4c, 0x02, 0x4e, 0xd4, 0x88, 0x50, 0xbe, 0x69,
|
0x4c, 0x02, 0x4e, 0xd4, 0x88, 0x50, 0xbe, 0x69,
|
||||||
0xb6, 0x8a, 0x9a, 0x4c, 0x5f, 0x53, 0xa9, 0xdb,
|
0xb6, 0x8a, 0x9a, 0x4c, 0x5f, 0x53, 0xa9, 0xdb,
|
||||||
}, false},
|
}, false},
|
||||||
{"strict", seccomp.PresetStrict, []byte{
|
{"strict", PresetStrict, 0, []byte{
|
||||||
0xe8, 0x80, 0x29, 0x8d, 0xf2, 0xbd, 0x67, 0x51,
|
0xe8, 0x80, 0x29, 0x8d, 0xf2, 0xbd, 0x67, 0x51,
|
||||||
0xd0, 0x04, 0x0f, 0xc2, 0x1b, 0xc0, 0xed, 0x4c,
|
0xd0, 0x04, 0x0f, 0xc2, 0x1b, 0xc0, 0xed, 0x4c,
|
||||||
0x00, 0xf9, 0x5d, 0xc0, 0xd7, 0xba, 0x50, 0x6c,
|
0x00, 0xf9, 0x5d, 0xc0, 0xd7, 0xba, 0x50, 0x6c,
|
||||||
@ -62,7 +63,7 @@ func TestExport(t *testing.T) {
|
|||||||
0x14, 0x89, 0x60, 0xfb, 0xd3, 0x5c, 0xd7, 0x35,
|
0x14, 0x89, 0x60, 0xfb, 0xd3, 0x5c, 0xd7, 0x35,
|
||||||
}, false},
|
}, false},
|
||||||
{"strict compat", 0 |
|
{"strict compat", 0 |
|
||||||
seccomp.FilterDenyNS | seccomp.FilterDenyTTY | seccomp.FilterDenyDevel, []byte{
|
PresetDenyNS | PresetDenyTTY | PresetDenyDevel, 0, []byte{
|
||||||
0x39, 0x87, 0x1b, 0x93, 0xff, 0xaf, 0xc8, 0xb9,
|
0x39, 0x87, 0x1b, 0x93, 0xff, 0xaf, 0xc8, 0xb9,
|
||||||
0x79, 0xfc, 0xed, 0xc0, 0xb0, 0xc3, 0x7b, 0x9e,
|
0x79, 0xfc, 0xed, 0xc0, 0xb0, 0xc3, 0x7b, 0x9e,
|
||||||
0x03, 0x92, 0x2f, 0x5b, 0x02, 0x74, 0x8d, 0xc5,
|
0x03, 0x92, 0x2f, 0x5b, 0x02, 0x74, 0x8d, 0xc5,
|
||||||
@ -72,7 +73,7 @@ func TestExport(t *testing.T) {
|
|||||||
0x80, 0x8b, 0x1a, 0x6f, 0x84, 0xf3, 0x2b, 0xbd,
|
0x80, 0x8b, 0x1a, 0x6f, 0x84, 0xf3, 0x2b, 0xbd,
|
||||||
0xe1, 0xaa, 0x02, 0xae, 0x30, 0xee, 0xdc, 0xfa,
|
0xe1, 0xaa, 0x02, 0xae, 0x30, 0xee, 0xdc, 0xfa,
|
||||||
}, false},
|
}, false},
|
||||||
{"hakurei default", seccomp.FilterExt | seccomp.FilterDenyDevel, []byte{
|
{"hakurei default", PresetExt | PresetDenyDevel, 0, []byte{
|
||||||
0xc6, 0x98, 0xb0, 0x81, 0xff, 0x95, 0x7a, 0xfe,
|
0xc6, 0x98, 0xb0, 0x81, 0xff, 0x95, 0x7a, 0xfe,
|
||||||
0x17, 0xa6, 0xd9, 0x43, 0x74, 0x53, 0x7d, 0x37,
|
0x17, 0xa6, 0xd9, 0x43, 0x74, 0x53, 0x7d, 0x37,
|
||||||
0xf2, 0xa6, 0x3f, 0x6f, 0x9d, 0xd7, 0x5d, 0xa7,
|
0xf2, 0xa6, 0x3f, 0x6f, 0x9d, 0xd7, 0x5d, 0xa7,
|
||||||
@ -87,11 +88,7 @@ func TestExport(t *testing.T) {
|
|||||||
buf := make([]byte, 8)
|
buf := make([]byte, 8)
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
oldF := seccomp.GetOutput()
|
e := New(tc.presets, tc.flags)
|
||||||
seccomp.SetOutput(t.Log)
|
|
||||||
t.Cleanup(func() { seccomp.SetOutput(oldF) })
|
|
||||||
|
|
||||||
e := seccomp.New(tc.opts)
|
|
||||||
digest := sha512.New()
|
digest := sha512.New()
|
||||||
|
|
||||||
if _, err := io.CopyBuffer(digest, e, buf); (err != nil) != tc.wantErr {
|
if _, err := io.CopyBuffer(digest, e, buf); (err != nil) != tc.wantErr {
|
||||||
@ -100,7 +97,6 @@ func TestExport(t *testing.T) {
|
|||||||
}
|
}
|
||||||
if err := e.Close(); err != nil {
|
if err := e.Close(); err != nil {
|
||||||
t.Errorf("Close: error = %v", err)
|
t.Errorf("Close: error = %v", err)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
if got := digest.Sum(nil); !slices.Equal(got, tc.want) {
|
if got := digest.Sum(nil); !slices.Equal(got, tc.want) {
|
||||||
t.Fatalf("Export() hash = %x, want %x",
|
t.Fatalf("Export() hash = %x, want %x",
|
||||||
@ -111,7 +107,7 @@ func TestExport(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t.Run("close without use", func(t *testing.T) {
|
t.Run("close without use", func(t *testing.T) {
|
||||||
e := seccomp.New(0)
|
e := New(0, 0)
|
||||||
if err := e.Close(); !errors.Is(err, syscall.EINVAL) {
|
if err := e.Close(); !errors.Is(err, syscall.EINVAL) {
|
||||||
t.Errorf("Close: error = %v", err)
|
t.Errorf("Close: error = %v", err)
|
||||||
return
|
return
|
||||||
@ -119,7 +115,7 @@ func TestExport(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("close partial read", func(t *testing.T) {
|
t.Run("close partial read", func(t *testing.T) {
|
||||||
e := seccomp.New(0)
|
e := New(0, 0)
|
||||||
if _, err := e.Read(nil); err != nil {
|
if _, err := e.Read(nil); err != nil {
|
||||||
t.Errorf("Read: error = %v", err)
|
t.Errorf("Read: error = %v", err)
|
||||||
return
|
return
|
||||||
@ -137,10 +133,9 @@ func TestExport(t *testing.T) {
|
|||||||
func BenchmarkExport(b *testing.B) {
|
func BenchmarkExport(b *testing.B) {
|
||||||
buf := make([]byte, 8)
|
buf := make([]byte, 8)
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
e := seccomp.New(seccomp.FilterExt |
|
e := New(PresetExt|
|
||||||
seccomp.FilterDenyNS | seccomp.FilterDenyTTY | seccomp.FilterDenyDevel |
|
PresetDenyNS|PresetDenyTTY|PresetDenyDevel|PresetLinux32,
|
||||||
seccomp.FilterMultiarch | seccomp.FilterLinux32 | seccomp.FilterCan |
|
AllowMultiarch|AllowCAN|AllowBluetooth)
|
||||||
seccomp.FilterBluetooth)
|
|
||||||
if _, err := io.CopyBuffer(io.Discard, e, buf); err != nil {
|
if _, err := io.CopyBuffer(io.Discard, e, buf); err != nil {
|
||||||
b.Fatalf("cannot export: %v", err)
|
b.Fatalf("cannot export: %v", err)
|
||||||
}
|
}
|
@ -1,30 +0,0 @@
|
|||||||
package seccomp
|
|
||||||
|
|
||||||
import "C"
|
|
||||||
import "sync/atomic"
|
|
||||||
|
|
||||||
var printlnP atomic.Pointer[func(v ...any)]
|
|
||||||
|
|
||||||
func SetOutput(f func(v ...any)) {
|
|
||||||
if f == nil {
|
|
||||||
// avoid storing nil function
|
|
||||||
printlnP.Store(nil)
|
|
||||||
} else {
|
|
||||||
printlnP.Store(&f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetOutput() func(v ...any) {
|
|
||||||
if fp := printlnP.Load(); fp == nil {
|
|
||||||
return nil
|
|
||||||
} else {
|
|
||||||
return *fp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//export hakurei_println
|
|
||||||
func hakurei_println(v *C.char) {
|
|
||||||
if fp := printlnP.Load(); fp != nil {
|
|
||||||
(*fp)(C.GoString(v))
|
|
||||||
}
|
|
||||||
}
|
|
230
sandbox/seccomp/presets.go
Normal file
230
sandbox/seccomp/presets.go
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
package seccomp
|
||||||
|
|
||||||
|
/* flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532 */
|
||||||
|
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
. "syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilterPreset int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// PresetExt are project-specific extensions.
|
||||||
|
PresetExt FilterPreset = 1 << iota
|
||||||
|
// PresetDenyNS denies namespace setup syscalls.
|
||||||
|
PresetDenyNS
|
||||||
|
// PresetDenyTTY denies faking input.
|
||||||
|
PresetDenyTTY
|
||||||
|
// PresetDenyDevel denies development-related syscalls.
|
||||||
|
PresetDenyDevel
|
||||||
|
// PresetLinux32 sets PER_LINUX32.
|
||||||
|
PresetLinux32
|
||||||
|
)
|
||||||
|
|
||||||
|
func preparePreset(fd int, presets FilterPreset, flags PrepareFlag) error {
|
||||||
|
allowedPersonality := PER_LINUX
|
||||||
|
if presets&PresetLinux32 != 0 {
|
||||||
|
allowedPersonality = PER_LINUX32
|
||||||
|
}
|
||||||
|
presetDevelFinal := presetDevel(ScmpDatum(allowedPersonality))
|
||||||
|
|
||||||
|
l := len(presetCommon)
|
||||||
|
if presets&PresetDenyNS != 0 {
|
||||||
|
l += len(presetNamespace)
|
||||||
|
}
|
||||||
|
if presets&PresetDenyTTY != 0 {
|
||||||
|
l += len(presetTTY)
|
||||||
|
}
|
||||||
|
if presets&PresetDenyDevel != 0 {
|
||||||
|
l += len(presetDevelFinal)
|
||||||
|
}
|
||||||
|
if flags&AllowMultiarch == 0 {
|
||||||
|
l += len(presetEmu)
|
||||||
|
}
|
||||||
|
if presets&PresetExt != 0 {
|
||||||
|
l += len(presetCommonExt)
|
||||||
|
if presets&PresetDenyNS != 0 {
|
||||||
|
l += len(presetNamespaceExt)
|
||||||
|
}
|
||||||
|
if flags&AllowMultiarch == 0 {
|
||||||
|
l += len(presetEmuExt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rules := make([]NativeRule, 0, l)
|
||||||
|
rules = append(rules, presetCommon...)
|
||||||
|
if presets&PresetDenyNS != 0 {
|
||||||
|
rules = append(rules, presetNamespace...)
|
||||||
|
}
|
||||||
|
if presets&PresetDenyTTY != 0 {
|
||||||
|
rules = append(rules, presetTTY...)
|
||||||
|
}
|
||||||
|
if presets&PresetDenyDevel != 0 {
|
||||||
|
rules = append(rules, presetDevelFinal...)
|
||||||
|
}
|
||||||
|
if flags&AllowMultiarch == 0 {
|
||||||
|
rules = append(rules, presetEmu...)
|
||||||
|
}
|
||||||
|
if presets&PresetExt != 0 {
|
||||||
|
rules = append(rules, presetCommonExt...)
|
||||||
|
if presets&PresetDenyNS != 0 {
|
||||||
|
rules = append(rules, presetNamespaceExt...)
|
||||||
|
}
|
||||||
|
if flags&AllowMultiarch == 0 {
|
||||||
|
rules = append(rules, presetEmuExt...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Prepare(fd, rules, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
presetCommon = []NativeRule{
|
||||||
|
/* Block dmesg */
|
||||||
|
{C.int(SYS_SYSLOG), C.int(EPERM), nil},
|
||||||
|
/* Useless old syscall */
|
||||||
|
{C.int(SYS_USELIB), C.int(EPERM), nil},
|
||||||
|
/* Don't allow disabling accounting */
|
||||||
|
{C.int(SYS_ACCT), C.int(EPERM), nil},
|
||||||
|
/* Don't allow reading current quota use */
|
||||||
|
{C.int(SYS_QUOTACTL), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* Don't allow access to the kernel keyring */
|
||||||
|
{C.int(SYS_ADD_KEY), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_KEYCTL), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_REQUEST_KEY), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* Scary VM/NUMA ops */
|
||||||
|
{C.int(SYS_MOVE_PAGES), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_MBIND), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_GET_MEMPOLICY), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SET_MEMPOLICY), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_MIGRATE_PAGES), C.int(EPERM), nil},
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hakurei: project-specific extensions */
|
||||||
|
presetCommonExt = []NativeRule{
|
||||||
|
/* system calls for changing the system clock */
|
||||||
|
{C.int(SYS_ADJTIMEX), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CLOCK_ADJTIME), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CLOCK_ADJTIME64), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CLOCK_SETTIME), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CLOCK_SETTIME64), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETTIMEOFDAY), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* loading and unloading of kernel modules */
|
||||||
|
{C.int(SYS_DELETE_MODULE), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_FINIT_MODULE), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_INIT_MODULE), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* system calls for rebooting and reboot preparation */
|
||||||
|
{C.int(SYS_KEXEC_FILE_LOAD), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_KEXEC_LOAD), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_REBOOT), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* system calls for enabling/disabling swap devices */
|
||||||
|
{C.int(SYS_SWAPOFF), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SWAPON), C.int(EPERM), nil},
|
||||||
|
}
|
||||||
|
|
||||||
|
presetNamespace = []NativeRule{
|
||||||
|
/* Don't allow subnamespace setups: */
|
||||||
|
{C.int(SYS_UNSHARE), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETNS), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_MOUNT), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_UMOUNT), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_UMOUNT2), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_PIVOT_ROOT), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CHROOT), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CLONE), C.int(EPERM),
|
||||||
|
&ScmpArgCmp{cloneArg, SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER}},
|
||||||
|
|
||||||
|
/* seccomp can't look into clone3()'s struct clone_args to check whether
|
||||||
|
* the flags are OK, so we have no choice but to block clone3().
|
||||||
|
* Return ENOSYS so user-space will fall back to clone().
|
||||||
|
* (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
|
||||||
|
*/
|
||||||
|
{C.int(SYS_CLONE3), C.int(ENOSYS), nil},
|
||||||
|
|
||||||
|
/* New mount manipulation APIs can also change our VFS. There's no
|
||||||
|
* legitimate reason to do these in the sandbox, so block all of them
|
||||||
|
* rather than thinking about which ones might be dangerous.
|
||||||
|
* (CVE-2021-41133) */
|
||||||
|
{C.int(SYS_OPEN_TREE), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_MOVE_MOUNT), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_FSOPEN), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_FSCONFIG), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_FSMOUNT), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_FSPICK), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_MOUNT_SETATTR), C.int(ENOSYS), nil},
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hakurei: project-specific extensions */
|
||||||
|
presetNamespaceExt = []NativeRule{
|
||||||
|
/* changing file ownership */
|
||||||
|
{C.int(SYS_CHOWN), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_CHOWN32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_FCHOWN), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_FCHOWN32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_FCHOWNAT), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_LCHOWN), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_LCHOWN32), C.int(EPERM), nil},
|
||||||
|
|
||||||
|
/* system calls for changing user ID and group ID credentials */
|
||||||
|
{C.int(SYS_SETGID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETGID32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETGROUPS), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETGROUPS32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETREGID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETREGID32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETRESGID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETRESGID32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETRESUID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETRESUID32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETREUID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETREUID32), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETUID), C.int(EPERM), nil},
|
||||||
|
{C.int(SYS_SETUID32), C.int(EPERM), nil},
|
||||||
|
}
|
||||||
|
|
||||||
|
presetTTY = []NativeRule{
|
||||||
|
/* Don't allow faking input to the controlling tty (CVE-2017-5226) */
|
||||||
|
{C.int(SYS_IOCTL), C.int(EPERM),
|
||||||
|
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCSTI}},
|
||||||
|
/* In the unlikely event that the controlling tty is a Linux virtual
|
||||||
|
* console (/dev/tty2 or similar), copy/paste operations have an effect
|
||||||
|
* similar to TIOCSTI (CVE-2023-28100) */
|
||||||
|
{C.int(SYS_IOCTL), C.int(EPERM),
|
||||||
|
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCLINUX}},
|
||||||
|
}
|
||||||
|
|
||||||
|
presetEmu = []NativeRule{
|
||||||
|
/* modify_ldt is a historic source of interesting information leaks,
|
||||||
|
* so it's disabled as a hardening measure.
|
||||||
|
* However, it is required to run old 16-bit applications
|
||||||
|
* as well as some Wine patches, so it's allowed in multiarch. */
|
||||||
|
{C.int(SYS_MODIFY_LDT), C.int(EPERM), nil},
|
||||||
|
}
|
||||||
|
|
||||||
|
/* hakurei: project-specific extensions */
|
||||||
|
presetEmuExt = []NativeRule{
|
||||||
|
{C.int(SYS_SUBPAGE_PROT), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_SWITCH_ENDIAN), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_VM86), C.int(ENOSYS), nil},
|
||||||
|
{C.int(SYS_VM86OLD), C.int(ENOSYS), nil},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func presetDevel(allowedPersonality ScmpDatum) []NativeRule {
|
||||||
|
return []NativeRule{
|
||||||
|
/* Profiling operations; we expect these to be done by tools from outside
|
||||||
|
* the sandbox. In particular perf has been the source of many CVEs. */
|
||||||
|
{C.int(SYS_PERF_EVENT_OPEN), C.int(EPERM), nil},
|
||||||
|
/* Don't allow you to switch to bsd emulation or whatnot */
|
||||||
|
{C.int(SYS_PERSONALITY), C.int(EPERM),
|
||||||
|
&ScmpArgCmp{0, SCMP_CMP_NE, allowedPersonality, 0}},
|
||||||
|
|
||||||
|
{C.int(SYS_PTRACE), C.int(EPERM), nil},
|
||||||
|
}
|
||||||
|
}
|
7
sandbox/seccomp/presets_clone_backwards2.go
Normal file
7
sandbox/seccomp/presets_clone_backwards2.go
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
//go:build s390 || s390x
|
||||||
|
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
/* Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
|
||||||
|
* and flags arguments are reversed so the flags come second */
|
||||||
|
const cloneArg = 1
|
6
sandbox/seccomp/presets_clone_generic.go
Normal file
6
sandbox/seccomp/presets_clone_generic.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
//go:build !s390 && !s390x
|
||||||
|
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
/* Normally the flags come first */
|
||||||
|
const cloneArg = 0
|
@ -9,15 +9,18 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
PresetStrict = FilterExt | FilterDenyNS | FilterDenyTTY | FilterDenyDevel
|
PresetStrict = PresetExt | PresetDenyNS | PresetDenyTTY | PresetDenyDevel
|
||||||
PresetCommon = PresetStrict | FilterMultiarch
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// New returns an inactive Encoder instance.
|
// New returns an inactive Encoder instance.
|
||||||
func New(opts FilterOpts) *Encoder { return &Encoder{newExporter(opts)} }
|
func New(presets FilterPreset, flags PrepareFlag) *Encoder {
|
||||||
|
return &Encoder{newExporter(presets, flags)}
|
||||||
|
}
|
||||||
|
|
||||||
// Load loads a filter into the kernel.
|
// Load loads a filter into the kernel.
|
||||||
func Load(opts FilterOpts) error { return buildFilter(-1, opts) }
|
func Load(presets FilterPreset, flags PrepareFlag) error {
|
||||||
|
return preparePreset(-1, presets, flags)
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
An Encoder writes a BPF program to an output stream.
|
An Encoder writes a BPF program to an output stream.
|
||||||
@ -47,17 +50,20 @@ func (e *Encoder) Close() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewFile returns an instance of exporter implementing [proc.File].
|
// NewFile returns an instance of exporter implementing [proc.File].
|
||||||
func NewFile(opts FilterOpts) proc.File { return &File{opts: opts} }
|
func NewFile(presets FilterPreset, flags PrepareFlag) proc.File {
|
||||||
|
return &File{presets: presets, flags: flags}
|
||||||
|
}
|
||||||
|
|
||||||
// File implements [proc.File] and provides access to the read end of exporter pipe.
|
// File implements [proc.File] and provides access to the read end of exporter pipe.
|
||||||
type File struct {
|
type File struct {
|
||||||
opts FilterOpts
|
presets FilterPreset
|
||||||
|
flags PrepareFlag
|
||||||
proc.BaseFile
|
proc.BaseFile
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *File) ErrCount() int { return 2 }
|
func (f *File) ErrCount() int { return 2 }
|
||||||
func (f *File) Fulfill(ctx context.Context, dispatchErr func(error)) error {
|
func (f *File) Fulfill(ctx context.Context, dispatchErr func(error)) error {
|
||||||
e := newExporter(f.opts)
|
e := newExporter(f.presets, f.flags)
|
||||||
if err := e.prepare(); err != nil {
|
if err := e.prepare(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
@ -1,321 +0,0 @@
|
|||||||
#ifndef _GNU_SOURCE
|
|
||||||
#define _GNU_SOURCE /* CLONE_NEWUSER */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "seccomp-build.h"
|
|
||||||
#include <assert.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/personality.h>
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <sys/syscall.h>
|
|
||||||
|
|
||||||
#if (SCMP_VER_MAJOR < 2) || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
|
||||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
|
||||||
#error This package requires libseccomp >= v2.5.1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct hakurei_syscall_act {
|
|
||||||
int syscall;
|
|
||||||
int m_errno;
|
|
||||||
struct scmp_arg_cmp *arg;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
|
|
||||||
|
|
||||||
#define SECCOMP_RULESET_ADD(ruleset) \
|
|
||||||
do { \
|
|
||||||
if (opts & HAKUREI_VERBOSE) \
|
|
||||||
hakurei_println("adding seccomp ruleset \"" #ruleset "\""); \
|
|
||||||
for (int i = 0; i < LEN(ruleset); i++) { \
|
|
||||||
assert(ruleset[i].m_errno == EPERM || ruleset[i].m_errno == ENOSYS); \
|
|
||||||
\
|
|
||||||
if (ruleset[i].arg) \
|
|
||||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), \
|
|
||||||
ruleset[i].syscall, 1, *ruleset[i].arg); \
|
|
||||||
else \
|
|
||||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ruleset[i].m_errno), \
|
|
||||||
ruleset[i].syscall, 0); \
|
|
||||||
\
|
|
||||||
if (*ret_p == -EFAULT) { \
|
|
||||||
res = 4; \
|
|
||||||
goto out; \
|
|
||||||
} else if (*ret_p < 0) { \
|
|
||||||
res = 5; \
|
|
||||||
goto out; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
int32_t hakurei_build_filter(int *ret_p, int fd, uint32_t arch,
|
|
||||||
uint32_t multiarch, hakurei_filter_opts opts) {
|
|
||||||
int32_t res = 0; /* refer to resPrefix for message */
|
|
||||||
int allow_multiarch = opts & HAKUREI_MULTIARCH;
|
|
||||||
int allowed_personality = PER_LINUX;
|
|
||||||
|
|
||||||
if (opts & HAKUREI_LINUX32)
|
|
||||||
allowed_personality = PER_LINUX32;
|
|
||||||
|
|
||||||
/* flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532 */
|
|
||||||
|
|
||||||
struct hakurei_syscall_act deny_common[] = {
|
|
||||||
/* Block dmesg */
|
|
||||||
{SCMP_SYS(syslog), EPERM},
|
|
||||||
/* Useless old syscall */
|
|
||||||
{SCMP_SYS(uselib), EPERM},
|
|
||||||
/* Don't allow disabling accounting */
|
|
||||||
{SCMP_SYS(acct), EPERM},
|
|
||||||
/* Don't allow reading current quota use */
|
|
||||||
{SCMP_SYS(quotactl), EPERM},
|
|
||||||
|
|
||||||
/* Don't allow access to the kernel keyring */
|
|
||||||
{SCMP_SYS(add_key), EPERM},
|
|
||||||
{SCMP_SYS(keyctl), EPERM},
|
|
||||||
{SCMP_SYS(request_key), EPERM},
|
|
||||||
|
|
||||||
/* Scary VM/NUMA ops */
|
|
||||||
{SCMP_SYS(move_pages), EPERM},
|
|
||||||
{SCMP_SYS(mbind), EPERM},
|
|
||||||
{SCMP_SYS(get_mempolicy), EPERM},
|
|
||||||
{SCMP_SYS(set_mempolicy), EPERM},
|
|
||||||
{SCMP_SYS(migrate_pages), EPERM},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* hakurei: project-specific extensions */
|
|
||||||
struct hakurei_syscall_act deny_common_ext[] = {
|
|
||||||
/* system calls for changing the system clock */
|
|
||||||
{SCMP_SYS(adjtimex), EPERM},
|
|
||||||
{SCMP_SYS(clock_adjtime), EPERM},
|
|
||||||
{SCMP_SYS(clock_adjtime64), EPERM},
|
|
||||||
{SCMP_SYS(clock_settime), EPERM},
|
|
||||||
{SCMP_SYS(clock_settime64), EPERM},
|
|
||||||
{SCMP_SYS(settimeofday), EPERM},
|
|
||||||
|
|
||||||
/* loading and unloading of kernel modules */
|
|
||||||
{SCMP_SYS(delete_module), EPERM},
|
|
||||||
{SCMP_SYS(finit_module), EPERM},
|
|
||||||
{SCMP_SYS(init_module), EPERM},
|
|
||||||
|
|
||||||
/* system calls for rebooting and reboot preparation */
|
|
||||||
{SCMP_SYS(kexec_file_load), EPERM},
|
|
||||||
{SCMP_SYS(kexec_load), EPERM},
|
|
||||||
{SCMP_SYS(reboot), EPERM},
|
|
||||||
|
|
||||||
/* system calls for enabling/disabling swap devices */
|
|
||||||
{SCMP_SYS(swapoff), EPERM},
|
|
||||||
{SCMP_SYS(swapon), EPERM},
|
|
||||||
};
|
|
||||||
|
|
||||||
struct hakurei_syscall_act deny_ns[] = {
|
|
||||||
/* Don't allow subnamespace setups: */
|
|
||||||
{SCMP_SYS(unshare), EPERM},
|
|
||||||
{SCMP_SYS(setns), EPERM},
|
|
||||||
{SCMP_SYS(mount), EPERM},
|
|
||||||
{SCMP_SYS(umount), EPERM},
|
|
||||||
{SCMP_SYS(umount2), EPERM},
|
|
||||||
{SCMP_SYS(pivot_root), EPERM},
|
|
||||||
{SCMP_SYS(chroot), EPERM},
|
|
||||||
#if defined(__s390__) || defined(__s390x__) || defined(__CRIS__)
|
|
||||||
/* Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
|
|
||||||
* and flags arguments are reversed so the flags come second */
|
|
||||||
{SCMP_SYS(clone), EPERM,
|
|
||||||
&SCMP_A1(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
|
|
||||||
#else
|
|
||||||
/* Normally the flags come first */
|
|
||||||
{SCMP_SYS(clone), EPERM,
|
|
||||||
&SCMP_A0(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)},
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* seccomp can't look into clone3()'s struct clone_args to check whether
|
|
||||||
* the flags are OK, so we have no choice but to block clone3().
|
|
||||||
* Return ENOSYS so user-space will fall back to clone().
|
|
||||||
* (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
|
|
||||||
*/
|
|
||||||
{SCMP_SYS(clone3), ENOSYS},
|
|
||||||
|
|
||||||
/* New mount manipulation APIs can also change our VFS. There's no
|
|
||||||
* legitimate reason to do these in the sandbox, so block all of them
|
|
||||||
* rather than thinking about which ones might be dangerous.
|
|
||||||
* (CVE-2021-41133) */
|
|
||||||
{SCMP_SYS(open_tree), ENOSYS},
|
|
||||||
{SCMP_SYS(move_mount), ENOSYS},
|
|
||||||
{SCMP_SYS(fsopen), ENOSYS},
|
|
||||||
{SCMP_SYS(fsconfig), ENOSYS},
|
|
||||||
{SCMP_SYS(fsmount), ENOSYS},
|
|
||||||
{SCMP_SYS(fspick), ENOSYS},
|
|
||||||
{SCMP_SYS(mount_setattr), ENOSYS},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* hakurei: project-specific extensions */
|
|
||||||
struct hakurei_syscall_act deny_ns_ext[] = {
|
|
||||||
/* changing file ownership */
|
|
||||||
{SCMP_SYS(chown), EPERM},
|
|
||||||
{SCMP_SYS(chown32), EPERM},
|
|
||||||
{SCMP_SYS(fchown), EPERM},
|
|
||||||
{SCMP_SYS(fchown32), EPERM},
|
|
||||||
{SCMP_SYS(fchownat), EPERM},
|
|
||||||
{SCMP_SYS(lchown), EPERM},
|
|
||||||
{SCMP_SYS(lchown32), EPERM},
|
|
||||||
|
|
||||||
/* system calls for changing user ID and group ID credentials */
|
|
||||||
{SCMP_SYS(setgid), EPERM},
|
|
||||||
{SCMP_SYS(setgid32), EPERM},
|
|
||||||
{SCMP_SYS(setgroups), EPERM},
|
|
||||||
{SCMP_SYS(setgroups32), EPERM},
|
|
||||||
{SCMP_SYS(setregid), EPERM},
|
|
||||||
{SCMP_SYS(setregid32), EPERM},
|
|
||||||
{SCMP_SYS(setresgid), EPERM},
|
|
||||||
{SCMP_SYS(setresgid32), EPERM},
|
|
||||||
{SCMP_SYS(setresuid), EPERM},
|
|
||||||
{SCMP_SYS(setresuid32), EPERM},
|
|
||||||
{SCMP_SYS(setreuid), EPERM},
|
|
||||||
{SCMP_SYS(setreuid32), EPERM},
|
|
||||||
{SCMP_SYS(setuid), EPERM},
|
|
||||||
{SCMP_SYS(setuid32), EPERM},
|
|
||||||
};
|
|
||||||
|
|
||||||
struct hakurei_syscall_act deny_tty[] = {
|
|
||||||
/* Don't allow faking input to the controlling tty (CVE-2017-5226) */
|
|
||||||
{SCMP_SYS(ioctl), EPERM,
|
|
||||||
&SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCSTI)},
|
|
||||||
/* In the unlikely event that the controlling tty is a Linux virtual
|
|
||||||
* console (/dev/tty2 or similar), copy/paste operations have an effect
|
|
||||||
* similar to TIOCSTI (CVE-2023-28100) */
|
|
||||||
{SCMP_SYS(ioctl), EPERM,
|
|
||||||
&SCMP_A1(SCMP_CMP_MASKED_EQ, 0xFFFFFFFFu, (int)TIOCLINUX)},
|
|
||||||
};
|
|
||||||
|
|
||||||
struct hakurei_syscall_act deny_devel[] = {
|
|
||||||
/* Profiling operations; we expect these to be done by tools from outside
|
|
||||||
* the sandbox. In particular perf has been the source of many CVEs. */
|
|
||||||
{SCMP_SYS(perf_event_open), EPERM},
|
|
||||||
/* Don't allow you to switch to bsd emulation or whatnot */
|
|
||||||
{SCMP_SYS(personality), EPERM,
|
|
||||||
&SCMP_A0(SCMP_CMP_NE, allowed_personality)},
|
|
||||||
|
|
||||||
{SCMP_SYS(ptrace), EPERM}};
|
|
||||||
|
|
||||||
struct hakurei_syscall_act deny_emu[] = {
|
|
||||||
/* modify_ldt is a historic source of interesting information leaks,
|
|
||||||
* so it's disabled as a hardening measure.
|
|
||||||
* However, it is required to run old 16-bit applications
|
|
||||||
* as well as some Wine patches, so it's allowed in multiarch. */
|
|
||||||
{SCMP_SYS(modify_ldt), EPERM},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* hakurei: project-specific extensions */
|
|
||||||
struct hakurei_syscall_act deny_emu_ext[] = {
|
|
||||||
{SCMP_SYS(subpage_prot), ENOSYS},
|
|
||||||
{SCMP_SYS(switch_endian), ENOSYS},
|
|
||||||
{SCMP_SYS(vm86), ENOSYS},
|
|
||||||
{SCMP_SYS(vm86old), ENOSYS},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Blocklist all but unix, inet, inet6 and netlink */
|
|
||||||
struct {
|
|
||||||
int family;
|
|
||||||
hakurei_filter_opts flags_mask;
|
|
||||||
} socket_family_allowlist[] = {
|
|
||||||
/* NOTE: Keep in numerical order */
|
|
||||||
{AF_UNSPEC, 0},
|
|
||||||
{AF_LOCAL, 0},
|
|
||||||
{AF_INET, 0},
|
|
||||||
{AF_INET6, 0},
|
|
||||||
{AF_NETLINK, 0},
|
|
||||||
{AF_CAN, HAKUREI_CAN},
|
|
||||||
{AF_BLUETOOTH, HAKUREI_BLUETOOTH},
|
|
||||||
};
|
|
||||||
|
|
||||||
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
|
|
||||||
if (ctx == NULL) {
|
|
||||||
res = 1;
|
|
||||||
goto out;
|
|
||||||
} else
|
|
||||||
errno = 0;
|
|
||||||
|
|
||||||
/* We only really need to handle arches on multiarch systems.
|
|
||||||
* If only one arch is supported the default is fine */
|
|
||||||
if (arch != 0) {
|
|
||||||
/* This *adds* the target arch, instead of replacing the
|
|
||||||
* native one. This is not ideal, because we'd like to only
|
|
||||||
* allow the target arch, but we can't really disallow the
|
|
||||||
* native arch at this point, because then bubblewrap
|
|
||||||
* couldn't continue running. */
|
|
||||||
*ret_p = seccomp_arch_add(ctx, arch);
|
|
||||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
|
||||||
res = 2;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allow_multiarch && multiarch != 0) {
|
|
||||||
*ret_p = seccomp_arch_add(ctx, multiarch);
|
|
||||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
|
||||||
res = 3;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SECCOMP_RULESET_ADD(deny_common);
|
|
||||||
if (opts & HAKUREI_DENY_NS)
|
|
||||||
SECCOMP_RULESET_ADD(deny_ns);
|
|
||||||
if (opts & HAKUREI_DENY_TTY)
|
|
||||||
SECCOMP_RULESET_ADD(deny_tty);
|
|
||||||
if (opts & HAKUREI_DENY_DEVEL)
|
|
||||||
SECCOMP_RULESET_ADD(deny_devel);
|
|
||||||
if (!allow_multiarch)
|
|
||||||
SECCOMP_RULESET_ADD(deny_emu);
|
|
||||||
if (opts & HAKUREI_EXT) {
|
|
||||||
SECCOMP_RULESET_ADD(deny_common_ext);
|
|
||||||
if (opts & HAKUREI_DENY_NS)
|
|
||||||
SECCOMP_RULESET_ADD(deny_ns_ext);
|
|
||||||
if (!allow_multiarch)
|
|
||||||
SECCOMP_RULESET_ADD(deny_emu_ext);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Socket filtering doesn't work on e.g. i386, so ignore failures here
|
|
||||||
* However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
|
|
||||||
* something else: https://github.com/seccomp/libseccomp/issues/8 */
|
|
||||||
int last_allowed_family = -1;
|
|
||||||
for (int i = 0; i < LEN(socket_family_allowlist); i++) {
|
|
||||||
if (socket_family_allowlist[i].flags_mask != 0 &&
|
|
||||||
(socket_family_allowlist[i].flags_mask & opts) !=
|
|
||||||
socket_family_allowlist[i].flags_mask)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (int disallowed = last_allowed_family + 1;
|
|
||||||
disallowed < socket_family_allowlist[i].family; disallowed++) {
|
|
||||||
/* Blocklist the in-between valid families */
|
|
||||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT),
|
|
||||||
SCMP_SYS(socket), 1,
|
|
||||||
SCMP_A0(SCMP_CMP_EQ, disallowed));
|
|
||||||
}
|
|
||||||
last_allowed_family = socket_family_allowlist[i].family;
|
|
||||||
}
|
|
||||||
/* Blocklist the rest */
|
|
||||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1,
|
|
||||||
SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
|
|
||||||
|
|
||||||
if (fd < 0) {
|
|
||||||
*ret_p = seccomp_load(ctx);
|
|
||||||
if (*ret_p != 0) {
|
|
||||||
res = 7;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*ret_p = seccomp_export_bpf(ctx, fd);
|
|
||||||
if (*ret_p != 0) {
|
|
||||||
res = 6;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
if (ctx)
|
|
||||||
seccomp_release(ctx);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
#include <seccomp.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#if (SCMP_VER_MAJOR < 2) || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
|
||||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
|
||||||
#error This package requires libseccomp >= v2.5.1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
HAKUREI_VERBOSE = 1 << 0,
|
|
||||||
HAKUREI_EXT = 1 << 1,
|
|
||||||
HAKUREI_DENY_NS = 1 << 2,
|
|
||||||
HAKUREI_DENY_TTY = 1 << 3,
|
|
||||||
HAKUREI_DENY_DEVEL = 1 << 4,
|
|
||||||
HAKUREI_MULTIARCH = 1 << 5,
|
|
||||||
HAKUREI_LINUX32 = 1 << 6,
|
|
||||||
HAKUREI_CAN = 1 << 7,
|
|
||||||
HAKUREI_BLUETOOTH = 1 << 8,
|
|
||||||
} hakurei_filter_opts;
|
|
||||||
|
|
||||||
extern void hakurei_println(char *v);
|
|
||||||
int32_t hakurei_build_filter(int *ret_p, int fd, uint32_t arch,
|
|
||||||
uint32_t multiarch, hakurei_filter_opts opts);
|
|
@ -1,125 +1,60 @@
|
|||||||
// Package seccomp provides filter presets and high level wrappers around libseccomp.
|
// Package seccomp provides high level wrappers around libseccomp.
|
||||||
package seccomp
|
package seccomp
|
||||||
|
|
||||||
/*
|
|
||||||
#cgo linux pkg-config: --static libseccomp
|
|
||||||
|
|
||||||
#include "seccomp-build.h"
|
|
||||||
*/
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"os"
|
||||||
"fmt"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"syscall"
|
"sync"
|
||||||
"unsafe"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// LibraryError represents a libseccomp error.
|
type exporter struct {
|
||||||
type LibraryError struct {
|
presets FilterPreset
|
||||||
Prefix string
|
flags PrepareFlag
|
||||||
Seccomp syscall.Errno
|
r, w *os.File
|
||||||
Errno error
|
|
||||||
|
prepareOnce sync.Once
|
||||||
|
prepareErr error
|
||||||
|
closeOnce sync.Once
|
||||||
|
closeErr error
|
||||||
|
exportErr <-chan error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *LibraryError) Error() string {
|
func (e *exporter) prepare() error {
|
||||||
if e.Seccomp == 0 {
|
e.prepareOnce.Do(func() {
|
||||||
if e.Errno == nil {
|
if r, w, err := os.Pipe(); err != nil {
|
||||||
panic("invalid libseccomp error")
|
e.prepareErr = err
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Errno)
|
|
||||||
}
|
|
||||||
if e.Errno == nil {
|
|
||||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Seccomp)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s: %s (%s)", e.Prefix, e.Seccomp, e.Errno)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *LibraryError) Is(err error) bool {
|
|
||||||
if e == nil {
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
if ef, ok := err.(*LibraryError); ok {
|
|
||||||
return *e == *ef
|
|
||||||
}
|
|
||||||
return (e.Seccomp != 0 && errors.Is(err, e.Seccomp)) ||
|
|
||||||
(e.Errno != nil && errors.Is(err, e.Errno))
|
|
||||||
}
|
|
||||||
|
|
||||||
var resPrefix = [...]string{
|
|
||||||
0: "",
|
|
||||||
1: "seccomp_init failed",
|
|
||||||
2: "seccomp_arch_add failed",
|
|
||||||
3: "seccomp_arch_add failed (multiarch)",
|
|
||||||
4: "internal libseccomp failure",
|
|
||||||
5: "seccomp_rule_add failed",
|
|
||||||
6: "seccomp_export_bpf failed",
|
|
||||||
7: "seccomp_load failed",
|
|
||||||
}
|
|
||||||
|
|
||||||
type FilterOpts = C.hakurei_filter_opts
|
|
||||||
|
|
||||||
const (
|
|
||||||
filterVerbose FilterOpts = C.HAKUREI_VERBOSE
|
|
||||||
// FilterExt are project-specific extensions.
|
|
||||||
FilterExt FilterOpts = C.HAKUREI_EXT
|
|
||||||
// FilterDenyNS denies namespace setup syscalls.
|
|
||||||
FilterDenyNS FilterOpts = C.HAKUREI_DENY_NS
|
|
||||||
// FilterDenyTTY denies faking input.
|
|
||||||
FilterDenyTTY FilterOpts = C.HAKUREI_DENY_TTY
|
|
||||||
// FilterDenyDevel denies development-related syscalls.
|
|
||||||
FilterDenyDevel FilterOpts = C.HAKUREI_DENY_DEVEL
|
|
||||||
// FilterMultiarch allows multiarch/emulation.
|
|
||||||
FilterMultiarch FilterOpts = C.HAKUREI_MULTIARCH
|
|
||||||
// FilterLinux32 sets PER_LINUX32.
|
|
||||||
FilterLinux32 FilterOpts = C.HAKUREI_LINUX32
|
|
||||||
// FilterCan allows AF_CAN.
|
|
||||||
FilterCan FilterOpts = C.HAKUREI_CAN
|
|
||||||
// FilterBluetooth allows AF_BLUETOOTH.
|
|
||||||
FilterBluetooth FilterOpts = C.HAKUREI_BLUETOOTH
|
|
||||||
)
|
|
||||||
|
|
||||||
func buildFilter(fd int, opts FilterOpts) error {
|
|
||||||
var (
|
|
||||||
arch C.uint32_t = 0
|
|
||||||
multiarch C.uint32_t = 0
|
|
||||||
)
|
|
||||||
switch runtime.GOARCH {
|
|
||||||
case "386":
|
|
||||||
arch = C.SCMP_ARCH_X86
|
|
||||||
case "amd64":
|
|
||||||
arch = C.SCMP_ARCH_X86_64
|
|
||||||
multiarch = C.SCMP_ARCH_X86
|
|
||||||
case "arm":
|
|
||||||
arch = C.SCMP_ARCH_ARM
|
|
||||||
case "arm64":
|
|
||||||
arch = C.SCMP_ARCH_AARCH64
|
|
||||||
multiarch = C.SCMP_ARCH_ARM
|
|
||||||
}
|
|
||||||
|
|
||||||
// this removes repeated transitions between C and Go execution
|
|
||||||
// when producing log output via hakurei_println and CPrintln is nil
|
|
||||||
if fp := printlnP.Load(); fp != nil {
|
|
||||||
opts |= filterVerbose
|
|
||||||
}
|
|
||||||
|
|
||||||
var ret C.int
|
|
||||||
res, err := C.hakurei_build_filter(&ret, C.int(fd), arch, multiarch, opts)
|
|
||||||
if prefix := resPrefix[res]; prefix != "" {
|
|
||||||
return &LibraryError{
|
|
||||||
prefix,
|
|
||||||
-syscall.Errno(ret),
|
|
||||||
err,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// only used for testing
|
|
||||||
func syscallResolveName(s string) (trap int) {
|
|
||||||
v := C.CString(s)
|
|
||||||
trap = int(C.seccomp_syscall_resolve_name(v))
|
|
||||||
C.free(unsafe.Pointer(v))
|
|
||||||
return
|
return
|
||||||
|
} else {
|
||||||
|
e.r, e.w = r, w
|
||||||
|
}
|
||||||
|
|
||||||
|
ec := make(chan error, 1)
|
||||||
|
go func(fd uintptr) {
|
||||||
|
ec <- preparePreset(int(fd), e.presets, e.flags)
|
||||||
|
close(ec)
|
||||||
|
_ = e.closeWrite()
|
||||||
|
runtime.KeepAlive(e.w)
|
||||||
|
}(e.w.Fd())
|
||||||
|
e.exportErr = ec
|
||||||
|
runtime.SetFinalizer(e, (*exporter).closeWrite)
|
||||||
|
})
|
||||||
|
return e.prepareErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *exporter) closeWrite() error {
|
||||||
|
e.closeOnce.Do(func() {
|
||||||
|
if e.w == nil {
|
||||||
|
panic("closeWrite called on invalid exporter")
|
||||||
|
}
|
||||||
|
e.closeErr = e.w.Close()
|
||||||
|
|
||||||
|
// no need for a finalizer anymore
|
||||||
|
runtime.SetFinalizer(e, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
return e.closeErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func newExporter(presets FilterPreset, flags PrepareFlag) *exporter {
|
||||||
|
return &exporter{presets: presets, flags: flags}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user