hakurei: move container helpers toplevel

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-07-02 21:31:29 +09:00
parent a1d98823f8
commit eec021cc4b
36 changed files with 21 additions and 21 deletions

View File

@@ -1,130 +0,0 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* CLONE_NEWUSER */
#endif
#include "libseccomp-helper.h"
#include <assert.h>
#include <errno.h>
#include <sys/socket.h>
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
int32_t hakurei_export_filter(int *ret_p, int fd, uint32_t arch,
uint32_t multiarch,
struct hakurei_syscall_rule *rules,
size_t rules_sz, hakurei_export_flag flags) {
int i;
int last_allowed_family;
int disallowed;
struct hakurei_syscall_rule *rule;
int32_t res = 0; /* refer to resPrefix for message */
/* Blocklist all but unix, inet, inet6 and netlink */
struct {
int family;
hakurei_export_flag flags_mask;
} socket_family_allowlist[] = {
/* NOTE: Keep in numerical order */
{AF_UNSPEC, 0},
{AF_LOCAL, 0},
{AF_INET, 0},
{AF_INET6, 0},
{AF_NETLINK, 0},
{AF_CAN, HAKUREI_EXPORT_CAN},
{AF_BLUETOOTH, HAKUREI_EXPORT_BLUETOOTH},
};
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
if (ctx == NULL) {
res = 1;
goto out;
} else
errno = 0;
/* We only really need to handle arches on multiarch systems.
* If only one arch is supported the default is fine */
if (arch != 0) {
/* This *adds* the target arch, instead of replacing the
* native one. This is not ideal, because we'd like to only
* allow the target arch, but we can't really disallow the
* native arch at this point, because then bubblewrap
* couldn't continue running. */
*ret_p = seccomp_arch_add(ctx, arch);
if (*ret_p < 0 && *ret_p != -EEXIST) {
res = 2;
goto out;
}
if (flags & HAKUREI_EXPORT_MULTIARCH && multiarch != 0) {
*ret_p = seccomp_arch_add(ctx, multiarch);
if (*ret_p < 0 && *ret_p != -EEXIST) {
res = 3;
goto out;
}
}
}
for (i = 0; i < rules_sz; i++) {
rule = &rules[i];
assert(rule->m_errno == EPERM || rule->m_errno == ENOSYS);
if (rule->arg)
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
rule->syscall, 1, *rule->arg);
else
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
rule->syscall, 0);
if (*ret_p == -EFAULT) {
res = 4;
goto out;
} else if (*ret_p < 0) {
res = 5;
goto out;
}
}
/* Socket filtering doesn't work on e.g. i386, so ignore failures here
* However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
* something else: https://github.com/seccomp/libseccomp/issues/8 */
last_allowed_family = -1;
for (i = 0; i < LEN(socket_family_allowlist); i++) {
if (socket_family_allowlist[i].flags_mask != 0 &&
(socket_family_allowlist[i].flags_mask & flags) !=
socket_family_allowlist[i].flags_mask)
continue;
for (disallowed = last_allowed_family + 1;
disallowed < socket_family_allowlist[i].family; disallowed++) {
/* Blocklist the in-between valid families */
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT),
SCMP_SYS(socket), 1,
SCMP_A0(SCMP_CMP_EQ, disallowed));
}
last_allowed_family = socket_family_allowlist[i].family;
}
/* Blocklist the rest */
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1,
SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
if (fd < 0) {
*ret_p = seccomp_load(ctx);
if (*ret_p != 0) {
res = 7;
goto out;
}
} else {
*ret_p = seccomp_export_bpf(ctx, fd);
if (*ret_p != 0) {
res = 6;
goto out;
}
}
out:
if (ctx)
seccomp_release(ctx);
return res;
}

View File

@@ -1,24 +0,0 @@
#include <seccomp.h>
#include <stdint.h>
#if (SCMP_VER_MAJOR < 2) || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
#error This package requires libseccomp >= v2.5.1
#endif
typedef enum {
HAKUREI_EXPORT_MULTIARCH = 1 << 0,
HAKUREI_EXPORT_CAN = 1 << 1,
HAKUREI_EXPORT_BLUETOOTH = 1 << 2,
} hakurei_export_flag;
struct hakurei_syscall_rule {
int syscall;
int m_errno;
struct scmp_arg_cmp *arg;
};
int32_t hakurei_export_filter(int *ret_p, int fd, uint32_t arch,
uint32_t multiarch,
struct hakurei_syscall_rule *rules,
size_t rules_sz, hakurei_export_flag flags);

View File

@@ -1,188 +0,0 @@
package seccomp
/*
#cgo linux pkg-config: --static libseccomp
#include <libseccomp-helper.h>
*/
import "C"
import (
"errors"
"fmt"
"runtime"
"syscall"
"unsafe"
)
var (
ErrInvalidRules = errors.New("invalid native rules slice")
)
// LibraryError represents a libseccomp error.
type LibraryError struct {
Prefix string
Seccomp syscall.Errno
Errno error
}
func (e *LibraryError) Error() string {
if e.Seccomp == 0 {
if e.Errno == nil {
panic("invalid libseccomp error")
}
return fmt.Sprintf("%s: %s", e.Prefix, e.Errno)
}
if e.Errno == nil {
return fmt.Sprintf("%s: %s", e.Prefix, e.Seccomp)
}
return fmt.Sprintf("%s: %s (%s)", e.Prefix, e.Seccomp, e.Errno)
}
func (e *LibraryError) Is(err error) bool {
if e == nil {
return err == nil
}
if ef, ok := err.(*LibraryError); ok {
return *e == *ef
}
return (e.Seccomp != 0 && errors.Is(err, e.Seccomp)) ||
(e.Errno != nil && errors.Is(err, e.Errno))
}
type (
ScmpSyscall = C.int
ScmpErrno = C.int
)
// A NativeRule specifies an arch-specific action taken by seccomp under certain conditions.
type NativeRule struct {
// Syscall is the arch-dependent syscall number to act against.
Syscall ScmpSyscall
// Errno is the errno value to return when the condition is satisfied.
Errno ScmpErrno
// Arg is the optional struct scmp_arg_cmp passed to libseccomp.
Arg *ScmpArgCmp
}
type ExportFlag = C.hakurei_export_flag
const (
// AllowMultiarch allows multiarch/emulation.
AllowMultiarch ExportFlag = C.HAKUREI_EXPORT_MULTIARCH
// AllowCAN allows AF_CAN.
AllowCAN ExportFlag = C.HAKUREI_EXPORT_CAN
// AllowBluetooth allows AF_BLUETOOTH.
AllowBluetooth ExportFlag = C.HAKUREI_EXPORT_BLUETOOTH
)
var resPrefix = [...]string{
0: "",
1: "seccomp_init failed",
2: "seccomp_arch_add failed",
3: "seccomp_arch_add failed (multiarch)",
4: "internal libseccomp failure",
5: "seccomp_rule_add failed",
6: "seccomp_export_bpf failed",
7: "seccomp_load failed",
}
// Export streams filter contents to fd, or installs it to the current process if fd < 0.
func Export(fd int, rules []NativeRule, flags ExportFlag) error {
if len(rules) == 0 {
return ErrInvalidRules
}
var (
arch C.uint32_t = 0
multiarch C.uint32_t = 0
)
switch runtime.GOARCH {
case "386":
arch = C.SCMP_ARCH_X86
case "amd64":
arch = C.SCMP_ARCH_X86_64
multiarch = C.SCMP_ARCH_X86
case "arm":
arch = C.SCMP_ARCH_ARM
case "arm64":
arch = C.SCMP_ARCH_AARCH64
multiarch = C.SCMP_ARCH_ARM
}
var ret C.int
rulesPinner := new(runtime.Pinner)
for i := range rules {
rule := &rules[i]
rulesPinner.Pin(rule)
if rule.Arg != nil {
rulesPinner.Pin(rule.Arg)
}
}
res, err := C.hakurei_export_filter(
&ret, C.int(fd),
arch, multiarch,
(*C.struct_hakurei_syscall_rule)(unsafe.Pointer(&rules[0])),
C.size_t(len(rules)),
flags,
)
rulesPinner.Unpin()
if prefix := resPrefix[res]; prefix != "" {
return &LibraryError{
prefix,
-syscall.Errno(ret),
err,
}
}
return err
}
// ScmpCompare is the equivalent of scmp_compare;
// Comparison operators
type ScmpCompare = C.enum_scmp_compare
const (
_SCMP_CMP_MIN = C._SCMP_CMP_MIN
// not equal
SCMP_CMP_NE = C.SCMP_CMP_NE
// less than
SCMP_CMP_LT = C.SCMP_CMP_LT
// less than or equal
SCMP_CMP_LE = C.SCMP_CMP_LE
// equal
SCMP_CMP_EQ = C.SCMP_CMP_EQ
// greater than or equal
SCMP_CMP_GE = C.SCMP_CMP_GE
// greater than
SCMP_CMP_GT = C.SCMP_CMP_GT
// masked equality
SCMP_CMP_MASKED_EQ = C.SCMP_CMP_MASKED_EQ
_SCMP_CMP_MAX = C._SCMP_CMP_MAX
)
// ScmpDatum is the equivalent of scmp_datum_t;
// Argument datum
type ScmpDatum uint64
// ScmpArgCmp is the equivalent of struct scmp_arg_cmp;
// Argument / Value comparison definition
type ScmpArgCmp struct {
// argument number, starting at 0
Arg C.uint
// the comparison op, e.g. SCMP_CMP_*
Op ScmpCompare
DatumA, DatumB ScmpDatum
}
// only used for testing
func syscallResolveName(s string) (trap int) {
v := C.CString(s)
trap = int(C.seccomp_syscall_resolve_name(v))
C.free(unsafe.Pointer(v))
return
}

View File

@@ -1,147 +0,0 @@
package seccomp_test
import (
"crypto/sha512"
"errors"
"io"
"slices"
"syscall"
"testing"
. "git.gensokyo.uk/security/hakurei/sandbox/seccomp"
)
func TestExport(t *testing.T) {
testCases := []struct {
name string
presets FilterPreset
flags ExportFlag
want []byte
wantErr bool
}{
{"compat", 0, 0, []byte{
0x95, 0xec, 0x69, 0xd0, 0x17, 0x73, 0x3e, 0x07,
0x21, 0x60, 0xe0, 0xda, 0x80, 0xfd, 0xeb, 0xec,
0xdf, 0x27, 0xae, 0x81, 0x66, 0xf5, 0xe2, 0xa7,
0x31, 0x27, 0x0c, 0x98, 0xea, 0x2d, 0x29, 0x46,
0xcb, 0x52, 0x31, 0x02, 0x90, 0x63, 0x66, 0x8a,
0xf2, 0x15, 0x87, 0x91, 0x55, 0xda, 0x21, 0xac,
0xa7, 0x9b, 0x07, 0x0e, 0x04, 0xc0, 0xee, 0x9a,
0xcd, 0xf5, 0x8f, 0x55, 0xcf, 0xa8, 0x15, 0xa5,
}, false},
{"base", PresetExt, 0, []byte{
0xdc, 0x7f, 0x2e, 0x1c, 0x5e, 0x82, 0x9b, 0x79,
0xeb, 0xb7, 0xef, 0xc7, 0x59, 0x15, 0x0f, 0x54,
0xa8, 0x3a, 0x75, 0xc8, 0xdf, 0x6f, 0xee, 0x4d,
0xce, 0x5d, 0xad, 0xc4, 0x73, 0x6c, 0x58, 0x5d,
0x4d, 0xee, 0xbf, 0xeb, 0x3c, 0x79, 0x69, 0xaf,
0x3a, 0x07, 0x7e, 0x90, 0xb7, 0x7b, 0xb4, 0x74,
0x1d, 0xb0, 0x5d, 0x90, 0x99, 0x7c, 0x86, 0x59,
0xb9, 0x58, 0x91, 0x20, 0x6a, 0xc9, 0x95, 0x2d,
}, false},
{"everything", PresetExt |
PresetDenyNS | PresetDenyTTY | PresetDenyDevel |
PresetLinux32, AllowMultiarch | AllowCAN |
AllowBluetooth, []byte{
0xe9, 0x9d, 0xd3, 0x45, 0xe1, 0x95, 0x41, 0x34,
0x73, 0xd3, 0xcb, 0xee, 0x07, 0xb4, 0xed, 0x57,
0xb9, 0x08, 0xbf, 0xa8, 0x9e, 0xa2, 0x07, 0x2f,
0xe9, 0x34, 0x82, 0x84, 0x7f, 0x50, 0xb5, 0xb7,
0x58, 0xda, 0x17, 0xe7, 0x4c, 0xa2, 0xbb, 0xc0,
0x08, 0x13, 0xde, 0x49, 0xa2, 0xb9, 0xbf, 0x83,
0x4c, 0x02, 0x4e, 0xd4, 0x88, 0x50, 0xbe, 0x69,
0xb6, 0x8a, 0x9a, 0x4c, 0x5f, 0x53, 0xa9, 0xdb,
}, false},
{"strict", PresetStrict, 0, []byte{
0xe8, 0x80, 0x29, 0x8d, 0xf2, 0xbd, 0x67, 0x51,
0xd0, 0x04, 0x0f, 0xc2, 0x1b, 0xc0, 0xed, 0x4c,
0x00, 0xf9, 0x5d, 0xc0, 0xd7, 0xba, 0x50, 0x6c,
0x24, 0x4d, 0x8b, 0x8c, 0xf6, 0x86, 0x6d, 0xba,
0x8e, 0xf4, 0xa3, 0x32, 0x96, 0xf2, 0x87, 0xb6,
0x6c, 0xcc, 0xc1, 0xd7, 0x8e, 0x97, 0x02, 0x65,
0x97, 0xf8, 0x4c, 0xc7, 0xde, 0xc1, 0x57, 0x3e,
0x14, 0x89, 0x60, 0xfb, 0xd3, 0x5c, 0xd7, 0x35,
}, false},
{"strict compat", 0 |
PresetDenyNS | PresetDenyTTY | PresetDenyDevel, 0, []byte{
0x39, 0x87, 0x1b, 0x93, 0xff, 0xaf, 0xc8, 0xb9,
0x79, 0xfc, 0xed, 0xc0, 0xb0, 0xc3, 0x7b, 0x9e,
0x03, 0x92, 0x2f, 0x5b, 0x02, 0x74, 0x8d, 0xc5,
0xc3, 0xc1, 0x7c, 0x92, 0x52, 0x7f, 0x6e, 0x02,
0x2e, 0xde, 0x1f, 0x48, 0xbf, 0xf5, 0x92, 0x46,
0xea, 0x45, 0x2c, 0x0d, 0x1d, 0xe5, 0x48, 0x27,
0x80, 0x8b, 0x1a, 0x6f, 0x84, 0xf3, 0x2b, 0xbd,
0xe1, 0xaa, 0x02, 0xae, 0x30, 0xee, 0xdc, 0xfa,
}, false},
{"hakurei default", PresetExt | PresetDenyDevel, 0, []byte{
0xc6, 0x98, 0xb0, 0x81, 0xff, 0x95, 0x7a, 0xfe,
0x17, 0xa6, 0xd9, 0x43, 0x74, 0x53, 0x7d, 0x37,
0xf2, 0xa6, 0x3f, 0x6f, 0x9d, 0xd7, 0x5d, 0xa7,
0x54, 0x65, 0x42, 0x40, 0x7a, 0x9e, 0x32, 0x47,
0x6e, 0xbd, 0xa3, 0x31, 0x2b, 0xa7, 0x78, 0x5d,
0x7f, 0x61, 0x85, 0x42, 0xbc, 0xfa, 0xf2, 0x7c,
0xa2, 0x7d, 0xcc, 0x2d, 0xdd, 0xba, 0x85, 0x20,
0x69, 0xd2, 0x8b, 0xcf, 0xe8, 0xca, 0xd3, 0x9a,
}, false},
}
buf := make([]byte, 8)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
e := New(Preset(tc.presets, tc.flags), tc.flags)
digest := sha512.New()
if _, err := io.CopyBuffer(digest, e, buf); (err != nil) != tc.wantErr {
t.Errorf("Exporter: error = %v, wantErr %v", err, tc.wantErr)
return
}
if err := e.Close(); err != nil {
t.Errorf("Close: error = %v", err)
}
if got := digest.Sum(nil); !slices.Equal(got, tc.want) {
t.Fatalf("Export() hash = %x, want %x",
got, tc.want)
return
}
})
}
t.Run("close without use", func(t *testing.T) {
e := New(Preset(0, 0), 0)
if err := e.Close(); !errors.Is(err, syscall.EINVAL) {
t.Errorf("Close: error = %v", err)
return
}
})
t.Run("close partial read", func(t *testing.T) {
e := New(Preset(0, 0), 0)
if _, err := e.Read(nil); err != nil {
t.Errorf("Read: error = %v", err)
return
}
// the underlying implementation uses buffered io, so the outcome of this is nondeterministic;
// that is not harmful however, so both outcomes are checked for here
if err := e.Close(); err != nil &&
(!errors.Is(err, syscall.ECANCELED) || !errors.Is(err, syscall.EBADF)) {
t.Errorf("Close: error = %v", err)
return
}
})
}
func BenchmarkExport(b *testing.B) {
buf := make([]byte, 8)
for i := 0; i < b.N; i++ {
e := New(
Preset(PresetExt|PresetDenyNS|PresetDenyTTY|PresetDenyDevel|PresetLinux32,
AllowMultiarch|AllowCAN|AllowBluetooth),
AllowMultiarch|AllowCAN|AllowBluetooth)
if _, err := io.CopyBuffer(io.Discard, e, buf); err != nil {
b.Fatalf("cannot export: %v", err)
}
if err := e.Close(); err != nil {
b.Fatalf("cannot close exporter: %v", err)
}
}
}

View File

@@ -1,83 +0,0 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
use strict;
my $command = "mksysnum_linux.pl ". join(' ', @ARGV);
print <<EOF;
// $command
// Code generated by the command above; DO NOT EDIT.
package seccomp
import . "syscall"
var syscallNum = map[string]int{
EOF
my $offset = 0;
my $state = -1;
sub fmt {
my ($name, $num) = @_;
if($num > 999){
# ignore deprecated syscalls that are no longer implemented
# https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/asm-generic/unistd.h?id=refs/heads/master#n716
return;
}
(my $name_upper = $name) =~ y/a-z/A-Z/;
$num = $num + $offset;
if($num > 302){ # not wired in Go standard library
if($state < 0){
print " \"$name\": SYS_$name_upper,\n";
}
else{
print " SYS_$name_upper = $num;\n";
}
}
elsif($state < 0){
print " \"$name\": SYS_$name_upper,\n";
}
else{
return;
}
}
GENERATE:
my $prev;
open(GCC, "gcc -E -dD $ARGV[0] |") || die "can't run gcc";
while(<GCC>){
if(/^#define __NR_Linux\s+([0-9]+)/){
# mips/mips64: extract offset
$offset = $1;
}
elsif(/^#define __NR_syscalls\s+/) {
# ignore redefinitions of __NR_syscalls
}
elsif(/^#define __NR_(\w+)\s+([0-9]+)/){
$prev = $2;
fmt($1, $2);
}
elsif(/^#define __NR3264_(\w+)\s+([0-9]+)/){
$prev = $2;
fmt($1, $2);
}
elsif(/^#define __NR_(\w+)\s+\(\w+\+\s*([0-9]+)\)/){
fmt($1, $prev+$2)
}
elsif(/^#define __NR_(\w+)\s+\(__NR_Linux \+ ([0-9]+)/){
fmt($1, $2);
}
}
if($state < 0){
$state = $state + 1;
print "}\n\nconst (\n";
goto GENERATE;
}
print ")";

View File

@@ -1,229 +0,0 @@
package seccomp
/* flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532 */
import (
. "syscall"
)
type FilterPreset int
const (
// PresetExt are project-specific extensions.
PresetExt FilterPreset = 1 << iota
// PresetDenyNS denies namespace setup syscalls.
PresetDenyNS
// PresetDenyTTY denies faking input.
PresetDenyTTY
// PresetDenyDevel denies development-related syscalls.
PresetDenyDevel
// PresetLinux32 sets PER_LINUX32.
PresetLinux32
)
func Preset(presets FilterPreset, flags ExportFlag) (rules []NativeRule) {
allowedPersonality := PER_LINUX
if presets&PresetLinux32 != 0 {
allowedPersonality = PER_LINUX32
}
presetDevelFinal := presetDevel(ScmpDatum(allowedPersonality))
l := len(presetCommon)
if presets&PresetDenyNS != 0 {
l += len(presetNamespace)
}
if presets&PresetDenyTTY != 0 {
l += len(presetTTY)
}
if presets&PresetDenyDevel != 0 {
l += len(presetDevelFinal)
}
if flags&AllowMultiarch == 0 {
l += len(presetEmu)
}
if presets&PresetExt != 0 {
l += len(presetCommonExt)
if presets&PresetDenyNS != 0 {
l += len(presetNamespaceExt)
}
if flags&AllowMultiarch == 0 {
l += len(presetEmuExt)
}
}
rules = make([]NativeRule, 0, l)
rules = append(rules, presetCommon...)
if presets&PresetDenyNS != 0 {
rules = append(rules, presetNamespace...)
}
if presets&PresetDenyTTY != 0 {
rules = append(rules, presetTTY...)
}
if presets&PresetDenyDevel != 0 {
rules = append(rules, presetDevelFinal...)
}
if flags&AllowMultiarch == 0 {
rules = append(rules, presetEmu...)
}
if presets&PresetExt != 0 {
rules = append(rules, presetCommonExt...)
if presets&PresetDenyNS != 0 {
rules = append(rules, presetNamespaceExt...)
}
if flags&AllowMultiarch == 0 {
rules = append(rules, presetEmuExt...)
}
}
return
}
var (
presetCommon = []NativeRule{
/* Block dmesg */
{ScmpSyscall(SYS_SYSLOG), ScmpErrno(EPERM), nil},
/* Useless old syscall */
{ScmpSyscall(SYS_USELIB), ScmpErrno(EPERM), nil},
/* Don't allow disabling accounting */
{ScmpSyscall(SYS_ACCT), ScmpErrno(EPERM), nil},
/* Don't allow reading current quota use */
{ScmpSyscall(SYS_QUOTACTL), ScmpErrno(EPERM), nil},
/* Don't allow access to the kernel keyring */
{ScmpSyscall(SYS_ADD_KEY), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_KEYCTL), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_REQUEST_KEY), ScmpErrno(EPERM), nil},
/* Scary VM/NUMA ops */
{ScmpSyscall(SYS_MOVE_PAGES), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_MBIND), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_GET_MEMPOLICY), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SET_MEMPOLICY), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_MIGRATE_PAGES), ScmpErrno(EPERM), nil},
}
/* hakurei: project-specific extensions */
presetCommonExt = []NativeRule{
/* system calls for changing the system clock */
{ScmpSyscall(SYS_ADJTIMEX), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CLOCK_ADJTIME), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CLOCK_ADJTIME64), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CLOCK_SETTIME), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CLOCK_SETTIME64), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETTIMEOFDAY), ScmpErrno(EPERM), nil},
/* loading and unloading of kernel modules */
{ScmpSyscall(SYS_DELETE_MODULE), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_FINIT_MODULE), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_INIT_MODULE), ScmpErrno(EPERM), nil},
/* system calls for rebooting and reboot preparation */
{ScmpSyscall(SYS_KEXEC_FILE_LOAD), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_KEXEC_LOAD), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_REBOOT), ScmpErrno(EPERM), nil},
/* system calls for enabling/disabling swap devices */
{ScmpSyscall(SYS_SWAPOFF), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SWAPON), ScmpErrno(EPERM), nil},
}
presetNamespace = []NativeRule{
/* Don't allow subnamespace setups: */
{ScmpSyscall(SYS_UNSHARE), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETNS), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_MOUNT), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_UMOUNT), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_UMOUNT2), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_PIVOT_ROOT), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CHROOT), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CLONE), ScmpErrno(EPERM),
&ScmpArgCmp{cloneArg, SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER}},
/* seccomp can't look into clone3()'s struct clone_args to check whether
* the flags are OK, so we have no choice but to block clone3().
* Return ENOSYS so user-space will fall back to clone().
* (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
*/
{ScmpSyscall(SYS_CLONE3), ScmpErrno(ENOSYS), nil},
/* New mount manipulation APIs can also change our VFS. There's no
* legitimate reason to do these in the sandbox, so block all of them
* rather than thinking about which ones might be dangerous.
* (CVE-2021-41133) */
{ScmpSyscall(SYS_OPEN_TREE), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_MOVE_MOUNT), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_FSOPEN), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_FSCONFIG), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_FSMOUNT), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_FSPICK), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_MOUNT_SETATTR), ScmpErrno(ENOSYS), nil},
}
/* hakurei: project-specific extensions */
presetNamespaceExt = []NativeRule{
/* changing file ownership */
{ScmpSyscall(SYS_CHOWN), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_CHOWN32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_FCHOWN), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_FCHOWN32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_FCHOWNAT), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_LCHOWN), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_LCHOWN32), ScmpErrno(EPERM), nil},
/* system calls for changing user ID and group ID credentials */
{ScmpSyscall(SYS_SETGID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETGID32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETGROUPS), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETGROUPS32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETREGID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETREGID32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETRESGID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETRESGID32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETRESUID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETRESUID32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETREUID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETREUID32), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETUID), ScmpErrno(EPERM), nil},
{ScmpSyscall(SYS_SETUID32), ScmpErrno(EPERM), nil},
}
presetTTY = []NativeRule{
/* Don't allow faking input to the controlling tty (CVE-2017-5226) */
{ScmpSyscall(SYS_IOCTL), ScmpErrno(EPERM),
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCSTI}},
/* In the unlikely event that the controlling tty is a Linux virtual
* console (/dev/tty2 or similar), copy/paste operations have an effect
* similar to TIOCSTI (CVE-2023-28100) */
{ScmpSyscall(SYS_IOCTL), ScmpErrno(EPERM),
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCLINUX}},
}
presetEmu = []NativeRule{
/* modify_ldt is a historic source of interesting information leaks,
* so it's disabled as a hardening measure.
* However, it is required to run old 16-bit applications
* as well as some Wine patches, so it's allowed in multiarch. */
{ScmpSyscall(SYS_MODIFY_LDT), ScmpErrno(EPERM), nil},
}
/* hakurei: project-specific extensions */
presetEmuExt = []NativeRule{
{ScmpSyscall(SYS_SUBPAGE_PROT), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_SWITCH_ENDIAN), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_VM86), ScmpErrno(ENOSYS), nil},
{ScmpSyscall(SYS_VM86OLD), ScmpErrno(ENOSYS), nil},
}
)
func presetDevel(allowedPersonality ScmpDatum) []NativeRule {
return []NativeRule{
/* Profiling operations; we expect these to be done by tools from outside
* the sandbox. In particular perf has been the source of many CVEs. */
{ScmpSyscall(SYS_PERF_EVENT_OPEN), ScmpErrno(EPERM), nil},
/* Don't allow you to switch to bsd emulation or whatnot */
{ScmpSyscall(SYS_PERSONALITY), ScmpErrno(EPERM),
&ScmpArgCmp{0, SCMP_CMP_NE, allowedPersonality, 0}},
{ScmpSyscall(SYS_PTRACE), ScmpErrno(EPERM), nil},
}
}

View File

@@ -1,7 +0,0 @@
//go:build s390 || s390x
package seccomp
/* Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
* and flags arguments are reversed so the flags come second */
const cloneArg = 1

View File

@@ -1,6 +0,0 @@
//go:build !s390 && !s390x
package seccomp
/* Normally the flags come first */
const cloneArg = 0

View File

@@ -1,78 +0,0 @@
package seccomp
import (
"context"
"errors"
"syscall"
"git.gensokyo.uk/security/hakurei/helper/proc"
)
const (
PresetStrict = PresetExt | PresetDenyNS | PresetDenyTTY | PresetDenyDevel
)
// New returns an inactive Encoder instance.
func New(rules []NativeRule, flags ExportFlag) *Encoder { return &Encoder{newExporter(rules, flags)} }
// Load loads a filter into the kernel.
func Load(rules []NativeRule, flags ExportFlag) error { return Export(-1, rules, flags) }
/*
An Encoder writes a BPF program to an output stream.
Methods of Encoder are not safe for concurrent use.
An Encoder must not be copied after first use.
*/
type Encoder struct {
*exporter
}
func (e *Encoder) Read(p []byte) (n int, err error) {
if err = e.prepare(); err != nil {
return
}
return e.r.Read(p)
}
func (e *Encoder) Close() error {
if e.r == nil {
return syscall.EINVAL
}
// this hangs if the cgo thread fails to exit
return errors.Join(e.closeWrite(), <-e.exportErr)
}
// NewFile returns an instance of exporter implementing [proc.File].
func NewFile(rules []NativeRule, flags ExportFlag) proc.File {
return &File{rules: rules, flags: flags}
}
// File implements [proc.File] and provides access to the read end of exporter pipe.
type File struct {
rules []NativeRule
flags ExportFlag
proc.BaseFile
}
func (f *File) ErrCount() int { return 2 }
func (f *File) Fulfill(ctx context.Context, dispatchErr func(error)) error {
e := newExporter(f.rules, f.flags)
if err := e.prepare(); err != nil {
return err
}
f.Set(e.r)
go func() {
select {
case err := <-e.exportErr:
dispatchErr(nil)
dispatchErr(err)
case <-ctx.Done():
dispatchErr(e.closeWrite())
dispatchErr(<-e.exportErr)
}
}()
return nil
}

View File

@@ -1,60 +0,0 @@
// Package seccomp provides high level wrappers around libseccomp.
package seccomp
import (
"os"
"runtime"
"sync"
)
type exporter struct {
rules []NativeRule
flags ExportFlag
r, w *os.File
prepareOnce sync.Once
prepareErr error
closeOnce sync.Once
closeErr error
exportErr <-chan error
}
func (e *exporter) prepare() error {
e.prepareOnce.Do(func() {
if r, w, err := os.Pipe(); err != nil {
e.prepareErr = err
return
} else {
e.r, e.w = r, w
}
ec := make(chan error, 1)
go func(fd uintptr) {
ec <- Export(int(fd), e.rules, e.flags)
close(ec)
_ = e.closeWrite()
runtime.KeepAlive(e.w)
}(e.w.Fd())
e.exportErr = ec
runtime.SetFinalizer(e, (*exporter).closeWrite)
})
return e.prepareErr
}
func (e *exporter) closeWrite() error {
e.closeOnce.Do(func() {
if e.w == nil {
panic("closeWrite called on invalid exporter")
}
e.closeErr = e.w.Close()
// no need for a finalizer anymore
runtime.SetFinalizer(e, nil)
})
return e.closeErr
}
func newExporter(rules []NativeRule, flags ExportFlag) *exporter {
return &exporter{rules: rules, flags: flags}
}

View File

@@ -1,65 +0,0 @@
package seccomp_test
import (
"errors"
"runtime"
"syscall"
"testing"
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
)
func TestLibraryError(t *testing.T) {
testCases := []struct {
name string
sample *seccomp.LibraryError
want string
wantIs bool
compare error
}{
{
"full",
&seccomp.LibraryError{Prefix: "seccomp_export_bpf failed", Seccomp: syscall.ECANCELED, Errno: syscall.EBADF},
"seccomp_export_bpf failed: operation canceled (bad file descriptor)",
true,
&seccomp.LibraryError{Prefix: "seccomp_export_bpf failed", Seccomp: syscall.ECANCELED, Errno: syscall.EBADF},
},
{
"errno only",
&seccomp.LibraryError{Prefix: "seccomp_init failed", Errno: syscall.ENOMEM},
"seccomp_init failed: cannot allocate memory",
false,
nil,
},
{
"seccomp only",
&seccomp.LibraryError{Prefix: "internal libseccomp failure", Seccomp: syscall.EFAULT},
"internal libseccomp failure: bad address",
true,
syscall.EFAULT,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if errors.Is(tc.sample, tc.compare) != tc.wantIs {
t.Errorf("errors.Is(%#v, %#v) did not return %v",
tc.sample, tc.compare, tc.wantIs)
}
if got := tc.sample.Error(); got != tc.want {
t.Errorf("Error: %q, want %q",
got, tc.want)
}
})
}
t.Run("invalid", func(t *testing.T) {
wantPanic := "invalid libseccomp error"
defer func() {
if r := recover(); r != wantPanic {
t.Errorf("panic: %q, want %q", r, wantPanic)
}
}()
runtime.KeepAlive(new(seccomp.LibraryError).Error())
})
}

View File

@@ -1,28 +0,0 @@
package seccomp
import "iter"
// Syscalls returns an iterator over all wired syscalls.
func Syscalls() iter.Seq2[string, int] {
return func(yield func(string, int) bool) {
for name, num := range syscallNum {
if !yield(name, num) {
return
}
}
for name, num := range syscallNumExtra {
if !yield(name, num) {
return
}
}
}
}
// SyscallResolveName resolves a syscall number from its string representation.
func SyscallResolveName(name string) (num int, ok bool) {
if num, ok = syscallNum[name]; ok {
return
}
num, ok = syscallNumExtra[name]
return
}

View File

@@ -1,54 +0,0 @@
package seccomp
/*
#cgo linux pkg-config: --static libseccomp
#include <seccomp.h>
#include <sys/personality.h>
*/
import "C"
const (
PER_LINUX = C.PER_LINUX
PER_LINUX32 = C.PER_LINUX32
)
var syscallNumExtra = map[string]int{
"umount": SYS_UMOUNT,
"subpage_prot": SYS_SUBPAGE_PROT,
"switch_endian": SYS_SWITCH_ENDIAN,
"vm86": SYS_VM86,
"vm86old": SYS_VM86OLD,
"clock_adjtime64": SYS_CLOCK_ADJTIME64,
"clock_settime64": SYS_CLOCK_SETTIME64,
"chown32": SYS_CHOWN32,
"fchown32": SYS_FCHOWN32,
"lchown32": SYS_LCHOWN32,
"setgid32": SYS_SETGID32,
"setgroups32": SYS_SETGROUPS32,
"setregid32": SYS_SETREGID32,
"setresgid32": SYS_SETRESGID32,
"setresuid32": SYS_SETRESUID32,
"setreuid32": SYS_SETREUID32,
"setuid32": SYS_SETUID32,
}
const (
SYS_UMOUNT = C.__SNR_umount
SYS_SUBPAGE_PROT = C.__SNR_subpage_prot
SYS_SWITCH_ENDIAN = C.__SNR_switch_endian
SYS_VM86 = C.__SNR_vm86
SYS_VM86OLD = C.__SNR_vm86old
SYS_CLOCK_ADJTIME64 = C.__SNR_clock_adjtime64
SYS_CLOCK_SETTIME64 = C.__SNR_clock_settime64
SYS_CHOWN32 = C.__SNR_chown32
SYS_FCHOWN32 = C.__SNR_fchown32
SYS_LCHOWN32 = C.__SNR_lchown32
SYS_SETGID32 = C.__SNR_setgid32
SYS_SETGROUPS32 = C.__SNR_setgroups32
SYS_SETREGID32 = C.__SNR_setregid32
SYS_SETRESGID32 = C.__SNR_setresgid32
SYS_SETRESUID32 = C.__SNR_setresuid32
SYS_SETREUID32 = C.__SNR_setreuid32
SYS_SETUID32 = C.__SNR_setuid32
)

View File

@@ -1,459 +0,0 @@
// mksysnum_linux.pl /usr/include/asm/unistd_64.h
// Code generated by the command above; DO NOT EDIT.
package seccomp
import . "syscall"
var syscallNum = map[string]int{
"read": SYS_READ,
"write": SYS_WRITE,
"open": SYS_OPEN,
"close": SYS_CLOSE,
"stat": SYS_STAT,
"fstat": SYS_FSTAT,
"lstat": SYS_LSTAT,
"poll": SYS_POLL,
"lseek": SYS_LSEEK,
"mmap": SYS_MMAP,
"mprotect": SYS_MPROTECT,
"munmap": SYS_MUNMAP,
"brk": SYS_BRK,
"rt_sigaction": SYS_RT_SIGACTION,
"rt_sigprocmask": SYS_RT_SIGPROCMASK,
"rt_sigreturn": SYS_RT_SIGRETURN,
"ioctl": SYS_IOCTL,
"pread64": SYS_PREAD64,
"pwrite64": SYS_PWRITE64,
"readv": SYS_READV,
"writev": SYS_WRITEV,
"access": SYS_ACCESS,
"pipe": SYS_PIPE,
"select": SYS_SELECT,
"sched_yield": SYS_SCHED_YIELD,
"mremap": SYS_MREMAP,
"msync": SYS_MSYNC,
"mincore": SYS_MINCORE,
"madvise": SYS_MADVISE,
"shmget": SYS_SHMGET,
"shmat": SYS_SHMAT,
"shmctl": SYS_SHMCTL,
"dup": SYS_DUP,
"dup2": SYS_DUP2,
"pause": SYS_PAUSE,
"nanosleep": SYS_NANOSLEEP,
"getitimer": SYS_GETITIMER,
"alarm": SYS_ALARM,
"setitimer": SYS_SETITIMER,
"getpid": SYS_GETPID,
"sendfile": SYS_SENDFILE,
"socket": SYS_SOCKET,
"connect": SYS_CONNECT,
"accept": SYS_ACCEPT,
"sendto": SYS_SENDTO,
"recvfrom": SYS_RECVFROM,
"sendmsg": SYS_SENDMSG,
"recvmsg": SYS_RECVMSG,
"shutdown": SYS_SHUTDOWN,
"bind": SYS_BIND,
"listen": SYS_LISTEN,
"getsockname": SYS_GETSOCKNAME,
"getpeername": SYS_GETPEERNAME,
"socketpair": SYS_SOCKETPAIR,
"setsockopt": SYS_SETSOCKOPT,
"getsockopt": SYS_GETSOCKOPT,
"clone": SYS_CLONE,
"fork": SYS_FORK,
"vfork": SYS_VFORK,
"execve": SYS_EXECVE,
"exit": SYS_EXIT,
"wait4": SYS_WAIT4,
"kill": SYS_KILL,
"uname": SYS_UNAME,
"semget": SYS_SEMGET,
"semop": SYS_SEMOP,
"semctl": SYS_SEMCTL,
"shmdt": SYS_SHMDT,
"msgget": SYS_MSGGET,
"msgsnd": SYS_MSGSND,
"msgrcv": SYS_MSGRCV,
"msgctl": SYS_MSGCTL,
"fcntl": SYS_FCNTL,
"flock": SYS_FLOCK,
"fsync": SYS_FSYNC,
"fdatasync": SYS_FDATASYNC,
"truncate": SYS_TRUNCATE,
"ftruncate": SYS_FTRUNCATE,
"getdents": SYS_GETDENTS,
"getcwd": SYS_GETCWD,
"chdir": SYS_CHDIR,
"fchdir": SYS_FCHDIR,
"rename": SYS_RENAME,
"mkdir": SYS_MKDIR,
"rmdir": SYS_RMDIR,
"creat": SYS_CREAT,
"link": SYS_LINK,
"unlink": SYS_UNLINK,
"symlink": SYS_SYMLINK,
"readlink": SYS_READLINK,
"chmod": SYS_CHMOD,
"fchmod": SYS_FCHMOD,
"chown": SYS_CHOWN,
"fchown": SYS_FCHOWN,
"lchown": SYS_LCHOWN,
"umask": SYS_UMASK,
"gettimeofday": SYS_GETTIMEOFDAY,
"getrlimit": SYS_GETRLIMIT,
"getrusage": SYS_GETRUSAGE,
"sysinfo": SYS_SYSINFO,
"times": SYS_TIMES,
"ptrace": SYS_PTRACE,
"getuid": SYS_GETUID,
"syslog": SYS_SYSLOG,
"getgid": SYS_GETGID,
"setuid": SYS_SETUID,
"setgid": SYS_SETGID,
"geteuid": SYS_GETEUID,
"getegid": SYS_GETEGID,
"setpgid": SYS_SETPGID,
"getppid": SYS_GETPPID,
"getpgrp": SYS_GETPGRP,
"setsid": SYS_SETSID,
"setreuid": SYS_SETREUID,
"setregid": SYS_SETREGID,
"getgroups": SYS_GETGROUPS,
"setgroups": SYS_SETGROUPS,
"setresuid": SYS_SETRESUID,
"getresuid": SYS_GETRESUID,
"setresgid": SYS_SETRESGID,
"getresgid": SYS_GETRESGID,
"getpgid": SYS_GETPGID,
"setfsuid": SYS_SETFSUID,
"setfsgid": SYS_SETFSGID,
"getsid": SYS_GETSID,
"capget": SYS_CAPGET,
"capset": SYS_CAPSET,
"rt_sigpending": SYS_RT_SIGPENDING,
"rt_sigtimedwait": SYS_RT_SIGTIMEDWAIT,
"rt_sigqueueinfo": SYS_RT_SIGQUEUEINFO,
"rt_sigsuspend": SYS_RT_SIGSUSPEND,
"sigaltstack": SYS_SIGALTSTACK,
"utime": SYS_UTIME,
"mknod": SYS_MKNOD,
"uselib": SYS_USELIB,
"personality": SYS_PERSONALITY,
"ustat": SYS_USTAT,
"statfs": SYS_STATFS,
"fstatfs": SYS_FSTATFS,
"sysfs": SYS_SYSFS,
"getpriority": SYS_GETPRIORITY,
"setpriority": SYS_SETPRIORITY,
"sched_setparam": SYS_SCHED_SETPARAM,
"sched_getparam": SYS_SCHED_GETPARAM,
"sched_setscheduler": SYS_SCHED_SETSCHEDULER,
"sched_getscheduler": SYS_SCHED_GETSCHEDULER,
"sched_get_priority_max": SYS_SCHED_GET_PRIORITY_MAX,
"sched_get_priority_min": SYS_SCHED_GET_PRIORITY_MIN,
"sched_rr_get_interval": SYS_SCHED_RR_GET_INTERVAL,
"mlock": SYS_MLOCK,
"munlock": SYS_MUNLOCK,
"mlockall": SYS_MLOCKALL,
"munlockall": SYS_MUNLOCKALL,
"vhangup": SYS_VHANGUP,
"modify_ldt": SYS_MODIFY_LDT,
"pivot_root": SYS_PIVOT_ROOT,
"_sysctl": SYS__SYSCTL,
"prctl": SYS_PRCTL,
"arch_prctl": SYS_ARCH_PRCTL,
"adjtimex": SYS_ADJTIMEX,
"setrlimit": SYS_SETRLIMIT,
"chroot": SYS_CHROOT,
"sync": SYS_SYNC,
"acct": SYS_ACCT,
"settimeofday": SYS_SETTIMEOFDAY,
"mount": SYS_MOUNT,
"umount2": SYS_UMOUNT2,
"swapon": SYS_SWAPON,
"swapoff": SYS_SWAPOFF,
"reboot": SYS_REBOOT,
"sethostname": SYS_SETHOSTNAME,
"setdomainname": SYS_SETDOMAINNAME,
"iopl": SYS_IOPL,
"ioperm": SYS_IOPERM,
"create_module": SYS_CREATE_MODULE,
"init_module": SYS_INIT_MODULE,
"delete_module": SYS_DELETE_MODULE,
"get_kernel_syms": SYS_GET_KERNEL_SYMS,
"query_module": SYS_QUERY_MODULE,
"quotactl": SYS_QUOTACTL,
"nfsservctl": SYS_NFSSERVCTL,
"getpmsg": SYS_GETPMSG,
"putpmsg": SYS_PUTPMSG,
"afs_syscall": SYS_AFS_SYSCALL,
"tuxcall": SYS_TUXCALL,
"security": SYS_SECURITY,
"gettid": SYS_GETTID,
"readahead": SYS_READAHEAD,
"setxattr": SYS_SETXATTR,
"lsetxattr": SYS_LSETXATTR,
"fsetxattr": SYS_FSETXATTR,
"getxattr": SYS_GETXATTR,
"lgetxattr": SYS_LGETXATTR,
"fgetxattr": SYS_FGETXATTR,
"listxattr": SYS_LISTXATTR,
"llistxattr": SYS_LLISTXATTR,
"flistxattr": SYS_FLISTXATTR,
"removexattr": SYS_REMOVEXATTR,
"lremovexattr": SYS_LREMOVEXATTR,
"fremovexattr": SYS_FREMOVEXATTR,
"tkill": SYS_TKILL,
"time": SYS_TIME,
"futex": SYS_FUTEX,
"sched_setaffinity": SYS_SCHED_SETAFFINITY,
"sched_getaffinity": SYS_SCHED_GETAFFINITY,
"set_thread_area": SYS_SET_THREAD_AREA,
"io_setup": SYS_IO_SETUP,
"io_destroy": SYS_IO_DESTROY,
"io_getevents": SYS_IO_GETEVENTS,
"io_submit": SYS_IO_SUBMIT,
"io_cancel": SYS_IO_CANCEL,
"get_thread_area": SYS_GET_THREAD_AREA,
"lookup_dcookie": SYS_LOOKUP_DCOOKIE,
"epoll_create": SYS_EPOLL_CREATE,
"epoll_ctl_old": SYS_EPOLL_CTL_OLD,
"epoll_wait_old": SYS_EPOLL_WAIT_OLD,
"remap_file_pages": SYS_REMAP_FILE_PAGES,
"getdents64": SYS_GETDENTS64,
"set_tid_address": SYS_SET_TID_ADDRESS,
"restart_syscall": SYS_RESTART_SYSCALL,
"semtimedop": SYS_SEMTIMEDOP,
"fadvise64": SYS_FADVISE64,
"timer_create": SYS_TIMER_CREATE,
"timer_settime": SYS_TIMER_SETTIME,
"timer_gettime": SYS_TIMER_GETTIME,
"timer_getoverrun": SYS_TIMER_GETOVERRUN,
"timer_delete": SYS_TIMER_DELETE,
"clock_settime": SYS_CLOCK_SETTIME,
"clock_gettime": SYS_CLOCK_GETTIME,
"clock_getres": SYS_CLOCK_GETRES,
"clock_nanosleep": SYS_CLOCK_NANOSLEEP,
"exit_group": SYS_EXIT_GROUP,
"epoll_wait": SYS_EPOLL_WAIT,
"epoll_ctl": SYS_EPOLL_CTL,
"tgkill": SYS_TGKILL,
"utimes": SYS_UTIMES,
"vserver": SYS_VSERVER,
"mbind": SYS_MBIND,
"set_mempolicy": SYS_SET_MEMPOLICY,
"get_mempolicy": SYS_GET_MEMPOLICY,
"mq_open": SYS_MQ_OPEN,
"mq_unlink": SYS_MQ_UNLINK,
"mq_timedsend": SYS_MQ_TIMEDSEND,
"mq_timedreceive": SYS_MQ_TIMEDRECEIVE,
"mq_notify": SYS_MQ_NOTIFY,
"mq_getsetattr": SYS_MQ_GETSETATTR,
"kexec_load": SYS_KEXEC_LOAD,
"waitid": SYS_WAITID,
"add_key": SYS_ADD_KEY,
"request_key": SYS_REQUEST_KEY,
"keyctl": SYS_KEYCTL,
"ioprio_set": SYS_IOPRIO_SET,
"ioprio_get": SYS_IOPRIO_GET,
"inotify_init": SYS_INOTIFY_INIT,
"inotify_add_watch": SYS_INOTIFY_ADD_WATCH,
"inotify_rm_watch": SYS_INOTIFY_RM_WATCH,
"migrate_pages": SYS_MIGRATE_PAGES,
"openat": SYS_OPENAT,
"mkdirat": SYS_MKDIRAT,
"mknodat": SYS_MKNODAT,
"fchownat": SYS_FCHOWNAT,
"futimesat": SYS_FUTIMESAT,
"newfstatat": SYS_NEWFSTATAT,
"unlinkat": SYS_UNLINKAT,
"renameat": SYS_RENAMEAT,
"linkat": SYS_LINKAT,
"symlinkat": SYS_SYMLINKAT,
"readlinkat": SYS_READLINKAT,
"fchmodat": SYS_FCHMODAT,
"faccessat": SYS_FACCESSAT,
"pselect6": SYS_PSELECT6,
"ppoll": SYS_PPOLL,
"unshare": SYS_UNSHARE,
"set_robust_list": SYS_SET_ROBUST_LIST,
"get_robust_list": SYS_GET_ROBUST_LIST,
"splice": SYS_SPLICE,
"tee": SYS_TEE,
"sync_file_range": SYS_SYNC_FILE_RANGE,
"vmsplice": SYS_VMSPLICE,
"move_pages": SYS_MOVE_PAGES,
"utimensat": SYS_UTIMENSAT,
"epoll_pwait": SYS_EPOLL_PWAIT,
"signalfd": SYS_SIGNALFD,
"timerfd_create": SYS_TIMERFD_CREATE,
"eventfd": SYS_EVENTFD,
"fallocate": SYS_FALLOCATE,
"timerfd_settime": SYS_TIMERFD_SETTIME,
"timerfd_gettime": SYS_TIMERFD_GETTIME,
"accept4": SYS_ACCEPT4,
"signalfd4": SYS_SIGNALFD4,
"eventfd2": SYS_EVENTFD2,
"epoll_create1": SYS_EPOLL_CREATE1,
"dup3": SYS_DUP3,
"pipe2": SYS_PIPE2,
"inotify_init1": SYS_INOTIFY_INIT1,
"preadv": SYS_PREADV,
"pwritev": SYS_PWRITEV,
"rt_tgsigqueueinfo": SYS_RT_TGSIGQUEUEINFO,
"perf_event_open": SYS_PERF_EVENT_OPEN,
"recvmmsg": SYS_RECVMMSG,
"fanotify_init": SYS_FANOTIFY_INIT,
"fanotify_mark": SYS_FANOTIFY_MARK,
"prlimit64": SYS_PRLIMIT64,
"name_to_handle_at": SYS_NAME_TO_HANDLE_AT,
"open_by_handle_at": SYS_OPEN_BY_HANDLE_AT,
"clock_adjtime": SYS_CLOCK_ADJTIME,
"syncfs": SYS_SYNCFS,
"sendmmsg": SYS_SENDMMSG,
"setns": SYS_SETNS,
"getcpu": SYS_GETCPU,
"process_vm_readv": SYS_PROCESS_VM_READV,
"process_vm_writev": SYS_PROCESS_VM_WRITEV,
"kcmp": SYS_KCMP,
"finit_module": SYS_FINIT_MODULE,
"sched_setattr": SYS_SCHED_SETATTR,
"sched_getattr": SYS_SCHED_GETATTR,
"renameat2": SYS_RENAMEAT2,
"seccomp": SYS_SECCOMP,
"getrandom": SYS_GETRANDOM,
"memfd_create": SYS_MEMFD_CREATE,
"kexec_file_load": SYS_KEXEC_FILE_LOAD,
"bpf": SYS_BPF,
"execveat": SYS_EXECVEAT,
"userfaultfd": SYS_USERFAULTFD,
"membarrier": SYS_MEMBARRIER,
"mlock2": SYS_MLOCK2,
"copy_file_range": SYS_COPY_FILE_RANGE,
"preadv2": SYS_PREADV2,
"pwritev2": SYS_PWRITEV2,
"pkey_mprotect": SYS_PKEY_MPROTECT,
"pkey_alloc": SYS_PKEY_ALLOC,
"pkey_free": SYS_PKEY_FREE,
"statx": SYS_STATX,
"io_pgetevents": SYS_IO_PGETEVENTS,
"rseq": SYS_RSEQ,
"uretprobe": SYS_URETPROBE,
"pidfd_send_signal": SYS_PIDFD_SEND_SIGNAL,
"io_uring_setup": SYS_IO_URING_SETUP,
"io_uring_enter": SYS_IO_URING_ENTER,
"io_uring_register": SYS_IO_URING_REGISTER,
"open_tree": SYS_OPEN_TREE,
"move_mount": SYS_MOVE_MOUNT,
"fsopen": SYS_FSOPEN,
"fsconfig": SYS_FSCONFIG,
"fsmount": SYS_FSMOUNT,
"fspick": SYS_FSPICK,
"pidfd_open": SYS_PIDFD_OPEN,
"clone3": SYS_CLONE3,
"close_range": SYS_CLOSE_RANGE,
"openat2": SYS_OPENAT2,
"pidfd_getfd": SYS_PIDFD_GETFD,
"faccessat2": SYS_FACCESSAT2,
"process_madvise": SYS_PROCESS_MADVISE,
"epoll_pwait2": SYS_EPOLL_PWAIT2,
"mount_setattr": SYS_MOUNT_SETATTR,
"quotactl_fd": SYS_QUOTACTL_FD,
"landlock_create_ruleset": SYS_LANDLOCK_CREATE_RULESET,
"landlock_add_rule": SYS_LANDLOCK_ADD_RULE,
"landlock_restrict_self": SYS_LANDLOCK_RESTRICT_SELF,
"memfd_secret": SYS_MEMFD_SECRET,
"process_mrelease": SYS_PROCESS_MRELEASE,
"futex_waitv": SYS_FUTEX_WAITV,
"set_mempolicy_home_node": SYS_SET_MEMPOLICY_HOME_NODE,
"cachestat": SYS_CACHESTAT,
"fchmodat2": SYS_FCHMODAT2,
"map_shadow_stack": SYS_MAP_SHADOW_STACK,
"futex_wake": SYS_FUTEX_WAKE,
"futex_wait": SYS_FUTEX_WAIT,
"futex_requeue": SYS_FUTEX_REQUEUE,
"statmount": SYS_STATMOUNT,
"listmount": SYS_LISTMOUNT,
"lsm_get_self_attr": SYS_LSM_GET_SELF_ATTR,
"lsm_set_self_attr": SYS_LSM_SET_SELF_ATTR,
"lsm_list_modules": SYS_LSM_LIST_MODULES,
"mseal": SYS_MSEAL,
}
const (
SYS_NAME_TO_HANDLE_AT = 303
SYS_OPEN_BY_HANDLE_AT = 304
SYS_CLOCK_ADJTIME = 305
SYS_SYNCFS = 306
SYS_SENDMMSG = 307
SYS_SETNS = 308
SYS_GETCPU = 309
SYS_PROCESS_VM_READV = 310
SYS_PROCESS_VM_WRITEV = 311
SYS_KCMP = 312
SYS_FINIT_MODULE = 313
SYS_SCHED_SETATTR = 314
SYS_SCHED_GETATTR = 315
SYS_RENAMEAT2 = 316
SYS_SECCOMP = 317
SYS_GETRANDOM = 318
SYS_MEMFD_CREATE = 319
SYS_KEXEC_FILE_LOAD = 320
SYS_BPF = 321
SYS_EXECVEAT = 322
SYS_USERFAULTFD = 323
SYS_MEMBARRIER = 324
SYS_MLOCK2 = 325
SYS_COPY_FILE_RANGE = 326
SYS_PREADV2 = 327
SYS_PWRITEV2 = 328
SYS_PKEY_MPROTECT = 329
SYS_PKEY_ALLOC = 330
SYS_PKEY_FREE = 331
SYS_STATX = 332
SYS_IO_PGETEVENTS = 333
SYS_RSEQ = 334
SYS_URETPROBE = 335
SYS_PIDFD_SEND_SIGNAL = 424
SYS_IO_URING_SETUP = 425
SYS_IO_URING_ENTER = 426
SYS_IO_URING_REGISTER = 427
SYS_OPEN_TREE = 428
SYS_MOVE_MOUNT = 429
SYS_FSOPEN = 430
SYS_FSCONFIG = 431
SYS_FSMOUNT = 432
SYS_FSPICK = 433
SYS_PIDFD_OPEN = 434
SYS_CLONE3 = 435
SYS_CLOSE_RANGE = 436
SYS_OPENAT2 = 437
SYS_PIDFD_GETFD = 438
SYS_FACCESSAT2 = 439
SYS_PROCESS_MADVISE = 440
SYS_EPOLL_PWAIT2 = 441
SYS_MOUNT_SETATTR = 442
SYS_QUOTACTL_FD = 443
SYS_LANDLOCK_CREATE_RULESET = 444
SYS_LANDLOCK_ADD_RULE = 445
SYS_LANDLOCK_RESTRICT_SELF = 446
SYS_MEMFD_SECRET = 447
SYS_PROCESS_MRELEASE = 448
SYS_FUTEX_WAITV = 449
SYS_SET_MEMPOLICY_HOME_NODE = 450
SYS_CACHESTAT = 451
SYS_FCHMODAT2 = 452
SYS_MAP_SHADOW_STACK = 453
SYS_FUTEX_WAKE = 454
SYS_FUTEX_WAIT = 455
SYS_FUTEX_REQUEUE = 456
SYS_STATMOUNT = 457
SYS_LISTMOUNT = 458
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
SYS_MSEAL = 462
)

View File

@@ -1,20 +0,0 @@
package seccomp
import (
"testing"
)
func TestSyscallResolveName(t *testing.T) {
for name, want := range Syscalls() {
t.Run(name, func(t *testing.T) {
if got := syscallResolveName(name); got != want {
t.Errorf("syscallResolveName(%q) = %d, want %d",
name, got, want)
}
if got, ok := SyscallResolveName(name); !ok || got != want {
t.Errorf("SyscallResolveName(%q) = %d, want %d",
name, got, want)
}
})
}
}

View File

@@ -1,30 +0,0 @@
package vfs
import "strings"
func Unmangle(s string) string {
if !strings.ContainsRune(s, '\\') {
return s
}
v := make([]byte, len(s))
var (
j int
c byte
)
for i := 0; i < len(s); i++ {
c = s[i]
if c == '\\' && len(s) > i+3 &&
(s[i+1] == '0' || s[i+1] == '1') &&
(s[i+2] >= '0' && s[i+2] <= '7') &&
(s[i+3] >= '0' && s[i+3] <= '7') {
c = ((s[i+1] - '0') << 6) |
((s[i+2] - '0') << 3) |
(s[i+3] - '0')
i += 3
}
v[j] = c
j++
}
return string(v[:j])
}

View File

@@ -1,27 +0,0 @@
package vfs_test
import (
"testing"
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
)
func TestUnmangle(t *testing.T) {
testCases := []struct {
want string
sample string
}{
{`\, `, `\134\054\040`},
{`(10) source -- maybe empty string`, `(10)\040source\040--\040maybe empty string`},
}
for _, tc := range testCases {
t.Run(tc.want, func(t *testing.T) {
got := vfs.Unmangle(tc.sample)
if got != tc.want {
t.Errorf("Unmangle: %q, want %q",
got, tc.want)
}
})
}
}

View File

@@ -1,260 +0,0 @@
// Package vfs provides bindings and iterators over proc_pid_mountinfo(5).
package vfs
import (
"bufio"
"errors"
"fmt"
"io"
"iter"
"slices"
"strconv"
"strings"
"syscall"
)
const (
MS_NOSYMFOLLOW = 0x100
)
var (
ErrMountInfoFields = errors.New("unexpected field count")
ErrMountInfoEmpty = errors.New("unexpected empty field")
ErrMountInfoDevno = errors.New("bad maj:min field")
ErrMountInfoSep = errors.New("bad optional fields separator")
)
type (
// A MountInfoDecoder reads and decodes proc_pid_mountinfo(5) entries from an input stream.
MountInfoDecoder struct {
s *bufio.Scanner
m *MountInfo
current *MountInfo
parseErr error
complete bool
}
// MountInfo represents the contents of a proc_pid_mountinfo(5) document.
MountInfo struct {
Next *MountInfo
MountInfoEntry
}
// MountInfoEntry represents a proc_pid_mountinfo(5) entry.
MountInfoEntry struct {
// mount ID: a unique ID for the mount (may be reused after umount(2)).
ID int `json:"id"`
// parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree).
Parent int `json:"parent"`
// major:minor: the value of st_dev for files on this filesystem (see stat(2)).
Devno DevT `json:"devno"`
// root: the pathname of the directory in the filesystem which forms the root of this mount.
Root string `json:"root"`
// mount point: the pathname of the mount point relative to the process's root directory.
Target string `json:"target"`
// mount options: per-mount options (see mount(2)).
VfsOptstr string `json:"vfs_optstr"`
// optional fields: zero or more fields of the form "tag[:value]"; see below.
// separator: the end of the optional fields is marked by a single hyphen.
OptFields []string `json:"opt_fields"`
// filesystem type: the filesystem type in the form "type[.subtype]".
FsType string `json:"fstype"`
// mount source: filesystem-specific information or "none".
Source string `json:"source"`
// super options: per-superblock options (see mount(2)).
FsOptstr string `json:"fs_optstr"`
}
DevT [2]int
)
// Flags interprets VfsOptstr and returns the resulting flags and unmatched options.
func (e *MountInfoEntry) Flags() (flags uintptr, unmatched []string) {
for _, s := range strings.Split(e.VfsOptstr, ",") {
switch s {
case "rw":
case "ro":
flags |= syscall.MS_RDONLY
case "nosuid":
flags |= syscall.MS_NOSUID
case "nodev":
flags |= syscall.MS_NODEV
case "noexec":
flags |= syscall.MS_NOEXEC
case "nosymfollow":
flags |= MS_NOSYMFOLLOW
case "noatime":
flags |= syscall.MS_NOATIME
case "nodiratime":
flags |= syscall.MS_NODIRATIME
case "relatime":
flags |= syscall.MS_RELATIME
default:
unmatched = append(unmatched, s)
}
}
return
}
// NewMountInfoDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may read data from r beyond the mountinfo entries requested.
func NewMountInfoDecoder(r io.Reader) *MountInfoDecoder {
return &MountInfoDecoder{s: bufio.NewScanner(r)}
}
func (d *MountInfoDecoder) Decode(v **MountInfo) (err error) {
for d.scan() {
}
err = d.Err()
if err == nil {
*v = d.m
}
return
}
// Entries returns an iterator over mountinfo entries.
func (d *MountInfoDecoder) Entries() iter.Seq[*MountInfoEntry] {
return func(yield func(*MountInfoEntry) bool) {
for cur := d.m; cur != nil; cur = cur.Next {
if !yield(&cur.MountInfoEntry) {
return
}
}
for d.scan() {
if !yield(&d.current.MountInfoEntry) {
return
}
}
}
}
func (d *MountInfoDecoder) Err() error {
if err := d.s.Err(); err != nil {
return err
}
return d.parseErr
}
func (d *MountInfoDecoder) scan() bool {
if d.complete {
return false
}
if !d.s.Scan() {
d.complete = true
return false
}
m := new(MountInfo)
if err := parseMountInfoLine(d.s.Text(), &m.MountInfoEntry); err != nil {
d.parseErr = err
d.complete = true
return false
}
if d.current == nil {
d.m = m
d.current = d.m
} else {
d.current.Next = m
d.current = d.current.Next
}
return true
}
func parseMountInfoLine(s string, ent *MountInfoEntry) error {
// prevent proceeding with misaligned fields due to optional fields
f := strings.Split(s, " ")
if len(f) < 10 {
return ErrMountInfoFields
}
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
// (1) id
if id, err := strconv.Atoi(f[0]); err != nil { // 0
return err
} else {
ent.ID = id
}
// (2) parent
if parent, err := strconv.Atoi(f[1]); err != nil { // 1
return err
} else {
ent.Parent = parent
}
// (3) maj:min
if n, err := fmt.Sscanf(f[2], "%d:%d", &ent.Devno[0], &ent.Devno[1]); err != nil {
return err
} else if n != 2 {
// unreachable
return ErrMountInfoDevno
}
// (4) mountroot
ent.Root = Unmangle(f[3])
if ent.Root == "" {
return ErrMountInfoEmpty
}
// (5) target
ent.Target = Unmangle(f[4])
if ent.Target == "" {
return ErrMountInfoEmpty
}
// (6) vfs options (fs-independent)
ent.VfsOptstr = Unmangle(f[5])
if ent.VfsOptstr == "" {
return ErrMountInfoEmpty
}
// (7) optional fields, terminated by " - "
i := len(f) - 4
ent.OptFields = f[6:i]
// (8) optional fields end marker
if f[i] != "-" {
return ErrMountInfoSep
}
i++
// (9) FS type
ent.FsType = Unmangle(f[i])
if ent.FsType == "" {
return ErrMountInfoEmpty
}
i++
// (10) source -- maybe empty string
ent.Source = Unmangle(f[i])
i++
// (11) fs options (fs specific)
ent.FsOptstr = Unmangle(f[i])
return nil
}
func (e *MountInfoEntry) EqualWithIgnore(want *MountInfoEntry, ignore string) bool {
return (e.ID == want.ID || want.ID == -1) &&
(e.Parent == want.Parent || want.Parent == -1) &&
(e.Devno == want.Devno || (want.Devno[0] == -1 && want.Devno[1] == -1)) &&
(e.Root == want.Root || want.Root == ignore) &&
(e.Target == want.Target || want.Target == ignore) &&
(e.VfsOptstr == want.VfsOptstr || want.VfsOptstr == ignore) &&
(slices.Equal(e.OptFields, want.OptFields) || (len(want.OptFields) == 1 && want.OptFields[0] == ignore)) &&
(e.FsType == want.FsType || want.FsType == ignore) &&
(e.Source == want.Source || want.Source == ignore) &&
(e.FsOptstr == want.FsOptstr || want.FsOptstr == ignore)
}
func (e *MountInfoEntry) String() string {
return fmt.Sprintf("%d %d %d:%d %s %s %s %s %s %s %s",
e.ID, e.Parent, e.Devno[0], e.Devno[1], e.Root, e.Target, e.VfsOptstr,
strings.Join(append(e.OptFields, "-"), " "), e.FsType, e.Source, e.FsOptstr)
}

View File

@@ -1,404 +0,0 @@
package vfs_test
import (
"encoding/json"
"errors"
"iter"
"path"
"reflect"
"slices"
"strconv"
"strings"
"syscall"
"testing"
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
)
func TestMountInfo(t *testing.T) {
testCases := []mountInfoTest{
{"count", sampleMountinfoBase + `
21 20 0:53/ /mnt/test rw,relatime - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoFields, "", nil, nil, nil},
{"sep", sampleMountinfoBase + `
21 20 0:53 / /mnt/test rw,relatime shared:212 _ tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoSep, "", nil, nil, nil},
{"id", sampleMountinfoBase + `
id 20 0:53 / /mnt/test rw,relatime shared:212 - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
strconv.ErrSyntax, "", nil, nil, nil},
{"parent", sampleMountinfoBase + `
21 parent 0:53 / /mnt/test rw,relatime shared:212 - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
strconv.ErrSyntax, "", nil, nil, nil},
{"devno", sampleMountinfoBase + `
21 20 053 / /mnt/test rw,relatime shared:212 - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
nil, "unexpected EOF", nil, nil, nil},
{"maj", sampleMountinfoBase + `
21 20 maj:53 / /mnt/test rw,relatime shared:212 - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
nil, "expected integer", nil, nil, nil},
{"min", sampleMountinfoBase + `
21 20 0:min / /mnt/test rw,relatime shared:212 - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
nil, "expected integer", nil, nil, nil},
{"mountroot", sampleMountinfoBase + `
21 20 0:53 /mnt/test rw,relatime - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoEmpty, "", nil, nil, nil},
{"target", sampleMountinfoBase + `
21 20 0:53 / rw,relatime - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoEmpty, "", nil, nil, nil},
{"vfs options", sampleMountinfoBase + `
21 20 0:53 / /mnt/test - tmpfs rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoEmpty, "", nil, nil, nil},
{"FS type", sampleMountinfoBase + `
21 20 0:53 / /mnt/test rw,relatime - rw
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755`,
vfs.ErrMountInfoEmpty, "", nil, nil, nil},
{"base", sampleMountinfoBase, nil, "", []*wantMountInfo{
m(15, 20, 0, 3, "/", "/proc", "rw,relatime", o(), "proc", "/proc", "rw", syscall.MS_RELATIME, nil),
m(16, 20, 0, 15, "/", "/sys", "rw,relatime", o(), "sysfs", "/sys", "rw", syscall.MS_RELATIME, nil),
m(17, 20, 0, 5, "/", "/dev", "rw,relatime", o(), "devtmpfs", "udev", "rw,size=1983516k,nr_inodes=495879,mode=755", syscall.MS_RELATIME, nil),
m(18, 17, 0, 10, "/", "/dev/pts", "rw,relatime", o(), "devpts", "devpts", "rw,gid=5,mode=620,ptmxmode=000", syscall.MS_RELATIME, nil),
m(19, 17, 0, 16, "/", "/dev/shm", "rw,relatime", o(), "tmpfs", "tmpfs", "rw", syscall.MS_RELATIME, nil),
m(20, 1, 8, 4, "/", "/", "ro,noatime,nodiratime,meow", o(), "ext3", "/dev/sda4", "rw,errors=continue,user_xattr,acl,barrier=0,data=ordered", syscall.MS_RDONLY|syscall.MS_NOATIME|syscall.MS_NODIRATIME, []string{"meow"}),
},
mn(20, 1, 8, 4, "/", "/", "ro,noatime,nodiratime,meow", o(), "ext3", "/dev/sda4", "rw,errors=continue,user_xattr,acl,barrier=0,data=ordered", false,
mn(15, 20, 0, 3, "/", "/proc", "rw,relatime", o(), "proc", "/proc", "rw", false, nil,
mn(16, 20, 0, 15, "/", "/sys", "rw,relatime", o(), "sysfs", "/sys", "rw", false, nil,
mn(17, 20, 0, 5, "/", "/dev", "rw,relatime", o(), "devtmpfs", "udev", "rw,size=1983516k,nr_inodes=495879,mode=755", false,
mn(18, 17, 0, 10, "/", "/dev/pts", "rw,relatime", o(), "devpts", "devpts", "rw,gid=5,mode=620,ptmxmode=000", false, nil,
mn(19, 17, 0, 16, "/", "/dev/shm", "rw,relatime", o(), "tmpfs", "tmpfs", "rw", false, nil, nil)),
nil))), nil), func(n *vfs.MountInfoNode) []*vfs.MountInfoNode {
return []*vfs.MountInfoNode{
n,
n.FirstChild,
n.FirstChild.NextSibling,
n.FirstChild.NextSibling.NextSibling,
n.FirstChild.NextSibling.NextSibling.FirstChild,
n.FirstChild.NextSibling.NextSibling.FirstChild.NextSibling,
}
}},
{"sample", sampleMountinfo, nil, "", []*wantMountInfo{
m(15, 20, 0, 3, "/", "/proc", "rw,relatime", o(), "proc", "/proc", "rw", syscall.MS_RELATIME, nil),
m(16, 20, 0, 15, "/", "/sys", "rw,relatime", o(), "sysfs", "/sys", "rw", syscall.MS_RELATIME, nil),
m(17, 20, 0, 5, "/", "/dev", "rw,relatime", o(), "devtmpfs", "udev", "rw,size=1983516k,nr_inodes=495879,mode=755", syscall.MS_RELATIME, nil),
m(18, 17, 0, 10, "/", "/dev/pts", "rw,relatime", o(), "devpts", "devpts", "rw,gid=5,mode=620,ptmxmode=000", syscall.MS_RELATIME, nil),
m(19, 17, 0, 16, "/", "/dev/shm", "rw,relatime", o(), "tmpfs", "tmpfs", "rw", syscall.MS_RELATIME, nil),
m(20, 1, 8, 4, "/", "/", "rw,noatime", o(), "ext3", "/dev/sda4", "rw,errors=continue,user_xattr,acl,barrier=0,data=ordered", syscall.MS_NOATIME, nil),
m(21, 16, 0, 17, "/", "/sys/fs/cgroup", "rw,nosuid,nodev,noexec,relatime", o(), "tmpfs", "tmpfs", "rw,mode=755", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(22, 21, 0, 18, "/", "/sys/fs/cgroup/systemd", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(23, 21, 0, 19, "/", "/sys/fs/cgroup/cpuset", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,cpuset", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(24, 21, 0, 20, "/", "/sys/fs/cgroup/ns", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,ns", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(25, 21, 0, 21, "/", "/sys/fs/cgroup/cpu", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,cpu", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(26, 21, 0, 22, "/", "/sys/fs/cgroup/cpuacct", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,cpuacct", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(27, 21, 0, 23, "/", "/sys/fs/cgroup/memory", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,memory", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(28, 21, 0, 24, "/", "/sys/fs/cgroup/devices", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,devices", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(29, 21, 0, 25, "/", "/sys/fs/cgroup/freezer", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,freezer", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(30, 21, 0, 26, "/", "/sys/fs/cgroup/net_cls", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,net_cls", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(31, 21, 0, 27, "/", "/sys/fs/cgroup/blkio", "rw,nosuid,nodev,noexec,relatime", o(), "cgroup", "cgroup", "rw,blkio", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC|syscall.MS_RELATIME, nil),
m(32, 16, 0, 28, "/", "/sys/kernel/security", "rw,relatime", o(), "autofs", "systemd-1", "rw,fd=22,pgrp=1,timeout=300,minproto=5,maxproto=5,direct", syscall.MS_RELATIME, nil),
m(33, 17, 0, 29, "/", "/dev/hugepages", "rw,relatime", o(), "autofs", "systemd-1", "rw,fd=23,pgrp=1,timeout=300,minproto=5,maxproto=5,direct", syscall.MS_RELATIME, nil),
m(34, 16, 0, 30, "/", "/sys/kernel/debug", "rw,relatime", o(), "autofs", "systemd-1", "rw,fd=24,pgrp=1,timeout=300,minproto=5,maxproto=5,direct", syscall.MS_RELATIME, nil),
m(35, 15, 0, 31, "/", "/proc/sys/fs/binfmt_misc", "rw,relatime", o(), "autofs", "systemd-1", "rw,fd=25,pgrp=1,timeout=300,minproto=5,maxproto=5,direct", syscall.MS_RELATIME, nil),
m(36, 17, 0, 32, "/", "/dev/mqueue", "rw,relatime", o(), "autofs", "systemd-1", "rw,fd=26,pgrp=1,timeout=300,minproto=5,maxproto=5,direct", syscall.MS_RELATIME, nil),
m(37, 15, 0, 14, "/", "/proc/bus/usb", "rw,relatime", o(), "usbfs", "/proc/bus/usb", "rw", syscall.MS_RELATIME, nil),
m(38, 33, 0, 33, "/", "/dev/hugepages", "rw,relatime", o(), "hugetlbfs", "hugetlbfs", "rw", syscall.MS_RELATIME, nil),
m(39, 36, 0, 12, "/", "/dev/mqueue", "rw,relatime", o(), "mqueue", "mqueue", "rw", syscall.MS_RELATIME, nil),
m(40, 20, 8, 6, "/", "/boot", "rw,noatime", o(), "ext3", "/dev/sda6", "rw,errors=continue,barrier=0,data=ordered", syscall.MS_NOATIME, nil),
m(41, 20, 253, 0, "/", "/home/kzak", "rw,noatime", o(), "ext4", "/dev/mapper/kzak-home", "rw,barrier=1,data=ordered", syscall.MS_NOATIME, nil),
m(42, 35, 0, 34, "/", "/proc/sys/fs/binfmt_misc", "rw,relatime", o(), "binfmt_misc", "none", "rw", syscall.MS_RELATIME, nil),
m(43, 16, 0, 35, "/", "/sys/fs/fuse/connections", "rw,relatime", o(), "fusectl", "fusectl", "rw", syscall.MS_RELATIME, nil),
m(44, 41, 0, 36, "/", "/home/kzak/.gvfs", "rw,nosuid,nodev,relatime", o(), "fuse.gvfs-fuse-daemon", "gvfs-fuse-daemon", "rw,user_id=500,group_id=500", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_RELATIME, nil),
m(45, 20, 0, 37, "/", "/var/lib/nfs/rpc_pipefs", "rw,relatime", o(), "rpc_pipefs", "sunrpc", "rw", syscall.MS_RELATIME, nil),
m(47, 20, 0, 38, "/", "/mnt/sounds", "rw,relatime", o(), "cifs", "//foo.home/bar/", "rw,unc=\\\\foo.home\\bar,username=kzak,domain=SRGROUP,uid=0,noforceuid,gid=0,noforcegid,addr=192.168.111.1,posixpaths,serverino,acl,rsize=16384,wsize=57344", syscall.MS_RELATIME, nil),
m(49, 20, 0, 56, "/", "/mnt/test/foobar", "rw,relatime,nosymfollow", o("shared:323"), "tmpfs", "tmpfs", "rw", syscall.MS_RELATIME|vfs.MS_NOSYMFOLLOW, nil),
}, nil, nil},
{"sample nosrc", sampleMountinfoNoSrc, nil, "", []*wantMountInfo{
m(15, 20, 0, 3, "/", "/proc", "rw,relatime", o(), "proc", "/proc", "rw", syscall.MS_RELATIME, nil),
m(16, 20, 0, 15, "/", "/sys", "rw,relatime", o(), "sysfs", "/sys", "rw", syscall.MS_RELATIME, nil),
m(17, 20, 0, 5, "/", "/dev", "rw,relatime", o(), "devtmpfs", "udev", "rw,size=1983516k,nr_inodes=495879,mode=755", syscall.MS_RELATIME, nil),
m(18, 17, 0, 10, "/", "/dev/pts", "rw,relatime", o(), "devpts", "devpts", "rw,gid=5,mode=620,ptmxmode=000", syscall.MS_RELATIME, nil),
m(19, 17, 0, 16, "/", "/dev/shm", "rw,relatime", o(), "tmpfs", "tmpfs", "rw", syscall.MS_RELATIME, nil),
m(20, 1, 8, 4, "/", "/", "rw,noatime", o(), "ext3", "/dev/sda4", "rw,errors=continue,user_xattr,acl,barrier=0,data=ordered", syscall.MS_NOATIME, nil),
m(21, 20, 0, 53, "/", "/mnt/test", "rw,relatime", o("shared:212"), "tmpfs", "", "rw", syscall.MS_RELATIME, nil),
}, nil, nil},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Run("decode", func(t *testing.T) {
var got *vfs.MountInfo
d := vfs.NewMountInfoDecoder(strings.NewReader(tc.sample))
err := d.Decode(&got)
tc.check(t, d, "Decode",
func(yield func(*vfs.MountInfoEntry) bool) {
for cur := got; cur != nil; cur = cur.Next {
if !yield(&cur.MountInfoEntry) {
return
}
}
}, func() error { return err })
t.Run("reuse", func(t *testing.T) {
tc.check(t, d, "Entries",
d.Entries(), d.Err)
})
})
t.Run("iter", func(t *testing.T) {
d := vfs.NewMountInfoDecoder(strings.NewReader(tc.sample))
tc.check(t, d, "Entries",
d.Entries(), d.Err)
t.Run("reuse", func(t *testing.T) {
tc.check(t, d, "Entries",
d.Entries(), d.Err)
})
})
t.Run("yield", func(t *testing.T) {
d := vfs.NewMountInfoDecoder(strings.NewReader(tc.sample))
v := false
d.Entries()(func(entry *vfs.MountInfoEntry) bool { v = !v; return v })
d.Entries()(func(entry *vfs.MountInfoEntry) bool { return false })
tc.check(t, d, "Entries",
d.Entries(), d.Err)
t.Run("reuse", func(t *testing.T) {
tc.check(t, d, "Entries",
d.Entries(), d.Err)
})
})
})
}
}
type mountInfoTest struct {
name string
sample string
wantErr error
wantError string
want []*wantMountInfo
wantNode *vfs.MountInfoNode
wantCollectF func(n *vfs.MountInfoNode) []*vfs.MountInfoNode
}
func (tc *mountInfoTest) check(t *testing.T, d *vfs.MountInfoDecoder, funcName string,
got iter.Seq[*vfs.MountInfoEntry], gotErr func() error) {
i := 0
for cur := range got {
if i == len(tc.want) {
if funcName != "Decode" && (tc.wantErr != nil || tc.wantError != "") {
continue
}
t.Errorf("%s: got more than %d entries", funcName, len(tc.want))
break
}
if !reflect.DeepEqual(cur, &tc.want[i].MountInfoEntry) {
t.Errorf("%s: entry %d\ngot: %#v\nwant: %#v",
funcName, i, cur, tc.want[i])
}
flags, unmatched := cur.Flags()
if flags != tc.want[i].flags {
t.Errorf("Flags(%q): %#x, want %#x",
cur.VfsOptstr, flags, tc.want[i].flags)
}
if !slices.Equal(unmatched, tc.want[i].unmatched) {
t.Errorf("Flags(%q): unmatched = %#q, want %#q",
cur.VfsOptstr, unmatched, tc.want[i].unmatched)
}
i++
}
if i != len(tc.want) {
t.Errorf("%s: got %d entries, want %d", funcName, i, len(tc.want))
}
if tc.wantErr == nil && tc.wantError == "" && tc.wantCollectF != nil {
t.Run("unfold", func(t *testing.T) {
n, err := d.Unfold("/")
if err != nil {
t.Errorf("Unfold: error = %v", err)
} else {
t.Run("stop", func(t *testing.T) {
v := false
n.Collective()(func(node *vfs.MountInfoNode) bool { v = !v; return v })
})
if !reflect.DeepEqual(n, tc.wantNode) {
t.Errorf("Unfold: %s, want %s",
mustMarshal(n), mustMarshal(tc.wantNode))
}
t.Run("collective", func(t *testing.T) {
wantCollect := tc.wantCollectF(n)
if gotCollect := slices.Collect(n.Collective()); !reflect.DeepEqual(gotCollect, wantCollect) {
t.Errorf("Collective: \ngot %#v\nwant %#v",
gotCollect, wantCollect)
}
})
}
})
} else if tc.wantNode != nil || tc.wantCollectF != nil {
panic("invalid test case")
} else if _, err := d.Unfold("/"); !errors.Is(err, tc.wantErr) {
if tc.wantError == "" {
t.Errorf("Unfold: error = %v, wantErr %v",
err, tc.wantErr)
} else if err != nil && err.Error() != tc.wantError {
t.Errorf("Unfold: error = %q, wantError %q",
err, tc.wantError)
}
}
if err := gotErr(); !errors.Is(err, tc.wantErr) {
if tc.wantError == "" {
t.Errorf("%s: error = %v, wantErr %v",
funcName, err, tc.wantErr)
} else if err != nil && err.Error() != tc.wantError {
t.Errorf("%s: error = %q, wantError %q",
funcName, err, tc.wantError)
}
}
}
func mustMarshal(v any) string {
p, err := json.Marshal(v)
if err != nil {
panic(err.Error())
}
return string(p)
}
type wantMountInfo struct {
vfs.MountInfoEntry
flags uintptr
unmatched []string
}
func m(
id, parent, maj, min int, root, target, vfsOptstr string, optFields []string, fsType, source, fsOptstr string,
flags uintptr, unmatched []string,
) *wantMountInfo {
return &wantMountInfo{
vfs.MountInfoEntry{
ID: id,
Parent: parent,
Devno: vfs.DevT{maj, min},
Root: root,
Target: target,
VfsOptstr: vfsOptstr,
OptFields: optFields,
FsType: fsType,
Source: source,
FsOptstr: fsOptstr,
}, flags, unmatched,
}
}
func mn(
id, parent, maj, min int, root, target, vfsOptstr string, optFields []string, fsType, source, fsOptstr string,
covered bool, firstChild, nextSibling *vfs.MountInfoNode,
) *vfs.MountInfoNode {
return &vfs.MountInfoNode{
MountInfoEntry: &vfs.MountInfoEntry{
ID: id,
Parent: parent,
Devno: vfs.DevT{maj, min},
Root: root,
Target: target,
VfsOptstr: vfsOptstr,
OptFields: optFields,
FsType: fsType,
Source: source,
FsOptstr: fsOptstr,
},
FirstChild: firstChild,
NextSibling: nextSibling,
Clean: path.Clean(target),
Covered: covered,
}
}
func o(field ...string) []string {
if field == nil {
return []string{}
}
return field
}
const (
sampleMountinfoBase = `15 20 0:3 / /proc rw,relatime - proc /proc rw
16 20 0:15 / /sys rw,relatime - sysfs /sys rw
17 20 0:5 / /dev rw,relatime - devtmpfs udev rw,size=1983516k,nr_inodes=495879,mode=755
18 17 0:10 / /dev/pts rw,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
19 17 0:16 / /dev/shm rw,relatime - tmpfs tmpfs rw
20 1 8:4 / / ro,noatime,nodiratime,meow - ext3 /dev/sda4 rw,errors=continue,user_xattr,acl,barrier=0,data=ordered`
sampleMountinfo = `15 20 0:3 / /proc rw,relatime - proc /proc rw
16 20 0:15 / /sys rw,relatime - sysfs /sys rw
17 20 0:5 / /dev rw,relatime - devtmpfs udev rw,size=1983516k,nr_inodes=495879,mode=755
18 17 0:10 / /dev/pts rw,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
19 17 0:16 / /dev/shm rw,relatime - tmpfs tmpfs rw
20 1 8:4 / / rw,noatime - ext3 /dev/sda4 rw,errors=continue,user_xattr,acl,barrier=0,data=ordered
21 16 0:17 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
22 21 0:18 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
23 21 0:19 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
24 21 0:20 / /sys/fs/cgroup/ns rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,ns
25 21 0:21 / /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
26 21 0:22 / /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
27 21 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
28 21 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
29 21 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
30 21 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls
31 21 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
32 16 0:28 / /sys/kernel/security rw,relatime - autofs systemd-1 rw,fd=22,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
33 17 0:29 / /dev/hugepages rw,relatime - autofs systemd-1 rw,fd=23,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
34 16 0:30 / /sys/kernel/debug rw,relatime - autofs systemd-1 rw,fd=24,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
35 15 0:31 / /proc/sys/fs/binfmt_misc rw,relatime - autofs systemd-1 rw,fd=25,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
36 17 0:32 / /dev/mqueue rw,relatime - autofs systemd-1 rw,fd=26,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
37 15 0:14 / /proc/bus/usb rw,relatime - usbfs /proc/bus/usb rw
38 33 0:33 / /dev/hugepages rw,relatime - hugetlbfs hugetlbfs rw
39 36 0:12 / /dev/mqueue rw,relatime - mqueue mqueue rw
40 20 8:6 / /boot rw,noatime - ext3 /dev/sda6 rw,errors=continue,barrier=0,data=ordered
41 20 253:0 / /home/kzak rw,noatime - ext4 /dev/mapper/kzak-home rw,barrier=1,data=ordered
42 35 0:34 / /proc/sys/fs/binfmt_misc rw,relatime - binfmt_misc none rw
43 16 0:35 / /sys/fs/fuse/connections rw,relatime - fusectl fusectl rw
44 41 0:36 / /home/kzak/.gvfs rw,nosuid,nodev,relatime - fuse.gvfs-fuse-daemon gvfs-fuse-daemon rw,user_id=500,group_id=500
45 20 0:37 / /var/lib/nfs/rpc_pipefs rw,relatime - rpc_pipefs sunrpc rw
47 20 0:38 / /mnt/sounds rw,relatime - cifs //foo.home/bar/ rw,unc=\\foo.home\bar,username=kzak,domain=SRGROUP,uid=0,noforceuid,gid=0,noforcegid,addr=192.168.111.1,posixpaths,serverino,acl,rsize=16384,wsize=57344
49 20 0:56 / /mnt/test/foobar rw,relatime,nosymfollow shared:323 - tmpfs tmpfs rw`
sampleMountinfoNoSrc = `15 20 0:3 / /proc rw,relatime - proc /proc rw
16 20 0:15 / /sys rw,relatime - sysfs /sys rw
17 20 0:5 / /dev rw,relatime - devtmpfs udev rw,size=1983516k,nr_inodes=495879,mode=755
18 17 0:10 / /dev/pts rw,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
19 17 0:16 / /dev/shm rw,relatime - tmpfs tmpfs rw
20 1 8:4 / / rw,noatime - ext3 /dev/sda4 rw,errors=continue,user_xattr,acl,barrier=0,data=ordered
21 20 0:53 / /mnt/test rw,relatime shared:212 - tmpfs rw`
)

View File

@@ -1,107 +0,0 @@
package vfs
import (
"iter"
"path"
"strings"
"syscall"
)
// MountInfoNode positions a [MountInfoEntry] in its mount hierarchy.
type MountInfoNode struct {
*MountInfoEntry
FirstChild *MountInfoNode `json:"first_child"`
NextSibling *MountInfoNode `json:"next_sibling"`
Clean string `json:"clean"`
Covered bool `json:"covered"`
}
// Collective returns an iterator over visible mountinfo nodes.
func (n *MountInfoNode) Collective() iter.Seq[*MountInfoNode] {
return func(yield func(*MountInfoNode) bool) { n.visit(yield) }
}
func (n *MountInfoNode) visit(yield func(*MountInfoNode) bool) bool {
if !n.Covered && !yield(n) {
return false
}
for cur := n.FirstChild; cur != nil; cur = cur.NextSibling {
if !cur.visit(yield) {
return false
}
}
return true
}
// Unfold unfolds the mount hierarchy and resolves covered paths.
func (d *MountInfoDecoder) Unfold(target string) (*MountInfoNode, error) {
targetClean := path.Clean(target)
var mountinfoSize int
for range d.Entries() {
mountinfoSize++
}
if err := d.Err(); err != nil {
return nil, err
}
mountinfo := make([]*MountInfoNode, mountinfoSize)
// mount ID to index lookup
idIndex := make(map[int]int, mountinfoSize)
// final entry to match target
targetIndex := -1
{
i := 0
for ent := range d.Entries() {
mountinfo[i] = &MountInfoNode{Clean: path.Clean(ent.Target), MountInfoEntry: ent}
idIndex[ent.ID] = i
if mountinfo[i].Clean == targetClean {
targetIndex = i
}
i++
}
}
if targetIndex == -1 {
return nil, syscall.ESTALE
}
for _, cur := range mountinfo {
var parent *MountInfoNode
if p, ok := idIndex[cur.Parent]; !ok {
continue
} else {
parent = mountinfo[p]
}
if !strings.HasPrefix(cur.Clean, targetClean) {
continue
}
if parent.Clean == cur.Clean {
parent.Covered = true
}
covered := false
nsp := &parent.FirstChild
for s := parent.FirstChild; s != nil; s = s.NextSibling {
if strings.HasPrefix(cur.Clean, s.Clean) {
covered = true
break
}
if strings.HasPrefix(s.Clean, cur.Clean) {
*nsp = s.NextSibling
} else {
nsp = &s.NextSibling
}
}
if covered {
continue
}
*nsp = cur
}
return mountinfo[targetIndex], nil
}

View File

@@ -1,93 +0,0 @@
package vfs_test
import (
"errors"
"reflect"
"slices"
"strings"
"syscall"
"testing"
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
)
func TestUnfold(t *testing.T) {
testCases := []struct {
name string
sample string
target string
wantErr error
want *vfs.MountInfoNode
wantCollectF func(n *vfs.MountInfoNode) []*vfs.MountInfoNode
wantCollectN []string
}{
{
"no match",
sampleMountinfoBase,
"/mnt",
syscall.ESTALE, nil, nil, nil,
},
{
"cover",
`33 1 0:33 / / rw,relatime shared:1 - tmpfs impure rw,size=16777216k,mode=755
37 33 0:32 / /proc rw,nosuid,nodev,noexec,relatime shared:41 - proc proc rw
551 33 0:121 / /mnt rw,relatime shared:666 - tmpfs tmpfs rw
595 551 0:123 / /mnt rw,relatime shared:990 - tmpfs tmpfs rw
611 595 0:142 / /mnt/etc rw,relatime shared:1112 - tmpfs tmpfs rw
625 644 0:142 /passwd /mnt/etc/passwd rw,relatime shared:1112 - tmpfs tmpfs rw
641 625 0:33 /etc/passwd /mnt/etc/passwd rw,relatime shared:1 - tmpfs impure rw,size=16777216k,mode=755
644 611 0:33 /etc/passwd /mnt/etc/passwd rw,relatime shared:1 - tmpfs impure rw,size=16777216k,mode=755
`, "/mnt", nil,
mn(595, 551, 0, 123, "/", "/mnt", "rw,relatime", o("shared:990"), "tmpfs", "tmpfs", "rw", false,
mn(611, 595, 0, 142, "/", "/mnt/etc", "rw,relatime", o("shared:1112"), "tmpfs", "tmpfs", "rw", false,
mn(644, 611, 0, 33, "/etc/passwd", "/mnt/etc/passwd", "rw,relatime", o("shared:1"), "tmpfs", "impure", "rw,size=16777216k,mode=755", true,
mn(625, 644, 0, 142, "/passwd", "/mnt/etc/passwd", "rw,relatime", o("shared:1112"), "tmpfs", "tmpfs", "rw", true,
mn(641, 625, 0, 33, "/etc/passwd", "/mnt/etc/passwd", "rw,relatime", o("shared:1"), "tmpfs", "impure", "rw,size=16777216k,mode=755", false,
nil, nil), nil), nil), nil), nil), func(n *vfs.MountInfoNode) []*vfs.MountInfoNode {
return []*vfs.MountInfoNode{n, n.FirstChild, n.FirstChild.FirstChild.FirstChild.FirstChild}
}, []string{"/mnt", "/mnt/etc", "/mnt/etc/passwd"},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
d := vfs.NewMountInfoDecoder(strings.NewReader(tc.sample))
got, err := d.Unfold(tc.target)
if !errors.Is(err, tc.wantErr) {
t.Errorf("Unfold: error = %v, wantErr %v",
err, tc.wantErr)
}
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("Unfold:\ngot %s\nwant %s",
mustMarshal(got), mustMarshal(tc.want))
}
if err == nil && tc.wantCollectF != nil {
t.Run("collective", func(t *testing.T) {
wantCollect := tc.wantCollectF(got)
gotCollect := slices.Collect(got.Collective())
if !reflect.DeepEqual(gotCollect, wantCollect) {
t.Errorf("Collective: \ngot %#v\nwant %#v",
gotCollect, wantCollect)
}
t.Run("target", func(t *testing.T) {
gotCollectN := slices.Collect[string](func(yield func(v string) bool) {
for _, cur := range gotCollect {
if !yield(cur.Clean) {
return
}
}
})
if !reflect.DeepEqual(gotCollectN, tc.wantCollectN) {
t.Errorf("Collective: got %q, want %q",
gotCollectN, tc.wantCollectN)
}
})
})
}
})
}
}