Compare commits
52 Commits
0e957cc9c1
...
v0.1.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
cb513bb1cd
|
|||
|
f7bd28118c
|
|||
|
940ee00ffe
|
|||
|
b43d104680
|
|||
|
ddf48a6c22
|
|||
|
a0f499e30a
|
|||
|
d6b07f12ff
|
|||
|
65fe09caf9
|
|||
|
a1e5f020f4
|
|||
|
bd3fa53a55
|
|||
|
625632c593
|
|||
|
e71ae3b8c5
|
|||
|
9d7a19d162
|
|||
|
6ba19a7ba5
|
|||
|
749a2779f5
|
|||
|
e574042d76
|
|||
|
2b44493e8a
|
|||
|
c30dd4e630
|
|||
|
d90da1c8f5
|
|||
|
5853d7700f
|
|||
|
d5c7523726
|
|||
|
ddfcc51b91
|
|||
|
8ebedbd88a
|
|||
|
84e8142a2d
|
|||
|
2c7b7ad845
|
|||
|
72c2b66fc0
|
|||
|
356b42a406
|
|||
|
d9b6d48e7c
|
|||
|
087959e81b
|
|||
|
e6967b8bbb
|
|||
|
d2f9a9b83b
|
|||
|
1b5ecd9eaf
|
|||
|
82561d62b6
|
|||
|
eec021cc4b
|
|||
|
a1d98823f8
|
|||
|
255b77d91d
|
|||
|
f84ec5a3f8
|
|||
|
eb22a8bcc1
|
|||
|
31aef905fa
|
|||
|
a6887f7253
|
|||
|
69bd581af7
|
|||
|
26b7afc890
|
|||
|
d5532aade0
|
|||
|
0c5409aec7
|
|||
|
1a8840bebc
|
|||
|
1fb453dffe
|
|||
|
e03d702d08
|
|||
|
241dc964a6
|
|||
|
8ef71e14d5
|
|||
|
972f4006f0
|
|||
|
9a8a047908
|
|||
|
863bf69ad3
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -26,7 +26,7 @@ go.work.sum
|
||||
.vscode
|
||||
|
||||
# go generate
|
||||
security-context-v1-protocol.*
|
||||
/cmd/hakurei/LICENSE
|
||||
|
||||
# release
|
||||
/dist/hakurei-*
|
||||
2
LICENSE
2
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2024 Ophestra Umiker
|
||||
Copyright (c) 2024-2025 Ophestra
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
@@ -7,8 +7,12 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://pkg.go.dev/git.gensokyo.uk/security/hakurei"><img src="https://pkg.go.dev/badge/git.gensokyo.uk/security/hakurei.svg" alt="Go Reference" /></a>
|
||||
<a href="https://goreportcard.com/report/git.gensokyo.uk/security/hakurei"><img src="https://goreportcard.com/badge/git.gensokyo.uk/security/hakurei" alt="Go Report Card" /></a>
|
||||
<a href="https://pkg.go.dev/hakurei.app"><img src="https://pkg.go.dev/badge/hakurei.app.svg" alt="Go Reference" /></a>
|
||||
<a href="https://git.gensokyo.uk/security/hakurei/actions"><img src="https://git.gensokyo.uk/security/hakurei/actions/workflows/test.yml/badge.svg?branch=staging&style=flat-square" alt="Gitea Workflow Status" /></a>
|
||||
<br/>
|
||||
<a href="https://git.gensokyo.uk/security/hakurei/releases"><img src="https://img.shields.io/gitea/v/release/security/hakurei?gitea_url=https%3A%2F%2Fgit.gensokyo.uk&color=purple" alt="Release" /></a>
|
||||
<a href="https://goreportcard.com/report/hakurei.app"><img src="https://goreportcard.com/badge/hakurei.app" alt="Go Report Card" /></a>
|
||||
<a href="https://hakurei.app"><img src="https://img.shields.io/website?url=https%3A%2F%2Fhakurei.app" alt="Website" /></a>
|
||||
</p>
|
||||
|
||||
Hakurei is a tool for running sandboxed graphical applications as dedicated subordinate users on the Linux kernel.
|
||||
|
||||
@@ -2,8 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -15,63 +13,26 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/command"
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app/instance"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/state"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/command"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/app"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
var (
|
||||
errSuccess = errors.New("success")
|
||||
|
||||
//go:embed LICENSE
|
||||
license string
|
||||
)
|
||||
|
||||
func init() { hlog.Prepare("hakurei") }
|
||||
|
||||
var std sys.State = new(sys.Std)
|
||||
|
||||
func main() {
|
||||
// early init path, skips root check and duplicate PR_SET_DUMPABLE
|
||||
sandbox.TryArgv0(hlog.Output{}, hlog.Prepare, internal.InstallFmsg)
|
||||
|
||||
if err := sandbox.SetDumpable(sandbox.SUID_DUMP_DISABLE); err != nil {
|
||||
log.Printf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
// not fatal: this program runs as the privileged user
|
||||
}
|
||||
|
||||
if os.Geteuid() == 0 {
|
||||
log.Fatal("this program must not run as root")
|
||||
}
|
||||
|
||||
buildCommand(os.Stderr).MustParse(os.Args[1:], func(err error) {
|
||||
hlog.Verbosef("command returned %v", err)
|
||||
if errors.Is(err, errSuccess) {
|
||||
hlog.BeforeExit()
|
||||
os.Exit(0)
|
||||
}
|
||||
})
|
||||
log.Fatal("unreachable")
|
||||
}
|
||||
|
||||
func buildCommand(out io.Writer) command.Command {
|
||||
var (
|
||||
flagVerbose bool
|
||||
flagJSON bool
|
||||
)
|
||||
c := command.New(out, log.Printf, "hakurei", func([]string) error { internal.InstallFmsg(flagVerbose); return nil }).
|
||||
c := command.New(out, log.Printf, "hakurei", func([]string) error { internal.InstallOutput(flagVerbose); return nil }).
|
||||
Flag(&flagVerbose, "v", command.BoolFlag(false), "Increase log verbosity").
|
||||
Flag(&flagJSON, "json", command.BoolFlag(false), "Serialise output in JSON when applicable")
|
||||
|
||||
c.Command("shim", command.UsageInternal, func([]string) error { instance.ShimMain(); return errSuccess })
|
||||
c.Command("shim", command.UsageInternal, func([]string) error { app.ShimMain(); return errSuccess })
|
||||
|
||||
c.Command("app", "Load app from configuration file", func(args []string) error {
|
||||
if len(args) < 1 {
|
||||
@@ -282,14 +243,14 @@ func runApp(config *hst.Config) {
|
||||
ctx, stop := signal.NotifyContext(context.Background(),
|
||||
syscall.SIGINT, syscall.SIGTERM)
|
||||
defer stop() // unreachable
|
||||
a := instance.MustNew(instance.ISetuid, ctx, std)
|
||||
a := app.MustNew(ctx, std)
|
||||
|
||||
rs := new(app.RunState)
|
||||
if sa, err := a.Seal(config); err != nil {
|
||||
hlog.PrintBaseError(err, "cannot seal app:")
|
||||
internal.Exit(1)
|
||||
} else {
|
||||
internal.Exit(instance.PrintRunStateErr(instance.ISetuid, rs, sa.Run(rs)))
|
||||
internal.Exit(app.PrintRunStateErr(rs, sa.Run(rs)))
|
||||
}
|
||||
|
||||
*(*int)(nil) = 0 // not reached
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"flag"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/command"
|
||||
"hakurei.app/command"
|
||||
)
|
||||
|
||||
func TestHelp(t *testing.T) {
|
||||
51
cmd/hakurei/main.go
Normal file
51
cmd/hakurei/main.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package main
|
||||
|
||||
// this works around go:embed '..' limitation
|
||||
//go:generate cp ../../LICENSE .
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/internal/sys"
|
||||
)
|
||||
|
||||
var (
|
||||
errSuccess = errors.New("success")
|
||||
|
||||
//go:embed LICENSE
|
||||
license string
|
||||
)
|
||||
|
||||
func init() { hlog.Prepare("hakurei") }
|
||||
|
||||
var std sys.State = new(sys.Std)
|
||||
|
||||
func main() {
|
||||
// early init path, skips root check and duplicate PR_SET_DUMPABLE
|
||||
container.TryArgv0(hlog.Output{}, hlog.Prepare, internal.InstallOutput)
|
||||
|
||||
if err := container.SetDumpable(container.SUID_DUMP_DISABLE); err != nil {
|
||||
log.Printf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
// not fatal: this program runs as the privileged user
|
||||
}
|
||||
|
||||
if os.Geteuid() == 0 {
|
||||
log.Fatal("this program must not run as root")
|
||||
}
|
||||
|
||||
buildCommand(os.Stderr).MustParse(os.Args[1:], func(err error) {
|
||||
hlog.Verbosef("command returned %v", err)
|
||||
if errors.Is(err, errSuccess) {
|
||||
hlog.BeforeExit()
|
||||
os.Exit(0)
|
||||
}
|
||||
// this catches faulty command handlers that fail to return before this point
|
||||
})
|
||||
log.Fatal("unreachable")
|
||||
}
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/state"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
func tryPath(name string) (config *hst.Config) {
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/state"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
func printShowSystem(output io.Writer, short, flagJSON bool) {
|
||||
@@ -264,7 +264,7 @@ func printPs(output io.Writer, now time.Time, s state.Store, short, flagJSON boo
|
||||
as = strconv.Itoa(e.Config.Identity)
|
||||
id := e.Config.ID
|
||||
if id == "" {
|
||||
id = "uk.gensokyo.hakurei." + e.s[:8]
|
||||
id = "app.hakurei." + e.s[:8]
|
||||
}
|
||||
as += " (" + id + ")"
|
||||
}
|
||||
@@ -5,14 +5,13 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/state"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
var (
|
||||
testID = app.ID{
|
||||
testID = state.ID{
|
||||
0x8e, 0x2c, 0x76, 0xb0,
|
||||
0x66, 0xda, 0xbe, 0x57,
|
||||
0x4c, 0xf0, 0x73, 0xbd,
|
||||
@@ -257,7 +256,10 @@ App
|
||||
],
|
||||
"container": {
|
||||
"hostname": "localhost",
|
||||
"seccomp": 32,
|
||||
"wait_delay": -1,
|
||||
"seccomp_flags": 1,
|
||||
"seccomp_presets": 1,
|
||||
"seccomp_compat": true,
|
||||
"devel": true,
|
||||
"userns": true,
|
||||
"net": true,
|
||||
@@ -382,7 +384,10 @@ App
|
||||
],
|
||||
"container": {
|
||||
"hostname": "localhost",
|
||||
"seccomp": 32,
|
||||
"wait_delay": -1,
|
||||
"seccomp_flags": 1,
|
||||
"seccomp_presets": 1,
|
||||
"seccomp_compat": true,
|
||||
"devel": true,
|
||||
"userns": true,
|
||||
"net": true,
|
||||
@@ -458,10 +463,10 @@ func Test_printPs(t *testing.T) {
|
||||
{"no entries", make(state.Entries), false, false, " Instance PID Application Uptime\n"},
|
||||
{"no entries short", make(state.Entries), true, false, ""},
|
||||
{"nil instance", state.Entries{testID: nil}, false, false, " Instance PID Application Uptime\n"},
|
||||
{"state corruption", state.Entries{app.ID{}: testState}, false, false, " Instance PID Application Uptime\n"},
|
||||
{"state corruption", state.Entries{state.ID{}: testState}, false, false, " Instance PID Application Uptime\n"},
|
||||
|
||||
{"valid pd", state.Entries{testID: &state.State{ID: testID, PID: 1 << 8, Config: new(hst.Config), Time: testAppTime}}, false, false, ` Instance PID Application Uptime
|
||||
8e2c76b0 256 0 (uk.gensokyo.hakurei.8e2c76b0) 1h2m32s
|
||||
{"valid pd", state.Entries{testID: &state.State{ID: testID, PID: 1 << 8, Config: new(hst.Config), Time: testAppTime}}, false, false, ` Instance PID Application Uptime
|
||||
8e2c76b0 256 0 (app.hakurei.8e2c76b0) 1h2m32s
|
||||
`},
|
||||
|
||||
{"valid", state.Entries{testID: testState}, false, false, ` Instance PID Application Uptime
|
||||
@@ -561,7 +566,10 @@ func Test_printPs(t *testing.T) {
|
||||
],
|
||||
"container": {
|
||||
"hostname": "localhost",
|
||||
"seccomp": 32,
|
||||
"wait_delay": -1,
|
||||
"seccomp_flags": 1,
|
||||
"seccomp_presets": 1,
|
||||
"seccomp_compat": true,
|
||||
"devel": true,
|
||||
"userns": true,
|
||||
"net": true,
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
type appInfo struct {
|
||||
@@ -115,10 +115,10 @@ func (app *appInfo) toFst(pathSet *appPathSet, argv []string, flagDropShell bool
|
||||
},
|
||||
}
|
||||
if app.Multiarch {
|
||||
config.Container.Seccomp |= seccomp.FilterMultiarch
|
||||
config.Container.SeccompFlags |= seccomp.AllowMultiarch
|
||||
}
|
||||
if app.Bluetooth {
|
||||
config.Container.Seccomp |= seccomp.FilterBluetooth
|
||||
config.Container.SeccompFlags |= seccomp.AllowBluetooth
|
||||
}
|
||||
return config
|
||||
}
|
||||
|
||||
@@ -215,15 +215,14 @@ stdenv.mkDerivation {
|
||||
# create binary cache
|
||||
closureInfo="${
|
||||
closureInfo {
|
||||
rootPaths =
|
||||
[
|
||||
homeManagerConfiguration.activationPackage
|
||||
launcher
|
||||
]
|
||||
++ optionals gpu [
|
||||
mesaWrappers
|
||||
nixGL
|
||||
];
|
||||
rootPaths = [
|
||||
homeManagerConfiguration.activationPackage
|
||||
launcher
|
||||
]
|
||||
++ optionals gpu [
|
||||
mesaWrappers
|
||||
nixGL
|
||||
];
|
||||
}
|
||||
}"
|
||||
echo "copying application paths..."
|
||||
|
||||
@@ -10,10 +10,10 @@ import (
|
||||
"path"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/command"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"hakurei.app/command"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
const shellPath = "/run/current-system/sw/bin/bash"
|
||||
@@ -42,7 +42,7 @@ func main() {
|
||||
flagVerbose bool
|
||||
flagDropShell bool
|
||||
)
|
||||
c := command.New(os.Stderr, log.Printf, "planterette", func([]string) error { internal.InstallFmsg(flagVerbose); return nil }).
|
||||
c := command.New(os.Stderr, log.Printf, "planterette", func([]string) error { internal.InstallOutput(flagVerbose); return nil }).
|
||||
Flag(&flagVerbose, "v", command.BoolFlag(false), "Print debug messages to the console").
|
||||
Flag(&flagDropShell, "s", command.BoolFlag(false), "Drop to a shell in place of next hakurei action")
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@ import (
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
var hakureiPath = internal.MustHakureiPath()
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
)
|
||||
|
||||
func withNixDaemon(
|
||||
@@ -43,11 +43,11 @@ func withNixDaemon(
|
||||
Identity: app.Identity,
|
||||
|
||||
Container: &hst.ContainerConfig{
|
||||
Hostname: formatHostname(app.Name) + "-" + action,
|
||||
Userns: true, // nix sandbox requires userns
|
||||
Net: net,
|
||||
Seccomp: seccomp.FilterMultiarch,
|
||||
Tty: dropShell,
|
||||
Hostname: formatHostname(app.Name) + "-" + action,
|
||||
Userns: true, // nix sandbox requires userns
|
||||
Net: net,
|
||||
SeccompFlags: seccomp.AllowMultiarch,
|
||||
Tty: dropShell,
|
||||
Filesystem: []*hst.FilesystemConfig{
|
||||
{Src: pathSet.nixPath, Dst: "/nix", Write: true, Must: true},
|
||||
},
|
||||
@@ -85,9 +85,9 @@ func withCacheDir(
|
||||
Identity: app.Identity,
|
||||
|
||||
Container: &hst.ContainerConfig{
|
||||
Hostname: formatHostname(app.Name) + "-" + action,
|
||||
Seccomp: seccomp.FilterMultiarch,
|
||||
Tty: dropShell,
|
||||
Hostname: formatHostname(app.Name) + "-" + action,
|
||||
SeccompFlags: seccomp.AllowMultiarch,
|
||||
Tty: dropShell,
|
||||
Filesystem: []*hst.FilesystemConfig{
|
||||
{Src: path.Join(workDir, "nix"), Dst: "/nix", Must: true},
|
||||
{Src: workDir, Dst: path.Join(hst.Tmp, "bundle"), Must: true},
|
||||
|
||||
@@ -3,7 +3,7 @@ package command_test
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/command"
|
||||
"hakurei.app/command"
|
||||
)
|
||||
|
||||
func TestBuild(t *testing.T) {
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/command"
|
||||
"hakurei.app/command"
|
||||
)
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
|
||||
45
container/capability.go
Normal file
45
container/capability.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
_LINUX_CAPABILITY_VERSION_3 = 0x20080522
|
||||
|
||||
PR_CAP_AMBIENT = 0x2f
|
||||
PR_CAP_AMBIENT_RAISE = 0x2
|
||||
PR_CAP_AMBIENT_CLEAR_ALL = 0x4
|
||||
|
||||
CAP_SYS_ADMIN = 0x15
|
||||
CAP_SETPCAP = 0x8
|
||||
)
|
||||
|
||||
type (
|
||||
capHeader struct {
|
||||
version uint32
|
||||
pid int32
|
||||
}
|
||||
|
||||
capData struct {
|
||||
effective uint32
|
||||
permitted uint32
|
||||
inheritable uint32
|
||||
}
|
||||
)
|
||||
|
||||
// See CAP_TO_INDEX in linux/capability.h:
|
||||
func capToIndex(cap uintptr) uintptr { return cap >> 5 }
|
||||
|
||||
// See CAP_TO_MASK in linux/capability.h:
|
||||
func capToMask(cap uintptr) uint32 { return 1 << uint(cap&31) }
|
||||
|
||||
func capset(hdrp *capHeader, datap *[2]capData) error {
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_CAPSET,
|
||||
uintptr(unsafe.Pointer(hdrp)),
|
||||
uintptr(unsafe.Pointer(&datap[0])), 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
// Package sandbox implements unprivileged Linux container with hardening options useful for creating application sandboxes.
|
||||
package sandbox
|
||||
// Package container implements unprivileged Linux containers with built-in support for syscall filtering.
|
||||
package container
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -11,37 +11,21 @@ import (
|
||||
"os/exec"
|
||||
"path"
|
||||
"strconv"
|
||||
"syscall"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
type HardeningFlags uintptr
|
||||
|
||||
const (
|
||||
FSyscallCompat HardeningFlags = 1 << iota
|
||||
FAllowDevel
|
||||
FAllowUserns
|
||||
FAllowTTY
|
||||
FAllowNet
|
||||
)
|
||||
// Nonexistent is a path that cannot exist.
|
||||
// /proc is chosen because a system with covered /proc is unsupported by this package.
|
||||
Nonexistent = "/proc/nonexistent"
|
||||
|
||||
func (flags HardeningFlags) seccomp(opts seccomp.FilterOpts) seccomp.FilterOpts {
|
||||
if flags&FSyscallCompat == 0 {
|
||||
opts |= seccomp.FilterExt
|
||||
}
|
||||
if flags&FAllowDevel == 0 {
|
||||
opts |= seccomp.FilterDenyDevel
|
||||
}
|
||||
if flags&FAllowUserns == 0 {
|
||||
opts |= seccomp.FilterDenyNS
|
||||
}
|
||||
if flags&FAllowTTY == 0 {
|
||||
opts |= seccomp.FilterDenyTTY
|
||||
}
|
||||
return opts
|
||||
}
|
||||
// CancelSignal is the signal expected by container init on context cancel.
|
||||
// A custom [Container.Cancel] function must eventually deliver this signal.
|
||||
CancelSignal = SIGTERM
|
||||
)
|
||||
|
||||
type (
|
||||
// Container represents a container environment being prepared or run.
|
||||
@@ -55,9 +39,6 @@ type (
|
||||
// with behaviour identical to its [exec.Cmd] counterpart.
|
||||
ExtraFiles []*os.File
|
||||
|
||||
// Custom [exec.Cmd] initialisation function.
|
||||
CommandContext func(ctx context.Context) (cmd *exec.Cmd)
|
||||
|
||||
// param encoder for shim and init
|
||||
setup *gob.Encoder
|
||||
// cancels cmd
|
||||
@@ -85,6 +66,10 @@ type (
|
||||
Path string
|
||||
// Initial process argv.
|
||||
Args []string
|
||||
// Deliver SIGINT to the initial process on context cancellation.
|
||||
ForwardCancel bool
|
||||
// time to wait for linger processes after death of initial process
|
||||
AdoptWaitDelay time.Duration
|
||||
|
||||
// Mapped Uid in user namespace.
|
||||
Uid int
|
||||
@@ -94,34 +79,43 @@ type (
|
||||
Hostname string
|
||||
// Sequential container setup ops.
|
||||
*Ops
|
||||
// Extra seccomp options.
|
||||
Seccomp seccomp.FilterOpts
|
||||
|
||||
// Seccomp system call filter rules.
|
||||
SeccompRules []seccomp.NativeRule
|
||||
// Extra seccomp flags.
|
||||
SeccompFlags seccomp.ExportFlag
|
||||
// Seccomp presets. Has no effect unless SeccompRules is zero-length.
|
||||
SeccompPresets seccomp.FilterPreset
|
||||
// Do not load seccomp program.
|
||||
SeccompDisable bool
|
||||
|
||||
// Permission bits of newly created parent directories.
|
||||
// The zero value is interpreted as 0755.
|
||||
ParentPerm os.FileMode
|
||||
// Do not syscall.Setsid.
|
||||
RetainSession bool
|
||||
// Do not [syscall.CLONE_NEWNET].
|
||||
HostNet bool
|
||||
// Retain CAP_SYS_ADMIN.
|
||||
Privileged bool
|
||||
|
||||
Flags HardeningFlags
|
||||
}
|
||||
)
|
||||
|
||||
// Start starts the container init. The init process blocks until Serve is called.
|
||||
func (p *Container) Start() error {
|
||||
if p.cmd != nil {
|
||||
return errors.New("sandbox: already started")
|
||||
return errors.New("container: already started")
|
||||
}
|
||||
if p.Ops == nil || len(*p.Ops) == 0 {
|
||||
return errors.New("sandbox: starting an empty container")
|
||||
return errors.New("container: starting an empty container")
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(p.ctx)
|
||||
p.cancel = cancel
|
||||
|
||||
var cloneFlags uintptr = syscall.CLONE_NEWIPC |
|
||||
syscall.CLONE_NEWUTS |
|
||||
syscall.CLONE_NEWCGROUP
|
||||
if p.Flags&FAllowNet == 0 {
|
||||
cloneFlags |= syscall.CLONE_NEWNET
|
||||
var cloneFlags uintptr = CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWCGROUP
|
||||
if !p.HostNet {
|
||||
cloneFlags |= CLONE_NEWNET
|
||||
}
|
||||
|
||||
// map to overflow id to work around ownership checks
|
||||
@@ -132,29 +126,32 @@ func (p *Container) Start() error {
|
||||
p.Gid = OverflowGid()
|
||||
}
|
||||
|
||||
if p.CommandContext != nil {
|
||||
p.cmd = p.CommandContext(ctx)
|
||||
} else {
|
||||
p.cmd = exec.CommandContext(ctx, MustExecutable())
|
||||
p.cmd.Args = []string{"init"}
|
||||
if !p.RetainSession {
|
||||
p.SeccompPresets |= seccomp.PresetDenyTTY
|
||||
}
|
||||
|
||||
if p.AdoptWaitDelay == 0 {
|
||||
p.AdoptWaitDelay = 5 * time.Second
|
||||
}
|
||||
// to allow disabling this behaviour
|
||||
if p.AdoptWaitDelay < 0 {
|
||||
p.AdoptWaitDelay = 0
|
||||
}
|
||||
|
||||
p.cmd = exec.CommandContext(ctx, MustExecutable())
|
||||
p.cmd.Args = []string{initName}
|
||||
p.cmd.Stdin, p.cmd.Stdout, p.cmd.Stderr = p.Stdin, p.Stdout, p.Stderr
|
||||
p.cmd.WaitDelay = p.WaitDelay
|
||||
if p.Cancel != nil {
|
||||
p.cmd.Cancel = func() error { return p.Cancel(p.cmd) }
|
||||
} else {
|
||||
p.cmd.Cancel = func() error { return p.cmd.Process.Signal(syscall.SIGTERM) }
|
||||
p.cmd.Cancel = func() error { return p.cmd.Process.Signal(CancelSignal) }
|
||||
}
|
||||
p.cmd.Dir = "/"
|
||||
p.cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setsid: p.Flags&FAllowTTY == 0,
|
||||
Pdeathsig: syscall.SIGKILL,
|
||||
|
||||
Cloneflags: cloneFlags |
|
||||
syscall.CLONE_NEWUSER |
|
||||
syscall.CLONE_NEWPID |
|
||||
syscall.CLONE_NEWNS,
|
||||
p.cmd.SysProcAttr = &SysProcAttr{
|
||||
Setsid: !p.RetainSession,
|
||||
Pdeathsig: SIGKILL,
|
||||
Cloneflags: cloneFlags | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS,
|
||||
|
||||
// remain privileged for setup
|
||||
AmbientCaps: []uintptr{CAP_SYS_ADMIN, CAP_SETPCAP},
|
||||
@@ -182,6 +179,8 @@ func (p *Container) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Serve serves [Container.Params] to the container init.
|
||||
// Serve must only be called once.
|
||||
func (p *Container) Serve() error {
|
||||
if p.setup == nil {
|
||||
panic("invalid serve")
|
||||
@@ -192,7 +191,7 @@ func (p *Container) Serve() error {
|
||||
|
||||
if p.Path != "" && !path.IsAbs(p.Path) {
|
||||
p.cancel()
|
||||
return msg.WrapErr(syscall.EINVAL,
|
||||
return msg.WrapErr(EINVAL,
|
||||
fmt.Sprintf("invalid executable path %q", p.Path))
|
||||
}
|
||||
|
||||
@@ -201,7 +200,7 @@ func (p *Container) Serve() error {
|
||||
p.Path = os.Getenv("SHELL")
|
||||
if !path.IsAbs(p.Path) {
|
||||
p.cancel()
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
return msg.WrapErr(EBADE,
|
||||
"no command specified and $SHELL is invalid")
|
||||
}
|
||||
p.name = path.Base(p.Path)
|
||||
@@ -215,11 +214,16 @@ func (p *Container) Serve() error {
|
||||
}
|
||||
}
|
||||
|
||||
if p.SeccompRules == nil {
|
||||
// do not transmit nil
|
||||
p.SeccompRules = make([]seccomp.NativeRule, 0)
|
||||
}
|
||||
|
||||
err := setup.Encode(
|
||||
&initParams{
|
||||
p.Params,
|
||||
syscall.Getuid(),
|
||||
syscall.Getgid(),
|
||||
Getuid(),
|
||||
Getgid(),
|
||||
len(p.ExtraFiles),
|
||||
msg.IsVerbose(),
|
||||
},
|
||||
@@ -230,11 +234,20 @@ func (p *Container) Serve() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait waits for the container init process to exit.
|
||||
func (p *Container) Wait() error { defer p.cancel(); return p.cmd.Wait() }
|
||||
|
||||
func (p *Container) String() string {
|
||||
return fmt.Sprintf("argv: %q, flags: %#x, seccomp: %#x",
|
||||
p.Args, p.Flags, int(p.Flags.seccomp(p.Seccomp)))
|
||||
return fmt.Sprintf("argv: %q, filter: %v, rules: %d, flags: %#x, presets: %#x",
|
||||
p.Args, !p.SeccompDisable, len(p.SeccompRules), int(p.SeccompFlags), int(p.SeccompPresets))
|
||||
}
|
||||
|
||||
// ProcessState returns the address to os.ProcessState held by the underlying [exec.Cmd].
|
||||
func (p *Container) ProcessState() *os.ProcessState {
|
||||
if p.cmd == nil {
|
||||
return nil
|
||||
}
|
||||
return p.cmd.ProcessState
|
||||
}
|
||||
|
||||
func New(ctx context.Context, name string, args ...string) *Container {
|
||||
368
container/container_test.go
Normal file
368
container/container_test.go
Normal file
@@ -0,0 +1,368 @@
|
||||
package container_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"hakurei.app/command"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/container/vfs"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
const (
|
||||
ignore = "\x00"
|
||||
ignoreV = -1
|
||||
|
||||
pathWantMnt = "/etc/hakurei/want-mnt"
|
||||
)
|
||||
|
||||
var containerTestCases = []struct {
|
||||
name string
|
||||
filter bool
|
||||
session bool
|
||||
net bool
|
||||
ops *container.Ops
|
||||
|
||||
mnt []*vfs.MountInfoEntry
|
||||
uid int
|
||||
gid int
|
||||
|
||||
rules []seccomp.NativeRule
|
||||
flags seccomp.ExportFlag
|
||||
presets seccomp.FilterPreset
|
||||
}{
|
||||
{"minimal", true, false, false,
|
||||
new(container.Ops), nil,
|
||||
1000, 100, nil, 0, seccomp.PresetStrict},
|
||||
{"allow", true, true, true,
|
||||
new(container.Ops), nil,
|
||||
1000, 100, nil, 0, seccomp.PresetExt | seccomp.PresetDenyDevel},
|
||||
{"no filter", false, true, true,
|
||||
new(container.Ops), nil,
|
||||
1000, 100, nil, 0, seccomp.PresetExt},
|
||||
{"custom rules", true, true, true,
|
||||
new(container.Ops), nil,
|
||||
1, 31, []seccomp.NativeRule{{seccomp.ScmpSyscall(syscall.SYS_SETUID), seccomp.ScmpErrno(syscall.EPERM), nil}}, 0, seccomp.PresetExt},
|
||||
{"tmpfs", true, false, false,
|
||||
new(container.Ops).
|
||||
Tmpfs(hst.Tmp, 0, 0755),
|
||||
[]*vfs.MountInfoEntry{
|
||||
ent("/", hst.Tmp, "rw,nosuid,nodev,relatime", "tmpfs", "tmpfs", ignore),
|
||||
},
|
||||
9, 9, nil, 0, seccomp.PresetStrict},
|
||||
{"dev", true, true /* go test output is not a tty */, false,
|
||||
new(container.Ops).
|
||||
Dev("/dev").
|
||||
Mqueue("/dev/mqueue"),
|
||||
[]*vfs.MountInfoEntry{
|
||||
ent("/", "/dev", "rw,nosuid,nodev,relatime", "tmpfs", "devtmpfs", ignore),
|
||||
ent("/null", "/dev/null", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/zero", "/dev/zero", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/full", "/dev/full", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/random", "/dev/random", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/urandom", "/dev/urandom", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/tty", "/dev/tty", "rw,nosuid", "devtmpfs", "devtmpfs", ignore),
|
||||
ent("/", "/dev/pts", "rw,nosuid,noexec,relatime", "devpts", "devpts", "rw,mode=620,ptmxmode=666"),
|
||||
ent("/", "/dev/mqueue", "rw,nosuid,nodev,noexec,relatime", "mqueue", "mqueue", "rw"),
|
||||
},
|
||||
1971, 100, nil, 0, seccomp.PresetStrict},
|
||||
}
|
||||
|
||||
func TestContainer(t *testing.T) {
|
||||
{
|
||||
oldVerbose := hlog.Load()
|
||||
oldOutput := container.GetOutput()
|
||||
internal.InstallOutput(true)
|
||||
t.Cleanup(func() { hlog.Store(oldVerbose) })
|
||||
t.Cleanup(func() { container.SetOutput(oldOutput) })
|
||||
}
|
||||
|
||||
t.Run("cancel", testContainerCancel(nil, func(t *testing.T, c *container.Container) {
|
||||
wantErr := context.Canceled
|
||||
wantExitCode := 0
|
||||
if err := c.Wait(); !errors.Is(err, wantErr) {
|
||||
hlog.PrintBaseError(err, "wait:")
|
||||
t.Errorf("Wait: error = %v, want %v", err, wantErr)
|
||||
}
|
||||
if ps := c.ProcessState(); ps == nil {
|
||||
t.Errorf("ProcessState unexpectedly returned nil")
|
||||
} else if code := ps.ExitCode(); code != wantExitCode {
|
||||
t.Errorf("ExitCode: %d, want %d", code, wantExitCode)
|
||||
}
|
||||
}))
|
||||
|
||||
t.Run("forward", testContainerCancel(func(c *container.Container) {
|
||||
c.ForwardCancel = true
|
||||
}, func(t *testing.T, c *container.Container) {
|
||||
var exitError *exec.ExitError
|
||||
if err := c.Wait(); !errors.As(err, &exitError) {
|
||||
hlog.PrintBaseError(err, "wait:")
|
||||
t.Errorf("Wait: error = %v", err)
|
||||
}
|
||||
if code := exitError.ExitCode(); code != blockExitCodeInterrupt {
|
||||
t.Errorf("ExitCode: %d, want %d", code, blockExitCodeInterrupt)
|
||||
}
|
||||
}))
|
||||
|
||||
for i, tc := range containerTestCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(t.Context(), helperDefaultTimeout)
|
||||
defer cancel()
|
||||
|
||||
var libPaths []string
|
||||
c := helperNewContainerLibPaths(ctx, &libPaths, "container", strconv.Itoa(i))
|
||||
c.Uid = tc.uid
|
||||
c.Gid = tc.gid
|
||||
c.Hostname = hostnameFromTestCase(tc.name)
|
||||
c.Stdout, c.Stderr = os.Stdout, os.Stderr
|
||||
c.WaitDelay = helperDefaultTimeout
|
||||
*c.Ops = append(*c.Ops, *tc.ops...)
|
||||
c.SeccompRules = tc.rules
|
||||
c.SeccompFlags = tc.flags | seccomp.AllowMultiarch
|
||||
c.SeccompPresets = tc.presets
|
||||
c.SeccompDisable = !tc.filter
|
||||
c.RetainSession = tc.session
|
||||
c.HostNet = tc.net
|
||||
|
||||
c.
|
||||
Tmpfs("/tmp", 0, 0755).
|
||||
Place("/etc/hostname", []byte(c.Hostname))
|
||||
// needs /proc to check mountinfo
|
||||
c.Proc("/proc")
|
||||
|
||||
// mountinfo cannot be resolved directly by helper due to libPaths nondeterminism
|
||||
mnt := make([]*vfs.MountInfoEntry, 0, 3+len(libPaths))
|
||||
mnt = append(mnt,
|
||||
ent("/sysroot", "/", "rw,nosuid,nodev,relatime", "tmpfs", "rootfs", ignore),
|
||||
// Bind(os.Args[0], helperInnerPath, 0)
|
||||
ent(ignore, helperInnerPath, "ro,nosuid,nodev,relatime", ignore, ignore, ignore),
|
||||
)
|
||||
for _, name := range libPaths {
|
||||
// Bind(name, name, 0)
|
||||
mnt = append(mnt, ent(ignore, name, "ro,nosuid,nodev,relatime", ignore, ignore, ignore))
|
||||
}
|
||||
mnt = append(mnt, tc.mnt...)
|
||||
mnt = append(mnt,
|
||||
// Tmpfs("/tmp", 0, 0755)
|
||||
ent("/", "/tmp", "rw,nosuid,nodev,relatime", "tmpfs", "tmpfs", ignore),
|
||||
// Place("/etc/hostname", []byte(hostname))
|
||||
ent(ignore, "/etc/hostname", "ro,nosuid,nodev,relatime", "tmpfs", "rootfs", ignore),
|
||||
// Proc("/proc")
|
||||
ent("/", "/proc", "rw,nosuid,nodev,noexec,relatime", "proc", "proc", "rw"),
|
||||
// Place(pathWantMnt, want.Bytes())
|
||||
ent(ignore, pathWantMnt, "ro,nosuid,nodev,relatime", "tmpfs", "rootfs", ignore),
|
||||
)
|
||||
want := new(bytes.Buffer)
|
||||
if err := gob.NewEncoder(want).Encode(mnt); err != nil {
|
||||
t.Fatalf("cannot serialise expected mount points: %v", err)
|
||||
}
|
||||
c.Place(pathWantMnt, want.Bytes())
|
||||
|
||||
if err := c.Start(); err != nil {
|
||||
hlog.PrintBaseError(err, "start:")
|
||||
t.Fatalf("cannot start container: %v", err)
|
||||
} else if err = c.Serve(); err != nil {
|
||||
hlog.PrintBaseError(err, "serve:")
|
||||
t.Errorf("cannot serve setup params: %v", err)
|
||||
}
|
||||
if err := c.Wait(); err != nil {
|
||||
hlog.PrintBaseError(err, "wait:")
|
||||
t.Fatalf("wait: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func ent(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoEntry {
|
||||
return &vfs.MountInfoEntry{
|
||||
ID: ignoreV,
|
||||
Parent: ignoreV,
|
||||
Devno: vfs.DevT{ignoreV, ignoreV},
|
||||
Root: root,
|
||||
Target: target,
|
||||
VfsOptstr: vfsOptstr,
|
||||
OptFields: []string{ignore},
|
||||
FsType: fsType,
|
||||
Source: source,
|
||||
FsOptstr: fsOptstr,
|
||||
}
|
||||
}
|
||||
|
||||
func hostnameFromTestCase(name string) string {
|
||||
return "test-" + strings.Join(strings.Fields(name), "-")
|
||||
}
|
||||
|
||||
func testContainerCancel(
|
||||
containerExtra func(c *container.Container),
|
||||
waitCheck func(t *testing.T, c *container.Container),
|
||||
) func(t *testing.T) {
|
||||
return func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(t.Context(), helperDefaultTimeout)
|
||||
|
||||
c := helperNewContainer(ctx, "block")
|
||||
c.Stdout, c.Stderr = os.Stdout, os.Stderr
|
||||
c.WaitDelay = helperDefaultTimeout
|
||||
if containerExtra != nil {
|
||||
containerExtra(c)
|
||||
}
|
||||
|
||||
ready := make(chan struct{})
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
t.Fatalf("cannot pipe: %v", err)
|
||||
} else {
|
||||
c.ExtraFiles = append(c.ExtraFiles, w)
|
||||
go func() {
|
||||
defer close(ready)
|
||||
if _, err = r.Read(make([]byte, 1)); err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if err := c.Start(); err != nil {
|
||||
hlog.PrintBaseError(err, "start:")
|
||||
t.Fatalf("cannot start container: %v", err)
|
||||
} else if err = c.Serve(); err != nil {
|
||||
hlog.PrintBaseError(err, "serve:")
|
||||
t.Errorf("cannot serve setup params: %v", err)
|
||||
}
|
||||
<-ready
|
||||
cancel()
|
||||
waitCheck(t, c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerString(t *testing.T) {
|
||||
c := container.New(t.Context(), "ldd", "/usr/bin/env")
|
||||
c.SeccompFlags |= seccomp.AllowMultiarch
|
||||
c.SeccompRules = seccomp.Preset(
|
||||
seccomp.PresetExt|seccomp.PresetDenyNS|seccomp.PresetDenyTTY,
|
||||
c.SeccompFlags)
|
||||
c.SeccompPresets = seccomp.PresetStrict
|
||||
want := `argv: ["ldd" "/usr/bin/env"], filter: true, rules: 65, flags: 0x1, presets: 0xf`
|
||||
if got := c.String(); got != want {
|
||||
t.Errorf("String: %s, want %s", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
blockExitCodeInterrupt = 2
|
||||
)
|
||||
|
||||
func init() {
|
||||
helperCommands = append(helperCommands, func(c command.Command) {
|
||||
c.Command("block", command.UsageInternal, func(args []string) error {
|
||||
if _, err := os.NewFile(3, "sync").Write([]byte{0}); err != nil {
|
||||
return fmt.Errorf("write to sync pipe: %v", err)
|
||||
}
|
||||
{
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, os.Interrupt)
|
||||
go func() { <-sig; os.Exit(blockExitCodeInterrupt) }()
|
||||
}
|
||||
select {}
|
||||
})
|
||||
|
||||
c.Command("container", command.UsageInternal, func(args []string) error {
|
||||
if len(args) != 1 {
|
||||
return syscall.EINVAL
|
||||
}
|
||||
tc := containerTestCases[0]
|
||||
if i, err := strconv.Atoi(args[0]); err != nil {
|
||||
return fmt.Errorf("cannot parse test case index: %v", err)
|
||||
} else {
|
||||
tc = containerTestCases[i]
|
||||
}
|
||||
|
||||
if uid := syscall.Getuid(); uid != tc.uid {
|
||||
return fmt.Errorf("uid: %d, want %d", uid, tc.uid)
|
||||
}
|
||||
if gid := syscall.Getgid(); gid != tc.gid {
|
||||
return fmt.Errorf("gid: %d, want %d", gid, tc.gid)
|
||||
}
|
||||
|
||||
wantHost := hostnameFromTestCase(tc.name)
|
||||
if host, err := os.Hostname(); err != nil {
|
||||
return fmt.Errorf("cannot get hostname: %v", err)
|
||||
} else if host != wantHost {
|
||||
return fmt.Errorf("hostname: %q, want %q", host, wantHost)
|
||||
}
|
||||
|
||||
if p, err := os.ReadFile("/etc/hostname"); err != nil {
|
||||
return fmt.Errorf("cannot read /etc/hostname: %v", err)
|
||||
} else if string(p) != wantHost {
|
||||
return fmt.Errorf("/etc/hostname: %q, want %q", string(p), wantHost)
|
||||
}
|
||||
|
||||
{
|
||||
var fail bool
|
||||
|
||||
var mnt []*vfs.MountInfoEntry
|
||||
if f, err := os.Open(pathWantMnt); err != nil {
|
||||
return fmt.Errorf("cannot open expected mount points: %v", err)
|
||||
} else if err = gob.NewDecoder(f).Decode(&mnt); err != nil {
|
||||
return fmt.Errorf("cannot parse expected mount points: %v", err)
|
||||
} else if err = f.Close(); err != nil {
|
||||
return fmt.Errorf("cannot close expected mount points: %v", err)
|
||||
}
|
||||
|
||||
var d *vfs.MountInfoDecoder
|
||||
if f, err := os.Open("/proc/self/mountinfo"); err != nil {
|
||||
return fmt.Errorf("cannot open mountinfo: %v", err)
|
||||
} else {
|
||||
d = vfs.NewMountInfoDecoder(f)
|
||||
}
|
||||
|
||||
i := 0
|
||||
for cur := range d.Entries() {
|
||||
if i == len(mnt) {
|
||||
return fmt.Errorf("got more than %d entries", len(mnt))
|
||||
}
|
||||
|
||||
// ugly hack but should be reliable and is less likely to false negative than comparing by parsed flags
|
||||
cur.VfsOptstr = strings.TrimSuffix(cur.VfsOptstr, ",relatime")
|
||||
cur.VfsOptstr = strings.TrimSuffix(cur.VfsOptstr, ",noatime")
|
||||
mnt[i].VfsOptstr = strings.TrimSuffix(mnt[i].VfsOptstr, ",relatime")
|
||||
mnt[i].VfsOptstr = strings.TrimSuffix(mnt[i].VfsOptstr, ",noatime")
|
||||
|
||||
if !cur.EqualWithIgnore(mnt[i], "\x00") {
|
||||
fail = true
|
||||
log.Printf("[FAIL] %s", cur)
|
||||
} else {
|
||||
log.Printf("[ OK ] %s", cur)
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
if err := d.Err(); err != nil {
|
||||
return fmt.Errorf("cannot parse mountinfo: %v", err)
|
||||
}
|
||||
|
||||
if i != len(mnt) {
|
||||
return fmt.Errorf("got %d entries, want %d", i, len(mnt))
|
||||
}
|
||||
|
||||
if fail {
|
||||
return errors.New("one or more mountinfo entries do not match")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
})
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"log"
|
||||
@@ -1,15 +1,15 @@
|
||||
package sandbox_test
|
||||
package container_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"hakurei.app/container"
|
||||
)
|
||||
|
||||
func TestExecutable(t *testing.T) {
|
||||
for i := 0; i < 16; i++ {
|
||||
if got := sandbox.MustExecutable(); got != os.Args[0] {
|
||||
if got := container.MustExecutable(); got != os.Args[0] {
|
||||
t.Errorf("MustExecutable: %q, want %q",
|
||||
got, os.Args[0])
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"errors"
|
||||
@@ -10,18 +10,28 @@ import (
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"syscall"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
const (
|
||||
// time to wait for linger processes after death of initial process
|
||||
residualProcessTimeout = 5 * time.Second
|
||||
/* intermediate tmpfs mount point
|
||||
|
||||
// intermediate tmpfs mount point
|
||||
basePath = "/tmp"
|
||||
this path might seem like a weird choice, however there are many good reasons to use it:
|
||||
- the contents of this path is never exposed to the container:
|
||||
the tmpfs root established here effectively becomes anonymous after pivot_root
|
||||
- it is safe to assume this path exists and is a directory:
|
||||
this program will not work correctly without a proper /proc and neither will most others
|
||||
- this path belongs to the container init:
|
||||
the container init is not any more privileged or trusted than the rest of the container
|
||||
- this path is only accessible by init and root:
|
||||
the container init sets SUID_DUMP_DISABLE and terminates if that fails;
|
||||
|
||||
it should be noted that none of this should become relevant at any point since the resulting
|
||||
intermediate root tmpfs should be effectively anonymous */
|
||||
intermediateHostPath = "/proc/self/fd"
|
||||
|
||||
// setup params file descriptor
|
||||
setupEnv = "HAKUREI_SETUP"
|
||||
@@ -97,9 +107,9 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
}
|
||||
|
||||
oldmask := syscall.Umask(0)
|
||||
oldmask := Umask(0)
|
||||
if params.Hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(params.Hostname)); err != nil {
|
||||
if err := Sethostname([]byte(params.Hostname)); err != nil {
|
||||
log.Fatalf("cannot set hostname: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -107,9 +117,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
// cache sysctl before pivot_root
|
||||
LastCap()
|
||||
|
||||
if err := syscall.Mount("", "/", "",
|
||||
syscall.MS_SILENT|syscall.MS_SLAVE|syscall.MS_REC,
|
||||
""); err != nil {
|
||||
if err := Mount("", "/", "", MS_SILENT|MS_SLAVE|MS_REC, ""); err != nil {
|
||||
log.Fatalf("cannot make / rslave: %v", err)
|
||||
}
|
||||
|
||||
@@ -126,29 +134,25 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := syscall.Mount("rootfs", basePath, "tmpfs",
|
||||
syscall.MS_NODEV|syscall.MS_NOSUID,
|
||||
""); err != nil {
|
||||
if err := Mount("rootfs", intermediateHostPath, "tmpfs", MS_NODEV|MS_NOSUID, ""); err != nil {
|
||||
log.Fatalf("cannot mount intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir(basePath); err != nil {
|
||||
if err := os.Chdir(intermediateHostPath); err != nil {
|
||||
log.Fatalf("cannot enter base path: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(sysrootDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
if err := syscall.Mount(sysrootDir, sysrootDir, "",
|
||||
syscall.MS_SILENT|syscall.MS_MGC_VAL|syscall.MS_BIND|syscall.MS_REC,
|
||||
""); err != nil {
|
||||
if err := Mount(sysrootDir, sysrootDir, "", MS_SILENT|MS_MGC_VAL|MS_BIND|MS_REC, ""); err != nil {
|
||||
log.Fatalf("cannot bind sysroot: %v", err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(hostDir, 0755); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
// pivot_root uncovers basePath in hostDir
|
||||
if err := syscall.PivotRoot(basePath, hostDir); err != nil {
|
||||
// pivot_root uncovers intermediateHostPath in hostDir
|
||||
if err := PivotRoot(intermediateHostPath, hostDir); err != nil {
|
||||
log.Fatalf("cannot pivot into intermediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
@@ -167,19 +171,17 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
}
|
||||
|
||||
// setup requiring host root complete at this point
|
||||
if err := syscall.Mount(hostDir, hostDir, "",
|
||||
syscall.MS_SILENT|syscall.MS_REC|syscall.MS_PRIVATE,
|
||||
""); err != nil {
|
||||
if err := Mount(hostDir, hostDir, "", MS_SILENT|MS_REC|MS_PRIVATE, ""); err != nil {
|
||||
log.Fatalf("cannot make host root rprivate: %v", err)
|
||||
}
|
||||
if err := syscall.Unmount(hostDir, syscall.MNT_DETACH); err != nil {
|
||||
if err := Unmount(hostDir, MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount host root: %v", err)
|
||||
}
|
||||
|
||||
{
|
||||
var fd int
|
||||
if err := IgnoringEINTR(func() (err error) {
|
||||
fd, err = syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
|
||||
fd, err = Open("/", O_DIRECTORY|O_RDONLY, 0)
|
||||
return
|
||||
}); err != nil {
|
||||
log.Fatalf("cannot open intermediate root: %v", err)
|
||||
@@ -188,36 +190,36 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := syscall.PivotRoot(".", "."); err != nil {
|
||||
if err := PivotRoot(".", "."); err != nil {
|
||||
log.Fatalf("cannot pivot into sysroot: %v", err)
|
||||
}
|
||||
if err := syscall.Fchdir(fd); err != nil {
|
||||
if err := Fchdir(fd); err != nil {
|
||||
log.Fatalf("cannot re-enter intermediate root: %v", err)
|
||||
}
|
||||
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
|
||||
if err := Unmount(".", MNT_DETACH); err != nil {
|
||||
log.Fatalf("cannot unmount intemediate root: %v", err)
|
||||
}
|
||||
if err := os.Chdir("/"); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
if err := syscall.Close(fd); err != nil {
|
||||
if err := Close(fd); err != nil {
|
||||
log.Fatalf("cannot close intermediate root: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, _, errno := syscall.Syscall(PR_SET_NO_NEW_PRIVS, 1, 0, 0); errno != 0 {
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_SET_NO_NEW_PRIVS, 1, 0); errno != 0 {
|
||||
log.Fatalf("prctl(PR_SET_NO_NEW_PRIVS): %v", errno)
|
||||
}
|
||||
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0); errno != 0 {
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0); errno != 0 {
|
||||
log.Fatalf("cannot clear the ambient capability set: %v", errno)
|
||||
}
|
||||
for i := uintptr(0); i <= LastCap(); i++ {
|
||||
if params.Privileged && i == CAP_SYS_ADMIN {
|
||||
continue
|
||||
}
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_CAPBSET_DROP, i, 0); errno != 0 {
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAPBSET_DROP, i, 0); errno != 0 {
|
||||
log.Fatalf("cannot drop capability from bonding set: %v", errno)
|
||||
}
|
||||
}
|
||||
@@ -226,7 +228,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
if params.Privileged {
|
||||
keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN)
|
||||
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_SYS_ADMIN); errno != 0 {
|
||||
if _, _, errno := Syscall(SYS_PRCTL, PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_SYS_ADMIN); errno != 0 {
|
||||
log.Fatalf("cannot raise CAP_SYS_ADMIN: %v", errno)
|
||||
}
|
||||
}
|
||||
@@ -237,8 +239,18 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
log.Fatalf("cannot capset: %v", err)
|
||||
}
|
||||
|
||||
if err := seccomp.Load(params.Flags.seccomp(params.Seccomp)); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
if !params.SeccompDisable {
|
||||
rules := params.SeccompRules
|
||||
if len(rules) == 0 { // non-empty rules slice always overrides presets
|
||||
msg.Verbosef("resolving presets %#x", params.SeccompPresets)
|
||||
rules = seccomp.Preset(params.SeccompPresets, params.SeccompFlags)
|
||||
}
|
||||
if err := seccomp.Load(rules, params.SeccompFlags); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
}
|
||||
msg.Verbosef("%d filter rules loaded", len(rules))
|
||||
} else {
|
||||
msg.Verbose("syscall filter not configured")
|
||||
}
|
||||
|
||||
extraFiles := make([]*os.File, params.Count)
|
||||
@@ -246,7 +258,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
// setup fd is placed before all extra files
|
||||
extraFiles[i] = os.NewFile(uintptr(offsetSetup+i), "extra file "+strconv.Itoa(i))
|
||||
}
|
||||
syscall.Umask(oldmask)
|
||||
Umask(oldmask)
|
||||
|
||||
cmd := exec.Command(params.Path)
|
||||
cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
@@ -255,19 +267,20 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
cmd.ExtraFiles = extraFiles
|
||||
cmd.Dir = params.Dir
|
||||
|
||||
msg.Verbosef("starting initial program %s", params.Path)
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
msg.Suspend()
|
||||
|
||||
if err := closeSetup(); err != nil {
|
||||
log.Println("cannot close setup pipe:", err)
|
||||
log.Printf("cannot close setup pipe: %v", err)
|
||||
// not fatal
|
||||
}
|
||||
|
||||
type winfo struct {
|
||||
wpid int
|
||||
wstatus syscall.WaitStatus
|
||||
wstatus WaitStatus
|
||||
}
|
||||
info := make(chan winfo, 1)
|
||||
done := make(chan struct{})
|
||||
@@ -276,7 +289,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
var (
|
||||
err error
|
||||
wpid = -2
|
||||
wstatus syscall.WaitStatus
|
||||
wstatus WaitStatus
|
||||
)
|
||||
|
||||
// keep going until no child process is left
|
||||
@@ -289,13 +302,13 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
info <- winfo{wpid, wstatus}
|
||||
}
|
||||
|
||||
err = syscall.EINTR
|
||||
for errors.Is(err, syscall.EINTR) {
|
||||
wpid, err = syscall.Wait4(-1, &wstatus, 0, nil)
|
||||
err = EINTR
|
||||
for errors.Is(err, EINTR) {
|
||||
wpid, err = Wait4(-1, &wstatus, 0, nil)
|
||||
}
|
||||
}
|
||||
if !errors.Is(err, syscall.ECHILD) {
|
||||
log.Println("unexpected wait4 response:", err)
|
||||
if !errors.Is(err, ECHILD) {
|
||||
log.Printf("unexpected wait4 response: %v", err)
|
||||
}
|
||||
|
||||
close(done)
|
||||
@@ -303,7 +316,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
|
||||
// handle signals to dump withheld messages
|
||||
sig := make(chan os.Signal, 2)
|
||||
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
|
||||
signal.Notify(sig, os.Interrupt, CancelSignal)
|
||||
|
||||
// closed after residualProcessTimeout has elapsed after initial process death
|
||||
timeout := make(chan struct{})
|
||||
@@ -313,9 +326,16 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
select {
|
||||
case s := <-sig:
|
||||
if msg.Resume() {
|
||||
msg.Verbosef("terminating on %s after process start", s.String())
|
||||
msg.Verbosef("%s after process start", s.String())
|
||||
} else {
|
||||
msg.Verbosef("terminating on %s", s.String())
|
||||
msg.Verbosef("got %s", s.String())
|
||||
}
|
||||
if s == CancelSignal && params.ForwardCancel && cmd.Process != nil {
|
||||
msg.Verbose("forwarding context cancellation")
|
||||
if err := cmd.Process.Signal(os.Interrupt); err != nil {
|
||||
log.Printf("cannot forward cancellation: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
os.Exit(0)
|
||||
case w := <-info:
|
||||
@@ -335,10 +355,7 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
msg.Verbosef("initial process exited with status %#x", w.wstatus)
|
||||
}
|
||||
|
||||
go func() {
|
||||
time.Sleep(residualProcessTimeout)
|
||||
close(timeout)
|
||||
}()
|
||||
go func() { time.Sleep(params.AdoptWaitDelay); close(timeout) }()
|
||||
}
|
||||
case <-done:
|
||||
msg.BeforeExit()
|
||||
@@ -351,9 +368,11 @@ func Init(prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
}
|
||||
}
|
||||
|
||||
const initName = "init"
|
||||
|
||||
// TryArgv0 calls [Init] if the last element of argv0 is "init".
|
||||
func TryArgv0(v Msg, prepare func(prefix string), setVerbose func(verbose bool)) {
|
||||
if len(os.Args) > 0 && path.Base(os.Args[0]) == "init" {
|
||||
if len(os.Args) > 0 && path.Base(os.Args[0]) == initName {
|
||||
msg = v
|
||||
Init(prepare, setVerbose)
|
||||
msg.BeforeExit()
|
||||
69
container/init_test.go
Normal file
69
container/init_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package container_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"hakurei.app/command"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/ldd"
|
||||
)
|
||||
|
||||
const (
|
||||
envDoCheck = "HAKUREI_TEST_DO_CHECK"
|
||||
|
||||
helperDefaultTimeout = 5 * time.Second
|
||||
helperInnerPath = "/usr/bin/helper"
|
||||
)
|
||||
|
||||
var helperCommands []func(c command.Command)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
container.TryArgv0(hlog.Output{}, hlog.Prepare, internal.InstallOutput)
|
||||
|
||||
if os.Getenv(envDoCheck) == "1" {
|
||||
c := command.New(os.Stderr, log.Printf, "helper", func(args []string) error {
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("helper: ")
|
||||
return nil
|
||||
})
|
||||
for _, f := range helperCommands {
|
||||
f(c)
|
||||
}
|
||||
c.MustParse(os.Args[1:], func(err error) {
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func helperNewContainerLibPaths(ctx context.Context, libPaths *[]string, args ...string) (c *container.Container) {
|
||||
c = container.New(ctx, helperInnerPath, args...)
|
||||
c.Env = append(c.Env, envDoCheck+"=1")
|
||||
c.Bind(os.Args[0], helperInnerPath, 0)
|
||||
|
||||
// in case test has cgo enabled
|
||||
if entries, err := ldd.Exec(ctx, os.Args[0]); err != nil {
|
||||
log.Fatalf("ldd: %v", err)
|
||||
} else {
|
||||
*libPaths = ldd.Path(entries)
|
||||
}
|
||||
for _, name := range *libPaths {
|
||||
c.Bind(name, name, 0)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func helperNewContainer(ctx context.Context, args ...string) (c *container.Container) {
|
||||
return helperNewContainerLibPaths(ctx, new([]string), args...)
|
||||
}
|
||||
@@ -1,13 +1,13 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
. "syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
"hakurei.app/container/vfs"
|
||||
)
|
||||
|
||||
func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) error {
|
||||
@@ -17,8 +17,7 @@ func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) err
|
||||
msg.Verbosef("resolved %q on %q flags %#x", source, target, flags)
|
||||
}
|
||||
|
||||
if err := syscall.Mount(source, target, "",
|
||||
syscall.MS_SILENT|syscall.MS_BIND|flags&syscall.MS_REC, ""); err != nil {
|
||||
if err := Mount(source, target, "", MS_SILENT|MS_BIND|flags&MS_REC, ""); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot mount %q on %q:", source, target))
|
||||
}
|
||||
@@ -38,7 +37,7 @@ func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) err
|
||||
{
|
||||
var destFd int
|
||||
if err := IgnoringEINTR(func() (err error) {
|
||||
destFd, err = syscall.Open(targetFinal, O_PATH|syscall.O_CLOEXEC, 0)
|
||||
destFd, err = Open(targetFinal, O_PATH|O_CLOEXEC, 0)
|
||||
return
|
||||
}); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
@@ -46,7 +45,7 @@ func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) err
|
||||
}
|
||||
if v, err := os.Readlink(p.fd(destFd)); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
} else if err = syscall.Close(destFd); err != nil {
|
||||
} else if err = Close(destFd); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot close %q:", targetFinal))
|
||||
} else {
|
||||
@@ -54,11 +53,11 @@ func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) err
|
||||
}
|
||||
}
|
||||
|
||||
mf := syscall.MS_NOSUID | flags&syscall.MS_NODEV | flags&syscall.MS_RDONLY
|
||||
mf := MS_NOSUID | flags&MS_NODEV | flags&MS_RDONLY
|
||||
return hostProc.mountinfo(func(d *vfs.MountInfoDecoder) error {
|
||||
n, err := d.Unfold(targetKFinal)
|
||||
if err != nil {
|
||||
if errors.Is(err, syscall.ESTALE) {
|
||||
if errors.Is(err, ESTALE) {
|
||||
return msg.WrapErr(err,
|
||||
fmt.Sprintf("mount point %q never appeared in mountinfo", targetKFinal))
|
||||
}
|
||||
@@ -69,13 +68,13 @@ func (p *procPaths) bindMount(source, target string, flags uintptr, eq bool) err
|
||||
if err = remountWithFlags(n, mf); err != nil {
|
||||
return err
|
||||
}
|
||||
if flags&syscall.MS_REC == 0 {
|
||||
if flags&MS_REC == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for cur := range n.Collective() {
|
||||
err = remountWithFlags(cur, mf)
|
||||
if err != nil && !errors.Is(err, syscall.EACCES) {
|
||||
if err != nil && !errors.Is(err, EACCES) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -91,9 +90,8 @@ func remountWithFlags(n *vfs.MountInfoNode, mf uintptr) error {
|
||||
}
|
||||
|
||||
if kf&mf != mf {
|
||||
return wrapErrSuffix(syscall.Mount("none", n.Clean, "",
|
||||
syscall.MS_SILENT|syscall.MS_BIND|syscall.MS_REMOUNT|kf|mf,
|
||||
""),
|
||||
return wrapErrSuffix(
|
||||
Mount("none", n.Clean, "", MS_SILENT|MS_BIND|MS_REMOUNT|kf|mf, ""),
|
||||
fmt.Sprintf("cannot remount %q:", n.Clean))
|
||||
}
|
||||
return nil
|
||||
@@ -108,8 +106,8 @@ func mountTmpfs(fsname, name string, size int, perm os.FileMode) error {
|
||||
if size > 0 {
|
||||
opt += fmt.Sprintf(",size=%d", size)
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount(fsname, target, "tmpfs",
|
||||
syscall.MS_NOSUID|syscall.MS_NODEV, opt),
|
||||
return wrapErrSuffix(
|
||||
Mount(fsname, target, "tmpfs", MS_NOSUID|MS_NODEV, opt),
|
||||
fmt.Sprintf("cannot mount tmpfs on %q:", name))
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"log"
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
@@ -9,13 +9,16 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"syscall"
|
||||
. "syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type (
|
||||
Ops []Op
|
||||
Op interface {
|
||||
|
||||
// Op is a generic setup step ran inside the container init.
|
||||
// Implementations of this interface are sent as a stream of gobs.
|
||||
Op interface {
|
||||
// early is called in host root.
|
||||
early(params *Params) error
|
||||
// apply is called in intermediate root.
|
||||
@@ -27,27 +30,35 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
// Grow grows the slice Ops points to using [slices.Grow].
|
||||
func (f *Ops) Grow(n int) { *f = slices.Grow(*f, n) }
|
||||
|
||||
func init() { gob.Register(new(BindMount)) }
|
||||
func init() { gob.Register(new(BindMountOp)) }
|
||||
|
||||
// BindMount bind mounts host path Source on container path Target.
|
||||
type BindMount struct {
|
||||
// Bind appends an [Op] that bind mounts host path [BindMountOp.Source] on container path [BindMountOp.Target].
|
||||
func (f *Ops) Bind(source, target string, flags int) *Ops {
|
||||
*f = append(*f, &BindMountOp{source, "", target, flags})
|
||||
return f
|
||||
}
|
||||
|
||||
type BindMountOp struct {
|
||||
Source, SourceFinal, Target string
|
||||
|
||||
Flags int
|
||||
}
|
||||
|
||||
const (
|
||||
// BindOptional skips nonexistent host paths.
|
||||
BindOptional = 1 << iota
|
||||
// BindWritable mounts filesystem read-write.
|
||||
BindWritable
|
||||
// BindDevice allows access to devices (special files) on this filesystem.
|
||||
BindDevice
|
||||
)
|
||||
|
||||
func (b *BindMount) early(*Params) error {
|
||||
func (b *BindMountOp) early(*Params) error {
|
||||
if !path.IsAbs(b.Source) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", b.Source))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", b.Source))
|
||||
}
|
||||
|
||||
if v, err := filepath.EvalSymlinks(b.Source); err != nil {
|
||||
@@ -62,18 +73,17 @@ func (b *BindMount) early(*Params) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BindMount) apply(*Params) error {
|
||||
func (b *BindMountOp) apply(*Params) error {
|
||||
if b.SourceFinal == "\x00" {
|
||||
if b.Flags&BindOptional == 0 {
|
||||
// unreachable
|
||||
return syscall.EBADE
|
||||
return EBADE
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if !path.IsAbs(b.SourceFinal) || !path.IsAbs(b.Target) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
"path is not absolute")
|
||||
return msg.WrapErr(EBADE, "path is not absolute")
|
||||
}
|
||||
|
||||
source := toHost(b.SourceFinal)
|
||||
@@ -91,73 +101,72 @@ func (b *BindMount) apply(*Params) error {
|
||||
return err
|
||||
}
|
||||
|
||||
var flags uintptr = syscall.MS_REC
|
||||
var flags uintptr = MS_REC
|
||||
if b.Flags&BindWritable == 0 {
|
||||
flags |= syscall.MS_RDONLY
|
||||
flags |= MS_RDONLY
|
||||
}
|
||||
if b.Flags&BindDevice == 0 {
|
||||
flags |= syscall.MS_NODEV
|
||||
flags |= MS_NODEV
|
||||
}
|
||||
|
||||
return hostProc.bindMount(source, target, flags, b.SourceFinal == b.Target)
|
||||
}
|
||||
|
||||
func (b *BindMount) Is(op Op) bool { vb, ok := op.(*BindMount); return ok && *b == *vb }
|
||||
func (*BindMount) prefix() string { return "mounting" }
|
||||
func (b *BindMount) String() string {
|
||||
func (b *BindMountOp) Is(op Op) bool { vb, ok := op.(*BindMountOp); return ok && *b == *vb }
|
||||
func (*BindMountOp) prefix() string { return "mounting" }
|
||||
func (b *BindMountOp) String() string {
|
||||
if b.Source == b.Target {
|
||||
return fmt.Sprintf("%q flags %#x", b.Source, b.Flags)
|
||||
}
|
||||
return fmt.Sprintf("%q on %q flags %#x", b.Source, b.Target, b.Flags&BindWritable)
|
||||
}
|
||||
func (f *Ops) Bind(source, target string, flags int) *Ops {
|
||||
*f = append(*f, &BindMount{source, "", target, flags})
|
||||
|
||||
func init() { gob.Register(new(MountProcOp)) }
|
||||
|
||||
// Proc appends an [Op] that mounts a private instance of proc.
|
||||
func (f *Ops) Proc(dest string) *Ops {
|
||||
*f = append(*f, MountProcOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountProc)) }
|
||||
type MountProcOp string
|
||||
|
||||
// MountProc mounts a private instance of proc.
|
||||
type MountProc string
|
||||
|
||||
func (p MountProc) early(*Params) error { return nil }
|
||||
func (p MountProc) apply(params *Params) error {
|
||||
func (p MountProcOp) early(*Params) error { return nil }
|
||||
func (p MountProcOp) apply(params *Params) error {
|
||||
v := string(p)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", v))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
|
||||
target := toSysroot(v)
|
||||
if err := os.MkdirAll(target, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount("proc", target, "proc",
|
||||
syscall.MS_NOSUID|syscall.MS_NOEXEC|syscall.MS_NODEV, ""),
|
||||
return wrapErrSuffix(Mount("proc", target, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, ""),
|
||||
fmt.Sprintf("cannot mount proc on %q:", v))
|
||||
}
|
||||
|
||||
func (p MountProc) Is(op Op) bool { vp, ok := op.(MountProc); return ok && p == vp }
|
||||
func (MountProc) prefix() string { return "mounting" }
|
||||
func (p MountProc) String() string { return fmt.Sprintf("proc on %q", string(p)) }
|
||||
func (f *Ops) Proc(dest string) *Ops {
|
||||
*f = append(*f, MountProc(dest))
|
||||
func (p MountProcOp) Is(op Op) bool { vp, ok := op.(MountProcOp); return ok && p == vp }
|
||||
func (MountProcOp) prefix() string { return "mounting" }
|
||||
func (p MountProcOp) String() string { return fmt.Sprintf("proc on %q", string(p)) }
|
||||
|
||||
func init() { gob.Register(new(MountDevOp)) }
|
||||
|
||||
// Dev appends an [Op] that mounts a subset of host /dev.
|
||||
func (f *Ops) Dev(dest string) *Ops {
|
||||
*f = append(*f, MountDevOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountDev)) }
|
||||
type MountDevOp string
|
||||
|
||||
// MountDev mounts part of host dev.
|
||||
type MountDev string
|
||||
|
||||
func (d MountDev) early(*Params) error { return nil }
|
||||
func (d MountDev) apply(params *Params) error {
|
||||
func (d MountDevOp) early(*Params) error { return nil }
|
||||
func (d MountDevOp) apply(params *Params) error {
|
||||
v := string(d)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", v))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
target := toSysroot(v)
|
||||
|
||||
@@ -204,19 +213,15 @@ func (d MountDev) apply(params *Params) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := syscall.Mount("devpts", devPtsPath, "devpts",
|
||||
syscall.MS_NOSUID|syscall.MS_NOEXEC,
|
||||
if err := Mount("devpts", devPtsPath, "devpts", MS_NOSUID|MS_NOEXEC,
|
||||
"newinstance,ptmxmode=0666,mode=620"); err != nil {
|
||||
return wrapErrSuffix(err,
|
||||
fmt.Sprintf("cannot mount devpts on %q:", devPtsPath))
|
||||
}
|
||||
|
||||
if params.Flags&FAllowTTY != 0 {
|
||||
if params.RetainSession {
|
||||
var buf [8]byte
|
||||
if _, _, errno := syscall.Syscall(
|
||||
syscall.SYS_IOCTL, 1, syscall.TIOCGWINSZ,
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
); errno == 0 {
|
||||
if _, _, errno := Syscall(SYS_IOCTL, 1, TIOCGWINSZ, uintptr(unsafe.Pointer(&buf[0]))); errno == 0 {
|
||||
consolePath := toSysroot(path.Join(v, "console"))
|
||||
if err := ensureFile(consolePath, 0444, params.ParentPerm); err != nil {
|
||||
return err
|
||||
@@ -237,86 +242,84 @@ func (d MountDev) apply(params *Params) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d MountDev) Is(op Op) bool { vd, ok := op.(MountDev); return ok && d == vd }
|
||||
func (MountDev) prefix() string { return "mounting" }
|
||||
func (d MountDev) String() string { return fmt.Sprintf("dev on %q", string(d)) }
|
||||
func (f *Ops) Dev(dest string) *Ops {
|
||||
*f = append(*f, MountDev(dest))
|
||||
func (d MountDevOp) Is(op Op) bool { vd, ok := op.(MountDevOp); return ok && d == vd }
|
||||
func (MountDevOp) prefix() string { return "mounting" }
|
||||
func (d MountDevOp) String() string { return fmt.Sprintf("dev on %q", string(d)) }
|
||||
|
||||
func init() { gob.Register(new(MountMqueueOp)) }
|
||||
|
||||
// Mqueue appends an [Op] that mounts a private instance of mqueue.
|
||||
func (f *Ops) Mqueue(dest string) *Ops {
|
||||
*f = append(*f, MountMqueueOp(dest))
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountMqueue)) }
|
||||
type MountMqueueOp string
|
||||
|
||||
// MountMqueue mounts a private mqueue instance on container Path.
|
||||
type MountMqueue string
|
||||
|
||||
func (m MountMqueue) early(*Params) error { return nil }
|
||||
func (m MountMqueue) apply(params *Params) error {
|
||||
func (m MountMqueueOp) early(*Params) error { return nil }
|
||||
func (m MountMqueueOp) apply(params *Params) error {
|
||||
v := string(m)
|
||||
|
||||
if !path.IsAbs(v) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", v))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", v))
|
||||
}
|
||||
|
||||
target := toSysroot(v)
|
||||
if err := os.MkdirAll(target, params.ParentPerm); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
}
|
||||
return wrapErrSuffix(syscall.Mount("mqueue", target, "mqueue",
|
||||
syscall.MS_NOSUID|syscall.MS_NOEXEC|syscall.MS_NODEV, ""),
|
||||
return wrapErrSuffix(Mount("mqueue", target, "mqueue", MS_NOSUID|MS_NOEXEC|MS_NODEV, ""),
|
||||
fmt.Sprintf("cannot mount mqueue on %q:", v))
|
||||
}
|
||||
|
||||
func (m MountMqueue) Is(op Op) bool { vm, ok := op.(MountMqueue); return ok && m == vm }
|
||||
func (MountMqueue) prefix() string { return "mounting" }
|
||||
func (m MountMqueue) String() string { return fmt.Sprintf("mqueue on %q", string(m)) }
|
||||
func (f *Ops) Mqueue(dest string) *Ops {
|
||||
*f = append(*f, MountMqueue(dest))
|
||||
func (m MountMqueueOp) Is(op Op) bool { vm, ok := op.(MountMqueueOp); return ok && m == vm }
|
||||
func (MountMqueueOp) prefix() string { return "mounting" }
|
||||
func (m MountMqueueOp) String() string { return fmt.Sprintf("mqueue on %q", string(m)) }
|
||||
|
||||
func init() { gob.Register(new(MountTmpfsOp)) }
|
||||
|
||||
// Tmpfs appends an [Op] that mounts tmpfs on container path [MountTmpfsOp.Path].
|
||||
func (f *Ops) Tmpfs(dest string, size int, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MountTmpfsOp{dest, size, perm})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(MountTmpfs)) }
|
||||
|
||||
// MountTmpfs mounts tmpfs on container Path.
|
||||
type MountTmpfs struct {
|
||||
type MountTmpfsOp struct {
|
||||
Path string
|
||||
Size int
|
||||
Perm os.FileMode
|
||||
}
|
||||
|
||||
func (t *MountTmpfs) early(*Params) error { return nil }
|
||||
func (t *MountTmpfs) apply(*Params) error {
|
||||
func (t *MountTmpfsOp) early(*Params) error { return nil }
|
||||
func (t *MountTmpfsOp) apply(*Params) error {
|
||||
if !path.IsAbs(t.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
}
|
||||
if t.Size < 0 || t.Size > math.MaxUint>>1 {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("size %d out of bounds", t.Size))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("size %d out of bounds", t.Size))
|
||||
}
|
||||
return mountTmpfs("tmpfs", t.Path, t.Size, t.Perm)
|
||||
}
|
||||
|
||||
func (t *MountTmpfs) Is(op Op) bool { vt, ok := op.(*MountTmpfs); return ok && *t == *vt }
|
||||
func (*MountTmpfs) prefix() string { return "mounting" }
|
||||
func (t *MountTmpfs) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) }
|
||||
func (f *Ops) Tmpfs(dest string, size int, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MountTmpfs{dest, size, perm})
|
||||
func (t *MountTmpfsOp) Is(op Op) bool { vt, ok := op.(*MountTmpfsOp); return ok && *t == *vt }
|
||||
func (*MountTmpfsOp) prefix() string { return "mounting" }
|
||||
func (t *MountTmpfsOp) String() string { return fmt.Sprintf("tmpfs on %q size %d", t.Path, t.Size) }
|
||||
|
||||
func init() { gob.Register(new(SymlinkOp)) }
|
||||
|
||||
// Link appends an [Op] that creates a symlink in the container filesystem.
|
||||
func (f *Ops) Link(target, linkName string) *Ops {
|
||||
*f = append(*f, &SymlinkOp{target, linkName})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(Symlink)) }
|
||||
type SymlinkOp [2]string
|
||||
|
||||
// Symlink creates a symlink in the container filesystem.
|
||||
type Symlink [2]string
|
||||
|
||||
func (l *Symlink) early(*Params) error {
|
||||
func (l *SymlinkOp) early(*Params) error {
|
||||
if strings.HasPrefix(l[0], "*") {
|
||||
l[0] = l[0][1:]
|
||||
if !path.IsAbs(l[0]) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", l[0]))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", l[0]))
|
||||
}
|
||||
if name, err := os.Readlink(l[0]); err != nil {
|
||||
return wrapErrSelf(err)
|
||||
@@ -326,11 +329,10 @@ func (l *Symlink) early(*Params) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (l *Symlink) apply(params *Params) error {
|
||||
func (l *SymlinkOp) apply(params *Params) error {
|
||||
// symlink target is an arbitrary path value, so only validate link name here
|
||||
if !path.IsAbs(l[1]) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", l[1]))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", l[1]))
|
||||
}
|
||||
|
||||
target := toSysroot(l[1])
|
||||
@@ -343,27 +345,27 @@ func (l *Symlink) apply(params *Params) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Symlink) Is(op Op) bool { vl, ok := op.(*Symlink); return ok && *l == *vl }
|
||||
func (*Symlink) prefix() string { return "creating" }
|
||||
func (l *Symlink) String() string { return fmt.Sprintf("symlink on %q target %q", l[1], l[0]) }
|
||||
func (f *Ops) Link(target, linkName string) *Ops {
|
||||
*f = append(*f, &Symlink{target, linkName})
|
||||
func (l *SymlinkOp) Is(op Op) bool { vl, ok := op.(*SymlinkOp); return ok && *l == *vl }
|
||||
func (*SymlinkOp) prefix() string { return "creating" }
|
||||
func (l *SymlinkOp) String() string { return fmt.Sprintf("symlink on %q target %q", l[1], l[0]) }
|
||||
|
||||
func init() { gob.Register(new(MkdirOp)) }
|
||||
|
||||
// Mkdir appends an [Op] that creates a directory in the container filesystem.
|
||||
func (f *Ops) Mkdir(dest string, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &MkdirOp{dest, perm})
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(Mkdir)) }
|
||||
|
||||
// Mkdir creates a directory in the container filesystem.
|
||||
type Mkdir struct {
|
||||
type MkdirOp struct {
|
||||
Path string
|
||||
Perm os.FileMode
|
||||
}
|
||||
|
||||
func (m *Mkdir) early(*Params) error { return nil }
|
||||
func (m *Mkdir) apply(*Params) error {
|
||||
func (m *MkdirOp) early(*Params) error { return nil }
|
||||
func (m *MkdirOp) apply(*Params) error {
|
||||
if !path.IsAbs(m.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", m.Path))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", m.Path))
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(toSysroot(m.Path), m.Perm); err != nil {
|
||||
@@ -372,27 +374,33 @@ func (m *Mkdir) apply(*Params) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mkdir) Is(op Op) bool { vm, ok := op.(*Mkdir); return ok && m == vm }
|
||||
func (*Mkdir) prefix() string { return "creating" }
|
||||
func (m *Mkdir) String() string { return fmt.Sprintf("directory %q perm %s", m.Path, m.Perm) }
|
||||
func (f *Ops) Mkdir(dest string, perm os.FileMode) *Ops {
|
||||
*f = append(*f, &Mkdir{dest, perm})
|
||||
func (m *MkdirOp) Is(op Op) bool { vm, ok := op.(*MkdirOp); return ok && m == vm }
|
||||
func (*MkdirOp) prefix() string { return "creating" }
|
||||
func (m *MkdirOp) String() string { return fmt.Sprintf("directory %q perm %s", m.Path, m.Perm) }
|
||||
|
||||
func init() { gob.Register(new(TmpfileOp)) }
|
||||
|
||||
// Place appends an [Op] that places a file in container path [TmpfileOp.Path] containing [TmpfileOp.Data].
|
||||
func (f *Ops) Place(name string, data []byte) *Ops { *f = append(*f, &TmpfileOp{name, data}); return f }
|
||||
|
||||
// PlaceP is like Place but writes the address of [TmpfileOp.Data] to the pointer dataP points to.
|
||||
func (f *Ops) PlaceP(name string, dataP **[]byte) *Ops {
|
||||
t := &TmpfileOp{Path: name}
|
||||
*dataP = &t.Data
|
||||
|
||||
*f = append(*f, t)
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(Tmpfile)) }
|
||||
|
||||
// Tmpfile places a file in container Path containing Data.
|
||||
type Tmpfile struct {
|
||||
type TmpfileOp struct {
|
||||
Path string
|
||||
Data []byte
|
||||
}
|
||||
|
||||
func (t *Tmpfile) early(*Params) error { return nil }
|
||||
func (t *Tmpfile) apply(params *Params) error {
|
||||
func (t *TmpfileOp) early(*Params) error { return nil }
|
||||
func (t *TmpfileOp) apply(params *Params) error {
|
||||
if !path.IsAbs(t.Path) {
|
||||
return msg.WrapErr(syscall.EBADE,
|
||||
fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
return msg.WrapErr(EBADE, fmt.Sprintf("path %q is not absolute", t.Path))
|
||||
}
|
||||
|
||||
var tmpPath string
|
||||
@@ -414,7 +422,7 @@ func (t *Tmpfile) apply(params *Params) error {
|
||||
} else if err = hostProc.bindMount(
|
||||
tmpPath,
|
||||
target,
|
||||
syscall.MS_RDONLY|syscall.MS_NODEV,
|
||||
MS_RDONLY|MS_NODEV,
|
||||
false,
|
||||
); err != nil {
|
||||
return err
|
||||
@@ -424,31 +432,31 @@ func (t *Tmpfile) apply(params *Params) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Tmpfile) Is(op Op) bool {
|
||||
vt, ok := op.(*Tmpfile)
|
||||
func (t *TmpfileOp) Is(op Op) bool {
|
||||
vt, ok := op.(*TmpfileOp)
|
||||
return ok && t.Path == vt.Path && slices.Equal(t.Data, vt.Data)
|
||||
}
|
||||
func (*Tmpfile) prefix() string { return "placing" }
|
||||
func (t *Tmpfile) String() string {
|
||||
func (*TmpfileOp) prefix() string { return "placing" }
|
||||
func (t *TmpfileOp) String() string {
|
||||
return fmt.Sprintf("tmpfile %q (%d bytes)", t.Path, len(t.Data))
|
||||
}
|
||||
func (f *Ops) Place(name string, data []byte) *Ops { *f = append(*f, &Tmpfile{name, data}); return f }
|
||||
func (f *Ops) PlaceP(name string, dataP **[]byte) *Ops {
|
||||
t := &Tmpfile{Path: name}
|
||||
*dataP = &t.Data
|
||||
|
||||
*f = append(*f, t)
|
||||
func init() { gob.Register(new(AutoEtcOp)) }
|
||||
|
||||
// Etc appends an [Op] that expands host /etc into a toplevel symlink mirror with /etc semantics.
|
||||
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
||||
func (f *Ops) Etc(host, prefix string) *Ops {
|
||||
e := &AutoEtcOp{prefix}
|
||||
f.Mkdir("/etc", 0755)
|
||||
f.Bind(host, e.hostPath(), 0)
|
||||
*f = append(*f, e)
|
||||
return f
|
||||
}
|
||||
|
||||
func init() { gob.Register(new(AutoEtc)) }
|
||||
type AutoEtcOp struct{ Prefix string }
|
||||
|
||||
// AutoEtc expands host /etc into a toplevel symlink mirror with /etc semantics.
|
||||
// This is not a generic setup op. It is implemented here to reduce ipc overhead.
|
||||
type AutoEtc struct{ Prefix string }
|
||||
|
||||
func (e *AutoEtc) early(*Params) error { return nil }
|
||||
func (e *AutoEtc) apply(*Params) error {
|
||||
func (e *AutoEtcOp) early(*Params) error { return nil }
|
||||
func (e *AutoEtcOp) apply(*Params) error {
|
||||
const target = sysrootPath + "/etc/"
|
||||
rel := e.hostRel() + "/"
|
||||
|
||||
@@ -481,19 +489,12 @@ func (e *AutoEtc) apply(*Params) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
func (e *AutoEtc) hostPath() string { return "/etc/" + e.hostRel() }
|
||||
func (e *AutoEtc) hostRel() string { return ".host/" + e.Prefix }
|
||||
func (e *AutoEtcOp) hostPath() string { return "/etc/" + e.hostRel() }
|
||||
func (e *AutoEtcOp) hostRel() string { return ".host/" + e.Prefix }
|
||||
|
||||
func (e *AutoEtc) Is(op Op) bool {
|
||||
ve, ok := op.(*AutoEtc)
|
||||
func (e *AutoEtcOp) Is(op Op) bool {
|
||||
ve, ok := op.(*AutoEtcOp)
|
||||
return ok && ((e == nil && ve == nil) || (e != nil && ve != nil && *e == *ve))
|
||||
}
|
||||
func (*AutoEtc) prefix() string { return "setting up" }
|
||||
func (e *AutoEtc) String() string { return fmt.Sprintf("auto etc %s", e.Prefix) }
|
||||
func (f *Ops) Etc(host, prefix string) *Ops {
|
||||
e := &AutoEtc{prefix}
|
||||
f.Mkdir("/etc", 0755)
|
||||
f.Bind(host, e.hostPath(), 0)
|
||||
*f = append(*f, e)
|
||||
return f
|
||||
}
|
||||
func (*AutoEtcOp) prefix() string { return "setting up" }
|
||||
func (e *AutoEtcOp) String() string { return fmt.Sprintf("auto etc %s", e.Prefix) }
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
var msg Msg = new(DefaultMsg)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"errors"
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
"hakurei.app/container/vfs"
|
||||
)
|
||||
|
||||
const (
|
||||
24
container/seccomp/hash_amd64_test.go
Normal file
24
container/seccomp/hash_amd64_test.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package seccomp_test
|
||||
|
||||
import . "hakurei.app/container/seccomp"
|
||||
|
||||
var bpfExpected = bpfLookup{
|
||||
{AllowMultiarch | AllowCAN |
|
||||
AllowBluetooth, PresetExt |
|
||||
PresetDenyNS | PresetDenyTTY | PresetDenyDevel |
|
||||
PresetLinux32}: toHash(
|
||||
"e99dd345e195413473d3cbee07b4ed57b908bfa89ea2072fe93482847f50b5b758da17e74ca2bbc00813de49a2b9bf834c024ed48850be69b68a9a4c5f53a9db"),
|
||||
|
||||
{0, 0}: toHash(
|
||||
"95ec69d017733e072160e0da80fdebecdf27ae8166f5e2a731270c98ea2d2946cb5231029063668af215879155da21aca79b070e04c0ee9acdf58f55cfa815a5"),
|
||||
{0, PresetExt}: toHash(
|
||||
"dc7f2e1c5e829b79ebb7efc759150f54a83a75c8df6fee4dce5dadc4736c585d4deebfeb3c7969af3a077e90b77bb4741db05d90997c8659b95891206ac9952d"),
|
||||
{0, PresetStrict}: toHash(
|
||||
"e880298df2bd6751d0040fc21bc0ed4c00f95dc0d7ba506c244d8b8cf6866dba8ef4a33296f287b66cccc1d78e97026597f84cc7dec1573e148960fbd35cd735"),
|
||||
{0, PresetDenyNS | PresetDenyTTY | PresetDenyDevel}: toHash(
|
||||
"39871b93ffafc8b979fcedc0b0c37b9e03922f5b02748dc5c3c17c92527f6e022ede1f48bff59246ea452c0d1de54827808b1a6f84f32bbde1aa02ae30eedcfa"),
|
||||
{0, PresetExt | PresetDenyDevel}: toHash(
|
||||
"c698b081ff957afe17a6d94374537d37f2a63f6f9dd75da7546542407a9e32476ebda3312ba7785d7f618542bcfaf27ca27dcc2dddba852069d28bcfe8cad39a"),
|
||||
{0, PresetExt | PresetDenyNS | PresetDenyDevel}: toHash(
|
||||
"0b76007476c1c9e25dbf674c29fdf609a1656a70063e49327654e1b5360ad3da06e1a3e32bf80e961c5516ad83d4b9e7e9bde876a93797e27627d2555c25858b"),
|
||||
}
|
||||
24
container/seccomp/hash_arm64_test.go
Normal file
24
container/seccomp/hash_arm64_test.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package seccomp_test
|
||||
|
||||
import . "hakurei.app/container/seccomp"
|
||||
|
||||
var bpfExpected = bpfLookup{
|
||||
{AllowMultiarch | AllowCAN |
|
||||
AllowBluetooth, PresetExt |
|
||||
PresetDenyNS | PresetDenyTTY | PresetDenyDevel |
|
||||
PresetLinux32}: toHash(
|
||||
"1431c013f2ddac3adae577821cb5d351b1514e7c754d62346ddffd31f46ea02fb368e46e3f8104f81019617e721fe687ddd83f1e79580622ccc991da12622170"),
|
||||
|
||||
{0, 0}: toHash(
|
||||
"450c21210dbf124dfa7ae56d0130f9c2e24b26f5bce8795ee75766c75850438ff9e7d91c5e73d63bbe51a5d4b06c2a0791c4de2903b2b9805f16265318183235"),
|
||||
{0, PresetExt}: toHash(
|
||||
"d971d0f2d30f54ac920fc6d84df2be279e9fd28cf2d48be775d7fdbd790b750e1369401cd3bb8bcf9ba3adb91874fe9792d9e3f62209b8ee59c9fdd2ddd10c7b"),
|
||||
{0, PresetStrict}: toHash(
|
||||
"79318538a3dc851314b6bd96f10d5861acb2aa7e13cb8de0619d0f6a76709d67f01ef3fd67e195862b02f9711e5b769bc4d1eb4fc0dfc41a723c89c968a93297"),
|
||||
{0, PresetDenyNS | PresetDenyTTY | PresetDenyDevel}: toHash(
|
||||
"228286c2f5df8e44463be0a57b91977b7f38b63b09e5d98dfabe5c61545b8f9ac3e5ea3d86df55d7edf2ce61875f0a5a85c0ab82800bef178c42533e8bdc9a6c"),
|
||||
{0, PresetExt | PresetDenyDevel}: toHash(
|
||||
"433ce9b911282d6dcc8029319fb79b816b60d5a795ec8fc94344dd027614d68f023166a91bb881faaeeedd26e3d89474e141e5a69a97e93b8984ca8f14999980"),
|
||||
{0, PresetExt | PresetDenyNS | PresetDenyDevel}: toHash(
|
||||
"cf1f4dc87436ba8ec95d268b663a6397bb0b4a5ac64d8557e6cc529d8b0f6f65dad3a92b62ed29d85eee9c6dde1267757a4d0f86032e8a45ca1bceadfa34cf5e"),
|
||||
}
|
||||
28
container/seccomp/hash_test.go
Normal file
28
container/seccomp/hash_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package seccomp_test
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
|
||||
"hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
type (
|
||||
bpfPreset = struct {
|
||||
seccomp.ExportFlag
|
||||
seccomp.FilterPreset
|
||||
}
|
||||
bpfLookup map[bpfPreset][]byte
|
||||
)
|
||||
|
||||
func toHash(s string) []byte {
|
||||
if len(s) != 128 {
|
||||
panic("bad sha512 string length")
|
||||
}
|
||||
if v, err := hex.DecodeString(s); err != nil {
|
||||
panic(err.Error())
|
||||
} else if len(v) != 64 {
|
||||
panic("unreachable")
|
||||
} else {
|
||||
return v
|
||||
}
|
||||
}
|
||||
130
container/seccomp/libseccomp-helper.c
Normal file
130
container/seccomp/libseccomp-helper.c
Normal file
@@ -0,0 +1,130 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE /* CLONE_NEWUSER */
|
||||
#endif
|
||||
|
||||
#include "libseccomp-helper.h"
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#define LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
|
||||
int32_t hakurei_export_filter(int *ret_p, int fd, uint32_t arch,
|
||||
uint32_t multiarch,
|
||||
struct hakurei_syscall_rule *rules,
|
||||
size_t rules_sz, hakurei_export_flag flags) {
|
||||
int i;
|
||||
int last_allowed_family;
|
||||
int disallowed;
|
||||
struct hakurei_syscall_rule *rule;
|
||||
|
||||
int32_t res = 0; /* refer to resPrefix for message */
|
||||
|
||||
/* Blocklist all but unix, inet, inet6 and netlink */
|
||||
struct {
|
||||
int family;
|
||||
hakurei_export_flag flags_mask;
|
||||
} socket_family_allowlist[] = {
|
||||
/* NOTE: Keep in numerical order */
|
||||
{AF_UNSPEC, 0},
|
||||
{AF_LOCAL, 0},
|
||||
{AF_INET, 0},
|
||||
{AF_INET6, 0},
|
||||
{AF_NETLINK, 0},
|
||||
{AF_CAN, HAKUREI_EXPORT_CAN},
|
||||
{AF_BLUETOOTH, HAKUREI_EXPORT_BLUETOOTH},
|
||||
};
|
||||
|
||||
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
|
||||
if (ctx == NULL) {
|
||||
res = 1;
|
||||
goto out;
|
||||
} else
|
||||
errno = 0;
|
||||
|
||||
/* We only really need to handle arches on multiarch systems.
|
||||
* If only one arch is supported the default is fine */
|
||||
if (arch != 0) {
|
||||
/* This *adds* the target arch, instead of replacing the
|
||||
* native one. This is not ideal, because we'd like to only
|
||||
* allow the target arch, but we can't really disallow the
|
||||
* native arch at this point, because then bubblewrap
|
||||
* couldn't continue running. */
|
||||
*ret_p = seccomp_arch_add(ctx, arch);
|
||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & HAKUREI_EXPORT_MULTIARCH && multiarch != 0) {
|
||||
*ret_p = seccomp_arch_add(ctx, multiarch);
|
||||
if (*ret_p < 0 && *ret_p != -EEXIST) {
|
||||
res = 3;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < rules_sz; i++) {
|
||||
rule = &rules[i];
|
||||
assert(rule->m_errno == EPERM || rule->m_errno == ENOSYS);
|
||||
|
||||
if (rule->arg)
|
||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
|
||||
rule->syscall, 1, *rule->arg);
|
||||
else
|
||||
*ret_p = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(rule->m_errno),
|
||||
rule->syscall, 0);
|
||||
|
||||
if (*ret_p == -EFAULT) {
|
||||
res = 4;
|
||||
goto out;
|
||||
} else if (*ret_p < 0) {
|
||||
res = 5;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Socket filtering doesn't work on e.g. i386, so ignore failures here
|
||||
* However, we need to user seccomp_rule_add_exact to avoid libseccomp doing
|
||||
* something else: https://github.com/seccomp/libseccomp/issues/8 */
|
||||
last_allowed_family = -1;
|
||||
for (i = 0; i < LEN(socket_family_allowlist); i++) {
|
||||
if (socket_family_allowlist[i].flags_mask != 0 &&
|
||||
(socket_family_allowlist[i].flags_mask & flags) !=
|
||||
socket_family_allowlist[i].flags_mask)
|
||||
continue;
|
||||
|
||||
for (disallowed = last_allowed_family + 1;
|
||||
disallowed < socket_family_allowlist[i].family; disallowed++) {
|
||||
/* Blocklist the in-between valid families */
|
||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT),
|
||||
SCMP_SYS(socket), 1,
|
||||
SCMP_A0(SCMP_CMP_EQ, disallowed));
|
||||
}
|
||||
last_allowed_family = socket_family_allowlist[i].family;
|
||||
}
|
||||
/* Blocklist the rest */
|
||||
seccomp_rule_add_exact(ctx, SCMP_ACT_ERRNO(EAFNOSUPPORT), SCMP_SYS(socket), 1,
|
||||
SCMP_A0(SCMP_CMP_GE, last_allowed_family + 1));
|
||||
|
||||
if (fd < 0) {
|
||||
*ret_p = seccomp_load(ctx);
|
||||
if (*ret_p != 0) {
|
||||
res = 7;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
*ret_p = seccomp_export_bpf(ctx, fd);
|
||||
if (*ret_p != 0) {
|
||||
res = 6;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (ctx)
|
||||
seccomp_release(ctx);
|
||||
|
||||
return res;
|
||||
}
|
||||
24
container/seccomp/libseccomp-helper.h
Normal file
24
container/seccomp/libseccomp-helper.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#include <seccomp.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if (SCMP_VER_MAJOR < 2) || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) || \
|
||||
(SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 5 && SCMP_VER_MICRO < 1)
|
||||
#error This package requires libseccomp >= v2.5.1
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
HAKUREI_EXPORT_MULTIARCH = 1 << 0,
|
||||
HAKUREI_EXPORT_CAN = 1 << 1,
|
||||
HAKUREI_EXPORT_BLUETOOTH = 1 << 2,
|
||||
} hakurei_export_flag;
|
||||
|
||||
struct hakurei_syscall_rule {
|
||||
int syscall;
|
||||
int m_errno;
|
||||
struct scmp_arg_cmp *arg;
|
||||
};
|
||||
|
||||
int32_t hakurei_export_filter(int *ret_p, int fd, uint32_t arch,
|
||||
uint32_t multiarch,
|
||||
struct hakurei_syscall_rule *rules,
|
||||
size_t rules_sz, hakurei_export_flag flags);
|
||||
194
container/seccomp/libseccomp.go
Normal file
194
container/seccomp/libseccomp.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package seccomp
|
||||
|
||||
/*
|
||||
#cgo linux pkg-config: --static libseccomp
|
||||
|
||||
#include <libseccomp-helper.h>
|
||||
#include <sys/personality.h>
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
PER_LINUX = C.PER_LINUX
|
||||
PER_LINUX32 = C.PER_LINUX32
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidRules = errors.New("invalid native rules slice")
|
||||
)
|
||||
|
||||
// LibraryError represents a libseccomp error.
|
||||
type LibraryError struct {
|
||||
Prefix string
|
||||
Seccomp syscall.Errno
|
||||
Errno error
|
||||
}
|
||||
|
||||
func (e *LibraryError) Error() string {
|
||||
if e.Seccomp == 0 {
|
||||
if e.Errno == nil {
|
||||
panic("invalid libseccomp error")
|
||||
}
|
||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Errno)
|
||||
}
|
||||
if e.Errno == nil {
|
||||
return fmt.Sprintf("%s: %s", e.Prefix, e.Seccomp)
|
||||
}
|
||||
return fmt.Sprintf("%s: %s (%s)", e.Prefix, e.Seccomp, e.Errno)
|
||||
}
|
||||
|
||||
func (e *LibraryError) Is(err error) bool {
|
||||
if e == nil {
|
||||
return err == nil
|
||||
}
|
||||
if ef, ok := err.(*LibraryError); ok {
|
||||
return *e == *ef
|
||||
}
|
||||
return (e.Seccomp != 0 && errors.Is(err, e.Seccomp)) ||
|
||||
(e.Errno != nil && errors.Is(err, e.Errno))
|
||||
}
|
||||
|
||||
type (
|
||||
ScmpSyscall = C.int
|
||||
ScmpErrno = C.int
|
||||
)
|
||||
|
||||
// A NativeRule specifies an arch-specific action taken by seccomp under certain conditions.
|
||||
type NativeRule struct {
|
||||
// Syscall is the arch-dependent syscall number to act against.
|
||||
Syscall ScmpSyscall
|
||||
// Errno is the errno value to return when the condition is satisfied.
|
||||
Errno ScmpErrno
|
||||
// Arg is the optional struct scmp_arg_cmp passed to libseccomp.
|
||||
Arg *ScmpArgCmp
|
||||
}
|
||||
|
||||
type ExportFlag = C.hakurei_export_flag
|
||||
|
||||
const (
|
||||
// AllowMultiarch allows multiarch/emulation.
|
||||
AllowMultiarch ExportFlag = C.HAKUREI_EXPORT_MULTIARCH
|
||||
// AllowCAN allows AF_CAN.
|
||||
AllowCAN ExportFlag = C.HAKUREI_EXPORT_CAN
|
||||
// AllowBluetooth allows AF_BLUETOOTH.
|
||||
AllowBluetooth ExportFlag = C.HAKUREI_EXPORT_BLUETOOTH
|
||||
)
|
||||
|
||||
var resPrefix = [...]string{
|
||||
0: "",
|
||||
1: "seccomp_init failed",
|
||||
2: "seccomp_arch_add failed",
|
||||
3: "seccomp_arch_add failed (multiarch)",
|
||||
4: "internal libseccomp failure",
|
||||
5: "seccomp_rule_add failed",
|
||||
6: "seccomp_export_bpf failed",
|
||||
7: "seccomp_load failed",
|
||||
}
|
||||
|
||||
// Export streams filter contents to fd, or installs it to the current process if fd < 0.
|
||||
func Export(fd int, rules []NativeRule, flags ExportFlag) error {
|
||||
if len(rules) == 0 {
|
||||
return ErrInvalidRules
|
||||
}
|
||||
|
||||
var (
|
||||
arch C.uint32_t = 0
|
||||
multiarch C.uint32_t = 0
|
||||
)
|
||||
switch runtime.GOARCH {
|
||||
case "386":
|
||||
arch = C.SCMP_ARCH_X86
|
||||
case "amd64":
|
||||
arch = C.SCMP_ARCH_X86_64
|
||||
multiarch = C.SCMP_ARCH_X86
|
||||
case "arm":
|
||||
arch = C.SCMP_ARCH_ARM
|
||||
case "arm64":
|
||||
arch = C.SCMP_ARCH_AARCH64
|
||||
multiarch = C.SCMP_ARCH_ARM
|
||||
}
|
||||
|
||||
var ret C.int
|
||||
|
||||
rulesPinner := new(runtime.Pinner)
|
||||
for i := range rules {
|
||||
rule := &rules[i]
|
||||
rulesPinner.Pin(rule)
|
||||
if rule.Arg != nil {
|
||||
rulesPinner.Pin(rule.Arg)
|
||||
}
|
||||
}
|
||||
res, err := C.hakurei_export_filter(
|
||||
&ret, C.int(fd),
|
||||
arch, multiarch,
|
||||
(*C.struct_hakurei_syscall_rule)(unsafe.Pointer(&rules[0])),
|
||||
C.size_t(len(rules)),
|
||||
flags,
|
||||
)
|
||||
rulesPinner.Unpin()
|
||||
|
||||
if prefix := resPrefix[res]; prefix != "" {
|
||||
return &LibraryError{
|
||||
prefix,
|
||||
-syscall.Errno(ret),
|
||||
err,
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// ScmpCompare is the equivalent of scmp_compare;
|
||||
// Comparison operators
|
||||
type ScmpCompare = C.enum_scmp_compare
|
||||
|
||||
const (
|
||||
_SCMP_CMP_MIN = C._SCMP_CMP_MIN
|
||||
|
||||
// not equal
|
||||
SCMP_CMP_NE = C.SCMP_CMP_NE
|
||||
// less than
|
||||
SCMP_CMP_LT = C.SCMP_CMP_LT
|
||||
// less than or equal
|
||||
SCMP_CMP_LE = C.SCMP_CMP_LE
|
||||
// equal
|
||||
SCMP_CMP_EQ = C.SCMP_CMP_EQ
|
||||
// greater than or equal
|
||||
SCMP_CMP_GE = C.SCMP_CMP_GE
|
||||
// greater than
|
||||
SCMP_CMP_GT = C.SCMP_CMP_GT
|
||||
// masked equality
|
||||
SCMP_CMP_MASKED_EQ = C.SCMP_CMP_MASKED_EQ
|
||||
|
||||
_SCMP_CMP_MAX = C._SCMP_CMP_MAX
|
||||
)
|
||||
|
||||
// ScmpDatum is the equivalent of scmp_datum_t;
|
||||
// Argument datum
|
||||
type ScmpDatum uint64
|
||||
|
||||
// ScmpArgCmp is the equivalent of struct scmp_arg_cmp;
|
||||
// Argument / Value comparison definition
|
||||
type ScmpArgCmp struct {
|
||||
// argument number, starting at 0
|
||||
Arg C.uint
|
||||
// the comparison op, e.g. SCMP_CMP_*
|
||||
Op ScmpCompare
|
||||
|
||||
DatumA, DatumB ScmpDatum
|
||||
}
|
||||
|
||||
// only used for testing
|
||||
func syscallResolveName(s string) (trap int) {
|
||||
v := C.CString(s)
|
||||
trap = int(C.seccomp_syscall_resolve_name(v))
|
||||
C.free(unsafe.Pointer(v))
|
||||
|
||||
return
|
||||
}
|
||||
94
container/seccomp/libseccomp_test.go
Normal file
94
container/seccomp/libseccomp_test.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package seccomp_test
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"errors"
|
||||
"io"
|
||||
"slices"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
. "hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
func TestExport(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
flags ExportFlag
|
||||
presets FilterPreset
|
||||
wantErr bool
|
||||
}{
|
||||
{"everything", AllowMultiarch | AllowCAN |
|
||||
AllowBluetooth, PresetExt |
|
||||
PresetDenyNS | PresetDenyTTY | PresetDenyDevel |
|
||||
PresetLinux32, false},
|
||||
|
||||
{"compat", 0, 0, false},
|
||||
{"base", 0, PresetExt, false},
|
||||
{"strict", 0, PresetStrict, false},
|
||||
{"strict compat", 0, PresetDenyNS | PresetDenyTTY | PresetDenyDevel, false},
|
||||
{"hakurei default", 0, PresetExt | PresetDenyDevel, false},
|
||||
{"hakurei tty", 0, PresetExt | PresetDenyNS | PresetDenyDevel, false},
|
||||
}
|
||||
|
||||
buf := make([]byte, 8)
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
e := New(Preset(tc.presets, tc.flags), tc.flags)
|
||||
want := bpfExpected[bpfPreset{tc.flags, tc.presets}]
|
||||
digest := sha512.New()
|
||||
|
||||
if _, err := io.CopyBuffer(digest, e, buf); (err != nil) != tc.wantErr {
|
||||
t.Errorf("Exporter: error = %v, wantErr %v", err, tc.wantErr)
|
||||
return
|
||||
}
|
||||
if err := e.Close(); err != nil {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
}
|
||||
if got := digest.Sum(nil); !slices.Equal(got, want) {
|
||||
t.Fatalf("Export() hash = %x, want %x",
|
||||
got, want)
|
||||
return
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("close without use", func(t *testing.T) {
|
||||
e := New(Preset(0, 0), 0)
|
||||
if err := e.Close(); !errors.Is(err, syscall.EINVAL) {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
return
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("close partial read", func(t *testing.T) {
|
||||
e := New(Preset(0, 0), 0)
|
||||
if _, err := e.Read(nil); err != nil {
|
||||
t.Errorf("Read: error = %v", err)
|
||||
return
|
||||
}
|
||||
// the underlying implementation uses buffered io, so the outcome of this is nondeterministic;
|
||||
// that is not harmful however, so both outcomes are checked for here
|
||||
if err := e.Close(); err != nil &&
|
||||
(!errors.Is(err, syscall.ECANCELED) || !errors.Is(err, syscall.EBADF)) {
|
||||
t.Errorf("Close: error = %v", err)
|
||||
return
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkExport(b *testing.B) {
|
||||
buf := make([]byte, 8)
|
||||
for b.Loop() {
|
||||
e := New(
|
||||
Preset(PresetExt|PresetDenyNS|PresetDenyTTY|PresetDenyDevel|PresetLinux32,
|
||||
AllowMultiarch|AllowCAN|AllowBluetooth),
|
||||
AllowMultiarch|AllowCAN|AllowBluetooth)
|
||||
if _, err := io.CopyBuffer(io.Discard, e, buf); err != nil {
|
||||
b.Fatalf("cannot export: %v", err)
|
||||
}
|
||||
if err := e.Close(); err != nil {
|
||||
b.Fatalf("cannot close exporter: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
89
container/seccomp/mksysnum_linux.pl
Executable file
89
container/seccomp/mksysnum_linux.pl
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env perl
|
||||
# Copyright 2009 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
use strict;
|
||||
use POSIX ();
|
||||
|
||||
my $command = "mksysnum_linux.pl ". join(' ', @ARGV);
|
||||
my $uname_arch = (POSIX::uname)[4];
|
||||
my %syscall_cutoff_arch = (
|
||||
"x86_64" => 302,
|
||||
"aarch64" => 281,
|
||||
);
|
||||
|
||||
print <<EOF;
|
||||
// $command
|
||||
// Code generated by the command above; DO NOT EDIT.
|
||||
|
||||
package seccomp
|
||||
|
||||
import . "syscall"
|
||||
|
||||
var syscallNum = map[string]int{
|
||||
EOF
|
||||
|
||||
my $offset = 0;
|
||||
my $state = -1;
|
||||
|
||||
sub fmt {
|
||||
my ($name, $num) = @_;
|
||||
if($num > 999){
|
||||
# ignore deprecated syscalls that are no longer implemented
|
||||
# https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/asm-generic/unistd.h?id=refs/heads/master#n716
|
||||
return;
|
||||
}
|
||||
(my $name_upper = $name) =~ y/a-z/A-Z/;
|
||||
$num = $num + $offset;
|
||||
if($num > $syscall_cutoff_arch{$uname_arch}){ # not wired in Go standard library
|
||||
if($state < 0){
|
||||
print " \"$name\": SYS_$name_upper,\n";
|
||||
}
|
||||
else{
|
||||
print " SYS_$name_upper = $num;\n";
|
||||
}
|
||||
}
|
||||
elsif($state < 0){
|
||||
print " \"$name\": SYS_$name_upper,\n";
|
||||
}
|
||||
else{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
GENERATE:
|
||||
|
||||
my $prev;
|
||||
open(GCC, "gcc -E -dD $ARGV[0] |") || die "can't run gcc";
|
||||
while(<GCC>){
|
||||
if(/^#define __NR_Linux\s+([0-9]+)/){
|
||||
# mips/mips64: extract offset
|
||||
$offset = $1;
|
||||
}
|
||||
elsif(/^#define __NR_syscalls\s+/) {
|
||||
# ignore redefinitions of __NR_syscalls
|
||||
}
|
||||
elsif(/^#define __NR_(\w+)\s+([0-9]+)/){
|
||||
$prev = $2;
|
||||
fmt($1, $2);
|
||||
}
|
||||
elsif(/^#define __NR3264_(\w+)\s+([0-9]+)/){
|
||||
$prev = $2;
|
||||
fmt($1, $2);
|
||||
}
|
||||
elsif(/^#define __NR_(\w+)\s+\(\w+\+\s*([0-9]+)\)/){
|
||||
fmt($1, $prev+$2)
|
||||
}
|
||||
elsif(/^#define __NR_(\w+)\s+\(__NR_Linux \+ ([0-9]+)/){
|
||||
fmt($1, $2);
|
||||
}
|
||||
}
|
||||
|
||||
if($state < 0){
|
||||
$state = $state + 1;
|
||||
print "}\n\nconst (\n";
|
||||
goto GENERATE;
|
||||
}
|
||||
|
||||
print ")";
|
||||
229
container/seccomp/presets.go
Normal file
229
container/seccomp/presets.go
Normal file
@@ -0,0 +1,229 @@
|
||||
package seccomp
|
||||
|
||||
/* flatpak commit 4c3bf179e2e4a2a298cd1db1d045adaf3f564532 */
|
||||
|
||||
import (
|
||||
. "syscall"
|
||||
)
|
||||
|
||||
type FilterPreset int
|
||||
|
||||
const (
|
||||
// PresetExt are project-specific extensions.
|
||||
PresetExt FilterPreset = 1 << iota
|
||||
// PresetDenyNS denies namespace setup syscalls.
|
||||
PresetDenyNS
|
||||
// PresetDenyTTY denies faking input.
|
||||
PresetDenyTTY
|
||||
// PresetDenyDevel denies development-related syscalls.
|
||||
PresetDenyDevel
|
||||
// PresetLinux32 sets PER_LINUX32.
|
||||
PresetLinux32
|
||||
)
|
||||
|
||||
func Preset(presets FilterPreset, flags ExportFlag) (rules []NativeRule) {
|
||||
allowedPersonality := PER_LINUX
|
||||
if presets&PresetLinux32 != 0 {
|
||||
allowedPersonality = PER_LINUX32
|
||||
}
|
||||
presetDevelFinal := presetDevel(ScmpDatum(allowedPersonality))
|
||||
|
||||
l := len(presetCommon)
|
||||
if presets&PresetDenyNS != 0 {
|
||||
l += len(presetNamespace)
|
||||
}
|
||||
if presets&PresetDenyTTY != 0 {
|
||||
l += len(presetTTY)
|
||||
}
|
||||
if presets&PresetDenyDevel != 0 {
|
||||
l += len(presetDevelFinal)
|
||||
}
|
||||
if flags&AllowMultiarch == 0 {
|
||||
l += len(presetEmu)
|
||||
}
|
||||
if presets&PresetExt != 0 {
|
||||
l += len(presetCommonExt)
|
||||
if presets&PresetDenyNS != 0 {
|
||||
l += len(presetNamespaceExt)
|
||||
}
|
||||
if flags&AllowMultiarch == 0 {
|
||||
l += len(presetEmuExt)
|
||||
}
|
||||
}
|
||||
|
||||
rules = make([]NativeRule, 0, l)
|
||||
rules = append(rules, presetCommon...)
|
||||
if presets&PresetDenyNS != 0 {
|
||||
rules = append(rules, presetNamespace...)
|
||||
}
|
||||
if presets&PresetDenyTTY != 0 {
|
||||
rules = append(rules, presetTTY...)
|
||||
}
|
||||
if presets&PresetDenyDevel != 0 {
|
||||
rules = append(rules, presetDevelFinal...)
|
||||
}
|
||||
if flags&AllowMultiarch == 0 {
|
||||
rules = append(rules, presetEmu...)
|
||||
}
|
||||
if presets&PresetExt != 0 {
|
||||
rules = append(rules, presetCommonExt...)
|
||||
if presets&PresetDenyNS != 0 {
|
||||
rules = append(rules, presetNamespaceExt...)
|
||||
}
|
||||
if flags&AllowMultiarch == 0 {
|
||||
rules = append(rules, presetEmuExt...)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
presetCommon = []NativeRule{
|
||||
/* Block dmesg */
|
||||
{ScmpSyscall(SYS_SYSLOG), ScmpErrno(EPERM), nil},
|
||||
/* Useless old syscall */
|
||||
{ScmpSyscall(SYS_USELIB), ScmpErrno(EPERM), nil},
|
||||
/* Don't allow disabling accounting */
|
||||
{ScmpSyscall(SYS_ACCT), ScmpErrno(EPERM), nil},
|
||||
/* Don't allow reading current quota use */
|
||||
{ScmpSyscall(SYS_QUOTACTL), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* Don't allow access to the kernel keyring */
|
||||
{ScmpSyscall(SYS_ADD_KEY), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_KEYCTL), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_REQUEST_KEY), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* Scary VM/NUMA ops */
|
||||
{ScmpSyscall(SYS_MOVE_PAGES), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_MBIND), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_GET_MEMPOLICY), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SET_MEMPOLICY), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_MIGRATE_PAGES), ScmpErrno(EPERM), nil},
|
||||
}
|
||||
|
||||
/* hakurei: project-specific extensions */
|
||||
presetCommonExt = []NativeRule{
|
||||
/* system calls for changing the system clock */
|
||||
{ScmpSyscall(SYS_ADJTIMEX), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CLOCK_ADJTIME), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CLOCK_ADJTIME64), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CLOCK_SETTIME), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CLOCK_SETTIME64), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETTIMEOFDAY), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* loading and unloading of kernel modules */
|
||||
{ScmpSyscall(SYS_DELETE_MODULE), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_FINIT_MODULE), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_INIT_MODULE), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* system calls for rebooting and reboot preparation */
|
||||
{ScmpSyscall(SYS_KEXEC_FILE_LOAD), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_KEXEC_LOAD), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_REBOOT), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* system calls for enabling/disabling swap devices */
|
||||
{ScmpSyscall(SYS_SWAPOFF), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SWAPON), ScmpErrno(EPERM), nil},
|
||||
}
|
||||
|
||||
presetNamespace = []NativeRule{
|
||||
/* Don't allow subnamespace setups: */
|
||||
{ScmpSyscall(SYS_UNSHARE), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETNS), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_MOUNT), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_UMOUNT), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_UMOUNT2), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_PIVOT_ROOT), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CHROOT), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CLONE), ScmpErrno(EPERM),
|
||||
&ScmpArgCmp{cloneArg, SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER}},
|
||||
|
||||
/* seccomp can't look into clone3()'s struct clone_args to check whether
|
||||
* the flags are OK, so we have no choice but to block clone3().
|
||||
* Return ENOSYS so user-space will fall back to clone().
|
||||
* (CVE-2021-41133; see also https://github.com/moby/moby/commit/9f6b562d)
|
||||
*/
|
||||
{ScmpSyscall(SYS_CLONE3), ScmpErrno(ENOSYS), nil},
|
||||
|
||||
/* New mount manipulation APIs can also change our VFS. There's no
|
||||
* legitimate reason to do these in the sandbox, so block all of them
|
||||
* rather than thinking about which ones might be dangerous.
|
||||
* (CVE-2021-41133) */
|
||||
{ScmpSyscall(SYS_OPEN_TREE), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_MOVE_MOUNT), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_FSOPEN), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_FSCONFIG), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_FSMOUNT), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_FSPICK), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_MOUNT_SETATTR), ScmpErrno(ENOSYS), nil},
|
||||
}
|
||||
|
||||
/* hakurei: project-specific extensions */
|
||||
presetNamespaceExt = []NativeRule{
|
||||
/* changing file ownership */
|
||||
{ScmpSyscall(SYS_CHOWN), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_CHOWN32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_FCHOWN), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_FCHOWN32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_FCHOWNAT), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_LCHOWN), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_LCHOWN32), ScmpErrno(EPERM), nil},
|
||||
|
||||
/* system calls for changing user ID and group ID credentials */
|
||||
{ScmpSyscall(SYS_SETGID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETGID32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETGROUPS), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETGROUPS32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETREGID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETREGID32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETRESGID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETRESGID32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETRESUID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETRESUID32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETREUID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETREUID32), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETUID), ScmpErrno(EPERM), nil},
|
||||
{ScmpSyscall(SYS_SETUID32), ScmpErrno(EPERM), nil},
|
||||
}
|
||||
|
||||
presetTTY = []NativeRule{
|
||||
/* Don't allow faking input to the controlling tty (CVE-2017-5226) */
|
||||
{ScmpSyscall(SYS_IOCTL), ScmpErrno(EPERM),
|
||||
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCSTI}},
|
||||
/* In the unlikely event that the controlling tty is a Linux virtual
|
||||
* console (/dev/tty2 or similar), copy/paste operations have an effect
|
||||
* similar to TIOCSTI (CVE-2023-28100) */
|
||||
{ScmpSyscall(SYS_IOCTL), ScmpErrno(EPERM),
|
||||
&ScmpArgCmp{1, SCMP_CMP_MASKED_EQ, 0xFFFFFFFF, TIOCLINUX}},
|
||||
}
|
||||
|
||||
presetEmu = []NativeRule{
|
||||
/* modify_ldt is a historic source of interesting information leaks,
|
||||
* so it's disabled as a hardening measure.
|
||||
* However, it is required to run old 16-bit applications
|
||||
* as well as some Wine patches, so it's allowed in multiarch. */
|
||||
{ScmpSyscall(SYS_MODIFY_LDT), ScmpErrno(EPERM), nil},
|
||||
}
|
||||
|
||||
/* hakurei: project-specific extensions */
|
||||
presetEmuExt = []NativeRule{
|
||||
{ScmpSyscall(SYS_SUBPAGE_PROT), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_SWITCH_ENDIAN), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_VM86), ScmpErrno(ENOSYS), nil},
|
||||
{ScmpSyscall(SYS_VM86OLD), ScmpErrno(ENOSYS), nil},
|
||||
}
|
||||
)
|
||||
|
||||
func presetDevel(allowedPersonality ScmpDatum) []NativeRule {
|
||||
return []NativeRule{
|
||||
/* Profiling operations; we expect these to be done by tools from outside
|
||||
* the sandbox. In particular perf has been the source of many CVEs. */
|
||||
{ScmpSyscall(SYS_PERF_EVENT_OPEN), ScmpErrno(EPERM), nil},
|
||||
/* Don't allow you to switch to bsd emulation or whatnot */
|
||||
{ScmpSyscall(SYS_PERSONALITY), ScmpErrno(EPERM),
|
||||
&ScmpArgCmp{0, SCMP_CMP_NE, allowedPersonality, 0}},
|
||||
|
||||
{ScmpSyscall(SYS_PTRACE), ScmpErrno(EPERM), nil},
|
||||
}
|
||||
}
|
||||
7
container/seccomp/presets_clone_backwards2.go
Normal file
7
container/seccomp/presets_clone_backwards2.go
Normal file
@@ -0,0 +1,7 @@
|
||||
//go:build s390 || s390x
|
||||
|
||||
package seccomp
|
||||
|
||||
/* Architectures with CONFIG_CLONE_BACKWARDS2: the child stack
|
||||
* and flags arguments are reversed so the flags come second */
|
||||
const cloneArg = 1
|
||||
6
container/seccomp/presets_clone_generic.go
Normal file
6
container/seccomp/presets_clone_generic.go
Normal file
@@ -0,0 +1,6 @@
|
||||
//go:build !s390 && !s390x
|
||||
|
||||
package seccomp
|
||||
|
||||
/* Normally the flags come first */
|
||||
const cloneArg = 0
|
||||
@@ -5,19 +5,18 @@ import (
|
||||
"errors"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper/proc"
|
||||
"hakurei.app/helper/proc"
|
||||
)
|
||||
|
||||
const (
|
||||
PresetStrict = FilterExt | FilterDenyNS | FilterDenyTTY | FilterDenyDevel
|
||||
PresetCommon = PresetStrict | FilterMultiarch
|
||||
PresetStrict = PresetExt | PresetDenyNS | PresetDenyTTY | PresetDenyDevel
|
||||
)
|
||||
|
||||
// New returns an inactive Encoder instance.
|
||||
func New(opts FilterOpts) *Encoder { return &Encoder{newExporter(opts)} }
|
||||
func New(rules []NativeRule, flags ExportFlag) *Encoder { return &Encoder{newExporter(rules, flags)} }
|
||||
|
||||
// Load loads a filter into the kernel.
|
||||
func Load(opts FilterOpts) error { return buildFilter(-1, opts) }
|
||||
func Load(rules []NativeRule, flags ExportFlag) error { return Export(-1, rules, flags) }
|
||||
|
||||
/*
|
||||
An Encoder writes a BPF program to an output stream.
|
||||
@@ -47,17 +46,20 @@ func (e *Encoder) Close() error {
|
||||
}
|
||||
|
||||
// NewFile returns an instance of exporter implementing [proc.File].
|
||||
func NewFile(opts FilterOpts) proc.File { return &File{opts: opts} }
|
||||
func NewFile(rules []NativeRule, flags ExportFlag) proc.File {
|
||||
return &File{rules: rules, flags: flags}
|
||||
}
|
||||
|
||||
// File implements [proc.File] and provides access to the read end of exporter pipe.
|
||||
type File struct {
|
||||
opts FilterOpts
|
||||
rules []NativeRule
|
||||
flags ExportFlag
|
||||
proc.BaseFile
|
||||
}
|
||||
|
||||
func (f *File) ErrCount() int { return 2 }
|
||||
func (f *File) Fulfill(ctx context.Context, dispatchErr func(error)) error {
|
||||
e := newExporter(f.opts)
|
||||
e := newExporter(f.rules, f.flags)
|
||||
if err := e.prepare(); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
// Package seccomp provides high level wrappers around libseccomp.
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
@@ -7,8 +8,9 @@ import (
|
||||
)
|
||||
|
||||
type exporter struct {
|
||||
opts FilterOpts
|
||||
r, w *os.File
|
||||
rules []NativeRule
|
||||
flags ExportFlag
|
||||
r, w *os.File
|
||||
|
||||
prepareOnce sync.Once
|
||||
prepareErr error
|
||||
@@ -28,7 +30,7 @@ func (e *exporter) prepare() error {
|
||||
|
||||
ec := make(chan error, 1)
|
||||
go func(fd uintptr) {
|
||||
ec <- buildFilter(int(fd), e.opts)
|
||||
ec <- Export(int(fd), e.rules, e.flags)
|
||||
close(ec)
|
||||
_ = e.closeWrite()
|
||||
runtime.KeepAlive(e.w)
|
||||
@@ -53,6 +55,6 @@ func (e *exporter) closeWrite() error {
|
||||
return e.closeErr
|
||||
}
|
||||
|
||||
func newExporter(opts FilterOpts) *exporter {
|
||||
return &exporter{opts: opts}
|
||||
func newExporter(rules []NativeRule, flags ExportFlag) *exporter {
|
||||
return &exporter{rules: rules, flags: flags}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
func TestLibraryError(t *testing.T) {
|
||||
28
container/seccomp/syscall.go
Normal file
28
container/seccomp/syscall.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package seccomp
|
||||
|
||||
import "iter"
|
||||
|
||||
// Syscalls returns an iterator over all wired syscalls.
|
||||
func Syscalls() iter.Seq2[string, int] {
|
||||
return func(yield func(string, int) bool) {
|
||||
for name, num := range syscallNum {
|
||||
if !yield(name, num) {
|
||||
return
|
||||
}
|
||||
}
|
||||
for name, num := range syscallNumExtra {
|
||||
if !yield(name, num) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SyscallResolveName resolves a syscall number from its string representation.
|
||||
func SyscallResolveName(name string) (num int, ok bool) {
|
||||
if num, ok = syscallNum[name]; ok {
|
||||
return
|
||||
}
|
||||
num, ok = syscallNumExtra[name]
|
||||
return
|
||||
}
|
||||
48
container/seccomp/syscall_extra_linux_amd64.go
Normal file
48
container/seccomp/syscall_extra_linux_amd64.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package seccomp
|
||||
|
||||
/*
|
||||
#cgo linux pkg-config: --static libseccomp
|
||||
|
||||
#include <seccomp.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
var syscallNumExtra = map[string]int{
|
||||
"umount": SYS_UMOUNT,
|
||||
"subpage_prot": SYS_SUBPAGE_PROT,
|
||||
"switch_endian": SYS_SWITCH_ENDIAN,
|
||||
"vm86": SYS_VM86,
|
||||
"vm86old": SYS_VM86OLD,
|
||||
"clock_adjtime64": SYS_CLOCK_ADJTIME64,
|
||||
"clock_settime64": SYS_CLOCK_SETTIME64,
|
||||
"chown32": SYS_CHOWN32,
|
||||
"fchown32": SYS_FCHOWN32,
|
||||
"lchown32": SYS_LCHOWN32,
|
||||
"setgid32": SYS_SETGID32,
|
||||
"setgroups32": SYS_SETGROUPS32,
|
||||
"setregid32": SYS_SETREGID32,
|
||||
"setresgid32": SYS_SETRESGID32,
|
||||
"setresuid32": SYS_SETRESUID32,
|
||||
"setreuid32": SYS_SETREUID32,
|
||||
"setuid32": SYS_SETUID32,
|
||||
}
|
||||
|
||||
const (
|
||||
SYS_UMOUNT = C.__SNR_umount
|
||||
SYS_SUBPAGE_PROT = C.__SNR_subpage_prot
|
||||
SYS_SWITCH_ENDIAN = C.__SNR_switch_endian
|
||||
SYS_VM86 = C.__SNR_vm86
|
||||
SYS_VM86OLD = C.__SNR_vm86old
|
||||
SYS_CLOCK_ADJTIME64 = C.__SNR_clock_adjtime64
|
||||
SYS_CLOCK_SETTIME64 = C.__SNR_clock_settime64
|
||||
SYS_CHOWN32 = C.__SNR_chown32
|
||||
SYS_FCHOWN32 = C.__SNR_fchown32
|
||||
SYS_LCHOWN32 = C.__SNR_lchown32
|
||||
SYS_SETGID32 = C.__SNR_setgid32
|
||||
SYS_SETGROUPS32 = C.__SNR_setgroups32
|
||||
SYS_SETREGID32 = C.__SNR_setregid32
|
||||
SYS_SETRESGID32 = C.__SNR_setresgid32
|
||||
SYS_SETRESUID32 = C.__SNR_setresuid32
|
||||
SYS_SETREUID32 = C.__SNR_setreuid32
|
||||
SYS_SETUID32 = C.__SNR_setuid32
|
||||
)
|
||||
61
container/seccomp/syscall_extra_linux_arm64.go
Normal file
61
container/seccomp/syscall_extra_linux_arm64.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package seccomp
|
||||
|
||||
/*
|
||||
#cgo linux pkg-config: --static libseccomp
|
||||
|
||||
#include <seccomp.h>
|
||||
*/
|
||||
import "C"
|
||||
import "syscall"
|
||||
|
||||
const (
|
||||
SYS_NEWFSTATAT = syscall.SYS_FSTATAT
|
||||
)
|
||||
|
||||
var syscallNumExtra = map[string]int{
|
||||
"uselib": SYS_USELIB,
|
||||
"clock_adjtime64": SYS_CLOCK_ADJTIME64,
|
||||
"clock_settime64": SYS_CLOCK_SETTIME64,
|
||||
"umount": SYS_UMOUNT,
|
||||
"chown": SYS_CHOWN,
|
||||
"chown32": SYS_CHOWN32,
|
||||
"fchown32": SYS_FCHOWN32,
|
||||
"lchown": SYS_LCHOWN,
|
||||
"lchown32": SYS_LCHOWN32,
|
||||
"setgid32": SYS_SETGID32,
|
||||
"setgroups32": SYS_SETGROUPS32,
|
||||
"setregid32": SYS_SETREGID32,
|
||||
"setresgid32": SYS_SETRESGID32,
|
||||
"setresuid32": SYS_SETRESUID32,
|
||||
"setreuid32": SYS_SETREUID32,
|
||||
"setuid32": SYS_SETUID32,
|
||||
"modify_ldt": SYS_MODIFY_LDT,
|
||||
"subpage_prot": SYS_SUBPAGE_PROT,
|
||||
"switch_endian": SYS_SWITCH_ENDIAN,
|
||||
"vm86": SYS_VM86,
|
||||
"vm86old": SYS_VM86OLD,
|
||||
}
|
||||
|
||||
const (
|
||||
SYS_USELIB = C.__SNR_uselib
|
||||
SYS_CLOCK_ADJTIME64 = C.__SNR_clock_adjtime64
|
||||
SYS_CLOCK_SETTIME64 = C.__SNR_clock_settime64
|
||||
SYS_UMOUNT = C.__SNR_umount
|
||||
SYS_CHOWN = C.__SNR_chown
|
||||
SYS_CHOWN32 = C.__SNR_chown32
|
||||
SYS_FCHOWN32 = C.__SNR_fchown32
|
||||
SYS_LCHOWN = C.__SNR_lchown
|
||||
SYS_LCHOWN32 = C.__SNR_lchown32
|
||||
SYS_SETGID32 = C.__SNR_setgid32
|
||||
SYS_SETGROUPS32 = C.__SNR_setgroups32
|
||||
SYS_SETREGID32 = C.__SNR_setregid32
|
||||
SYS_SETRESGID32 = C.__SNR_setresgid32
|
||||
SYS_SETRESUID32 = C.__SNR_setresuid32
|
||||
SYS_SETREUID32 = C.__SNR_setreuid32
|
||||
SYS_SETUID32 = C.__SNR_setuid32
|
||||
SYS_MODIFY_LDT = C.__SNR_modify_ldt
|
||||
SYS_SUBPAGE_PROT = C.__SNR_subpage_prot
|
||||
SYS_SWITCH_ENDIAN = C.__SNR_switch_endian
|
||||
SYS_VM86 = C.__SNR_vm86
|
||||
SYS_VM86OLD = C.__SNR_vm86old
|
||||
)
|
||||
459
container/seccomp/syscall_linux_amd64.go
Normal file
459
container/seccomp/syscall_linux_amd64.go
Normal file
@@ -0,0 +1,459 @@
|
||||
// mksysnum_linux.pl /usr/include/asm/unistd_64.h
|
||||
// Code generated by the command above; DO NOT EDIT.
|
||||
|
||||
package seccomp
|
||||
|
||||
import . "syscall"
|
||||
|
||||
var syscallNum = map[string]int{
|
||||
"read": SYS_READ,
|
||||
"write": SYS_WRITE,
|
||||
"open": SYS_OPEN,
|
||||
"close": SYS_CLOSE,
|
||||
"stat": SYS_STAT,
|
||||
"fstat": SYS_FSTAT,
|
||||
"lstat": SYS_LSTAT,
|
||||
"poll": SYS_POLL,
|
||||
"lseek": SYS_LSEEK,
|
||||
"mmap": SYS_MMAP,
|
||||
"mprotect": SYS_MPROTECT,
|
||||
"munmap": SYS_MUNMAP,
|
||||
"brk": SYS_BRK,
|
||||
"rt_sigaction": SYS_RT_SIGACTION,
|
||||
"rt_sigprocmask": SYS_RT_SIGPROCMASK,
|
||||
"rt_sigreturn": SYS_RT_SIGRETURN,
|
||||
"ioctl": SYS_IOCTL,
|
||||
"pread64": SYS_PREAD64,
|
||||
"pwrite64": SYS_PWRITE64,
|
||||
"readv": SYS_READV,
|
||||
"writev": SYS_WRITEV,
|
||||
"access": SYS_ACCESS,
|
||||
"pipe": SYS_PIPE,
|
||||
"select": SYS_SELECT,
|
||||
"sched_yield": SYS_SCHED_YIELD,
|
||||
"mremap": SYS_MREMAP,
|
||||
"msync": SYS_MSYNC,
|
||||
"mincore": SYS_MINCORE,
|
||||
"madvise": SYS_MADVISE,
|
||||
"shmget": SYS_SHMGET,
|
||||
"shmat": SYS_SHMAT,
|
||||
"shmctl": SYS_SHMCTL,
|
||||
"dup": SYS_DUP,
|
||||
"dup2": SYS_DUP2,
|
||||
"pause": SYS_PAUSE,
|
||||
"nanosleep": SYS_NANOSLEEP,
|
||||
"getitimer": SYS_GETITIMER,
|
||||
"alarm": SYS_ALARM,
|
||||
"setitimer": SYS_SETITIMER,
|
||||
"getpid": SYS_GETPID,
|
||||
"sendfile": SYS_SENDFILE,
|
||||
"socket": SYS_SOCKET,
|
||||
"connect": SYS_CONNECT,
|
||||
"accept": SYS_ACCEPT,
|
||||
"sendto": SYS_SENDTO,
|
||||
"recvfrom": SYS_RECVFROM,
|
||||
"sendmsg": SYS_SENDMSG,
|
||||
"recvmsg": SYS_RECVMSG,
|
||||
"shutdown": SYS_SHUTDOWN,
|
||||
"bind": SYS_BIND,
|
||||
"listen": SYS_LISTEN,
|
||||
"getsockname": SYS_GETSOCKNAME,
|
||||
"getpeername": SYS_GETPEERNAME,
|
||||
"socketpair": SYS_SOCKETPAIR,
|
||||
"setsockopt": SYS_SETSOCKOPT,
|
||||
"getsockopt": SYS_GETSOCKOPT,
|
||||
"clone": SYS_CLONE,
|
||||
"fork": SYS_FORK,
|
||||
"vfork": SYS_VFORK,
|
||||
"execve": SYS_EXECVE,
|
||||
"exit": SYS_EXIT,
|
||||
"wait4": SYS_WAIT4,
|
||||
"kill": SYS_KILL,
|
||||
"uname": SYS_UNAME,
|
||||
"semget": SYS_SEMGET,
|
||||
"semop": SYS_SEMOP,
|
||||
"semctl": SYS_SEMCTL,
|
||||
"shmdt": SYS_SHMDT,
|
||||
"msgget": SYS_MSGGET,
|
||||
"msgsnd": SYS_MSGSND,
|
||||
"msgrcv": SYS_MSGRCV,
|
||||
"msgctl": SYS_MSGCTL,
|
||||
"fcntl": SYS_FCNTL,
|
||||
"flock": SYS_FLOCK,
|
||||
"fsync": SYS_FSYNC,
|
||||
"fdatasync": SYS_FDATASYNC,
|
||||
"truncate": SYS_TRUNCATE,
|
||||
"ftruncate": SYS_FTRUNCATE,
|
||||
"getdents": SYS_GETDENTS,
|
||||
"getcwd": SYS_GETCWD,
|
||||
"chdir": SYS_CHDIR,
|
||||
"fchdir": SYS_FCHDIR,
|
||||
"rename": SYS_RENAME,
|
||||
"mkdir": SYS_MKDIR,
|
||||
"rmdir": SYS_RMDIR,
|
||||
"creat": SYS_CREAT,
|
||||
"link": SYS_LINK,
|
||||
"unlink": SYS_UNLINK,
|
||||
"symlink": SYS_SYMLINK,
|
||||
"readlink": SYS_READLINK,
|
||||
"chmod": SYS_CHMOD,
|
||||
"fchmod": SYS_FCHMOD,
|
||||
"chown": SYS_CHOWN,
|
||||
"fchown": SYS_FCHOWN,
|
||||
"lchown": SYS_LCHOWN,
|
||||
"umask": SYS_UMASK,
|
||||
"gettimeofday": SYS_GETTIMEOFDAY,
|
||||
"getrlimit": SYS_GETRLIMIT,
|
||||
"getrusage": SYS_GETRUSAGE,
|
||||
"sysinfo": SYS_SYSINFO,
|
||||
"times": SYS_TIMES,
|
||||
"ptrace": SYS_PTRACE,
|
||||
"getuid": SYS_GETUID,
|
||||
"syslog": SYS_SYSLOG,
|
||||
"getgid": SYS_GETGID,
|
||||
"setuid": SYS_SETUID,
|
||||
"setgid": SYS_SETGID,
|
||||
"geteuid": SYS_GETEUID,
|
||||
"getegid": SYS_GETEGID,
|
||||
"setpgid": SYS_SETPGID,
|
||||
"getppid": SYS_GETPPID,
|
||||
"getpgrp": SYS_GETPGRP,
|
||||
"setsid": SYS_SETSID,
|
||||
"setreuid": SYS_SETREUID,
|
||||
"setregid": SYS_SETREGID,
|
||||
"getgroups": SYS_GETGROUPS,
|
||||
"setgroups": SYS_SETGROUPS,
|
||||
"setresuid": SYS_SETRESUID,
|
||||
"getresuid": SYS_GETRESUID,
|
||||
"setresgid": SYS_SETRESGID,
|
||||
"getresgid": SYS_GETRESGID,
|
||||
"getpgid": SYS_GETPGID,
|
||||
"setfsuid": SYS_SETFSUID,
|
||||
"setfsgid": SYS_SETFSGID,
|
||||
"getsid": SYS_GETSID,
|
||||
"capget": SYS_CAPGET,
|
||||
"capset": SYS_CAPSET,
|
||||
"rt_sigpending": SYS_RT_SIGPENDING,
|
||||
"rt_sigtimedwait": SYS_RT_SIGTIMEDWAIT,
|
||||
"rt_sigqueueinfo": SYS_RT_SIGQUEUEINFO,
|
||||
"rt_sigsuspend": SYS_RT_SIGSUSPEND,
|
||||
"sigaltstack": SYS_SIGALTSTACK,
|
||||
"utime": SYS_UTIME,
|
||||
"mknod": SYS_MKNOD,
|
||||
"uselib": SYS_USELIB,
|
||||
"personality": SYS_PERSONALITY,
|
||||
"ustat": SYS_USTAT,
|
||||
"statfs": SYS_STATFS,
|
||||
"fstatfs": SYS_FSTATFS,
|
||||
"sysfs": SYS_SYSFS,
|
||||
"getpriority": SYS_GETPRIORITY,
|
||||
"setpriority": SYS_SETPRIORITY,
|
||||
"sched_setparam": SYS_SCHED_SETPARAM,
|
||||
"sched_getparam": SYS_SCHED_GETPARAM,
|
||||
"sched_setscheduler": SYS_SCHED_SETSCHEDULER,
|
||||
"sched_getscheduler": SYS_SCHED_GETSCHEDULER,
|
||||
"sched_get_priority_max": SYS_SCHED_GET_PRIORITY_MAX,
|
||||
"sched_get_priority_min": SYS_SCHED_GET_PRIORITY_MIN,
|
||||
"sched_rr_get_interval": SYS_SCHED_RR_GET_INTERVAL,
|
||||
"mlock": SYS_MLOCK,
|
||||
"munlock": SYS_MUNLOCK,
|
||||
"mlockall": SYS_MLOCKALL,
|
||||
"munlockall": SYS_MUNLOCKALL,
|
||||
"vhangup": SYS_VHANGUP,
|
||||
"modify_ldt": SYS_MODIFY_LDT,
|
||||
"pivot_root": SYS_PIVOT_ROOT,
|
||||
"_sysctl": SYS__SYSCTL,
|
||||
"prctl": SYS_PRCTL,
|
||||
"arch_prctl": SYS_ARCH_PRCTL,
|
||||
"adjtimex": SYS_ADJTIMEX,
|
||||
"setrlimit": SYS_SETRLIMIT,
|
||||
"chroot": SYS_CHROOT,
|
||||
"sync": SYS_SYNC,
|
||||
"acct": SYS_ACCT,
|
||||
"settimeofday": SYS_SETTIMEOFDAY,
|
||||
"mount": SYS_MOUNT,
|
||||
"umount2": SYS_UMOUNT2,
|
||||
"swapon": SYS_SWAPON,
|
||||
"swapoff": SYS_SWAPOFF,
|
||||
"reboot": SYS_REBOOT,
|
||||
"sethostname": SYS_SETHOSTNAME,
|
||||
"setdomainname": SYS_SETDOMAINNAME,
|
||||
"iopl": SYS_IOPL,
|
||||
"ioperm": SYS_IOPERM,
|
||||
"create_module": SYS_CREATE_MODULE,
|
||||
"init_module": SYS_INIT_MODULE,
|
||||
"delete_module": SYS_DELETE_MODULE,
|
||||
"get_kernel_syms": SYS_GET_KERNEL_SYMS,
|
||||
"query_module": SYS_QUERY_MODULE,
|
||||
"quotactl": SYS_QUOTACTL,
|
||||
"nfsservctl": SYS_NFSSERVCTL,
|
||||
"getpmsg": SYS_GETPMSG,
|
||||
"putpmsg": SYS_PUTPMSG,
|
||||
"afs_syscall": SYS_AFS_SYSCALL,
|
||||
"tuxcall": SYS_TUXCALL,
|
||||
"security": SYS_SECURITY,
|
||||
"gettid": SYS_GETTID,
|
||||
"readahead": SYS_READAHEAD,
|
||||
"setxattr": SYS_SETXATTR,
|
||||
"lsetxattr": SYS_LSETXATTR,
|
||||
"fsetxattr": SYS_FSETXATTR,
|
||||
"getxattr": SYS_GETXATTR,
|
||||
"lgetxattr": SYS_LGETXATTR,
|
||||
"fgetxattr": SYS_FGETXATTR,
|
||||
"listxattr": SYS_LISTXATTR,
|
||||
"llistxattr": SYS_LLISTXATTR,
|
||||
"flistxattr": SYS_FLISTXATTR,
|
||||
"removexattr": SYS_REMOVEXATTR,
|
||||
"lremovexattr": SYS_LREMOVEXATTR,
|
||||
"fremovexattr": SYS_FREMOVEXATTR,
|
||||
"tkill": SYS_TKILL,
|
||||
"time": SYS_TIME,
|
||||
"futex": SYS_FUTEX,
|
||||
"sched_setaffinity": SYS_SCHED_SETAFFINITY,
|
||||
"sched_getaffinity": SYS_SCHED_GETAFFINITY,
|
||||
"set_thread_area": SYS_SET_THREAD_AREA,
|
||||
"io_setup": SYS_IO_SETUP,
|
||||
"io_destroy": SYS_IO_DESTROY,
|
||||
"io_getevents": SYS_IO_GETEVENTS,
|
||||
"io_submit": SYS_IO_SUBMIT,
|
||||
"io_cancel": SYS_IO_CANCEL,
|
||||
"get_thread_area": SYS_GET_THREAD_AREA,
|
||||
"lookup_dcookie": SYS_LOOKUP_DCOOKIE,
|
||||
"epoll_create": SYS_EPOLL_CREATE,
|
||||
"epoll_ctl_old": SYS_EPOLL_CTL_OLD,
|
||||
"epoll_wait_old": SYS_EPOLL_WAIT_OLD,
|
||||
"remap_file_pages": SYS_REMAP_FILE_PAGES,
|
||||
"getdents64": SYS_GETDENTS64,
|
||||
"set_tid_address": SYS_SET_TID_ADDRESS,
|
||||
"restart_syscall": SYS_RESTART_SYSCALL,
|
||||
"semtimedop": SYS_SEMTIMEDOP,
|
||||
"fadvise64": SYS_FADVISE64,
|
||||
"timer_create": SYS_TIMER_CREATE,
|
||||
"timer_settime": SYS_TIMER_SETTIME,
|
||||
"timer_gettime": SYS_TIMER_GETTIME,
|
||||
"timer_getoverrun": SYS_TIMER_GETOVERRUN,
|
||||
"timer_delete": SYS_TIMER_DELETE,
|
||||
"clock_settime": SYS_CLOCK_SETTIME,
|
||||
"clock_gettime": SYS_CLOCK_GETTIME,
|
||||
"clock_getres": SYS_CLOCK_GETRES,
|
||||
"clock_nanosleep": SYS_CLOCK_NANOSLEEP,
|
||||
"exit_group": SYS_EXIT_GROUP,
|
||||
"epoll_wait": SYS_EPOLL_WAIT,
|
||||
"epoll_ctl": SYS_EPOLL_CTL,
|
||||
"tgkill": SYS_TGKILL,
|
||||
"utimes": SYS_UTIMES,
|
||||
"vserver": SYS_VSERVER,
|
||||
"mbind": SYS_MBIND,
|
||||
"set_mempolicy": SYS_SET_MEMPOLICY,
|
||||
"get_mempolicy": SYS_GET_MEMPOLICY,
|
||||
"mq_open": SYS_MQ_OPEN,
|
||||
"mq_unlink": SYS_MQ_UNLINK,
|
||||
"mq_timedsend": SYS_MQ_TIMEDSEND,
|
||||
"mq_timedreceive": SYS_MQ_TIMEDRECEIVE,
|
||||
"mq_notify": SYS_MQ_NOTIFY,
|
||||
"mq_getsetattr": SYS_MQ_GETSETATTR,
|
||||
"kexec_load": SYS_KEXEC_LOAD,
|
||||
"waitid": SYS_WAITID,
|
||||
"add_key": SYS_ADD_KEY,
|
||||
"request_key": SYS_REQUEST_KEY,
|
||||
"keyctl": SYS_KEYCTL,
|
||||
"ioprio_set": SYS_IOPRIO_SET,
|
||||
"ioprio_get": SYS_IOPRIO_GET,
|
||||
"inotify_init": SYS_INOTIFY_INIT,
|
||||
"inotify_add_watch": SYS_INOTIFY_ADD_WATCH,
|
||||
"inotify_rm_watch": SYS_INOTIFY_RM_WATCH,
|
||||
"migrate_pages": SYS_MIGRATE_PAGES,
|
||||
"openat": SYS_OPENAT,
|
||||
"mkdirat": SYS_MKDIRAT,
|
||||
"mknodat": SYS_MKNODAT,
|
||||
"fchownat": SYS_FCHOWNAT,
|
||||
"futimesat": SYS_FUTIMESAT,
|
||||
"newfstatat": SYS_NEWFSTATAT,
|
||||
"unlinkat": SYS_UNLINKAT,
|
||||
"renameat": SYS_RENAMEAT,
|
||||
"linkat": SYS_LINKAT,
|
||||
"symlinkat": SYS_SYMLINKAT,
|
||||
"readlinkat": SYS_READLINKAT,
|
||||
"fchmodat": SYS_FCHMODAT,
|
||||
"faccessat": SYS_FACCESSAT,
|
||||
"pselect6": SYS_PSELECT6,
|
||||
"ppoll": SYS_PPOLL,
|
||||
"unshare": SYS_UNSHARE,
|
||||
"set_robust_list": SYS_SET_ROBUST_LIST,
|
||||
"get_robust_list": SYS_GET_ROBUST_LIST,
|
||||
"splice": SYS_SPLICE,
|
||||
"tee": SYS_TEE,
|
||||
"sync_file_range": SYS_SYNC_FILE_RANGE,
|
||||
"vmsplice": SYS_VMSPLICE,
|
||||
"move_pages": SYS_MOVE_PAGES,
|
||||
"utimensat": SYS_UTIMENSAT,
|
||||
"epoll_pwait": SYS_EPOLL_PWAIT,
|
||||
"signalfd": SYS_SIGNALFD,
|
||||
"timerfd_create": SYS_TIMERFD_CREATE,
|
||||
"eventfd": SYS_EVENTFD,
|
||||
"fallocate": SYS_FALLOCATE,
|
||||
"timerfd_settime": SYS_TIMERFD_SETTIME,
|
||||
"timerfd_gettime": SYS_TIMERFD_GETTIME,
|
||||
"accept4": SYS_ACCEPT4,
|
||||
"signalfd4": SYS_SIGNALFD4,
|
||||
"eventfd2": SYS_EVENTFD2,
|
||||
"epoll_create1": SYS_EPOLL_CREATE1,
|
||||
"dup3": SYS_DUP3,
|
||||
"pipe2": SYS_PIPE2,
|
||||
"inotify_init1": SYS_INOTIFY_INIT1,
|
||||
"preadv": SYS_PREADV,
|
||||
"pwritev": SYS_PWRITEV,
|
||||
"rt_tgsigqueueinfo": SYS_RT_TGSIGQUEUEINFO,
|
||||
"perf_event_open": SYS_PERF_EVENT_OPEN,
|
||||
"recvmmsg": SYS_RECVMMSG,
|
||||
"fanotify_init": SYS_FANOTIFY_INIT,
|
||||
"fanotify_mark": SYS_FANOTIFY_MARK,
|
||||
"prlimit64": SYS_PRLIMIT64,
|
||||
"name_to_handle_at": SYS_NAME_TO_HANDLE_AT,
|
||||
"open_by_handle_at": SYS_OPEN_BY_HANDLE_AT,
|
||||
"clock_adjtime": SYS_CLOCK_ADJTIME,
|
||||
"syncfs": SYS_SYNCFS,
|
||||
"sendmmsg": SYS_SENDMMSG,
|
||||
"setns": SYS_SETNS,
|
||||
"getcpu": SYS_GETCPU,
|
||||
"process_vm_readv": SYS_PROCESS_VM_READV,
|
||||
"process_vm_writev": SYS_PROCESS_VM_WRITEV,
|
||||
"kcmp": SYS_KCMP,
|
||||
"finit_module": SYS_FINIT_MODULE,
|
||||
"sched_setattr": SYS_SCHED_SETATTR,
|
||||
"sched_getattr": SYS_SCHED_GETATTR,
|
||||
"renameat2": SYS_RENAMEAT2,
|
||||
"seccomp": SYS_SECCOMP,
|
||||
"getrandom": SYS_GETRANDOM,
|
||||
"memfd_create": SYS_MEMFD_CREATE,
|
||||
"kexec_file_load": SYS_KEXEC_FILE_LOAD,
|
||||
"bpf": SYS_BPF,
|
||||
"execveat": SYS_EXECVEAT,
|
||||
"userfaultfd": SYS_USERFAULTFD,
|
||||
"membarrier": SYS_MEMBARRIER,
|
||||
"mlock2": SYS_MLOCK2,
|
||||
"copy_file_range": SYS_COPY_FILE_RANGE,
|
||||
"preadv2": SYS_PREADV2,
|
||||
"pwritev2": SYS_PWRITEV2,
|
||||
"pkey_mprotect": SYS_PKEY_MPROTECT,
|
||||
"pkey_alloc": SYS_PKEY_ALLOC,
|
||||
"pkey_free": SYS_PKEY_FREE,
|
||||
"statx": SYS_STATX,
|
||||
"io_pgetevents": SYS_IO_PGETEVENTS,
|
||||
"rseq": SYS_RSEQ,
|
||||
"uretprobe": SYS_URETPROBE,
|
||||
"pidfd_send_signal": SYS_PIDFD_SEND_SIGNAL,
|
||||
"io_uring_setup": SYS_IO_URING_SETUP,
|
||||
"io_uring_enter": SYS_IO_URING_ENTER,
|
||||
"io_uring_register": SYS_IO_URING_REGISTER,
|
||||
"open_tree": SYS_OPEN_TREE,
|
||||
"move_mount": SYS_MOVE_MOUNT,
|
||||
"fsopen": SYS_FSOPEN,
|
||||
"fsconfig": SYS_FSCONFIG,
|
||||
"fsmount": SYS_FSMOUNT,
|
||||
"fspick": SYS_FSPICK,
|
||||
"pidfd_open": SYS_PIDFD_OPEN,
|
||||
"clone3": SYS_CLONE3,
|
||||
"close_range": SYS_CLOSE_RANGE,
|
||||
"openat2": SYS_OPENAT2,
|
||||
"pidfd_getfd": SYS_PIDFD_GETFD,
|
||||
"faccessat2": SYS_FACCESSAT2,
|
||||
"process_madvise": SYS_PROCESS_MADVISE,
|
||||
"epoll_pwait2": SYS_EPOLL_PWAIT2,
|
||||
"mount_setattr": SYS_MOUNT_SETATTR,
|
||||
"quotactl_fd": SYS_QUOTACTL_FD,
|
||||
"landlock_create_ruleset": SYS_LANDLOCK_CREATE_RULESET,
|
||||
"landlock_add_rule": SYS_LANDLOCK_ADD_RULE,
|
||||
"landlock_restrict_self": SYS_LANDLOCK_RESTRICT_SELF,
|
||||
"memfd_secret": SYS_MEMFD_SECRET,
|
||||
"process_mrelease": SYS_PROCESS_MRELEASE,
|
||||
"futex_waitv": SYS_FUTEX_WAITV,
|
||||
"set_mempolicy_home_node": SYS_SET_MEMPOLICY_HOME_NODE,
|
||||
"cachestat": SYS_CACHESTAT,
|
||||
"fchmodat2": SYS_FCHMODAT2,
|
||||
"map_shadow_stack": SYS_MAP_SHADOW_STACK,
|
||||
"futex_wake": SYS_FUTEX_WAKE,
|
||||
"futex_wait": SYS_FUTEX_WAIT,
|
||||
"futex_requeue": SYS_FUTEX_REQUEUE,
|
||||
"statmount": SYS_STATMOUNT,
|
||||
"listmount": SYS_LISTMOUNT,
|
||||
"lsm_get_self_attr": SYS_LSM_GET_SELF_ATTR,
|
||||
"lsm_set_self_attr": SYS_LSM_SET_SELF_ATTR,
|
||||
"lsm_list_modules": SYS_LSM_LIST_MODULES,
|
||||
"mseal": SYS_MSEAL,
|
||||
}
|
||||
|
||||
const (
|
||||
SYS_NAME_TO_HANDLE_AT = 303
|
||||
SYS_OPEN_BY_HANDLE_AT = 304
|
||||
SYS_CLOCK_ADJTIME = 305
|
||||
SYS_SYNCFS = 306
|
||||
SYS_SENDMMSG = 307
|
||||
SYS_SETNS = 308
|
||||
SYS_GETCPU = 309
|
||||
SYS_PROCESS_VM_READV = 310
|
||||
SYS_PROCESS_VM_WRITEV = 311
|
||||
SYS_KCMP = 312
|
||||
SYS_FINIT_MODULE = 313
|
||||
SYS_SCHED_SETATTR = 314
|
||||
SYS_SCHED_GETATTR = 315
|
||||
SYS_RENAMEAT2 = 316
|
||||
SYS_SECCOMP = 317
|
||||
SYS_GETRANDOM = 318
|
||||
SYS_MEMFD_CREATE = 319
|
||||
SYS_KEXEC_FILE_LOAD = 320
|
||||
SYS_BPF = 321
|
||||
SYS_EXECVEAT = 322
|
||||
SYS_USERFAULTFD = 323
|
||||
SYS_MEMBARRIER = 324
|
||||
SYS_MLOCK2 = 325
|
||||
SYS_COPY_FILE_RANGE = 326
|
||||
SYS_PREADV2 = 327
|
||||
SYS_PWRITEV2 = 328
|
||||
SYS_PKEY_MPROTECT = 329
|
||||
SYS_PKEY_ALLOC = 330
|
||||
SYS_PKEY_FREE = 331
|
||||
SYS_STATX = 332
|
||||
SYS_IO_PGETEVENTS = 333
|
||||
SYS_RSEQ = 334
|
||||
SYS_URETPROBE = 335
|
||||
SYS_PIDFD_SEND_SIGNAL = 424
|
||||
SYS_IO_URING_SETUP = 425
|
||||
SYS_IO_URING_ENTER = 426
|
||||
SYS_IO_URING_REGISTER = 427
|
||||
SYS_OPEN_TREE = 428
|
||||
SYS_MOVE_MOUNT = 429
|
||||
SYS_FSOPEN = 430
|
||||
SYS_FSCONFIG = 431
|
||||
SYS_FSMOUNT = 432
|
||||
SYS_FSPICK = 433
|
||||
SYS_PIDFD_OPEN = 434
|
||||
SYS_CLONE3 = 435
|
||||
SYS_CLOSE_RANGE = 436
|
||||
SYS_OPENAT2 = 437
|
||||
SYS_PIDFD_GETFD = 438
|
||||
SYS_FACCESSAT2 = 439
|
||||
SYS_PROCESS_MADVISE = 440
|
||||
SYS_EPOLL_PWAIT2 = 441
|
||||
SYS_MOUNT_SETATTR = 442
|
||||
SYS_QUOTACTL_FD = 443
|
||||
SYS_LANDLOCK_CREATE_RULESET = 444
|
||||
SYS_LANDLOCK_ADD_RULE = 445
|
||||
SYS_LANDLOCK_RESTRICT_SELF = 446
|
||||
SYS_MEMFD_SECRET = 447
|
||||
SYS_PROCESS_MRELEASE = 448
|
||||
SYS_FUTEX_WAITV = 449
|
||||
SYS_SET_MEMPOLICY_HOME_NODE = 450
|
||||
SYS_CACHESTAT = 451
|
||||
SYS_FCHMODAT2 = 452
|
||||
SYS_MAP_SHADOW_STACK = 453
|
||||
SYS_FUTEX_WAKE = 454
|
||||
SYS_FUTEX_WAIT = 455
|
||||
SYS_FUTEX_REQUEUE = 456
|
||||
SYS_STATMOUNT = 457
|
||||
SYS_LISTMOUNT = 458
|
||||
SYS_LSM_GET_SELF_ATTR = 459
|
||||
SYS_LSM_SET_SELF_ATTR = 460
|
||||
SYS_LSM_LIST_MODULES = 461
|
||||
SYS_MSEAL = 462
|
||||
)
|
||||
382
container/seccomp/syscall_linux_arm64.go
Normal file
382
container/seccomp/syscall_linux_arm64.go
Normal file
@@ -0,0 +1,382 @@
|
||||
// mksysnum_linux.pl /usr/include/asm/unistd_64.h
|
||||
// Code generated by the command above; DO NOT EDIT.
|
||||
|
||||
package seccomp
|
||||
|
||||
import . "syscall"
|
||||
|
||||
var syscallNum = map[string]int{
|
||||
"io_setup": SYS_IO_SETUP,
|
||||
"io_destroy": SYS_IO_DESTROY,
|
||||
"io_submit": SYS_IO_SUBMIT,
|
||||
"io_cancel": SYS_IO_CANCEL,
|
||||
"io_getevents": SYS_IO_GETEVENTS,
|
||||
"setxattr": SYS_SETXATTR,
|
||||
"lsetxattr": SYS_LSETXATTR,
|
||||
"fsetxattr": SYS_FSETXATTR,
|
||||
"getxattr": SYS_GETXATTR,
|
||||
"lgetxattr": SYS_LGETXATTR,
|
||||
"fgetxattr": SYS_FGETXATTR,
|
||||
"listxattr": SYS_LISTXATTR,
|
||||
"llistxattr": SYS_LLISTXATTR,
|
||||
"flistxattr": SYS_FLISTXATTR,
|
||||
"removexattr": SYS_REMOVEXATTR,
|
||||
"lremovexattr": SYS_LREMOVEXATTR,
|
||||
"fremovexattr": SYS_FREMOVEXATTR,
|
||||
"getcwd": SYS_GETCWD,
|
||||
"lookup_dcookie": SYS_LOOKUP_DCOOKIE,
|
||||
"eventfd2": SYS_EVENTFD2,
|
||||
"epoll_create1": SYS_EPOLL_CREATE1,
|
||||
"epoll_ctl": SYS_EPOLL_CTL,
|
||||
"epoll_pwait": SYS_EPOLL_PWAIT,
|
||||
"dup": SYS_DUP,
|
||||
"dup3": SYS_DUP3,
|
||||
"fcntl": SYS_FCNTL,
|
||||
"inotify_init1": SYS_INOTIFY_INIT1,
|
||||
"inotify_add_watch": SYS_INOTIFY_ADD_WATCH,
|
||||
"inotify_rm_watch": SYS_INOTIFY_RM_WATCH,
|
||||
"ioctl": SYS_IOCTL,
|
||||
"ioprio_set": SYS_IOPRIO_SET,
|
||||
"ioprio_get": SYS_IOPRIO_GET,
|
||||
"flock": SYS_FLOCK,
|
||||
"mknodat": SYS_MKNODAT,
|
||||
"mkdirat": SYS_MKDIRAT,
|
||||
"unlinkat": SYS_UNLINKAT,
|
||||
"symlinkat": SYS_SYMLINKAT,
|
||||
"linkat": SYS_LINKAT,
|
||||
"renameat": SYS_RENAMEAT,
|
||||
"umount2": SYS_UMOUNT2,
|
||||
"mount": SYS_MOUNT,
|
||||
"pivot_root": SYS_PIVOT_ROOT,
|
||||
"nfsservctl": SYS_NFSSERVCTL,
|
||||
"statfs": SYS_STATFS,
|
||||
"fstatfs": SYS_FSTATFS,
|
||||
"truncate": SYS_TRUNCATE,
|
||||
"ftruncate": SYS_FTRUNCATE,
|
||||
"fallocate": SYS_FALLOCATE,
|
||||
"faccessat": SYS_FACCESSAT,
|
||||
"chdir": SYS_CHDIR,
|
||||
"fchdir": SYS_FCHDIR,
|
||||
"chroot": SYS_CHROOT,
|
||||
"fchmod": SYS_FCHMOD,
|
||||
"fchmodat": SYS_FCHMODAT,
|
||||
"fchownat": SYS_FCHOWNAT,
|
||||
"fchown": SYS_FCHOWN,
|
||||
"openat": SYS_OPENAT,
|
||||
"close": SYS_CLOSE,
|
||||
"vhangup": SYS_VHANGUP,
|
||||
"pipe2": SYS_PIPE2,
|
||||
"quotactl": SYS_QUOTACTL,
|
||||
"getdents64": SYS_GETDENTS64,
|
||||
"lseek": SYS_LSEEK,
|
||||
"read": SYS_READ,
|
||||
"write": SYS_WRITE,
|
||||
"readv": SYS_READV,
|
||||
"writev": SYS_WRITEV,
|
||||
"pread64": SYS_PREAD64,
|
||||
"pwrite64": SYS_PWRITE64,
|
||||
"preadv": SYS_PREADV,
|
||||
"pwritev": SYS_PWRITEV,
|
||||
"sendfile": SYS_SENDFILE,
|
||||
"pselect6": SYS_PSELECT6,
|
||||
"ppoll": SYS_PPOLL,
|
||||
"signalfd4": SYS_SIGNALFD4,
|
||||
"vmsplice": SYS_VMSPLICE,
|
||||
"splice": SYS_SPLICE,
|
||||
"tee": SYS_TEE,
|
||||
"readlinkat": SYS_READLINKAT,
|
||||
"newfstatat": SYS_NEWFSTATAT,
|
||||
"fstat": SYS_FSTAT,
|
||||
"sync": SYS_SYNC,
|
||||
"fsync": SYS_FSYNC,
|
||||
"fdatasync": SYS_FDATASYNC,
|
||||
"sync_file_range": SYS_SYNC_FILE_RANGE,
|
||||
"timerfd_create": SYS_TIMERFD_CREATE,
|
||||
"timerfd_settime": SYS_TIMERFD_SETTIME,
|
||||
"timerfd_gettime": SYS_TIMERFD_GETTIME,
|
||||
"utimensat": SYS_UTIMENSAT,
|
||||
"acct": SYS_ACCT,
|
||||
"capget": SYS_CAPGET,
|
||||
"capset": SYS_CAPSET,
|
||||
"personality": SYS_PERSONALITY,
|
||||
"exit": SYS_EXIT,
|
||||
"exit_group": SYS_EXIT_GROUP,
|
||||
"waitid": SYS_WAITID,
|
||||
"set_tid_address": SYS_SET_TID_ADDRESS,
|
||||
"unshare": SYS_UNSHARE,
|
||||
"futex": SYS_FUTEX,
|
||||
"set_robust_list": SYS_SET_ROBUST_LIST,
|
||||
"get_robust_list": SYS_GET_ROBUST_LIST,
|
||||
"nanosleep": SYS_NANOSLEEP,
|
||||
"getitimer": SYS_GETITIMER,
|
||||
"setitimer": SYS_SETITIMER,
|
||||
"kexec_load": SYS_KEXEC_LOAD,
|
||||
"init_module": SYS_INIT_MODULE,
|
||||
"delete_module": SYS_DELETE_MODULE,
|
||||
"timer_create": SYS_TIMER_CREATE,
|
||||
"timer_gettime": SYS_TIMER_GETTIME,
|
||||
"timer_getoverrun": SYS_TIMER_GETOVERRUN,
|
||||
"timer_settime": SYS_TIMER_SETTIME,
|
||||
"timer_delete": SYS_TIMER_DELETE,
|
||||
"clock_settime": SYS_CLOCK_SETTIME,
|
||||
"clock_gettime": SYS_CLOCK_GETTIME,
|
||||
"clock_getres": SYS_CLOCK_GETRES,
|
||||
"clock_nanosleep": SYS_CLOCK_NANOSLEEP,
|
||||
"syslog": SYS_SYSLOG,
|
||||
"ptrace": SYS_PTRACE,
|
||||
"sched_setparam": SYS_SCHED_SETPARAM,
|
||||
"sched_setscheduler": SYS_SCHED_SETSCHEDULER,
|
||||
"sched_getscheduler": SYS_SCHED_GETSCHEDULER,
|
||||
"sched_getparam": SYS_SCHED_GETPARAM,
|
||||
"sched_setaffinity": SYS_SCHED_SETAFFINITY,
|
||||
"sched_getaffinity": SYS_SCHED_GETAFFINITY,
|
||||
"sched_yield": SYS_SCHED_YIELD,
|
||||
"sched_get_priority_max": SYS_SCHED_GET_PRIORITY_MAX,
|
||||
"sched_get_priority_min": SYS_SCHED_GET_PRIORITY_MIN,
|
||||
"sched_rr_get_interval": SYS_SCHED_RR_GET_INTERVAL,
|
||||
"restart_syscall": SYS_RESTART_SYSCALL,
|
||||
"kill": SYS_KILL,
|
||||
"tkill": SYS_TKILL,
|
||||
"tgkill": SYS_TGKILL,
|
||||
"sigaltstack": SYS_SIGALTSTACK,
|
||||
"rt_sigsuspend": SYS_RT_SIGSUSPEND,
|
||||
"rt_sigaction": SYS_RT_SIGACTION,
|
||||
"rt_sigprocmask": SYS_RT_SIGPROCMASK,
|
||||
"rt_sigpending": SYS_RT_SIGPENDING,
|
||||
"rt_sigtimedwait": SYS_RT_SIGTIMEDWAIT,
|
||||
"rt_sigqueueinfo": SYS_RT_SIGQUEUEINFO,
|
||||
"rt_sigreturn": SYS_RT_SIGRETURN,
|
||||
"setpriority": SYS_SETPRIORITY,
|
||||
"getpriority": SYS_GETPRIORITY,
|
||||
"reboot": SYS_REBOOT,
|
||||
"setregid": SYS_SETREGID,
|
||||
"setgid": SYS_SETGID,
|
||||
"setreuid": SYS_SETREUID,
|
||||
"setuid": SYS_SETUID,
|
||||
"setresuid": SYS_SETRESUID,
|
||||
"getresuid": SYS_GETRESUID,
|
||||
"setresgid": SYS_SETRESGID,
|
||||
"getresgid": SYS_GETRESGID,
|
||||
"setfsuid": SYS_SETFSUID,
|
||||
"setfsgid": SYS_SETFSGID,
|
||||
"times": SYS_TIMES,
|
||||
"setpgid": SYS_SETPGID,
|
||||
"getpgid": SYS_GETPGID,
|
||||
"getsid": SYS_GETSID,
|
||||
"setsid": SYS_SETSID,
|
||||
"getgroups": SYS_GETGROUPS,
|
||||
"setgroups": SYS_SETGROUPS,
|
||||
"uname": SYS_UNAME,
|
||||
"sethostname": SYS_SETHOSTNAME,
|
||||
"setdomainname": SYS_SETDOMAINNAME,
|
||||
"getrlimit": SYS_GETRLIMIT,
|
||||
"setrlimit": SYS_SETRLIMIT,
|
||||
"getrusage": SYS_GETRUSAGE,
|
||||
"umask": SYS_UMASK,
|
||||
"prctl": SYS_PRCTL,
|
||||
"getcpu": SYS_GETCPU,
|
||||
"gettimeofday": SYS_GETTIMEOFDAY,
|
||||
"settimeofday": SYS_SETTIMEOFDAY,
|
||||
"adjtimex": SYS_ADJTIMEX,
|
||||
"getpid": SYS_GETPID,
|
||||
"getppid": SYS_GETPPID,
|
||||
"getuid": SYS_GETUID,
|
||||
"geteuid": SYS_GETEUID,
|
||||
"getgid": SYS_GETGID,
|
||||
"getegid": SYS_GETEGID,
|
||||
"gettid": SYS_GETTID,
|
||||
"sysinfo": SYS_SYSINFO,
|
||||
"mq_open": SYS_MQ_OPEN,
|
||||
"mq_unlink": SYS_MQ_UNLINK,
|
||||
"mq_timedsend": SYS_MQ_TIMEDSEND,
|
||||
"mq_timedreceive": SYS_MQ_TIMEDRECEIVE,
|
||||
"mq_notify": SYS_MQ_NOTIFY,
|
||||
"mq_getsetattr": SYS_MQ_GETSETATTR,
|
||||
"msgget": SYS_MSGGET,
|
||||
"msgctl": SYS_MSGCTL,
|
||||
"msgrcv": SYS_MSGRCV,
|
||||
"msgsnd": SYS_MSGSND,
|
||||
"semget": SYS_SEMGET,
|
||||
"semctl": SYS_SEMCTL,
|
||||
"semtimedop": SYS_SEMTIMEDOP,
|
||||
"semop": SYS_SEMOP,
|
||||
"shmget": SYS_SHMGET,
|
||||
"shmctl": SYS_SHMCTL,
|
||||
"shmat": SYS_SHMAT,
|
||||
"shmdt": SYS_SHMDT,
|
||||
"socket": SYS_SOCKET,
|
||||
"socketpair": SYS_SOCKETPAIR,
|
||||
"bind": SYS_BIND,
|
||||
"listen": SYS_LISTEN,
|
||||
"accept": SYS_ACCEPT,
|
||||
"connect": SYS_CONNECT,
|
||||
"getsockname": SYS_GETSOCKNAME,
|
||||
"getpeername": SYS_GETPEERNAME,
|
||||
"sendto": SYS_SENDTO,
|
||||
"recvfrom": SYS_RECVFROM,
|
||||
"setsockopt": SYS_SETSOCKOPT,
|
||||
"getsockopt": SYS_GETSOCKOPT,
|
||||
"shutdown": SYS_SHUTDOWN,
|
||||
"sendmsg": SYS_SENDMSG,
|
||||
"recvmsg": SYS_RECVMSG,
|
||||
"readahead": SYS_READAHEAD,
|
||||
"brk": SYS_BRK,
|
||||
"munmap": SYS_MUNMAP,
|
||||
"mremap": SYS_MREMAP,
|
||||
"add_key": SYS_ADD_KEY,
|
||||
"request_key": SYS_REQUEST_KEY,
|
||||
"keyctl": SYS_KEYCTL,
|
||||
"clone": SYS_CLONE,
|
||||
"execve": SYS_EXECVE,
|
||||
"mmap": SYS_MMAP,
|
||||
"fadvise64": SYS_FADVISE64,
|
||||
"swapon": SYS_SWAPON,
|
||||
"swapoff": SYS_SWAPOFF,
|
||||
"mprotect": SYS_MPROTECT,
|
||||
"msync": SYS_MSYNC,
|
||||
"mlock": SYS_MLOCK,
|
||||
"munlock": SYS_MUNLOCK,
|
||||
"mlockall": SYS_MLOCKALL,
|
||||
"munlockall": SYS_MUNLOCKALL,
|
||||
"mincore": SYS_MINCORE,
|
||||
"madvise": SYS_MADVISE,
|
||||
"remap_file_pages": SYS_REMAP_FILE_PAGES,
|
||||
"mbind": SYS_MBIND,
|
||||
"get_mempolicy": SYS_GET_MEMPOLICY,
|
||||
"set_mempolicy": SYS_SET_MEMPOLICY,
|
||||
"migrate_pages": SYS_MIGRATE_PAGES,
|
||||
"move_pages": SYS_MOVE_PAGES,
|
||||
"rt_tgsigqueueinfo": SYS_RT_TGSIGQUEUEINFO,
|
||||
"perf_event_open": SYS_PERF_EVENT_OPEN,
|
||||
"accept4": SYS_ACCEPT4,
|
||||
"recvmmsg": SYS_RECVMMSG,
|
||||
"wait4": SYS_WAIT4,
|
||||
"prlimit64": SYS_PRLIMIT64,
|
||||
"fanotify_init": SYS_FANOTIFY_INIT,
|
||||
"fanotify_mark": SYS_FANOTIFY_MARK,
|
||||
"name_to_handle_at": SYS_NAME_TO_HANDLE_AT,
|
||||
"open_by_handle_at": SYS_OPEN_BY_HANDLE_AT,
|
||||
"clock_adjtime": SYS_CLOCK_ADJTIME,
|
||||
"syncfs": SYS_SYNCFS,
|
||||
"setns": SYS_SETNS,
|
||||
"sendmmsg": SYS_SENDMMSG,
|
||||
"process_vm_readv": SYS_PROCESS_VM_READV,
|
||||
"process_vm_writev": SYS_PROCESS_VM_WRITEV,
|
||||
"kcmp": SYS_KCMP,
|
||||
"finit_module": SYS_FINIT_MODULE,
|
||||
"sched_setattr": SYS_SCHED_SETATTR,
|
||||
"sched_getattr": SYS_SCHED_GETATTR,
|
||||
"renameat2": SYS_RENAMEAT2,
|
||||
"seccomp": SYS_SECCOMP,
|
||||
"getrandom": SYS_GETRANDOM,
|
||||
"memfd_create": SYS_MEMFD_CREATE,
|
||||
"bpf": SYS_BPF,
|
||||
"execveat": SYS_EXECVEAT,
|
||||
"userfaultfd": SYS_USERFAULTFD,
|
||||
"membarrier": SYS_MEMBARRIER,
|
||||
"mlock2": SYS_MLOCK2,
|
||||
"copy_file_range": SYS_COPY_FILE_RANGE,
|
||||
"preadv2": SYS_PREADV2,
|
||||
"pwritev2": SYS_PWRITEV2,
|
||||
"pkey_mprotect": SYS_PKEY_MPROTECT,
|
||||
"pkey_alloc": SYS_PKEY_ALLOC,
|
||||
"pkey_free": SYS_PKEY_FREE,
|
||||
"statx": SYS_STATX,
|
||||
"io_pgetevents": SYS_IO_PGETEVENTS,
|
||||
"rseq": SYS_RSEQ,
|
||||
"kexec_file_load": SYS_KEXEC_FILE_LOAD,
|
||||
"pidfd_send_signal": SYS_PIDFD_SEND_SIGNAL,
|
||||
"io_uring_setup": SYS_IO_URING_SETUP,
|
||||
"io_uring_enter": SYS_IO_URING_ENTER,
|
||||
"io_uring_register": SYS_IO_URING_REGISTER,
|
||||
"open_tree": SYS_OPEN_TREE,
|
||||
"move_mount": SYS_MOVE_MOUNT,
|
||||
"fsopen": SYS_FSOPEN,
|
||||
"fsconfig": SYS_FSCONFIG,
|
||||
"fsmount": SYS_FSMOUNT,
|
||||
"fspick": SYS_FSPICK,
|
||||
"pidfd_open": SYS_PIDFD_OPEN,
|
||||
"clone3": SYS_CLONE3,
|
||||
"close_range": SYS_CLOSE_RANGE,
|
||||
"openat2": SYS_OPENAT2,
|
||||
"pidfd_getfd": SYS_PIDFD_GETFD,
|
||||
"faccessat2": SYS_FACCESSAT2,
|
||||
"process_madvise": SYS_PROCESS_MADVISE,
|
||||
"epoll_pwait2": SYS_EPOLL_PWAIT2,
|
||||
"mount_setattr": SYS_MOUNT_SETATTR,
|
||||
"quotactl_fd": SYS_QUOTACTL_FD,
|
||||
"landlock_create_ruleset": SYS_LANDLOCK_CREATE_RULESET,
|
||||
"landlock_add_rule": SYS_LANDLOCK_ADD_RULE,
|
||||
"landlock_restrict_self": SYS_LANDLOCK_RESTRICT_SELF,
|
||||
"memfd_secret": SYS_MEMFD_SECRET,
|
||||
"process_mrelease": SYS_PROCESS_MRELEASE,
|
||||
"futex_waitv": SYS_FUTEX_WAITV,
|
||||
"set_mempolicy_home_node": SYS_SET_MEMPOLICY_HOME_NODE,
|
||||
"cachestat": SYS_CACHESTAT,
|
||||
"fchmodat2": SYS_FCHMODAT2,
|
||||
"map_shadow_stack": SYS_MAP_SHADOW_STACK,
|
||||
"futex_wake": SYS_FUTEX_WAKE,
|
||||
"futex_wait": SYS_FUTEX_WAIT,
|
||||
"futex_requeue": SYS_FUTEX_REQUEUE,
|
||||
"statmount": SYS_STATMOUNT,
|
||||
"listmount": SYS_LISTMOUNT,
|
||||
"lsm_get_self_attr": SYS_LSM_GET_SELF_ATTR,
|
||||
"lsm_set_self_attr": SYS_LSM_SET_SELF_ATTR,
|
||||
"lsm_list_modules": SYS_LSM_LIST_MODULES,
|
||||
"mseal": SYS_MSEAL,
|
||||
}
|
||||
|
||||
const (
|
||||
SYS_USERFAULTFD = 282
|
||||
SYS_MEMBARRIER = 283
|
||||
SYS_MLOCK2 = 284
|
||||
SYS_COPY_FILE_RANGE = 285
|
||||
SYS_PREADV2 = 286
|
||||
SYS_PWRITEV2 = 287
|
||||
SYS_PKEY_MPROTECT = 288
|
||||
SYS_PKEY_ALLOC = 289
|
||||
SYS_PKEY_FREE = 290
|
||||
SYS_STATX = 291
|
||||
SYS_IO_PGETEVENTS = 292
|
||||
SYS_RSEQ = 293
|
||||
SYS_KEXEC_FILE_LOAD = 294
|
||||
SYS_PIDFD_SEND_SIGNAL = 424
|
||||
SYS_IO_URING_SETUP = 425
|
||||
SYS_IO_URING_ENTER = 426
|
||||
SYS_IO_URING_REGISTER = 427
|
||||
SYS_OPEN_TREE = 428
|
||||
SYS_MOVE_MOUNT = 429
|
||||
SYS_FSOPEN = 430
|
||||
SYS_FSCONFIG = 431
|
||||
SYS_FSMOUNT = 432
|
||||
SYS_FSPICK = 433
|
||||
SYS_PIDFD_OPEN = 434
|
||||
SYS_CLONE3 = 435
|
||||
SYS_CLOSE_RANGE = 436
|
||||
SYS_OPENAT2 = 437
|
||||
SYS_PIDFD_GETFD = 438
|
||||
SYS_FACCESSAT2 = 439
|
||||
SYS_PROCESS_MADVISE = 440
|
||||
SYS_EPOLL_PWAIT2 = 441
|
||||
SYS_MOUNT_SETATTR = 442
|
||||
SYS_QUOTACTL_FD = 443
|
||||
SYS_LANDLOCK_CREATE_RULESET = 444
|
||||
SYS_LANDLOCK_ADD_RULE = 445
|
||||
SYS_LANDLOCK_RESTRICT_SELF = 446
|
||||
SYS_MEMFD_SECRET = 447
|
||||
SYS_PROCESS_MRELEASE = 448
|
||||
SYS_FUTEX_WAITV = 449
|
||||
SYS_SET_MEMPOLICY_HOME_NODE = 450
|
||||
SYS_CACHESTAT = 451
|
||||
SYS_FCHMODAT2 = 452
|
||||
SYS_MAP_SHADOW_STACK = 453
|
||||
SYS_FUTEX_WAKE = 454
|
||||
SYS_FUTEX_WAIT = 455
|
||||
SYS_FUTEX_REQUEUE = 456
|
||||
SYS_STATMOUNT = 457
|
||||
SYS_LISTMOUNT = 458
|
||||
SYS_LSM_GET_SELF_ATTR = 459
|
||||
SYS_LSM_SET_SELF_ATTR = 460
|
||||
SYS_LSM_LIST_MODULES = 461
|
||||
SYS_MSEAL = 462
|
||||
)
|
||||
20
container/seccomp/syscall_test.go
Normal file
20
container/seccomp/syscall_test.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSyscallResolveName(t *testing.T) {
|
||||
for name, want := range Syscalls() {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
if got := syscallResolveName(name); got != want {
|
||||
t.Errorf("syscallResolveName(%q) = %d, want %d",
|
||||
name, got, want)
|
||||
}
|
||||
if got, ok := SyscallResolveName(name); !ok || got != want {
|
||||
t.Errorf("SyscallResolveName(%q) = %d, want %d",
|
||||
name, got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
35
container/syscall.go
Normal file
35
container/syscall.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
SUID_DUMP_DISABLE = iota
|
||||
SUID_DUMP_USER
|
||||
)
|
||||
|
||||
func SetDumpable(dumpable uintptr) error {
|
||||
// linux/sched/coredump.h
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_DUMPABLE, dumpable, 0); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IgnoringEINTR makes a function call and repeats it if it returns an
|
||||
// EINTR error. This appears to be required even though we install all
|
||||
// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
|
||||
// Also #20400 and #36644 are issues in which a signal handler is
|
||||
// installed without setting SA_RESTART. None of these are the common case,
|
||||
// but there are enough of them that it seems that we can't avoid
|
||||
// an EINTR loop.
|
||||
func IgnoringEINTR(fn func() error) error {
|
||||
for {
|
||||
err := fn()
|
||||
if err != syscall.EINTR {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
7
container/syscall_amd64.go
Normal file
7
container/syscall_amd64.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package container
|
||||
|
||||
const (
|
||||
O_PATH = 0x200000
|
||||
|
||||
PR_SET_NO_NEW_PRIVS = 0x26
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
package sandbox
|
||||
package container
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -3,7 +3,7 @@ package vfs_test
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
"hakurei.app/container/vfs"
|
||||
)
|
||||
|
||||
func TestUnmangle(t *testing.T) {
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
"hakurei.app/container/vfs"
|
||||
)
|
||||
|
||||
func TestMountInfo(t *testing.T) {
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/vfs"
|
||||
"hakurei.app/container/vfs"
|
||||
)
|
||||
|
||||
func TestUnfold(t *testing.T) {
|
||||
@@ -1,9 +0,0 @@
|
||||
package dbus_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
)
|
||||
|
||||
func TestHelperStub(t *testing.T) { helper.InternalHelperStub() }
|
||||
8
dist/release.sh
vendored
8
dist/release.sh
vendored
@@ -10,11 +10,11 @@ cp -rv "dist/comp" "${out}"
|
||||
|
||||
go generate ./...
|
||||
go build -trimpath -v -o "${out}/bin/" -ldflags "-s -w -buildid= -extldflags '-static'
|
||||
-X git.gensokyo.uk/security/hakurei/internal.version=${VERSION}
|
||||
-X git.gensokyo.uk/security/hakurei/internal.hakurei=/usr/bin/hakurei
|
||||
-X git.gensokyo.uk/security/hakurei/internal.hsu=/usr/bin/hsu
|
||||
-X hakurei.app/internal.version=${VERSION}
|
||||
-X hakurei.app/internal.hmain=/usr/bin/hakurei
|
||||
-X hakurei.app/internal.hsu=/usr/bin/hsu
|
||||
-X main.hmain=/usr/bin/hakurei" ./...
|
||||
|
||||
rm -f "./${out}.tar.gz" && tar -C dist -czf "${out}.tar.gz" "${pname}"
|
||||
rm -rf "./${out}"
|
||||
(cd dist && sha512sum "${pname}.tar.gz" > "${pname}.tar.gz.sha512")
|
||||
(cd dist && sha512sum "${pname}.tar.gz" > "${pname}.tar.gz.sha512")
|
||||
|
||||
12
flake.lock
generated
12
flake.lock
generated
@@ -7,11 +7,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1748665073,
|
||||
"narHash": "sha256-RMhjnPKWtCoIIHiuR9QKD7xfsKb3agxzMfJY8V9MOew=",
|
||||
"lastModified": 1753479839,
|
||||
"narHash": "sha256-E/rPVh7vyPMJUFl2NAew+zibNGfVbANr8BP8nLRbLkQ=",
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"rev": "282e1e029cb6ab4811114fc85110613d72771dea",
|
||||
"rev": "0b9bf983db4d064764084cd6748efb1ab8297d1e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -23,11 +23,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1749024892,
|
||||
"narHash": "sha256-OGcDEz60TXQC+gVz5sdtgGJdKVYr6rwdzQKuZAJQpCA=",
|
||||
"lastModified": 1753345091,
|
||||
"narHash": "sha256-CdX2Rtvp5I8HGu9swBmYuq+ILwRxpXdJwlpg8jvN4tU=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "8f1b52b04f2cb6e5ead50bd28d76528a2f0380ef",
|
||||
"rev": "3ff0e34b1383648053bba8ed03f201d3466f90c9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
18
flake.nix
18
flake.nix
@@ -184,6 +184,24 @@
|
||||
exec cat ${docText} > options.md
|
||||
'';
|
||||
};
|
||||
|
||||
generateSyscallTable =
|
||||
let
|
||||
GOARCH = {
|
||||
x86_64-linux = "amd64";
|
||||
aarch64-linux = "arm64";
|
||||
};
|
||||
in
|
||||
pkgs.mkShell {
|
||||
shellHook = "exec ${pkgs.writeShellScript "generate-syscall-table" ''
|
||||
set -e
|
||||
${pkgs.perl}/bin/perl \
|
||||
container/seccomp/mksysnum_linux.pl \
|
||||
${pkgs.linuxHeaders}/include/asm/unistd_64.h | \
|
||||
${pkgs.go}/bin/gofmt > \
|
||||
container/seccomp/syscall_linux_${GOARCH.${system}}.go
|
||||
''}";
|
||||
};
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
2
go.mod
2
go.mod
@@ -1,3 +1,3 @@
|
||||
module git.gensokyo.uk/security/hakurei
|
||||
module hakurei.app
|
||||
|
||||
go 1.24
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
"hakurei.app/helper"
|
||||
)
|
||||
|
||||
func TestArgsString(t *testing.T) {
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper/proc"
|
||||
"hakurei.app/helper/proc"
|
||||
)
|
||||
|
||||
// NewDirect initialises a new direct Helper instance with wt as the null-terminated argument writer.
|
||||
|
||||
@@ -8,12 +8,13 @@ import (
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/helper"
|
||||
)
|
||||
|
||||
func TestCmd(t *testing.T) {
|
||||
t.Run("start non-existent helper path", func(t *testing.T) {
|
||||
h := helper.NewDirect(t.Context(), "/proc/nonexistent", argsWt, false, argF, nil, nil)
|
||||
h := helper.NewDirect(t.Context(), container.Nonexistent, argsWt, false, argF, nil, nil)
|
||||
|
||||
if err := h.Start(); !errors.Is(err, os.ErrNotExist) {
|
||||
t.Errorf("Start: error = %v, wantErr %v",
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper/proc"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/helper/proc"
|
||||
)
|
||||
|
||||
// New initialises a Helper instance with wt as the null-terminated argument writer.
|
||||
@@ -20,13 +20,13 @@ func New(
|
||||
wt io.WriterTo,
|
||||
stat bool,
|
||||
argF func(argsFd, statFd int) []string,
|
||||
cmdF func(container *sandbox.Container),
|
||||
cmdF func(z *container.Container),
|
||||
extraFiles []*os.File,
|
||||
) Helper {
|
||||
var args []string
|
||||
h := new(helperContainer)
|
||||
h.helperFiles, args = newHelperFiles(ctx, wt, stat, argF, extraFiles)
|
||||
h.Container = sandbox.New(ctx, name, args...)
|
||||
h.Container = container.New(ctx, name, args...)
|
||||
h.WaitDelay = WaitDelay
|
||||
if cmdF != nil {
|
||||
cmdF(h.Container)
|
||||
@@ -40,7 +40,7 @@ type helperContainer struct {
|
||||
|
||||
mu sync.Mutex
|
||||
*helperFiles
|
||||
*sandbox.Container
|
||||
*container.Container
|
||||
}
|
||||
|
||||
func (h *helperContainer) Start() error {
|
||||
|
||||
@@ -4,20 +4,17 @@ import (
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/helper"
|
||||
)
|
||||
|
||||
func TestContainer(t *testing.T) {
|
||||
t.Run("start empty container", func(t *testing.T) {
|
||||
h := helper.New(t.Context(), "/nonexistent", argsWt, false, argF, nil, nil)
|
||||
h := helper.New(t.Context(), container.Nonexistent, argsWt, false, argF, nil, nil)
|
||||
|
||||
wantErr := "sandbox: starting an empty container"
|
||||
wantErr := "container: starting an empty container"
|
||||
if err := h.Start(); err == nil || err.Error() != wantErr {
|
||||
t.Errorf("Start: error = %v, wantErr %q",
|
||||
err, wantErr)
|
||||
@@ -34,24 +31,10 @@ func TestContainer(t *testing.T) {
|
||||
|
||||
t.Run("implementation compliance", func(t *testing.T) {
|
||||
testHelper(t, func(ctx context.Context, setOutput func(stdoutP, stderrP *io.Writer), stat bool) helper.Helper {
|
||||
return helper.New(ctx, os.Args[0], argsWt, stat, argF, func(container *sandbox.Container) {
|
||||
setOutput(&container.Stdout, &container.Stderr)
|
||||
container.CommandContext = func(ctx context.Context) (cmd *exec.Cmd) {
|
||||
return exec.CommandContext(ctx, os.Args[0], "-test.v",
|
||||
"-test.run=TestHelperInit", "--", "init")
|
||||
}
|
||||
container.Bind("/", "/", 0)
|
||||
container.Proc("/proc")
|
||||
container.Dev("/dev")
|
||||
return helper.New(ctx, os.Args[0], argsWt, stat, argF, func(z *container.Container) {
|
||||
setOutput(&z.Stdout, &z.Stderr)
|
||||
z.Bind("/", "/", 0).Proc("/proc").Dev("/dev")
|
||||
}, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestHelperInit(t *testing.T) {
|
||||
if len(os.Args) != 5 || os.Args[4] != "init" {
|
||||
return
|
||||
}
|
||||
sandbox.SetOutput(hlog.Output{})
|
||||
sandbox.Init(hlog.Prepare, func(bool) { internal.InstallFmsg(false) })
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper/proc"
|
||||
"hakurei.app/helper/proc"
|
||||
)
|
||||
|
||||
var WaitDelay = 2 * time.Second
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
"hakurei.app/helper"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -38,7 +38,6 @@ func argF(argsFd, statFd int) []string {
|
||||
|
||||
func argFChecked(argsFd, statFd int) (args []string) {
|
||||
args = make([]string, 0, 6)
|
||||
args = append(args, "-test.run=TestHelperStub", "--")
|
||||
if argsFd > -1 {
|
||||
args = append(args, "--args", strconv.Itoa(argsFd))
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ func InternalHelperStub() {
|
||||
sp = v
|
||||
}
|
||||
|
||||
genericStub(flagRestoreFiles(3, ap, sp))
|
||||
genericStub(flagRestoreFiles(1, ap, sp))
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
@@ -1,9 +1,17 @@
|
||||
package helper_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/helper"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/helper"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
func TestHelperStub(t *testing.T) { helper.InternalHelperStub() }
|
||||
func TestMain(m *testing.M) {
|
||||
container.TryArgv0(hlog.Output{}, hlog.Prepare, internal.InstallOutput)
|
||||
helper.InternalHelperStub()
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
package hst
|
||||
|
||||
import (
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
const Tmp = "/.hakurei"
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package hst
|
||||
|
||||
import (
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"time"
|
||||
|
||||
"hakurei.app/container/seccomp"
|
||||
)
|
||||
|
||||
type (
|
||||
@@ -10,8 +12,16 @@ type (
|
||||
// container hostname
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
|
||||
// duration to wait for after interrupting a container's initial process in nanoseconds;
|
||||
// a negative value causes the container to be terminated immediately on cancellation
|
||||
WaitDelay time.Duration `json:"wait_delay,omitempty"`
|
||||
|
||||
// extra seccomp flags
|
||||
Seccomp seccomp.FilterOpts `json:"seccomp"`
|
||||
SeccompFlags seccomp.ExportFlag `json:"seccomp_flags"`
|
||||
// extra seccomp presets
|
||||
SeccompPresets seccomp.FilterPreset `json:"seccomp_presets"`
|
||||
// disable project-specific filter extensions
|
||||
SeccompCompat bool `json:"seccomp_compat,omitempty"`
|
||||
// allow ptrace and friends
|
||||
Devel bool `json:"devel,omitempty"`
|
||||
// allow userns creation in container
|
||||
|
||||
11
hst/paths.go
Normal file
11
hst/paths.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package hst
|
||||
|
||||
// Paths contains environment-dependent paths used by hakurei.
|
||||
type Paths struct {
|
||||
// path to shared directory (usually `/tmp/hakurei.%d`)
|
||||
SharePath string `json:"share_path"`
|
||||
// XDG_RUNTIME_DIR value (usually `/run/user/%d`)
|
||||
RuntimePath string `json:"runtime_path"`
|
||||
// application runtime directory (usually `/run/user/%d/hakurei`)
|
||||
RunDirPath string `json:"run_dir_path"`
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
package hst
|
||||
|
||||
import (
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
// Template returns a fully populated instance of Config.
|
||||
@@ -57,15 +57,18 @@ func Template() *Config {
|
||||
Groups: []string{"video", "dialout", "plugdev"},
|
||||
|
||||
Container: &ContainerConfig{
|
||||
Hostname: "localhost",
|
||||
Devel: true,
|
||||
Userns: true,
|
||||
Net: true,
|
||||
Device: true,
|
||||
Seccomp: seccomp.FilterMultiarch,
|
||||
Tty: true,
|
||||
Multiarch: true,
|
||||
MapRealUID: true,
|
||||
Hostname: "localhost",
|
||||
Devel: true,
|
||||
Userns: true,
|
||||
Net: true,
|
||||
Device: true,
|
||||
WaitDelay: -1,
|
||||
SeccompFlags: seccomp.AllowMultiarch,
|
||||
SeccompPresets: seccomp.PresetExt,
|
||||
SeccompCompat: true,
|
||||
Tty: true,
|
||||
Multiarch: true,
|
||||
MapRealUID: true,
|
||||
// example API credentials pulled from Google Chrome
|
||||
// DO NOT USE THESE IN A REAL BROWSER
|
||||
Env: map[string]string{
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"hakurei.app/hst"
|
||||
)
|
||||
|
||||
func TestTemplate(t *testing.T) {
|
||||
@@ -80,7 +80,10 @@ func TestTemplate(t *testing.T) {
|
||||
],
|
||||
"container": {
|
||||
"hostname": "localhost",
|
||||
"seccomp": 32,
|
||||
"wait_delay": -1,
|
||||
"seccomp_flags": 1,
|
||||
"seccomp_presets": 1,
|
||||
"seccomp_compat": true,
|
||||
"devel": true,
|
||||
"userns": true,
|
||||
"net": true,
|
||||
|
||||
@@ -2,15 +2,19 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/sys"
|
||||
)
|
||||
|
||||
type App interface {
|
||||
// ID returns a copy of [ID] held by App.
|
||||
ID() ID
|
||||
ID() state.ID
|
||||
|
||||
// Seal determines the outcome of config as a [SealedApp].
|
||||
// The value of config might be overwritten and must not be used again.
|
||||
@@ -48,12 +52,10 @@ func (rs *RunState) SetStart() {
|
||||
rs.Time = &now
|
||||
}
|
||||
|
||||
// Paths contains environment-dependent paths used by hakurei.
|
||||
type Paths struct {
|
||||
// path to shared directory (usually `/tmp/hakurei.%d`)
|
||||
SharePath string `json:"share_path"`
|
||||
// XDG_RUNTIME_DIR value (usually `/run/user/%d`)
|
||||
RuntimePath string `json:"runtime_path"`
|
||||
// application runtime directory (usually `/run/user/%d/hakurei`)
|
||||
RunDirPath string `json:"run_dir_path"`
|
||||
func MustNew(ctx context.Context, os sys.State) App {
|
||||
a, err := New(ctx, os)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create app: %v", err)
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
package setuid
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
. "git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/internal/sys"
|
||||
)
|
||||
|
||||
func New(ctx context.Context, os sys.State) (App, error) {
|
||||
@@ -16,15 +16,15 @@ func New(ctx context.Context, os sys.State) (App, error) {
|
||||
a.sys = os
|
||||
a.ctx = ctx
|
||||
|
||||
id := new(ID)
|
||||
err := NewAppID(id)
|
||||
id := new(state.ID)
|
||||
err := state.NewAppID(id)
|
||||
a.id = newID(id)
|
||||
|
||||
return a, err
|
||||
}
|
||||
|
||||
type app struct {
|
||||
id *stringPair[ID]
|
||||
id *stringPair[state.ID]
|
||||
sys sys.State
|
||||
ctx context.Context
|
||||
|
||||
@@ -32,7 +32,7 @@ type app struct {
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (a *app) ID() ID { a.mu.RLock(); defer a.mu.RUnlock(); return a.id.unwrap() }
|
||||
func (a *app) ID() state.ID { a.mu.RLock(); defer a.mu.RUnlock(); return a.id.unwrap() }
|
||||
|
||||
func (a *app) String() string {
|
||||
if a == nil {
|
||||
@@ -1,4 +1,4 @@
|
||||
package setuid_test
|
||||
package app_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
@@ -7,21 +7,21 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app/internal/setuid"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/sys"
|
||||
"hakurei.app/system"
|
||||
)
|
||||
|
||||
type sealTestCase struct {
|
||||
name string
|
||||
os sys.State
|
||||
config *hst.Config
|
||||
id app.ID
|
||||
id state.ID
|
||||
wantSys *system.I
|
||||
wantContainer *sandbox.Params
|
||||
wantContainer *container.Params
|
||||
}
|
||||
|
||||
func TestApp(t *testing.T) {
|
||||
@@ -29,17 +29,17 @@ func TestApp(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
a := setuid.NewWithID(tc.id, tc.os)
|
||||
a := app.NewWithID(tc.id, tc.os)
|
||||
var (
|
||||
gotSys *system.I
|
||||
gotContainer *sandbox.Params
|
||||
gotContainer *container.Params
|
||||
)
|
||||
if !t.Run("seal", func(t *testing.T) {
|
||||
if sa, err := a.Seal(tc.config); err != nil {
|
||||
t.Errorf("Seal: error = %v", err)
|
||||
return
|
||||
} else {
|
||||
gotSys, gotContainer = setuid.AppIParams(a, sa)
|
||||
gotSys, gotContainer = app.AppIParams(a, sa)
|
||||
}
|
||||
}) {
|
||||
return
|
||||
@@ -1,12 +1,13 @@
|
||||
package setuid_test
|
||||
package app_test
|
||||
|
||||
import (
|
||||
"git.gensokyo.uk/security/hakurei/acl"
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/acl"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
var testCasesNixos = []sealTestCase{
|
||||
@@ -51,7 +52,7 @@ var testCasesNixos = []sealTestCase{
|
||||
Data: "/var/lib/persist/module/hakurei/0/1",
|
||||
Identity: 1, Groups: []string{},
|
||||
},
|
||||
app.ID{
|
||||
state.ID{
|
||||
0x8e, 0x2c, 0x76, 0xb0,
|
||||
0x66, 0xda, 0xbe, 0x57,
|
||||
0x4c, 0xf0, 0x73, 0xbd,
|
||||
@@ -93,13 +94,12 @@ var testCasesNixos = []sealTestCase{
|
||||
}).
|
||||
UpdatePerm("/tmp/hakurei.1971/8e2c76b066dabe574cf073bdb46eb5c1/bus", acl.Read, acl.Write).
|
||||
UpdatePerm("/tmp/hakurei.1971/8e2c76b066dabe574cf073bdb46eb5c1/system_bus_socket", acl.Read, acl.Write),
|
||||
&sandbox.Params{
|
||||
Uid: 1971,
|
||||
Gid: 100,
|
||||
Flags: sandbox.FAllowNet | sandbox.FAllowUserns,
|
||||
Dir: "/var/lib/persist/module/hakurei/0/1",
|
||||
Path: "/nix/store/yqivzpzzn7z5x0lq9hmbzygh45d8rhqd-chromium-start",
|
||||
Args: []string{"/nix/store/yqivzpzzn7z5x0lq9hmbzygh45d8rhqd-chromium-start"},
|
||||
&container.Params{
|
||||
Uid: 1971,
|
||||
Gid: 100,
|
||||
Dir: "/var/lib/persist/module/hakurei/0/1",
|
||||
Path: "/nix/store/yqivzpzzn7z5x0lq9hmbzygh45d8rhqd-chromium-start",
|
||||
Args: []string{"/nix/store/yqivzpzzn7z5x0lq9hmbzygh45d8rhqd-chromium-start"},
|
||||
Env: []string{
|
||||
"DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1971/bus",
|
||||
"DBUS_SYSTEM_BUS_ADDRESS=unix:path=/run/dbus/system_bus_socket",
|
||||
@@ -114,7 +114,7 @@ var testCasesNixos = []sealTestCase{
|
||||
"XDG_SESSION_CLASS=user",
|
||||
"XDG_SESSION_TYPE=tty",
|
||||
},
|
||||
Ops: new(sandbox.Ops).
|
||||
Ops: new(container.Ops).
|
||||
Proc("/proc").
|
||||
Tmpfs(hst.Tmp, 4096, 0755).
|
||||
Dev("/dev").Mqueue("/dev/mqueue").
|
||||
@@ -122,18 +122,18 @@ var testCasesNixos = []sealTestCase{
|
||||
Bind("/usr/bin", "/usr/bin", 0).
|
||||
Bind("/nix/store", "/nix/store", 0).
|
||||
Bind("/run/current-system", "/run/current-system", 0).
|
||||
Bind("/sys/block", "/sys/block", sandbox.BindOptional).
|
||||
Bind("/sys/bus", "/sys/bus", sandbox.BindOptional).
|
||||
Bind("/sys/class", "/sys/class", sandbox.BindOptional).
|
||||
Bind("/sys/dev", "/sys/dev", sandbox.BindOptional).
|
||||
Bind("/sys/devices", "/sys/devices", sandbox.BindOptional).
|
||||
Bind("/sys/block", "/sys/block", container.BindOptional).
|
||||
Bind("/sys/bus", "/sys/bus", container.BindOptional).
|
||||
Bind("/sys/class", "/sys/class", container.BindOptional).
|
||||
Bind("/sys/dev", "/sys/dev", container.BindOptional).
|
||||
Bind("/sys/devices", "/sys/devices", container.BindOptional).
|
||||
Bind("/run/opengl-driver", "/run/opengl-driver", 0).
|
||||
Bind("/dev/dri", "/dev/dri", sandbox.BindDevice|sandbox.BindWritable|sandbox.BindOptional).
|
||||
Bind("/dev/dri", "/dev/dri", container.BindDevice|container.BindWritable|container.BindOptional).
|
||||
Etc("/etc", "8e2c76b066dabe574cf073bdb46eb5c1").
|
||||
Tmpfs("/run/user", 4096, 0755).
|
||||
Bind("/tmp/hakurei.1971/runtime/1", "/run/user/1971", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/1", "/tmp", sandbox.BindWritable).
|
||||
Bind("/var/lib/persist/module/hakurei/0/1", "/var/lib/persist/module/hakurei/0/1", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/runtime/1", "/run/user/1971", container.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/1", "/tmp", container.BindWritable).
|
||||
Bind("/var/lib/persist/module/hakurei/0/1", "/var/lib/persist/module/hakurei/0/1", container.BindWritable).
|
||||
Place("/etc/passwd", []byte("u0_a1:x:1971:100:Hakurei:/var/lib/persist/module/hakurei/0/1:/run/current-system/sw/bin/zsh\n")).
|
||||
Place("/etc/group", []byte("hakurei:x:100:\n")).
|
||||
Bind("/run/user/1971/wayland-0", "/run/user/1971/wayland-0", 0).
|
||||
@@ -142,6 +142,9 @@ var testCasesNixos = []sealTestCase{
|
||||
Bind("/tmp/hakurei.1971/8e2c76b066dabe574cf073bdb46eb5c1/bus", "/run/user/1971/bus", 0).
|
||||
Bind("/tmp/hakurei.1971/8e2c76b066dabe574cf073bdb46eb5c1/system_bus_socket", "/run/dbus/system_bus_socket", 0).
|
||||
Tmpfs("/var/run/nscd", 8192, 0755),
|
||||
SeccompPresets: seccomp.PresetExt | seccomp.PresetDenyTTY | seccomp.PresetDenyDevel,
|
||||
HostNet: true,
|
||||
ForwardCancel: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1,21 +1,22 @@
|
||||
package setuid_test
|
||||
package app_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/acl"
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/acl"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
var testCasesPd = []sealTestCase{
|
||||
{
|
||||
"nixos permissive defaults no enablements", new(stubNixOS),
|
||||
&hst.Config{Username: "chronos", Data: "/home/chronos"},
|
||||
app.ID{
|
||||
state.ID{
|
||||
0x4a, 0x45, 0x0b, 0x65,
|
||||
0x96, 0xd7, 0xbc, 0x15,
|
||||
0xbd, 0x01, 0x78, 0x0e,
|
||||
@@ -27,11 +28,10 @@ var testCasesPd = []sealTestCase{
|
||||
Ensure("/tmp/hakurei.1971/runtime/0", 0700).UpdatePermType(system.User, "/tmp/hakurei.1971/runtime/0", acl.Read, acl.Write, acl.Execute).
|
||||
Ensure("/tmp/hakurei.1971/tmpdir", 0700).UpdatePermType(system.User, "/tmp/hakurei.1971/tmpdir", acl.Execute).
|
||||
Ensure("/tmp/hakurei.1971/tmpdir/0", 01700).UpdatePermType(system.User, "/tmp/hakurei.1971/tmpdir/0", acl.Read, acl.Write, acl.Execute),
|
||||
&sandbox.Params{
|
||||
Flags: sandbox.FAllowNet | sandbox.FAllowUserns | sandbox.FAllowTTY,
|
||||
Dir: "/home/chronos",
|
||||
Path: "/run/current-system/sw/bin/zsh",
|
||||
Args: []string{"/run/current-system/sw/bin/zsh"},
|
||||
&container.Params{
|
||||
Dir: "/home/chronos",
|
||||
Path: "/run/current-system/sw/bin/zsh",
|
||||
Args: []string{"/run/current-system/sw/bin/zsh"},
|
||||
Env: []string{
|
||||
"HOME=/home/chronos",
|
||||
"SHELL=/run/current-system/sw/bin/zsh",
|
||||
@@ -41,33 +41,37 @@ var testCasesPd = []sealTestCase{
|
||||
"XDG_SESSION_CLASS=user",
|
||||
"XDG_SESSION_TYPE=tty",
|
||||
},
|
||||
Ops: new(sandbox.Ops).
|
||||
Ops: new(container.Ops).
|
||||
Proc("/proc").
|
||||
Tmpfs(hst.Tmp, 4096, 0755).
|
||||
Dev("/dev").Mqueue("/dev/mqueue").
|
||||
Bind("/bin", "/bin", sandbox.BindWritable).
|
||||
Bind("/boot", "/boot", sandbox.BindWritable).
|
||||
Bind("/home", "/home", sandbox.BindWritable).
|
||||
Bind("/lib", "/lib", sandbox.BindWritable).
|
||||
Bind("/lib64", "/lib64", sandbox.BindWritable).
|
||||
Bind("/nix", "/nix", sandbox.BindWritable).
|
||||
Bind("/root", "/root", sandbox.BindWritable).
|
||||
Bind("/run", "/run", sandbox.BindWritable).
|
||||
Bind("/srv", "/srv", sandbox.BindWritable).
|
||||
Bind("/sys", "/sys", sandbox.BindWritable).
|
||||
Bind("/usr", "/usr", sandbox.BindWritable).
|
||||
Bind("/var", "/var", sandbox.BindWritable).
|
||||
Bind("/dev/kvm", "/dev/kvm", sandbox.BindWritable|sandbox.BindDevice|sandbox.BindOptional).
|
||||
Bind("/bin", "/bin", container.BindWritable).
|
||||
Bind("/boot", "/boot", container.BindWritable).
|
||||
Bind("/home", "/home", container.BindWritable).
|
||||
Bind("/lib", "/lib", container.BindWritable).
|
||||
Bind("/lib64", "/lib64", container.BindWritable).
|
||||
Bind("/nix", "/nix", container.BindWritable).
|
||||
Bind("/root", "/root", container.BindWritable).
|
||||
Bind("/run", "/run", container.BindWritable).
|
||||
Bind("/srv", "/srv", container.BindWritable).
|
||||
Bind("/sys", "/sys", container.BindWritable).
|
||||
Bind("/usr", "/usr", container.BindWritable).
|
||||
Bind("/var", "/var", container.BindWritable).
|
||||
Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional).
|
||||
Tmpfs("/run/user/1971", 8192, 0755).
|
||||
Tmpfs("/run/dbus", 8192, 0755).
|
||||
Etc("/etc", "4a450b6596d7bc15bd01780eb9a607ac").
|
||||
Tmpfs("/run/user", 4096, 0755).
|
||||
Bind("/tmp/hakurei.1971/runtime/0", "/run/user/65534", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/0", "/tmp", sandbox.BindWritable).
|
||||
Bind("/home/chronos", "/home/chronos", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/runtime/0", "/run/user/65534", container.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/0", "/tmp", container.BindWritable).
|
||||
Bind("/home/chronos", "/home/chronos", container.BindWritable).
|
||||
Place("/etc/passwd", []byte("chronos:x:65534:65534:Hakurei:/home/chronos:/run/current-system/sw/bin/zsh\n")).
|
||||
Place("/etc/group", []byte("hakurei:x:65534:\n")).
|
||||
Tmpfs("/var/run/nscd", 8192, 0755),
|
||||
SeccompPresets: seccomp.PresetExt | seccomp.PresetDenyDevel,
|
||||
HostNet: true,
|
||||
RetainSession: true,
|
||||
ForwardCancel: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -112,7 +116,7 @@ var testCasesPd = []sealTestCase{
|
||||
},
|
||||
Enablements: system.EWayland | system.EDBus | system.EPulse,
|
||||
},
|
||||
app.ID{
|
||||
state.ID{
|
||||
0xeb, 0xf0, 0x83, 0xd1,
|
||||
0xb1, 0x75, 0x91, 0x17,
|
||||
0x82, 0xd4, 0x13, 0x36,
|
||||
@@ -163,11 +167,10 @@ var testCasesPd = []sealTestCase{
|
||||
}).
|
||||
UpdatePerm("/tmp/hakurei.1971/ebf083d1b175911782d413369b64ce7c/bus", acl.Read, acl.Write).
|
||||
UpdatePerm("/tmp/hakurei.1971/ebf083d1b175911782d413369b64ce7c/system_bus_socket", acl.Read, acl.Write),
|
||||
&sandbox.Params{
|
||||
Flags: sandbox.FAllowNet | sandbox.FAllowUserns | sandbox.FAllowTTY,
|
||||
Dir: "/home/chronos",
|
||||
Path: "/run/current-system/sw/bin/zsh",
|
||||
Args: []string{"zsh", "-c", "exec chromium "},
|
||||
&container.Params{
|
||||
Dir: "/home/chronos",
|
||||
Path: "/run/current-system/sw/bin/zsh",
|
||||
Args: []string{"zsh", "-c", "exec chromium "},
|
||||
Env: []string{
|
||||
"DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/65534/bus",
|
||||
"DBUS_SYSTEM_BUS_ADDRESS=unix:path=/run/dbus/system_bus_socket",
|
||||
@@ -182,31 +185,31 @@ var testCasesPd = []sealTestCase{
|
||||
"XDG_SESSION_CLASS=user",
|
||||
"XDG_SESSION_TYPE=tty",
|
||||
},
|
||||
Ops: new(sandbox.Ops).
|
||||
Ops: new(container.Ops).
|
||||
Proc("/proc").
|
||||
Tmpfs(hst.Tmp, 4096, 0755).
|
||||
Dev("/dev").Mqueue("/dev/mqueue").
|
||||
Bind("/bin", "/bin", sandbox.BindWritable).
|
||||
Bind("/boot", "/boot", sandbox.BindWritable).
|
||||
Bind("/home", "/home", sandbox.BindWritable).
|
||||
Bind("/lib", "/lib", sandbox.BindWritable).
|
||||
Bind("/lib64", "/lib64", sandbox.BindWritable).
|
||||
Bind("/nix", "/nix", sandbox.BindWritable).
|
||||
Bind("/root", "/root", sandbox.BindWritable).
|
||||
Bind("/run", "/run", sandbox.BindWritable).
|
||||
Bind("/srv", "/srv", sandbox.BindWritable).
|
||||
Bind("/sys", "/sys", sandbox.BindWritable).
|
||||
Bind("/usr", "/usr", sandbox.BindWritable).
|
||||
Bind("/var", "/var", sandbox.BindWritable).
|
||||
Bind("/dev/dri", "/dev/dri", sandbox.BindWritable|sandbox.BindDevice|sandbox.BindOptional).
|
||||
Bind("/dev/kvm", "/dev/kvm", sandbox.BindWritable|sandbox.BindDevice|sandbox.BindOptional).
|
||||
Bind("/bin", "/bin", container.BindWritable).
|
||||
Bind("/boot", "/boot", container.BindWritable).
|
||||
Bind("/home", "/home", container.BindWritable).
|
||||
Bind("/lib", "/lib", container.BindWritable).
|
||||
Bind("/lib64", "/lib64", container.BindWritable).
|
||||
Bind("/nix", "/nix", container.BindWritable).
|
||||
Bind("/root", "/root", container.BindWritable).
|
||||
Bind("/run", "/run", container.BindWritable).
|
||||
Bind("/srv", "/srv", container.BindWritable).
|
||||
Bind("/sys", "/sys", container.BindWritable).
|
||||
Bind("/usr", "/usr", container.BindWritable).
|
||||
Bind("/var", "/var", container.BindWritable).
|
||||
Bind("/dev/dri", "/dev/dri", container.BindWritable|container.BindDevice|container.BindOptional).
|
||||
Bind("/dev/kvm", "/dev/kvm", container.BindWritable|container.BindDevice|container.BindOptional).
|
||||
Tmpfs("/run/user/1971", 8192, 0755).
|
||||
Tmpfs("/run/dbus", 8192, 0755).
|
||||
Etc("/etc", "ebf083d1b175911782d413369b64ce7c").
|
||||
Tmpfs("/run/user", 4096, 0755).
|
||||
Bind("/tmp/hakurei.1971/runtime/9", "/run/user/65534", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/9", "/tmp", sandbox.BindWritable).
|
||||
Bind("/home/chronos", "/home/chronos", sandbox.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/runtime/9", "/run/user/65534", container.BindWritable).
|
||||
Bind("/tmp/hakurei.1971/tmpdir/9", "/tmp", container.BindWritable).
|
||||
Bind("/home/chronos", "/home/chronos", container.BindWritable).
|
||||
Place("/etc/passwd", []byte("chronos:x:65534:65534:Hakurei:/home/chronos:/run/current-system/sw/bin/zsh\n")).
|
||||
Place("/etc/group", []byte("hakurei:x:65534:\n")).
|
||||
Bind("/tmp/hakurei.1971/ebf083d1b175911782d413369b64ce7c/wayland", "/run/user/65534/wayland-0", 0).
|
||||
@@ -215,6 +218,10 @@ var testCasesPd = []sealTestCase{
|
||||
Bind("/tmp/hakurei.1971/ebf083d1b175911782d413369b64ce7c/bus", "/run/user/65534/bus", 0).
|
||||
Bind("/tmp/hakurei.1971/ebf083d1b175911782d413369b64ce7c/system_bus_socket", "/run/dbus/system_bus_socket", 0).
|
||||
Tmpfs("/var/run/nscd", 8192, 0755),
|
||||
SeccompPresets: seccomp.PresetExt | seccomp.PresetDenyDevel,
|
||||
HostNet: true,
|
||||
RetainSession: true,
|
||||
ForwardCancel: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package setuid_test
|
||||
package app_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"os/user"
|
||||
"strconv"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"hakurei.app/hst"
|
||||
)
|
||||
|
||||
// fs methods are not implemented using a real FS
|
||||
@@ -125,8 +125,8 @@ func (s *stubNixOS) Open(name string) (fs.File, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *stubNixOS) Paths() app.Paths {
|
||||
return app.Paths{
|
||||
func (s *stubNixOS) Paths() hst.Paths {
|
||||
return hst.Paths{
|
||||
SharePath: "/tmp/hakurei.1971",
|
||||
RuntimePath: "/run/user/1971",
|
||||
RunDirPath: "/run/user/1971/hakurei",
|
||||
@@ -1,4 +1,4 @@
|
||||
package common
|
||||
package app
|
||||
|
||||
import (
|
||||
"errors"
|
||||
@@ -8,71 +8,78 @@ import (
|
||||
"path"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal/sys"
|
||||
"hakurei.app/system/dbus"
|
||||
)
|
||||
|
||||
// in practice there should be less than 30 entries added by the runtime;
|
||||
// allocating slightly more as a margin for future expansion
|
||||
const preallocateOpsCount = 1 << 5
|
||||
|
||||
// NewContainer initialises [sandbox.Params] via [hst.ContainerConfig].
|
||||
// newContainer initialises [container.Params] via [hst.ContainerConfig].
|
||||
// Note that remaining container setup must be queued by the caller.
|
||||
func NewContainer(s *hst.ContainerConfig, os sys.State, uid, gid *int) (*sandbox.Params, map[string]string, error) {
|
||||
func newContainer(s *hst.ContainerConfig, os sys.State, uid, gid *int) (*container.Params, map[string]string, error) {
|
||||
if s == nil {
|
||||
return nil, nil, syscall.EBADE
|
||||
}
|
||||
|
||||
container := &sandbox.Params{
|
||||
Hostname: s.Hostname,
|
||||
Seccomp: s.Seccomp,
|
||||
params := &container.Params{
|
||||
Hostname: s.Hostname,
|
||||
SeccompFlags: s.SeccompFlags,
|
||||
SeccompPresets: s.SeccompPresets,
|
||||
RetainSession: s.Tty,
|
||||
HostNet: s.Net,
|
||||
|
||||
// the container is canceled when shim is requested to exit or receives an interrupt or termination signal;
|
||||
// this behaviour is implemented in the shim
|
||||
ForwardCancel: s.WaitDelay >= 0,
|
||||
}
|
||||
|
||||
{
|
||||
ops := make(sandbox.Ops, 0, preallocateOpsCount+len(s.Filesystem)+len(s.Link)+len(s.Cover))
|
||||
container.Ops = &ops
|
||||
ops := make(container.Ops, 0, preallocateOpsCount+len(s.Filesystem)+len(s.Link)+len(s.Cover))
|
||||
params.Ops = &ops
|
||||
}
|
||||
|
||||
if s.Multiarch {
|
||||
container.Seccomp |= seccomp.FilterMultiarch
|
||||
params.SeccompFlags |= seccomp.AllowMultiarch
|
||||
}
|
||||
|
||||
if s.Devel {
|
||||
container.Flags |= sandbox.FAllowDevel
|
||||
if !s.SeccompCompat {
|
||||
params.SeccompPresets |= seccomp.PresetExt
|
||||
}
|
||||
if s.Userns {
|
||||
container.Flags |= sandbox.FAllowUserns
|
||||
if !s.Devel {
|
||||
params.SeccompPresets |= seccomp.PresetDenyDevel
|
||||
}
|
||||
if s.Net {
|
||||
container.Flags |= sandbox.FAllowNet
|
||||
if !s.Userns {
|
||||
params.SeccompPresets |= seccomp.PresetDenyNS
|
||||
}
|
||||
if s.Tty {
|
||||
container.Flags |= sandbox.FAllowTTY
|
||||
if !s.Tty {
|
||||
params.SeccompPresets |= seccomp.PresetDenyTTY
|
||||
}
|
||||
|
||||
if s.MapRealUID {
|
||||
/* some programs fail to connect to dbus session running as a different uid
|
||||
so this workaround is introduced to map priv-side caller uid in container */
|
||||
container.Uid = os.Getuid()
|
||||
*uid = container.Uid
|
||||
container.Gid = os.Getgid()
|
||||
*gid = container.Gid
|
||||
params.Uid = os.Getuid()
|
||||
*uid = params.Uid
|
||||
params.Gid = os.Getgid()
|
||||
*gid = params.Gid
|
||||
} else {
|
||||
*uid = sandbox.OverflowUid()
|
||||
*gid = sandbox.OverflowGid()
|
||||
*uid = container.OverflowUid()
|
||||
*gid = container.OverflowGid()
|
||||
}
|
||||
|
||||
container.
|
||||
params.
|
||||
Proc("/proc").
|
||||
Tmpfs(hst.Tmp, 1<<12, 0755)
|
||||
|
||||
if !s.Device {
|
||||
container.Dev("/dev").Mqueue("/dev/mqueue")
|
||||
params.Dev("/dev").Mqueue("/dev/mqueue")
|
||||
} else {
|
||||
container.Bind("/dev", "/dev", sandbox.BindWritable|sandbox.BindDevice)
|
||||
params.Bind("/dev", "/dev", container.BindWritable|container.BindDevice)
|
||||
}
|
||||
|
||||
/* retrieve paths and hide them if they're made available in the sandbox;
|
||||
@@ -151,29 +158,29 @@ func NewContainer(s *hst.ContainerConfig, os sys.State, uid, gid *int) (*sandbox
|
||||
|
||||
var flags int
|
||||
if c.Write {
|
||||
flags |= sandbox.BindWritable
|
||||
flags |= container.BindWritable
|
||||
}
|
||||
if c.Device {
|
||||
flags |= sandbox.BindDevice | sandbox.BindWritable
|
||||
flags |= container.BindDevice | container.BindWritable
|
||||
}
|
||||
if !c.Must {
|
||||
flags |= sandbox.BindOptional
|
||||
flags |= container.BindOptional
|
||||
}
|
||||
container.Bind(c.Src, dest, flags)
|
||||
params.Bind(c.Src, dest, flags)
|
||||
}
|
||||
|
||||
// cover matched paths
|
||||
for i, ok := range hidePathMatch {
|
||||
if ok {
|
||||
container.Tmpfs(hidePaths[i], 1<<13, 0755)
|
||||
params.Tmpfs(hidePaths[i], 1<<13, 0755)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range s.Link {
|
||||
container.Link(l[0], l[1])
|
||||
params.Link(l[0], l[1])
|
||||
}
|
||||
|
||||
return container, maps.Clone(s.Env), nil
|
||||
return params, maps.Clone(s.Env), nil
|
||||
}
|
||||
|
||||
func evalSymlinks(os sys.State, v *string) error {
|
||||
@@ -1,11 +1,10 @@
|
||||
package setuid
|
||||
package app
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
|
||||
. "git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
func PrintRunStateErr(rs *RunState, runErr error) (code int) {
|
||||
24
internal/app/export_linux_test.go
Normal file
24
internal/app/export_linux_test.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/sys"
|
||||
"hakurei.app/system"
|
||||
)
|
||||
|
||||
func NewWithID(id state.ID, os sys.State) App {
|
||||
a := new(app)
|
||||
a.id = newID(&id)
|
||||
a.sys = os
|
||||
return a
|
||||
}
|
||||
|
||||
func AppIParams(a App, sa SealedApp) (*system.I, *container.Params) {
|
||||
v := a.(*app)
|
||||
seal := sa.(*outcome)
|
||||
if v.outcome != seal || v.id != seal.id {
|
||||
panic("broken app/outcome link")
|
||||
}
|
||||
return seal.sys, seal.container
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package instance
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app/internal/setuid"
|
||||
)
|
||||
|
||||
func PrintRunStateErr(whence int, rs *app.RunState, runErr error) (code int) {
|
||||
switch whence {
|
||||
case ISetuid:
|
||||
return setuid.PrintRunStateErr(rs, runErr)
|
||||
default:
|
||||
panic(syscall.EINVAL)
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
// Package instance exposes cross-package implementation details and provides constructors for builtin implementations.
|
||||
package instance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"syscall"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app/internal/setuid"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
)
|
||||
|
||||
const (
|
||||
ISetuid = iota
|
||||
)
|
||||
|
||||
func New(whence int, ctx context.Context, os sys.State) (app.App, error) {
|
||||
switch whence {
|
||||
case ISetuid:
|
||||
return setuid.New(ctx, os)
|
||||
default:
|
||||
return nil, syscall.EINVAL
|
||||
}
|
||||
}
|
||||
|
||||
func MustNew(whence int, ctx context.Context, os sys.State) app.App {
|
||||
a, err := New(whence, ctx, os)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot create app: %v", err)
|
||||
}
|
||||
return a
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
package instance
|
||||
|
||||
import "git.gensokyo.uk/security/hakurei/internal/app/internal/setuid"
|
||||
|
||||
// ShimMain is the main function of the shim process and runs as the unconstrained target user.
|
||||
func ShimMain() { setuid.ShimMain() }
|
||||
@@ -1,24 +0,0 @@
|
||||
package setuid
|
||||
|
||||
import (
|
||||
. "git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
)
|
||||
|
||||
func NewWithID(id ID, os sys.State) App {
|
||||
a := new(app)
|
||||
a.id = newID(&id)
|
||||
a.sys = os
|
||||
return a
|
||||
}
|
||||
|
||||
func AppIParams(a App, sa SealedApp) (*system.I, *sandbox.Params) {
|
||||
v := a.(*app)
|
||||
seal := sa.(*outcome)
|
||||
if v.outcome != seal || v.id != seal.id {
|
||||
panic("broken app/outcome link")
|
||||
}
|
||||
return seal.sys, seal.container
|
||||
}
|
||||
@@ -1,181 +0,0 @@
|
||||
package setuid
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/seccomp"
|
||||
)
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
|
||||
static pid_t hakurei_shim_param_ppid = -1;
|
||||
|
||||
// this cannot unblock hlog since Go code is not async-signal-safe
|
||||
static void hakurei_shim_sigaction(int sig, siginfo_t *si, void *ucontext) {
|
||||
if (sig != SIGCONT || si == NULL) {
|
||||
// unreachable
|
||||
fprintf(stderr, "sigaction: sa_sigaction got invalid siginfo\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// monitor requests shim exit
|
||||
if (si->si_pid == hakurei_shim_param_ppid)
|
||||
exit(254);
|
||||
|
||||
fprintf(stderr, "sigaction: got SIGCONT from process %d\n", si->si_pid);
|
||||
|
||||
// shim orphaned before monitor delivers a signal
|
||||
if (getppid() != hakurei_shim_param_ppid)
|
||||
exit(3);
|
||||
}
|
||||
|
||||
void hakurei_shim_setup_cont_signal(pid_t ppid) {
|
||||
struct sigaction new_action = {0}, old_action = {0};
|
||||
if (sigaction(SIGCONT, NULL, &old_action) != 0)
|
||||
return;
|
||||
if (old_action.sa_handler != SIG_DFL) {
|
||||
errno = ENOTRECOVERABLE;
|
||||
return;
|
||||
}
|
||||
|
||||
new_action.sa_sigaction = hakurei_shim_sigaction;
|
||||
if (sigemptyset(&new_action.sa_mask) != 0)
|
||||
return;
|
||||
new_action.sa_flags = SA_ONSTACK | SA_SIGINFO;
|
||||
|
||||
if (sigaction(SIGCONT, &new_action, NULL) != 0)
|
||||
return;
|
||||
|
||||
errno = 0;
|
||||
hakurei_shim_param_ppid = ppid;
|
||||
}
|
||||
*/
|
||||
import "C"
|
||||
|
||||
const shimEnv = "HAKUREI_SHIM"
|
||||
|
||||
type shimParams struct {
|
||||
// monitor pid, checked against ppid in signal handler
|
||||
Monitor int
|
||||
|
||||
// finalised container params
|
||||
Container *sandbox.Params
|
||||
// path to outer home directory
|
||||
Home string
|
||||
|
||||
// verbosity pass through
|
||||
Verbose bool
|
||||
}
|
||||
|
||||
// ShimMain is the main function of the shim process and runs as the unconstrained target user.
|
||||
func ShimMain() {
|
||||
hlog.Prepare("shim")
|
||||
|
||||
if err := sandbox.SetDumpable(sandbox.SUID_DUMP_DISABLE); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
}
|
||||
|
||||
var (
|
||||
params shimParams
|
||||
closeSetup func() error
|
||||
)
|
||||
if f, err := sandbox.Receive(shimEnv, ¶ms, nil); err != nil {
|
||||
if errors.Is(err, sandbox.ErrInvalid) {
|
||||
log.Fatal("invalid config descriptor")
|
||||
}
|
||||
if errors.Is(err, sandbox.ErrNotSet) {
|
||||
log.Fatal("HAKUREI_SHIM not set")
|
||||
}
|
||||
|
||||
log.Fatalf("cannot receive shim setup params: %v", err)
|
||||
} else {
|
||||
internal.InstallFmsg(params.Verbose)
|
||||
closeSetup = f
|
||||
|
||||
// the Go runtime does not expose siginfo_t so SIGCONT is handled in C to check si_pid
|
||||
if _, err = C.hakurei_shim_setup_cont_signal(C.pid_t(params.Monitor)); err != nil {
|
||||
log.Fatalf("cannot install SIGCONT handler: %v", err)
|
||||
}
|
||||
|
||||
// pdeath_signal delivery is checked as if the dying process called kill(2), see kernel/exit.c
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGCONT), 0); errno != 0 {
|
||||
log.Fatalf("cannot set parent-death signal: %v", errno)
|
||||
}
|
||||
}
|
||||
|
||||
if params.Container == nil || params.Container.Ops == nil {
|
||||
log.Fatal("invalid container params")
|
||||
}
|
||||
|
||||
// close setup socket
|
||||
if err := closeSetup(); err != nil {
|
||||
log.Printf("cannot close setup pipe: %v", err)
|
||||
// not fatal
|
||||
}
|
||||
|
||||
// ensure home directory as target user
|
||||
if s, err := os.Stat(params.Home); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err = os.Mkdir(params.Home, 0700); err != nil {
|
||||
log.Fatalf("cannot create home directory: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("cannot access home directory: %v", err)
|
||||
}
|
||||
|
||||
// home directory is created, proceed
|
||||
} else if !s.IsDir() {
|
||||
log.Fatalf("path %q is not a directory", params.Home)
|
||||
}
|
||||
|
||||
var name string
|
||||
if len(params.Container.Args) > 0 {
|
||||
name = params.Container.Args[0]
|
||||
}
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop() // unreachable
|
||||
container := sandbox.New(ctx, name)
|
||||
container.Params = *params.Container
|
||||
container.Stdin, container.Stdout, container.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
container.Cancel = func(cmd *exec.Cmd) error { return cmd.Process.Signal(os.Interrupt) }
|
||||
container.WaitDelay = 2 * time.Second
|
||||
|
||||
if err := container.Start(); err != nil {
|
||||
hlog.PrintBaseError(err, "cannot start container:")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := container.Serve(); err != nil {
|
||||
hlog.PrintBaseError(err, "cannot configure container:")
|
||||
}
|
||||
|
||||
if err := seccomp.Load(seccomp.PresetCommon); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
}
|
||||
|
||||
if err := container.Wait(); err != nil {
|
||||
var exitError *exec.ExitError
|
||||
if !errors.As(err, &exitError) {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
os.Exit(2)
|
||||
}
|
||||
log.Printf("wait: %v", err)
|
||||
os.Exit(127)
|
||||
}
|
||||
os.Exit(exitError.ExitCode())
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package common
|
||||
package app
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
@@ -1,4 +1,4 @@
|
||||
package common
|
||||
package app
|
||||
|
||||
import (
|
||||
"testing"
|
||||
@@ -1,4 +1,4 @@
|
||||
package setuid
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -12,12 +12,11 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
. "git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/state"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/system"
|
||||
)
|
||||
|
||||
const shimWaitTimeout = 5 * time.Second
|
||||
@@ -94,7 +93,7 @@ func (seal *outcome) Run(rs *RunState) error {
|
||||
cmd.Cancel = func() error { return cmd.Process.Signal(syscall.SIGCONT) }
|
||||
|
||||
var e *gob.Encoder
|
||||
if fd, encoder, err := sandbox.Setup(&cmd.ExtraFiles); err != nil {
|
||||
if fd, encoder, err := container.Setup(&cmd.ExtraFiles); err != nil {
|
||||
return hlog.WrapErrSuffix(err,
|
||||
"cannot create shim setup pipe:")
|
||||
} else {
|
||||
@@ -124,7 +123,15 @@ func (seal *outcome) Run(rs *RunState) error {
|
||||
// this prevents blocking forever on an early failure
|
||||
waitErr, setupErr := make(chan error, 1), make(chan error, 1)
|
||||
go func() { waitErr <- cmd.Wait(); cancel() }()
|
||||
go func() { setupErr <- e.Encode(&shimParams{os.Getpid(), seal.container, seal.user.data, hlog.Load()}) }()
|
||||
go func() {
|
||||
setupErr <- e.Encode(&shimParams{
|
||||
os.Getpid(),
|
||||
seal.waitDelay,
|
||||
seal.container,
|
||||
seal.user.data,
|
||||
hlog.Load(),
|
||||
})
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-setupErr:
|
||||
@@ -1,4 +1,4 @@
|
||||
package setuid
|
||||
package app
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -15,18 +15,18 @@ import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.gensokyo.uk/security/hakurei/acl"
|
||||
"git.gensokyo.uk/security/hakurei/dbus"
|
||||
"git.gensokyo.uk/security/hakurei/hst"
|
||||
"git.gensokyo.uk/security/hakurei/internal"
|
||||
. "git.gensokyo.uk/security/hakurei/internal/app"
|
||||
"git.gensokyo.uk/security/hakurei/internal/app/instance/common"
|
||||
"git.gensokyo.uk/security/hakurei/internal/hlog"
|
||||
"git.gensokyo.uk/security/hakurei/internal/sys"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox"
|
||||
"git.gensokyo.uk/security/hakurei/sandbox/wl"
|
||||
"git.gensokyo.uk/security/hakurei/system"
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/hst"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/app/state"
|
||||
"hakurei.app/internal/hlog"
|
||||
"hakurei.app/internal/sys"
|
||||
"hakurei.app/system"
|
||||
"hakurei.app/system/acl"
|
||||
"hakurei.app/system/dbus"
|
||||
"hakurei.app/system/wayland"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -66,7 +66,7 @@ var posixUsername = regexp.MustCompilePOSIX("^[a-z_]([A-Za-z0-9_-]{0,31}|[A-Za-z
|
||||
// outcome stores copies of various parts of [hst.Config]
|
||||
type outcome struct {
|
||||
// copied from initialising [app]
|
||||
id *stringPair[ID]
|
||||
id *stringPair[state.ID]
|
||||
// copied from [sys.State] response
|
||||
runDirPath string
|
||||
|
||||
@@ -80,7 +80,8 @@ type outcome struct {
|
||||
sys *system.I
|
||||
ctx context.Context
|
||||
|
||||
container *sandbox.Params
|
||||
waitDelay time.Duration
|
||||
container *container.Params
|
||||
env map[string]string
|
||||
sync *os.File
|
||||
|
||||
@@ -97,7 +98,7 @@ type shareHost struct {
|
||||
runtimeSharePath string
|
||||
|
||||
seal *outcome
|
||||
sc Paths
|
||||
sc hst.Paths
|
||||
}
|
||||
|
||||
// ensureRuntimeDir must be called if direct access to paths within XDG_RUNTIME_DIR is required
|
||||
@@ -183,7 +184,7 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
if seal.user.username == "" {
|
||||
seal.user.username = "chronos"
|
||||
} else if !posixUsername.MatchString(seal.user.username) ||
|
||||
len(seal.user.username) >= internal.Sysconf_SC_LOGIN_NAME_MAX() {
|
||||
len(seal.user.username) >= internal.Sysconf(internal.SC_LOGIN_NAME_MAX) {
|
||||
return hlog.WrapErr(ErrName,
|
||||
fmt.Sprintf("invalid user name %q", seal.user.username))
|
||||
}
|
||||
@@ -281,7 +282,8 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
{
|
||||
var uid, gid int
|
||||
var err error
|
||||
seal.container, seal.env, err = common.NewContainer(config.Container, sys, &uid, &gid)
|
||||
seal.container, seal.env, err = newContainer(config.Container, sys, &uid, &gid)
|
||||
seal.waitDelay = config.Container.WaitDelay
|
||||
if err != nil {
|
||||
return hlog.WrapErrSuffix(err,
|
||||
"cannot initialise container configuration:")
|
||||
@@ -334,7 +336,7 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
seal.sys.Ensure(runtimeDirInst, 0700)
|
||||
seal.sys.UpdatePermType(system.User, runtimeDirInst, acl.Read, acl.Write, acl.Execute)
|
||||
seal.container.Tmpfs("/run/user", 1<<12, 0755)
|
||||
seal.container.Bind(runtimeDirInst, innerRuntimeDir, sandbox.BindWritable)
|
||||
seal.container.Bind(runtimeDirInst, innerRuntimeDir, container.BindWritable)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -345,7 +347,7 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
seal.sys.Ensure(tmpdirInst, 01700)
|
||||
seal.sys.UpdatePermType(system.User, tmpdirInst, acl.Read, acl.Write, acl.Execute)
|
||||
// mount inner /tmp from share so it shares persistence and storage behaviour of host /tmp
|
||||
seal.container.Bind(tmpdirInst, "/tmp", sandbox.BindWritable)
|
||||
seal.container.Bind(tmpdirInst, "/tmp", container.BindWritable)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -357,7 +359,7 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
if seal.user.username != "" {
|
||||
username = seal.user.username
|
||||
}
|
||||
seal.container.Bind(seal.user.data, homeDir, sandbox.BindWritable)
|
||||
seal.container.Bind(seal.user.data, homeDir, container.BindWritable)
|
||||
seal.container.Dir = homeDir
|
||||
seal.env["HOME"] = homeDir
|
||||
seal.env["USER"] = username
|
||||
@@ -377,23 +379,23 @@ func (seal *outcome) finalise(ctx context.Context, sys sys.State, config *hst.Co
|
||||
if config.Enablements&system.EWayland != 0 {
|
||||
// outer wayland socket (usually `/run/user/%d/wayland-%d`)
|
||||
var socketPath string
|
||||
if name, ok := sys.LookupEnv(wl.WaylandDisplay); !ok {
|
||||
hlog.Verbose(wl.WaylandDisplay + " is not set, assuming " + wl.FallbackName)
|
||||
socketPath = path.Join(share.sc.RuntimePath, wl.FallbackName)
|
||||
if name, ok := sys.LookupEnv(wayland.WaylandDisplay); !ok {
|
||||
hlog.Verbose(wayland.WaylandDisplay + " is not set, assuming " + wayland.FallbackName)
|
||||
socketPath = path.Join(share.sc.RuntimePath, wayland.FallbackName)
|
||||
} else if !path.IsAbs(name) {
|
||||
socketPath = path.Join(share.sc.RuntimePath, name)
|
||||
} else {
|
||||
socketPath = name
|
||||
}
|
||||
|
||||
innerPath := path.Join(innerRuntimeDir, wl.FallbackName)
|
||||
seal.env[wl.WaylandDisplay] = wl.FallbackName
|
||||
innerPath := path.Join(innerRuntimeDir, wayland.FallbackName)
|
||||
seal.env[wayland.WaylandDisplay] = wayland.FallbackName
|
||||
|
||||
if !config.DirectWayland { // set up security-context-v1
|
||||
appID := config.ID
|
||||
if appID == "" {
|
||||
// use instance ID in case app id is not set
|
||||
appID = "uk.gensokyo.hakurei." + seal.id.String()
|
||||
appID = "app.hakurei." + seal.id.String()
|
||||
}
|
||||
// downstream socket paths
|
||||
outerPath := path.Join(share.instance(), "wayland")
|
||||
65
internal/app/shim-signal.c
Normal file
65
internal/app/shim-signal.c
Normal file
@@ -0,0 +1,65 @@
|
||||
#include "shim-signal.h"
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static pid_t hakurei_shim_param_ppid = -1;
|
||||
static int hakurei_shim_fd = -1;
|
||||
|
||||
static ssize_t hakurei_shim_write(const void *buf, size_t count) {
|
||||
int savedErrno = errno;
|
||||
ssize_t ret = write(hakurei_shim_fd, buf, count);
|
||||
if (ret == -1 && errno != EAGAIN)
|
||||
exit(EXIT_FAILURE);
|
||||
errno = savedErrno;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* see shim_linux.go for handling of the value */
|
||||
static void hakurei_shim_sigaction(int sig, siginfo_t *si, void *ucontext) {
|
||||
if (sig != SIGCONT || si == NULL) {
|
||||
/* unreachable */
|
||||
hakurei_shim_write("\2", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (si->si_pid == hakurei_shim_param_ppid) {
|
||||
/* monitor requests shim exit */
|
||||
hakurei_shim_write("\0", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unexpected si_pid */
|
||||
hakurei_shim_write("\3", 1);
|
||||
|
||||
if (getppid() != hakurei_shim_param_ppid)
|
||||
/* shim orphaned before monitor delivers a signal */
|
||||
hakurei_shim_write("\1", 1);
|
||||
}
|
||||
|
||||
void hakurei_shim_setup_cont_signal(pid_t ppid, int fd) {
|
||||
if (hakurei_shim_param_ppid != -1 || hakurei_shim_fd != -1)
|
||||
*(int *)NULL = 0; /* unreachable */
|
||||
|
||||
struct sigaction new_action = {0}, old_action = {0};
|
||||
if (sigaction(SIGCONT, NULL, &old_action) != 0)
|
||||
return;
|
||||
if (old_action.sa_handler != SIG_DFL) {
|
||||
errno = ENOTRECOVERABLE;
|
||||
return;
|
||||
}
|
||||
|
||||
new_action.sa_sigaction = hakurei_shim_sigaction;
|
||||
if (sigemptyset(&new_action.sa_mask) != 0)
|
||||
return;
|
||||
new_action.sa_flags = SA_ONSTACK | SA_SIGINFO;
|
||||
|
||||
if (sigaction(SIGCONT, &new_action, NULL) != 0)
|
||||
return;
|
||||
|
||||
errno = 0;
|
||||
hakurei_shim_param_ppid = ppid;
|
||||
hakurei_shim_fd = fd;
|
||||
}
|
||||
3
internal/app/shim-signal.h
Normal file
3
internal/app/shim-signal.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#include <signal.h>
|
||||
|
||||
void hakurei_shim_setup_cont_signal(pid_t ppid, int fd);
|
||||
204
internal/app/shim_linux.go
Normal file
204
internal/app/shim_linux.go
Normal file
@@ -0,0 +1,204 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"hakurei.app/container"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/internal"
|
||||
"hakurei.app/internal/hlog"
|
||||
)
|
||||
|
||||
//#include "shim-signal.h"
|
||||
import "C"
|
||||
|
||||
const shimEnv = "HAKUREI_SHIM"
|
||||
|
||||
type shimParams struct {
|
||||
// monitor pid, checked against ppid in signal handler
|
||||
Monitor int
|
||||
|
||||
// duration to wait for after interrupting a container's initial process before the container is killed;
|
||||
// zero value defaults to [DefaultShimWaitDelay], values exceeding [MaxShimWaitDelay] becomes [MaxShimWaitDelay]
|
||||
WaitDelay time.Duration
|
||||
|
||||
// finalised container params
|
||||
Container *container.Params
|
||||
// path to outer home directory
|
||||
Home string
|
||||
|
||||
// verbosity pass through
|
||||
Verbose bool
|
||||
}
|
||||
|
||||
const (
|
||||
// ShimExitRequest is returned when the monitor process requests shim exit.
|
||||
ShimExitRequest = 254
|
||||
// ShimExitOrphan is returned when the shim is orphaned before monitor delivers a signal.
|
||||
ShimExitOrphan = 3
|
||||
|
||||
DefaultShimWaitDelay = 5 * time.Second
|
||||
MaxShimWaitDelay = 30 * time.Second
|
||||
)
|
||||
|
||||
// ShimMain is the main function of the shim process and runs as the unconstrained target user.
|
||||
func ShimMain() {
|
||||
hlog.Prepare("shim")
|
||||
|
||||
if err := container.SetDumpable(container.SUID_DUMP_DISABLE); err != nil {
|
||||
log.Fatalf("cannot set SUID_DUMP_DISABLE: %s", err)
|
||||
}
|
||||
|
||||
var (
|
||||
params shimParams
|
||||
closeSetup func() error
|
||||
)
|
||||
if f, err := container.Receive(shimEnv, ¶ms, nil); err != nil {
|
||||
if errors.Is(err, container.ErrInvalid) {
|
||||
log.Fatal("invalid config descriptor")
|
||||
}
|
||||
if errors.Is(err, container.ErrNotSet) {
|
||||
log.Fatal("HAKUREI_SHIM not set")
|
||||
}
|
||||
|
||||
log.Fatalf("cannot receive shim setup params: %v", err)
|
||||
} else {
|
||||
internal.InstallOutput(params.Verbose)
|
||||
closeSetup = f
|
||||
}
|
||||
|
||||
var signalPipe io.ReadCloser
|
||||
// the Go runtime does not expose siginfo_t so SIGCONT is handled in C to check si_pid
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
log.Fatalf("cannot pipe: %v", err)
|
||||
} else if _, err = C.hakurei_shim_setup_cont_signal(C.pid_t(params.Monitor), C.int(w.Fd())); err != nil {
|
||||
log.Fatalf("cannot install SIGCONT handler: %v", err)
|
||||
} else {
|
||||
defer runtime.KeepAlive(w)
|
||||
signalPipe = r
|
||||
}
|
||||
|
||||
// pdeath_signal delivery is checked as if the dying process called kill(2), see kernel/exit.c
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGCONT), 0); errno != 0 {
|
||||
log.Fatalf("cannot set parent-death signal: %v", errno)
|
||||
}
|
||||
|
||||
// signal handler outcome
|
||||
var cancelContainer atomic.Pointer[context.CancelFunc]
|
||||
go func() {
|
||||
buf := make([]byte, 1)
|
||||
for {
|
||||
if _, err := signalPipe.Read(buf); err != nil {
|
||||
log.Fatalf("cannot read from signal pipe: %v", err)
|
||||
}
|
||||
|
||||
switch buf[0] {
|
||||
case 0: // got SIGCONT from monitor: shim exit requested
|
||||
if fp := cancelContainer.Load(); params.Container.ForwardCancel && fp != nil && *fp != nil {
|
||||
(*fp)()
|
||||
// shim now bound by ShimWaitDelay, implemented below
|
||||
continue
|
||||
}
|
||||
|
||||
// setup has not completed, terminate immediately
|
||||
hlog.Resume()
|
||||
os.Exit(ShimExitRequest)
|
||||
return
|
||||
|
||||
case 1: // got SIGCONT after adoption: monitor died before delivering signal
|
||||
hlog.BeforeExit()
|
||||
os.Exit(ShimExitOrphan)
|
||||
return
|
||||
|
||||
case 2: // unreachable
|
||||
log.Println("sa_sigaction got invalid siginfo")
|
||||
|
||||
case 3: // got SIGCONT from unexpected process: hopefully the terminal driver
|
||||
log.Println("got SIGCONT from unexpected process")
|
||||
|
||||
default: // unreachable
|
||||
log.Fatalf("got invalid message %d from signal handler", buf[0])
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if params.Container == nil || params.Container.Ops == nil {
|
||||
log.Fatal("invalid container params")
|
||||
}
|
||||
|
||||
// close setup socket
|
||||
if err := closeSetup(); err != nil {
|
||||
log.Printf("cannot close setup pipe: %v", err)
|
||||
// not fatal
|
||||
}
|
||||
|
||||
// ensure home directory as target user
|
||||
if s, err := os.Stat(params.Home); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err = os.Mkdir(params.Home, 0700); err != nil {
|
||||
log.Fatalf("cannot create home directory: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("cannot access home directory: %v", err)
|
||||
}
|
||||
|
||||
// home directory is created, proceed
|
||||
} else if !s.IsDir() {
|
||||
log.Fatalf("path %q is not a directory", params.Home)
|
||||
}
|
||||
|
||||
var name string
|
||||
if len(params.Container.Args) > 0 {
|
||||
name = params.Container.Args[0]
|
||||
}
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
cancelContainer.Store(&stop)
|
||||
z := container.New(ctx, name)
|
||||
z.Params = *params.Container
|
||||
z.Stdin, z.Stdout, z.Stderr = os.Stdin, os.Stdout, os.Stderr
|
||||
|
||||
z.WaitDelay = params.WaitDelay
|
||||
if z.WaitDelay == 0 {
|
||||
z.WaitDelay = DefaultShimWaitDelay
|
||||
}
|
||||
if z.WaitDelay > MaxShimWaitDelay {
|
||||
z.WaitDelay = MaxShimWaitDelay
|
||||
}
|
||||
|
||||
if err := z.Start(); err != nil {
|
||||
hlog.PrintBaseError(err, "cannot start container:")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := z.Serve(); err != nil {
|
||||
hlog.PrintBaseError(err, "cannot configure container:")
|
||||
}
|
||||
|
||||
if err := seccomp.Load(
|
||||
seccomp.Preset(seccomp.PresetStrict, seccomp.AllowMultiarch),
|
||||
seccomp.AllowMultiarch,
|
||||
); err != nil {
|
||||
log.Fatalf("cannot load syscall filter: %v", err)
|
||||
}
|
||||
|
||||
if err := z.Wait(); err != nil {
|
||||
var exitError *exec.ExitError
|
||||
if !errors.As(err, &exitError) {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
os.Exit(2)
|
||||
}
|
||||
log.Printf("wait: %v", err)
|
||||
os.Exit(127)
|
||||
}
|
||||
os.Exit(exitError.ExitCode())
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package app
|
||||
package state
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user