Compare commits
7 Commits
9aad98d409
...
8a3c3d145a
| Author | SHA1 | Date | |
|---|---|---|---|
|
8a3c3d145a
|
|||
|
575ef307ad
|
|||
|
d4144fcf7f
|
|||
|
bad66facbc
|
|||
|
4aba014eac
|
|||
|
779ba994ce
|
|||
|
917be2de93
|
46
container/binfmt.go
Normal file
46
container/binfmt.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"hakurei.app/check"
|
||||
)
|
||||
|
||||
// escapeBinfmt escapes magic/mask sequences in a [BinfmtEntry].
|
||||
func escapeBinfmt(buf *strings.Builder, s string) string {
|
||||
const lowerhex = "0123456789abcdef"
|
||||
|
||||
buf.Reset()
|
||||
for _, c := range unsafe.Slice(unsafe.StringData(s), len(s)) {
|
||||
switch c {
|
||||
case 0, '\\', ':':
|
||||
buf.WriteString(`\x`)
|
||||
buf.WriteByte(lowerhex[c>>4])
|
||||
buf.WriteByte(lowerhex[c&0xf])
|
||||
|
||||
default:
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// BinfmtEntry is an entry to be registered by the init process.
|
||||
type BinfmtEntry struct {
|
||||
// The offset of the magic/mask in the file, counted in bytes.
|
||||
Offset byte
|
||||
// The byte sequence binfmt_misc is matching for.
|
||||
Magic string
|
||||
// An (optional, defaults to all 0xff) mask.
|
||||
Mask string
|
||||
// The program that should be invoked with the binary as first argument.
|
||||
Interpreter *check.Absolute
|
||||
}
|
||||
|
||||
// Valid returns whether e can be registered into the kernel.
|
||||
func (e *BinfmtEntry) Valid() bool {
|
||||
return e != nil &&
|
||||
int(e.Offset)+max(len(e.Magic), len(e.Mask)) < 128 &&
|
||||
e.Interpreter != nil && len(e.Interpreter.String()) < 128
|
||||
}
|
||||
62
container/binfmt_test.go
Normal file
62
container/binfmt_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"hakurei.app/fhs"
|
||||
)
|
||||
|
||||
func TestEscapeBinfmt(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
magic string
|
||||
want string
|
||||
}{
|
||||
{"packed DOS applications", "\x0eDEX", "\x0eDEX"},
|
||||
|
||||
{"riscv64 magic",
|
||||
"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00",
|
||||
"\x7fELF\x02\x01\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\x02\\x00\xf3\\x00"},
|
||||
{"riscv64 mask",
|
||||
"\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff",
|
||||
"\xff\xff\xff\xff\xff\xff\xff\\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
got := escapeBinfmt(new(strings.Builder), tc.magic)
|
||||
if got != tc.want {
|
||||
t.Errorf("escapeBinfmt: %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinfmtEntry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
e BinfmtEntry
|
||||
valid bool
|
||||
}{
|
||||
{"zero", BinfmtEntry{}, false},
|
||||
{"large offset", BinfmtEntry{Offset: 128}, false},
|
||||
{"long magic", BinfmtEntry{Magic: strings.Repeat("\x00", 128)}, false},
|
||||
{"long mask", BinfmtEntry{Mask: strings.Repeat("\x00", 128)}, false},
|
||||
{"valid", BinfmtEntry{Interpreter: fhs.AbsRoot}, true},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if tc.e.Valid() != tc.valid {
|
||||
t.Errorf("Valid: %v", !tc.valid)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,7 @@ const (
|
||||
CAP_SETPCAP = 0x8
|
||||
CAP_NET_ADMIN = 0xc
|
||||
CAP_DAC_OVERRIDE = 0x1
|
||||
CAP_SETFCAP = 0x1f
|
||||
)
|
||||
|
||||
type (
|
||||
|
||||
@@ -91,12 +91,20 @@ type (
|
||||
// Time to wait for processes lingering after the initial process terminates.
|
||||
AdoptWaitDelay time.Duration
|
||||
|
||||
// Map uid/gid 0 in the init process. Requires [FstypeProc] attached to
|
||||
// [fhs.Proc] in the container filesystem.
|
||||
InitAsRoot bool
|
||||
// Mapped Uid in user namespace.
|
||||
Uid int
|
||||
// Mapped Gid in user namespace.
|
||||
Gid int
|
||||
// Hostname value in UTS namespace.
|
||||
Hostname string
|
||||
// Register binfmt_misc entries.
|
||||
Binfmt []BinfmtEntry
|
||||
// Alternative pathname to attach binfmt_misc filesystem. The zero value
|
||||
// requires [FstypeProc] to be made available at [fhs.Proc].
|
||||
BinfmtPath *check.Absolute
|
||||
// Sequential container setup ops.
|
||||
*Ops
|
||||
|
||||
@@ -216,6 +224,9 @@ func (p *Container) Start() error {
|
||||
if p.cmd.Process != nil {
|
||||
return errors.New("container: already started")
|
||||
}
|
||||
if !p.InitAsRoot && len(p.Binfmt) > 0 {
|
||||
return errors.New("container: init as root required, but not enabled")
|
||||
}
|
||||
|
||||
if err := ensureCloseOnExec(); err != nil {
|
||||
return err
|
||||
@@ -286,6 +297,18 @@ func (p *Container) Start() error {
|
||||
if !p.HostNet {
|
||||
p.cmd.SysProcAttr.Cloneflags |= CLONE_NEWNET
|
||||
}
|
||||
if p.InitAsRoot {
|
||||
p.cmd.SysProcAttr.AmbientCaps = append(p.cmd.SysProcAttr.AmbientCaps,
|
||||
// mappings during init as root
|
||||
CAP_SETFCAP,
|
||||
)
|
||||
|
||||
if !p.SeccompDisable &&
|
||||
len(p.SeccompRules) == 0 &&
|
||||
p.SeccompPresets&std.PresetDenyNS != 0 {
|
||||
return errors.New("container: as root requires late namespace creation")
|
||||
}
|
||||
}
|
||||
|
||||
// place setup pipe before user supplied extra files, this is later restored by init
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
|
||||
@@ -16,7 +16,8 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
_ "unsafe" // for go:linkname
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"hakurei.app/check"
|
||||
"hakurei.app/command"
|
||||
@@ -408,39 +409,11 @@ var containerTestCases = []struct {
|
||||
func TestContainer(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("cancel", testContainerCancel(nil, func(t *testing.T, c *container.Container) {
|
||||
wantErr := context.Canceled
|
||||
wantExitCode := 0
|
||||
if err := c.Wait(); !reflect.DeepEqual(err, wantErr) {
|
||||
if m, ok := container.InternalMessageFromError(err); ok {
|
||||
t.Error(m)
|
||||
}
|
||||
t.Errorf("Wait: error = %#v, want %#v", err, wantErr)
|
||||
}
|
||||
if ps := c.ProcessState(); ps == nil {
|
||||
t.Errorf("ProcessState unexpectedly returned nil")
|
||||
} else if code := ps.ExitCode(); code != wantExitCode {
|
||||
t.Errorf("ExitCode: %d, want %d", code, wantExitCode)
|
||||
}
|
||||
}))
|
||||
|
||||
t.Run("forward", testContainerCancel(func(c *container.Container) {
|
||||
c.ForwardCancel = true
|
||||
}, func(t *testing.T, c *container.Container) {
|
||||
var exitError *exec.ExitError
|
||||
if err := c.Wait(); !errors.As(err, &exitError) {
|
||||
if m, ok := container.InternalMessageFromError(err); ok {
|
||||
t.Error(m)
|
||||
}
|
||||
t.Errorf("Wait: error = %v", err)
|
||||
}
|
||||
if code := exitError.ExitCode(); code != blockExitCodeInterrupt {
|
||||
t.Errorf("ExitCode: %d, want %d", code, blockExitCodeInterrupt)
|
||||
}
|
||||
}))
|
||||
|
||||
var suffix string
|
||||
runTests:
|
||||
for i, tc := range containerTestCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
_suffix := suffix
|
||||
t.Run(tc.name+_suffix, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
wantOps, wantOpsCtx := tc.ops(t)
|
||||
@@ -464,6 +437,8 @@ func TestContainer(t *testing.T) {
|
||||
c.SeccompDisable = !tc.filter
|
||||
c.RetainSession = tc.session
|
||||
c.HostNet = tc.net
|
||||
c.InitAsRoot = _suffix != ""
|
||||
c.Env = append(c.Env, "HAKUREI_TEST_SUFFIX="+_suffix)
|
||||
if info.CanDegrade {
|
||||
if _, err := landlock.GetABI(); err != nil {
|
||||
if !errors.Is(err, syscall.ENOSYS) {
|
||||
@@ -473,6 +448,9 @@ func TestContainer(t *testing.T) {
|
||||
t.Log("Landlock LSM is unavailable, enabling HostAbstract")
|
||||
}
|
||||
}
|
||||
if c.InitAsRoot {
|
||||
c.SeccompPresets &= ^std.PresetDenyNS
|
||||
}
|
||||
|
||||
c.
|
||||
Readonly(check.MustAbs(pathReadonly), 0755).
|
||||
@@ -541,6 +519,11 @@ func TestContainer(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if suffix == "" {
|
||||
suffix = " as root"
|
||||
goto runTests
|
||||
}
|
||||
}
|
||||
|
||||
func ent(root, target, vfsOptstr, fsType, source, fsOptstr string) *vfs.MountInfoEntry {
|
||||
@@ -563,11 +546,10 @@ func hostnameFromTestCase(name string) string {
|
||||
}
|
||||
|
||||
func testContainerCancel(
|
||||
t *testing.T,
|
||||
containerExtra func(c *container.Container),
|
||||
waitCheck func(t *testing.T, c *container.Container),
|
||||
) func(t *testing.T) {
|
||||
return func(t *testing.T) {
|
||||
t.Parallel()
|
||||
waitCheck func(ps *os.ProcessState, waitErr error),
|
||||
) {
|
||||
ctx, cancel := context.WithCancel(t.Context())
|
||||
|
||||
c := helperNewContainer(ctx, "block")
|
||||
@@ -577,25 +559,36 @@ func testContainerCancel(
|
||||
}
|
||||
|
||||
ready := make(chan struct{})
|
||||
if r, w, err := os.Pipe(); err != nil {
|
||||
var waitErr error
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
t.Fatalf("cannot pipe: %v", err)
|
||||
} else {
|
||||
}
|
||||
|
||||
c.ExtraFiles = append(c.ExtraFiles, w)
|
||||
go func() {
|
||||
defer close(ready)
|
||||
if _, err = r.Read(make([]byte, 1)); err != nil {
|
||||
panic(err.Error())
|
||||
if _, _err := r.Read(make([]byte, 1)); _err != nil {
|
||||
panic(_err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if err := c.Start(); err != nil {
|
||||
if err = c.Start(); err != nil {
|
||||
if m, ok := container.InternalMessageFromError(err); ok {
|
||||
t.Fatal(m)
|
||||
} else {
|
||||
t.Fatalf("cannot start container: %v", err)
|
||||
}
|
||||
} else if err = c.Serve(); err != nil {
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
waitErr = c.Wait()
|
||||
_ = r.SetReadDeadline(time.Now())
|
||||
}()
|
||||
|
||||
if err = c.Serve(); err != nil {
|
||||
if m, ok := container.InternalMessageFromError(err); ok {
|
||||
t.Error(m)
|
||||
} else {
|
||||
@@ -604,8 +597,67 @@ func testContainerCancel(
|
||||
}
|
||||
<-ready
|
||||
cancel()
|
||||
waitCheck(t, c)
|
||||
<-done
|
||||
waitCheck(c.ProcessState(), waitErr)
|
||||
}
|
||||
|
||||
func TestForward(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
f := func(ps *os.ProcessState, waitErr error) {
|
||||
var exitError *exec.ExitError
|
||||
if !errors.As(waitErr, &exitError) {
|
||||
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
||||
t.Error(m)
|
||||
}
|
||||
t.Errorf("Wait: error = %v", waitErr)
|
||||
}
|
||||
if code := exitError.ExitCode(); code != blockExitCodeInterrupt {
|
||||
t.Errorf("ExitCode: %d, want %d", code, blockExitCodeInterrupt)
|
||||
}
|
||||
}
|
||||
t.Run("direct", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
testContainerCancel(t, func(c *container.Container) {
|
||||
c.ForwardCancel = true
|
||||
}, f)
|
||||
})
|
||||
t.Run("as root", func(t *testing.T) {
|
||||
testContainerCancel(t, func(c *container.Container) {
|
||||
c.ForwardCancel = true
|
||||
c.InitAsRoot = true
|
||||
c.Proc(fhs.AbsProc)
|
||||
}, f)
|
||||
})
|
||||
}
|
||||
|
||||
func TestCancel(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
f := func(ps *os.ProcessState, waitErr error) {
|
||||
wantErr := context.Canceled
|
||||
if !reflect.DeepEqual(waitErr, wantErr) {
|
||||
if m, ok := container.InternalMessageFromError(waitErr); ok {
|
||||
t.Error(m)
|
||||
}
|
||||
t.Errorf("Wait: error = %#v, want %#v", waitErr, wantErr)
|
||||
}
|
||||
if ps == nil {
|
||||
t.Errorf("ProcessState unexpectedly returned nil")
|
||||
} else if code := ps.ExitCode(); code != 0 {
|
||||
t.Errorf("ExitCode: %d, want %d", code, 0)
|
||||
}
|
||||
}
|
||||
t.Run("direct", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
testContainerCancel(t, nil, f)
|
||||
})
|
||||
t.Run("as root", func(t *testing.T) {
|
||||
testContainerCancel(t, func(c *container.Container) {
|
||||
c.InitAsRoot = true
|
||||
c.Proc(fhs.AbsProc)
|
||||
}, f)
|
||||
})
|
||||
}
|
||||
|
||||
func TestContainerString(t *testing.T) {
|
||||
@@ -641,6 +693,8 @@ func init() {
|
||||
})
|
||||
|
||||
c.Command("container", command.UsageInternal, func(args []string) error {
|
||||
asRoot := os.Getenv("HAKUREI_TEST_SUFFIX") == " as root"
|
||||
|
||||
if len(args) != 1 {
|
||||
return syscall.EINVAL
|
||||
}
|
||||
@@ -658,6 +712,66 @@ func init() {
|
||||
return fmt.Errorf("gid: %d, want %d", gid, tc.gid)
|
||||
}
|
||||
|
||||
// no attack surface increase during as root due to no_new_privs
|
||||
var wantBounding uintptr = 1
|
||||
asRootNot := " not"
|
||||
if !asRoot {
|
||||
wantBounding = 0
|
||||
asRootNot = ""
|
||||
}
|
||||
|
||||
const (
|
||||
PR_CAP_AMBIENT = 0x2f
|
||||
PR_CAP_AMBIENT_IS_SET = 0x1
|
||||
)
|
||||
for i := range container.LastCap(nil) + 1 {
|
||||
r, _, errno := syscall.Syscall(
|
||||
syscall.SYS_PRCTL,
|
||||
PR_CAP_AMBIENT,
|
||||
PR_CAP_AMBIENT_IS_SET,
|
||||
i,
|
||||
)
|
||||
if errno != 0 {
|
||||
return os.NewSyscallError("prctl", errno)
|
||||
}
|
||||
if r != 0 {
|
||||
return fmt.Errorf("capability %d in ambient set", i)
|
||||
}
|
||||
|
||||
r, _, errno = syscall.Syscall(
|
||||
syscall.SYS_PRCTL,
|
||||
syscall.PR_CAPBSET_READ,
|
||||
i,
|
||||
0,
|
||||
)
|
||||
if errno != 0 {
|
||||
return os.NewSyscallError("prctl", errno)
|
||||
}
|
||||
if r != wantBounding {
|
||||
return fmt.Errorf("capability %d%s in bounding set", i, asRootNot)
|
||||
}
|
||||
}
|
||||
|
||||
const _LINUX_CAPABILITY_VERSION_3 = 0x20080522
|
||||
var capData struct {
|
||||
effective uint32
|
||||
permitted uint32
|
||||
inheritable uint32
|
||||
}
|
||||
if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&struct {
|
||||
version uint32
|
||||
pid int32
|
||||
}{_LINUX_CAPABILITY_VERSION_3, 0})), uintptr(unsafe.Pointer(&capData)), 0); errno != 0 {
|
||||
return os.NewSyscallError("capget", errno)
|
||||
}
|
||||
|
||||
if max(capData.effective, capData.permitted, capData.inheritable) != 0 {
|
||||
return fmt.Errorf(
|
||||
"effective = %d, permitted = %d, inheritable = %d",
|
||||
capData.effective, capData.permitted, capData.inheritable,
|
||||
)
|
||||
}
|
||||
|
||||
wantHost := hostnameFromTestCase(tc.name)
|
||||
if host, err := os.Hostname(); err != nil {
|
||||
return fmt.Errorf("cannot get hostname: %v", err)
|
||||
@@ -775,7 +889,7 @@ func TestMain(m *testing.M) {
|
||||
}
|
||||
c.MustParse(os.Args[1:], func(err error) {
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
log.Fatal(err)
|
||||
}
|
||||
})
|
||||
return
|
||||
|
||||
@@ -11,11 +11,13 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"hakurei.app/check"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/ext"
|
||||
"hakurei.app/fhs"
|
||||
@@ -182,23 +184,33 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
cancel()
|
||||
}
|
||||
|
||||
uid, gid := param.Uid, param.Gid
|
||||
if param.InitAsRoot {
|
||||
uid, gid = 0, 0
|
||||
}
|
||||
|
||||
// write uid/gid map here so parent does not need to set dumpable
|
||||
if err := k.setDumpable(ext.SUID_DUMP_USER); err != nil {
|
||||
k.fatalf(msg, "cannot set SUID_DUMP_USER: %v", err)
|
||||
}
|
||||
if err := k.writeFile(fhs.Proc+"self/uid_map",
|
||||
append([]byte{}, strconv.Itoa(param.Uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
if err := k.writeFile(
|
||||
fhs.Proc+"self/uid_map",
|
||||
[]byte(strconv.Itoa(uid)+" "+strconv.Itoa(param.HostUid)+" 1\n"),
|
||||
0,
|
||||
); err != nil {
|
||||
k.fatalf(msg, "%v", err)
|
||||
}
|
||||
if err := k.writeFile(fhs.Proc+"self/setgroups",
|
||||
if err := k.writeFile(
|
||||
fhs.Proc+"self/setgroups",
|
||||
[]byte("deny\n"),
|
||||
0); err != nil && !os.IsNotExist(err) {
|
||||
0,
|
||||
); err != nil && !os.IsNotExist(err) {
|
||||
k.fatalf(msg, "%v", err)
|
||||
}
|
||||
if err := k.writeFile(fhs.Proc+"self/gid_map",
|
||||
append([]byte{}, strconv.Itoa(param.Gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"...),
|
||||
0); err != nil {
|
||||
[]byte(strconv.Itoa(gid)+" "+strconv.Itoa(param.HostGid)+" 1\n"),
|
||||
0,
|
||||
); err != nil {
|
||||
k.fatalf(msg, "%v", err)
|
||||
}
|
||||
if err := k.setDumpable(ext.SUID_DUMP_DISABLE); err != nil {
|
||||
@@ -230,6 +242,16 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
k.fatalf(msg, "cannot enter intermediate host path: %v", err)
|
||||
}
|
||||
|
||||
if len(param.Binfmt) > 0 {
|
||||
for i, e := range param.Binfmt {
|
||||
if pathname, err := k.evalSymlinks(e.Interpreter.String()); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if param.Binfmt[i].Interpreter, err = check.NewAbs(pathname); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* early is called right before pivot_root into intermediate root;
|
||||
this step is mostly for gathering information that would otherwise be
|
||||
difficult to obtain via library functions after pivot_root, and
|
||||
@@ -285,6 +307,48 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
}
|
||||
}
|
||||
|
||||
if len(param.Binfmt) > 0 {
|
||||
const interpreter = "/interpreter"
|
||||
|
||||
if param.BinfmtPath == nil {
|
||||
param.BinfmtPath = fhs.AbsProcSys.Append("fs/binfmt_misc")
|
||||
}
|
||||
binfmt := sysrootPath + param.BinfmtPath.String()
|
||||
if err := k.mkdirAll(binfmt, 0); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
if err := k.mount(
|
||||
SourceBinfmtMisc,
|
||||
binfmt,
|
||||
FstypeBinfmtMisc,
|
||||
MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||
zeroString,
|
||||
); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
buf.Grow(1920)
|
||||
|
||||
register := binfmt + "/register"
|
||||
for i, e := range param.Binfmt {
|
||||
if err := k.symlink(hostPath+e.Interpreter.String(), interpreter); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if err = k.writeFile(register, []byte(":"+
|
||||
strconv.Itoa(i)+":"+
|
||||
"M:"+
|
||||
strconv.Itoa(int(e.Offset))+":"+
|
||||
escapeBinfmt(&buf, e.Magic)+":"+
|
||||
escapeBinfmt(&buf, e.Mask)+":"+
|
||||
interpreter+":"+
|
||||
"F"), 0); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if err = k.remove(interpreter); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setup requiring host root complete at this point
|
||||
if err := k.mount(hostDir, hostDir, zeroString, MS_SILENT|MS_REC|MS_PRIVATE, zeroString); err != nil {
|
||||
k.fatalf(msg, "cannot make host root rprivate: %v", optionalErrorUnwrap(err))
|
||||
@@ -323,11 +387,19 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
}
|
||||
}
|
||||
|
||||
var keepCaps []uintptr
|
||||
if param.Privileged {
|
||||
keepCaps = append(keepCaps, CAP_SYS_ADMIN, CAP_SETPCAP)
|
||||
}
|
||||
if param.InitAsRoot {
|
||||
keepCaps = append(keepCaps, CAP_SETFCAP)
|
||||
}
|
||||
|
||||
if err := k.capAmbientClearAll(); err != nil {
|
||||
k.fatalf(msg, "cannot clear the ambient capability set: %v", err)
|
||||
}
|
||||
for i := uintptr(0); i <= lastcap; i++ {
|
||||
if param.Privileged && i == CAP_SYS_ADMIN {
|
||||
for i := range lastcap + 1 {
|
||||
if slices.Contains(keepCaps, i) {
|
||||
continue
|
||||
}
|
||||
if err := k.capBoundingSetDrop(i); err != nil {
|
||||
@@ -336,20 +408,23 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
}
|
||||
|
||||
var keep [2]uint32
|
||||
if param.Privileged {
|
||||
keep[capToIndex(CAP_SYS_ADMIN)] |= capToMask(CAP_SYS_ADMIN)
|
||||
for _, c := range keepCaps {
|
||||
keep[capToIndex(c)] |= capToMask(c)
|
||||
}
|
||||
|
||||
if err := k.capAmbientRaise(CAP_SYS_ADMIN); err != nil {
|
||||
k.fatalf(msg, "cannot raise CAP_SYS_ADMIN: %v", err)
|
||||
}
|
||||
}
|
||||
if err := k.capset(
|
||||
&capHeader{_LINUX_CAPABILITY_VERSION_3, 0},
|
||||
&[2]capData{{0, keep[0], keep[0]}, {0, keep[1], keep[1]}},
|
||||
&[2]capData{{keep[0], keep[0], keep[0]}, {keep[1], keep[1], keep[1]}},
|
||||
); err != nil {
|
||||
k.fatalf(msg, "cannot capset: %v", err)
|
||||
}
|
||||
|
||||
for _, c := range keepCaps {
|
||||
if err := k.capAmbientRaise(c); err != nil {
|
||||
k.fatalf(msg, "cannot raise %#x: %v", c, err)
|
||||
}
|
||||
}
|
||||
|
||||
if !param.SeccompDisable {
|
||||
rules := param.SeccompRules
|
||||
if len(rules) == 0 { // non-empty rules slice always overrides presets
|
||||
@@ -474,6 +549,14 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
cmd.ExtraFiles = extraFiles
|
||||
cmd.Dir = param.Dir.String()
|
||||
|
||||
if param.InitAsRoot {
|
||||
cmd.SysProcAttr = &SysProcAttr{
|
||||
Cloneflags: CLONE_NEWUSER,
|
||||
UidMappings: []SysProcIDMap{{ContainerID: param.Uid, HostID: 0, Size: 1}},
|
||||
GidMappings: []SysProcIDMap{{ContainerID: param.Gid, HostID: 0, Size: 1}},
|
||||
}
|
||||
}
|
||||
|
||||
msg.Verbosef("starting initial process %s", param.Path)
|
||||
if err := k.start(cmd); err != nil {
|
||||
k.fatalf(msg, "%v", err)
|
||||
|
||||
@@ -1624,7 +1624,6 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
|
||||
@@ -1656,8 +1655,9 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, stub.UniqueError(19)),
|
||||
call("fatalf", stub.ExpectArgs{"cannot raise CAP_SYS_ADMIN: %v", []any{stub.UniqueError(19)}}, nil, nil),
|
||||
call("fatalf", stub.ExpectArgs{"cannot raise %#x: %v", []any{uintptr(0x15), stub.UniqueError(19)}}, nil, nil),
|
||||
},
|
||||
}, nil},
|
||||
|
||||
@@ -1731,7 +1731,6 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
|
||||
@@ -1763,8 +1762,7 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, stub.UniqueError(17)),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, stub.UniqueError(17)),
|
||||
call("fatalf", stub.ExpectArgs{"cannot capset: %v", []any{stub.UniqueError(17)}}, nil, nil),
|
||||
},
|
||||
}, nil},
|
||||
@@ -1839,7 +1837,6 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
|
||||
@@ -1871,8 +1868,9 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil),
|
||||
call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, stub.UniqueError(15)),
|
||||
call("fatalf", stub.ExpectArgs{"cannot load syscall filter: %v", []any{stub.UniqueError(15)}}, nil, nil),
|
||||
@@ -2699,7 +2697,6 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x5)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x6)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x7)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x9)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xa)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0xb)}, nil, nil),
|
||||
@@ -2731,8 +2728,9 @@ func TestInitEntrypoint(t *testing.T) {
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x26)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x27)}, nil, nil),
|
||||
call("capBoundingSetDrop", stub.ExpectArgs{uintptr(0x28)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0x200100, 0x200100, 0x200100}, {0, 0, 0}}}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x15)}, nil, nil),
|
||||
call("capset", stub.ExpectArgs{&capHeader{_LINUX_CAPABILITY_VERSION_3, 0}, &[2]capData{{0, 0x200000, 0x200000}, {0, 0, 0}}}, nil, nil),
|
||||
call("capAmbientRaise", stub.ExpectArgs{uintptr(0x8)}, nil, nil),
|
||||
call("verbosef", stub.ExpectArgs{"resolving presets %#x", []any{std.FilterPreset(0xf)}}, nil, nil),
|
||||
call("seccompLoad", stub.ExpectArgs{seccomp.Preset(0xf, 0), seccomp.ExportFlag(0)}, nil, nil),
|
||||
call("verbosef", stub.ExpectArgs{"%d filter rules loaded", []any{73}}, nil, nil),
|
||||
|
||||
@@ -40,6 +40,9 @@ const (
|
||||
// SourceMqueue is used when mounting mqueue.
|
||||
// Note that any source value is allowed when fstype is [FstypeMqueue].
|
||||
SourceMqueue = "mqueue"
|
||||
// SourceBinfmtMisc is used when mounting binfmt_misc.
|
||||
// Note that any source value is allowed when fstype is [SourceBinfmtMisc].
|
||||
SourceBinfmtMisc = "binfmt_misc"
|
||||
// SourceOverlay is used when mounting overlay.
|
||||
// Note that any source value is allowed when fstype is [FstypeOverlay].
|
||||
SourceOverlay = "overlay"
|
||||
@@ -70,6 +73,9 @@ const (
|
||||
// FstypeMqueue represents the mqueue pseudo-filesystem.
|
||||
// This filesystem type is usually mounted on /dev/mqueue.
|
||||
FstypeMqueue = "mqueue"
|
||||
// FstypeBinfmtMisc represents the binfmt_misc pseudo-filesystem.
|
||||
// This filesystem type is usually mounted on /proc/sys/fs/binfmt_misc.
|
||||
FstypeBinfmtMisc = "binfmt_misc"
|
||||
// FstypeOverlay represents the overlay pseudo-filesystem.
|
||||
// This filesystem type can be mounted anywhere in the container filesystem.
|
||||
FstypeOverlay = "overlay"
|
||||
|
||||
@@ -42,6 +42,8 @@ var (
|
||||
AbsDevShm = unsafeAbs(DevShm)
|
||||
// AbsProc is [Proc] as [check.Absolute].
|
||||
AbsProc = unsafeAbs(Proc)
|
||||
// AbsProcSys is [ProcSys] as [check.Absolute].
|
||||
AbsProcSys = unsafeAbs(ProcSys)
|
||||
// AbsProcSelfExe is [ProcSelfExe] as [check.Absolute].
|
||||
AbsProcSelfExe = unsafeAbs(ProcSelfExe)
|
||||
// AbsSys is [Sys] as [check.Absolute].
|
||||
|
||||
@@ -631,12 +631,6 @@ func (a *execArtifact) cure(f *FContext, hostNet bool) (err error) {
|
||||
_ = stdout.Close()
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err != nil && !errors.As(err, new(*exec.ExitError)) {
|
||||
_ = stdout.Close()
|
||||
_ = stderr.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
brStdout, brStderr := f.cache.getReader(stdout), f.cache.getReader(stderr)
|
||||
stdoutDone, stderrDone := make(chan struct{}), make(chan struct{})
|
||||
@@ -651,6 +645,11 @@ func (a *execArtifact) cure(f *FContext, hostNet bool) (err error) {
|
||||
io.TeeReader(brStderr, status),
|
||||
)
|
||||
defer func() {
|
||||
if err != nil && !errors.As(err, new(*exec.ExitError)) {
|
||||
_ = stdout.Close()
|
||||
_ = stderr.Close()
|
||||
}
|
||||
|
||||
<-stdoutDone
|
||||
<-stderrDone
|
||||
f.cache.putReader(brStdout)
|
||||
|
||||
@@ -423,7 +423,7 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) {
|
||||
msg := message.New(log.New(os.Stderr, "cache: ", 0))
|
||||
msg.SwapVerbose(testing.Verbose())
|
||||
|
||||
flags := tc.flags
|
||||
flags := tc.flags | pkg.CSuppressInit
|
||||
|
||||
if info.CanDegrade {
|
||||
if _, err := landlock.GetABI(); err != nil {
|
||||
@@ -544,7 +544,11 @@ func cureMany(t *testing.T, c *pkg.Cache, steps []cureStep) {
|
||||
t.Fatalf("Cure: pathname = %q, want %q", pathname, step.pathname)
|
||||
} else if step.output == nil || checksum != makeChecksumH(step.output.hash()) {
|
||||
if pathname != nil {
|
||||
t.Fatal(expectsFrom(pathname.String()))
|
||||
if name, _err := filepath.EvalSymlinks(pathname.String()); _err != nil {
|
||||
t.Fatal(_err)
|
||||
} else {
|
||||
t.Fatal(expectsFrom(name))
|
||||
}
|
||||
} else if checksum != (unique.Handle[pkg.Checksum]{}) {
|
||||
t.Fatalf("Cure: unexpected checksum %s", pkg.Encode(checksum.Value()))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user