container: binfmt registration
All checks were successful
Test / Create distribution (push) Successful in 1m4s
Test / Sandbox (push) Successful in 2m43s
Test / Hakurei (push) Successful in 3m49s
Test / ShareFS (push) Successful in 3m55s
Test / Sandbox (race detector) (push) Successful in 5m18s
Test / Hakurei (race detector) (push) Successful in 6m25s
Test / Flake checks (push) Successful in 1m22s
All checks were successful
Test / Create distribution (push) Successful in 1m4s
Test / Sandbox (push) Successful in 2m43s
Test / Hakurei (push) Successful in 3m49s
Test / ShareFS (push) Successful in 3m55s
Test / Sandbox (race detector) (push) Successful in 5m18s
Test / Hakurei (race detector) (push) Successful in 6m25s
Test / Flake checks (push) Successful in 1m22s
This arranges for binfmt entries to be registered for the container. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
46
container/binfmt.go
Normal file
46
container/binfmt.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"hakurei.app/check"
|
||||
)
|
||||
|
||||
// escapeBinfmt escapes magic/mask sequences in a [BinfmtEntry].
|
||||
func escapeBinfmt(buf *strings.Builder, s string) string {
|
||||
const lowerhex = "0123456789abcdef"
|
||||
|
||||
buf.Reset()
|
||||
for _, c := range unsafe.Slice(unsafe.StringData(s), len(s)) {
|
||||
switch c {
|
||||
case 0, '\\', ':':
|
||||
buf.WriteString(`\x`)
|
||||
buf.WriteByte(lowerhex[c>>4])
|
||||
buf.WriteByte(lowerhex[c&0xf])
|
||||
|
||||
default:
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// BinfmtEntry is an entry to be registered by the init process.
|
||||
type BinfmtEntry struct {
|
||||
// The offset of the magic/mask in the file, counted in bytes.
|
||||
Offset byte
|
||||
// The byte sequence binfmt_misc is matching for.
|
||||
Magic string
|
||||
// An (optional, defaults to all 0xff) mask.
|
||||
Mask string
|
||||
// The program that should be invoked with the binary as first argument.
|
||||
Interpreter *check.Absolute
|
||||
}
|
||||
|
||||
// Valid returns whether e can be registered into the kernel.
|
||||
func (e *BinfmtEntry) Valid() bool {
|
||||
return e != nil &&
|
||||
int(e.Offset)+max(len(e.Magic), len(e.Mask)) < 128 &&
|
||||
e.Interpreter != nil && len(e.Interpreter.String()) < 128
|
||||
}
|
||||
62
container/binfmt_test.go
Normal file
62
container/binfmt_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package container
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"hakurei.app/fhs"
|
||||
)
|
||||
|
||||
func TestEscapeBinfmt(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
magic string
|
||||
want string
|
||||
}{
|
||||
{"packed DOS applications", "\x0eDEX", "\x0eDEX"},
|
||||
|
||||
{"riscv64 magic",
|
||||
"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00",
|
||||
"\x7fELF\x02\x01\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\x02\\x00\xf3\\x00"},
|
||||
{"riscv64 mask",
|
||||
"\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff",
|
||||
"\xff\xff\xff\xff\xff\xff\xff\\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
got := escapeBinfmt(new(strings.Builder), tc.magic)
|
||||
if got != tc.want {
|
||||
t.Errorf("escapeBinfmt: %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinfmtEntry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
e BinfmtEntry
|
||||
valid bool
|
||||
}{
|
||||
{"zero", BinfmtEntry{}, false},
|
||||
{"large offset", BinfmtEntry{Offset: 128}, false},
|
||||
{"long magic", BinfmtEntry{Magic: strings.Repeat("\x00", 128)}, false},
|
||||
{"long mask", BinfmtEntry{Mask: strings.Repeat("\x00", 128)}, false},
|
||||
{"valid", BinfmtEntry{Interpreter: fhs.AbsRoot}, true},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if tc.e.Valid() != tc.valid {
|
||||
t.Errorf("Valid: %v", !tc.valid)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -100,6 +100,11 @@ type (
|
||||
Gid int
|
||||
// Hostname value in UTS namespace.
|
||||
Hostname string
|
||||
// Register binfmt_misc entries.
|
||||
Binfmt []BinfmtEntry
|
||||
// Alternative pathname to attach binfmt_misc filesystem. The zero value
|
||||
// requires [FstypeProc] to be made available at [fhs.Proc].
|
||||
BinfmtPath *check.Absolute
|
||||
// Sequential container setup ops.
|
||||
*Ops
|
||||
|
||||
@@ -219,6 +224,9 @@ func (p *Container) Start() error {
|
||||
if p.cmd.Process != nil {
|
||||
return errors.New("container: already started")
|
||||
}
|
||||
if !p.InitAsRoot && len(p.Binfmt) > 0 {
|
||||
return errors.New("container: init as root required, but not enabled")
|
||||
}
|
||||
|
||||
if err := ensureCloseOnExec(); err != nil {
|
||||
return err
|
||||
|
||||
@@ -11,11 +11,13 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
. "syscall"
|
||||
"time"
|
||||
|
||||
"hakurei.app/check"
|
||||
"hakurei.app/container/seccomp"
|
||||
"hakurei.app/ext"
|
||||
"hakurei.app/fhs"
|
||||
@@ -240,6 +242,16 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
k.fatalf(msg, "cannot enter intermediate host path: %v", err)
|
||||
}
|
||||
|
||||
if len(param.Binfmt) > 0 {
|
||||
for i, e := range param.Binfmt {
|
||||
if pathname, err := k.evalSymlinks(e.Interpreter.String()); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if param.Binfmt[i].Interpreter, err = check.NewAbs(pathname); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* early is called right before pivot_root into intermediate root;
|
||||
this step is mostly for gathering information that would otherwise be
|
||||
difficult to obtain via library functions after pivot_root, and
|
||||
@@ -295,6 +307,48 @@ func initEntrypoint(k syscallDispatcher, msg message.Msg) {
|
||||
}
|
||||
}
|
||||
|
||||
if len(param.Binfmt) > 0 {
|
||||
const interpreter = "/interpreter"
|
||||
|
||||
if param.BinfmtPath == nil {
|
||||
param.BinfmtPath = fhs.AbsProcSys.Append("fs/binfmt_misc")
|
||||
}
|
||||
binfmt := sysrootPath + param.BinfmtPath.String()
|
||||
if err := k.mkdirAll(binfmt, 0); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
if err := k.mount(
|
||||
SourceBinfmtMisc,
|
||||
binfmt,
|
||||
FstypeBinfmtMisc,
|
||||
MS_NOSUID|MS_NOEXEC|MS_NODEV,
|
||||
zeroString,
|
||||
); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
buf.Grow(1920)
|
||||
|
||||
register := binfmt + "/register"
|
||||
for i, e := range param.Binfmt {
|
||||
if err := k.symlink(hostPath+e.Interpreter.String(), interpreter); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if err = k.writeFile(register, []byte(":"+
|
||||
strconv.Itoa(i)+":"+
|
||||
"M:"+
|
||||
strconv.Itoa(int(e.Offset))+":"+
|
||||
escapeBinfmt(&buf, e.Magic)+":"+
|
||||
escapeBinfmt(&buf, e.Mask)+":"+
|
||||
interpreter+":"+
|
||||
"F"), 0); err != nil {
|
||||
k.fatal(msg, err)
|
||||
} else if err = k.remove(interpreter); err != nil {
|
||||
k.fatal(msg, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setup requiring host root complete at this point
|
||||
if err := k.mount(hostDir, hostDir, zeroString, MS_SILENT|MS_REC|MS_PRIVATE, zeroString); err != nil {
|
||||
k.fatalf(msg, "cannot make host root rprivate: %v", optionalErrorUnwrap(err))
|
||||
|
||||
@@ -40,6 +40,9 @@ const (
|
||||
// SourceMqueue is used when mounting mqueue.
|
||||
// Note that any source value is allowed when fstype is [FstypeMqueue].
|
||||
SourceMqueue = "mqueue"
|
||||
// SourceBinfmtMisc is used when mounting binfmt_misc.
|
||||
// Note that any source value is allowed when fstype is [SourceBinfmtMisc].
|
||||
SourceBinfmtMisc = "binfmt_misc"
|
||||
// SourceOverlay is used when mounting overlay.
|
||||
// Note that any source value is allowed when fstype is [FstypeOverlay].
|
||||
SourceOverlay = "overlay"
|
||||
@@ -70,6 +73,9 @@ const (
|
||||
// FstypeMqueue represents the mqueue pseudo-filesystem.
|
||||
// This filesystem type is usually mounted on /dev/mqueue.
|
||||
FstypeMqueue = "mqueue"
|
||||
// FstypeBinfmtMisc represents the binfmt_misc pseudo-filesystem.
|
||||
// This filesystem type is usually mounted on /proc/sys/fs/binfmt_misc.
|
||||
FstypeBinfmtMisc = "binfmt_misc"
|
||||
// FstypeOverlay represents the overlay pseudo-filesystem.
|
||||
// This filesystem type can be mounted anywhere in the container filesystem.
|
||||
FstypeOverlay = "overlay"
|
||||
|
||||
@@ -42,6 +42,8 @@ var (
|
||||
AbsDevShm = unsafeAbs(DevShm)
|
||||
// AbsProc is [Proc] as [check.Absolute].
|
||||
AbsProc = unsafeAbs(Proc)
|
||||
// AbsProcSys is [ProcSys] as [check.Absolute].
|
||||
AbsProcSys = unsafeAbs(ProcSys)
|
||||
// AbsProcSelfExe is [ProcSelfExe] as [check.Absolute].
|
||||
AbsProcSelfExe = unsafeAbs(ProcSelfExe)
|
||||
// AbsSys is [Sys] as [check.Absolute].
|
||||
|
||||
Reference in New Issue
Block a user