cmd/sharefs: containerise filesystem daemon
All checks were successful
Test / Create distribution (push) Successful in 44s
Test / Sandbox (push) Successful in 2m30s
Test / Hakurei (push) Successful in 3m26s
Test / ShareFS (push) Successful in 3m26s
Test / Hpkg (push) Successful in 4m20s
Test / Sandbox (race detector) (push) Successful in 4m41s
Test / Hakurei (race detector) (push) Successful in 5m31s
Test / Flake checks (push) Successful in 1m36s

This replaces the forking daemonise libfuse function which prevents Go callbacks from calling into the runtime. This also enforces least privilege on the daemon process.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-12-27 09:17:14 +09:00
parent 3d720ada92
commit 2f8ca83376
3 changed files with 189 additions and 55 deletions

View File

@@ -12,22 +12,31 @@ extern void sharefs_destroy(void *private_data);
typedef void (*closure)();
static inline struct fuse_opt _FUSE_OPT_END() { return (struct fuse_opt)FUSE_OPT_END; };
static inline int _fuse_main(int argc, char *argv[], const struct fuse_operations *op, void *user_data) { return fuse_main(argc, argv, op, user_data); }
*/
import "C"
import (
"context"
"encoding/gob"
"errors"
"fmt"
"io"
"log"
"os"
"path"
"os/exec"
"os/signal"
"runtime"
"runtime/cgo"
"strconv"
"syscall"
"unsafe"
"hakurei.app/container"
"hakurei.app/container/check"
"hakurei.app/container/std"
"hakurei.app/hst"
"hakurei.app/internal/helper/proc"
"hakurei.app/internal/info"
"hakurei.app/message"
)
type (
@@ -40,8 +49,11 @@ type (
// Whether sharefs_init failed.
initFailed bool
// Open file descriptor to backing directory.
Source int
// Open file descriptor to fuse.
Fuse int
// Pathname to open for dirfd.
Source *check.Absolute
// New uid and gid to set by sharefs_init when starting as root.
Setuid, Setgid int
}
@@ -71,22 +83,8 @@ func sharefs_init(_ *C.struct_fuse_conn_info, cfg *C.struct_fuse_config) unsafe.
setup := cgo.Handle(priv.setup).Value().(*setupState)
if os.Geteuid() == 0 {
if setup.Setuid <= 0 || setup.Setgid <= 0 {
log.Println("setuid and setgid must not be 0")
goto fail
}
if err := syscall.Setresgid(setup.Setgid, setup.Setgid, setup.Setgid); err != nil {
log.Printf("cannot set gid: %v", err)
goto fail
}
if err := syscall.Setgroups(nil); err != nil {
log.Printf("cannot set supplementary groups: %v", err)
goto fail
}
if err := syscall.Setresuid(setup.Setuid, setup.Setuid, setup.Setuid); err != nil {
log.Printf("cannot set uid: %v", err)
goto fail
}
log.Println("filesystem daemon must not run as root")
goto fail
}
cfg.use_ino = C.true
@@ -97,7 +95,16 @@ func sharefs_init(_ *C.struct_fuse_conn_info, cfg *C.struct_fuse_config) unsafe.
cfg.negative_timeout = 0
// all future filesystem operations happen through this dirfd
priv.dirfd = C.int(setup.Source)
if fd, err := syscall.Open(setup.Source.String(), syscall.O_DIRECTORY|syscall.O_RDONLY|syscall.O_CLOEXEC, 0); err != nil {
log.Printf("cannot open %q: %v", setup.Source, err)
goto fail
} else if err = syscall.Fchdir(fd); err != nil {
_ = syscall.Close(fd)
log.Printf("cannot enter %q: %s", setup.Source, err)
goto fail
} else {
priv.dirfd = C.int(fd)
}
return ctx.private_data
@@ -177,8 +184,20 @@ func parseOpts(args *C.struct_fuse_args, setup *setupState) (ok bool) {
if v, err := strconv.Atoi(C.GoString(unsafeOpts.setup)); err != nil || v < 3 {
log.Println("invalid value for option setup")
return false
} else if err = gob.NewDecoder(os.NewFile(uintptr(v), "setup")).Decode(setup); err != nil {
log.Println(err)
} else {
r := os.NewFile(uintptr(v), "setup")
defer func() {
if err = r.Close(); err != nil {
log.Println(err)
}
}()
if err = gob.NewDecoder(r).Decode(setup); err != nil {
log.Println(err)
return false
}
}
if setup.Fuse < 3 {
log.Println("invalid file descriptor", setup.Fuse)
return false
}
return true
@@ -187,23 +206,11 @@ func parseOpts(args *C.struct_fuse_args, setup *setupState) (ok bool) {
if unsafeOpts.source == nil {
showHelp(args)
return false
} else if source := C.GoString(unsafeOpts.source); !path.IsAbs(source) {
log.Println("source is not absolute")
return false
} else if fd, err := syscall.Open(source, syscall.O_DIRECTORY|syscall.O_RDONLY, 0); err != nil {
log.Printf("cannot open source: %v", err)
return false
} else if err = syscall.Fchdir(fd); err != nil {
_ = syscall.Close(fd)
log.Printf("cannot enter source: %s", err)
} else if a, err := check.NewAbs(C.GoString(unsafeOpts.source)); err != nil {
log.Println(err)
return false
} else {
setup.Source = fd
defer func() {
if !ok {
_ = syscall.Close(fd)
}
}()
setup.Source = a
}
if unsafeOpts.setuid == nil {
@@ -244,7 +251,9 @@ func unsafeAddArgument(args *C.struct_fuse_args, arg string) {
C.fuse_opt_add_arg(args, (*C.char)(unsafe.Pointer(unsafe.StringData(arg))))
}
func _main(argc int, argv **C.char) int {
func _main(argc int, argv **C.char) (exitCode int) {
msg := message.New(log.Default())
container.TryArgv0(msg)
runtime.LockOSThread()
// don't mask creation mode, kernel already did that
@@ -286,11 +295,24 @@ func _main(argc int, argv **C.char) int {
if opts.show_help != 0 {
showHelp(&args)
return 0
}
if opts.show_help == 0 && opts.mountpoint == nil {
} else if opts.mountpoint == nil {
log.Println("no mountpoint specified")
return 2
} else {
// hack to keep fuse_parse_cmdline happy in the container
mountpoint := C.GoString(opts.mountpoint)
pathnameArg := -1
for i, arg := range os.Args {
if arg == mountpoint {
pathnameArg = i
break
}
}
if pathnameArg < 0 {
log.Println("mountpoint must be absolute")
return 2
}
os.Args[pathnameArg] = container.Nonexistent
}
if !parseOpts(&args, &setup) {
@@ -302,7 +324,7 @@ func _main(argc int, argv **C.char) int {
log.Println("setuid and setgid must not be 0")
return 1
}
} else if setup.Setuid > 0 || setup.Setgid > 0 {
} else if setup.Fuse < 3 && (setup.Setuid > 0 || setup.Setgid > 0) {
log.Println("setuid and setgid has no effect when not starting as root")
return 1
}
@@ -334,18 +356,136 @@ func _main(argc int, argv **C.char) int {
return 3
}
defer C.fuse_destroy(fuse)
se := C.fuse_get_session(fuse)
if setup.Fuse < 3 {
// unconfined, set up mount point and container
if C.fuse_mount(fuse, opts.mountpoint) != 0 {
return 4
}
// unmounted by initial process
defer func() {
if exitCode == 5 {
C.fuse_unmount(fuse)
}
}()
if os.Geteuid() == 0 {
if setup.Setuid <= 0 || setup.Setgid <= 0 {
log.Println("setuid and setgid must not be 0")
return 5
}
if err := syscall.Setresgid(setup.Setgid, setup.Setgid, setup.Setgid); err != nil {
log.Printf("cannot set gid: %v", err)
return 5
}
if err := syscall.Setgroups(nil); err != nil {
log.Printf("cannot set supplementary groups: %v", err)
return 5
}
if err := syscall.Setresuid(setup.Setuid, setup.Setuid, setup.Setuid); err != nil {
log.Printf("cannot set uid: %v", err)
return 5
}
}
msg.SwapVerbose(opts.debug != 0)
ctx := context.Background()
if opts.foreground != 0 {
c, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
ctx = c
}
z := container.New(ctx, msg)
z.AllowOrphan = opts.foreground == 0
z.Env = os.Environ()
// keep fuse_parse_cmdline happy in the container
z.Tmpfs(check.MustAbs(container.Nonexistent), 1<<10, 0755)
if a, err := check.NewAbs(container.MustExecutable(msg)); err != nil {
log.Println(err)
return 5
} else {
z.Path = a
}
z.Args = os.Args
z.ForwardCancel = true
z.SeccompPresets |= std.PresetStrict
z.ParentPerm = 0700
z.Bind(setup.Source, setup.Source, std.BindWritable)
if !z.AllowOrphan {
z.WaitDelay = hst.WaitDelayMax
z.Stdin, z.Stdout, z.Stderr = os.Stdin, os.Stdout, os.Stderr
}
z.Bind(z.Path, z.Path, 0)
setup.Fuse = int(proc.ExtraFileSlice(&z.ExtraFiles, os.NewFile(uintptr(C.fuse_session_fd(se)), "fuse")))
var setupWriter io.WriteCloser
if fd, w, err := container.Setup(&z.ExtraFiles); err != nil {
log.Println(err)
return 5
} else {
z.Args = append(z.Args, "-osetup="+strconv.Itoa(fd))
setupWriter = w
}
if err := z.Start(); err != nil {
if m, ok := message.GetMessage(err); ok {
log.Println(m)
} else {
log.Println(err)
}
return 5
}
if err := z.Serve(); err != nil {
if m, ok := message.GetMessage(err); ok {
log.Println(m)
} else {
log.Println(err)
}
return 5
}
if err := gob.NewEncoder(setupWriter).Encode(&setup); err != nil {
log.Println(err)
return 5
} else if err = setupWriter.Close(); err != nil {
log.Println(err)
}
if !z.AllowOrphan {
if err := z.Wait(); err != nil {
var exitError *exec.ExitError
if !errors.As(err, &exitError) || exitError == nil {
log.Println(err)
return 5
}
switch code := exitError.ExitCode(); syscall.Signal(code & 0x7f) {
case syscall.SIGINT:
case syscall.SIGTERM:
default:
return code
}
}
}
return 0
} else { // confined
C.free(unsafe.Pointer(opts.mountpoint))
// must be heap allocated
opts.mountpoint = C.CString("/dev/fd/" + strconv.Itoa(setup.Fuse))
if err := os.Chdir("/"); err != nil {
log.Println(err)
}
}
if C.fuse_mount(fuse, opts.mountpoint) != 0 {
return 4
}
defer C.fuse_unmount(fuse)
// TODO(ophestra): spawn container here, set PR_SET_NO_NEW_PRIVS and enforce landlock
if C.fuse_daemonize(opts.foreground) != 0 {
return 5
}
se := C.fuse_get_session(fuse)
if C.fuse_set_signal_handlers(se) != 0 {
return 6
}

View File

@@ -33,11 +33,6 @@ check_bad_opts_output("allow_other", "sharefs: setuid and setgid must not be 0\n
check_bad_opts_output("setuid=1023", "sharefs: setuid and setgid must not be 0\n", privileged=True)
check_bad_opts_output("setgid=1023", "sharefs: setuid and setgid must not be 0\n", privileged=True)
# Bad backing directory:
check_bad_opts_output("clone_fd", "sharefs: cannot open source: no such file or directory\n", source="/proc/nonexistent")
check_bad_opts_output("clone_fd", "sharefs: cannot open source: not a directory\n", source="/proc/self/exe")
check_bad_opts_output("clone_fd", "sharefs: cannot open source: permission denied\n", source="/root")
# Make sure nothing actually got mounted:
machine.fail("umount /mnt")
machine.succeed("rmdir /mnt")