cmd/sharefs: containerise filesystem daemon

This replaces the forking daemonise libfuse function which prevents Go callbacks from calling into the runtime. This also enforces least privilege on the daemon process.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-12-27 09:17:14 +09:00
parent 3d720ada92
commit 2f8ca83376
3 changed files with 189 additions and 55 deletions

View File

@@ -12,22 +12,31 @@ extern void sharefs_destroy(void *private_data);
typedef void (*closure)();
static inline struct fuse_opt _FUSE_OPT_END() { return (struct fuse_opt)FUSE_OPT_END; };
static inline int _fuse_main(int argc, char *argv[], const struct fuse_operations *op, void *user_data) { return fuse_main(argc, argv, op, user_data); }
*/
import "C"
import (
"context"
"encoding/gob"
"errors"
"fmt"
"io"
"log"
"os"
"path"
"os/exec"
"os/signal"
"runtime"
"runtime/cgo"
"strconv"
"syscall"
"unsafe"
"hakurei.app/container"
"hakurei.app/container/check"
"hakurei.app/container/std"
"hakurei.app/hst"
"hakurei.app/internal/helper/proc"
"hakurei.app/internal/info"
"hakurei.app/message"
)
type (
@@ -40,8 +49,11 @@ type (
// Whether sharefs_init failed.
initFailed bool
// Open file descriptor to backing directory.
Source int
// Open file descriptor to fuse.
Fuse int
// Pathname to open for dirfd.
Source *check.Absolute
// New uid and gid to set by sharefs_init when starting as root.
Setuid, Setgid int
}
@@ -71,22 +83,8 @@ func sharefs_init(_ *C.struct_fuse_conn_info, cfg *C.struct_fuse_config) unsafe.
setup := cgo.Handle(priv.setup).Value().(*setupState)
if os.Geteuid() == 0 {
if setup.Setuid <= 0 || setup.Setgid <= 0 {
log.Println("setuid and setgid must not be 0")
goto fail
}
if err := syscall.Setresgid(setup.Setgid, setup.Setgid, setup.Setgid); err != nil {
log.Printf("cannot set gid: %v", err)
goto fail
}
if err := syscall.Setgroups(nil); err != nil {
log.Printf("cannot set supplementary groups: %v", err)
goto fail
}
if err := syscall.Setresuid(setup.Setuid, setup.Setuid, setup.Setuid); err != nil {
log.Printf("cannot set uid: %v", err)
goto fail
}
log.Println("filesystem daemon must not run as root")
goto fail
}
cfg.use_ino = C.true
@@ -97,7 +95,16 @@ func sharefs_init(_ *C.struct_fuse_conn_info, cfg *C.struct_fuse_config) unsafe.
cfg.negative_timeout = 0
// all future filesystem operations happen through this dirfd
priv.dirfd = C.int(setup.Source)
if fd, err := syscall.Open(setup.Source.String(), syscall.O_DIRECTORY|syscall.O_RDONLY|syscall.O_CLOEXEC, 0); err != nil {
log.Printf("cannot open %q: %v", setup.Source, err)
goto fail
} else if err = syscall.Fchdir(fd); err != nil {
_ = syscall.Close(fd)
log.Printf("cannot enter %q: %s", setup.Source, err)
goto fail
} else {
priv.dirfd = C.int(fd)
}
return ctx.private_data
@@ -177,8 +184,20 @@ func parseOpts(args *C.struct_fuse_args, setup *setupState) (ok bool) {
if v, err := strconv.Atoi(C.GoString(unsafeOpts.setup)); err != nil || v < 3 {
log.Println("invalid value for option setup")
return false
} else if err = gob.NewDecoder(os.NewFile(uintptr(v), "setup")).Decode(setup); err != nil {
log.Println(err)
} else {
r := os.NewFile(uintptr(v), "setup")
defer func() {
if err = r.Close(); err != nil {
log.Println(err)
}
}()
if err = gob.NewDecoder(r).Decode(setup); err != nil {
log.Println(err)
return false
}
}
if setup.Fuse < 3 {
log.Println("invalid file descriptor", setup.Fuse)
return false
}
return true
@@ -187,23 +206,11 @@ func parseOpts(args *C.struct_fuse_args, setup *setupState) (ok bool) {
if unsafeOpts.source == nil {
showHelp(args)
return false
} else if source := C.GoString(unsafeOpts.source); !path.IsAbs(source) {
log.Println("source is not absolute")
return false
} else if fd, err := syscall.Open(source, syscall.O_DIRECTORY|syscall.O_RDONLY, 0); err != nil {
log.Printf("cannot open source: %v", err)
return false
} else if err = syscall.Fchdir(fd); err != nil {
_ = syscall.Close(fd)
log.Printf("cannot enter source: %s", err)
} else if a, err := check.NewAbs(C.GoString(unsafeOpts.source)); err != nil {
log.Println(err)
return false
} else {
setup.Source = fd
defer func() {
if !ok {
_ = syscall.Close(fd)
}
}()
setup.Source = a
}
if unsafeOpts.setuid == nil {
@@ -244,7 +251,9 @@ func unsafeAddArgument(args *C.struct_fuse_args, arg string) {
C.fuse_opt_add_arg(args, (*C.char)(unsafe.Pointer(unsafe.StringData(arg))))
}
func _main(argc int, argv **C.char) int {
func _main(argc int, argv **C.char) (exitCode int) {
msg := message.New(log.Default())
container.TryArgv0(msg)
runtime.LockOSThread()
// don't mask creation mode, kernel already did that
@@ -286,11 +295,24 @@ func _main(argc int, argv **C.char) int {
if opts.show_help != 0 {
showHelp(&args)
return 0
}
if opts.show_help == 0 && opts.mountpoint == nil {
} else if opts.mountpoint == nil {
log.Println("no mountpoint specified")
return 2
} else {
// hack to keep fuse_parse_cmdline happy in the container
mountpoint := C.GoString(opts.mountpoint)
pathnameArg := -1
for i, arg := range os.Args {
if arg == mountpoint {
pathnameArg = i
break
}
}
if pathnameArg < 0 {
log.Println("mountpoint must be absolute")
return 2
}
os.Args[pathnameArg] = container.Nonexistent
}
if !parseOpts(&args, &setup) {
@@ -302,7 +324,7 @@ func _main(argc int, argv **C.char) int {
log.Println("setuid and setgid must not be 0")
return 1
}
} else if setup.Setuid > 0 || setup.Setgid > 0 {
} else if setup.Fuse < 3 && (setup.Setuid > 0 || setup.Setgid > 0) {
log.Println("setuid and setgid has no effect when not starting as root")
return 1
}
@@ -334,18 +356,136 @@ func _main(argc int, argv **C.char) int {
return 3
}
defer C.fuse_destroy(fuse)
se := C.fuse_get_session(fuse)
if setup.Fuse < 3 {
// unconfined, set up mount point and container
if C.fuse_mount(fuse, opts.mountpoint) != 0 {
return 4
}
// unmounted by initial process
defer func() {
if exitCode == 5 {
C.fuse_unmount(fuse)
}
}()
if os.Geteuid() == 0 {
if setup.Setuid <= 0 || setup.Setgid <= 0 {
log.Println("setuid and setgid must not be 0")
return 5
}
if err := syscall.Setresgid(setup.Setgid, setup.Setgid, setup.Setgid); err != nil {
log.Printf("cannot set gid: %v", err)
return 5
}
if err := syscall.Setgroups(nil); err != nil {
log.Printf("cannot set supplementary groups: %v", err)
return 5
}
if err := syscall.Setresuid(setup.Setuid, setup.Setuid, setup.Setuid); err != nil {
log.Printf("cannot set uid: %v", err)
return 5
}
}
msg.SwapVerbose(opts.debug != 0)
ctx := context.Background()
if opts.foreground != 0 {
c, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
ctx = c
}
z := container.New(ctx, msg)
z.AllowOrphan = opts.foreground == 0
z.Env = os.Environ()
// keep fuse_parse_cmdline happy in the container
z.Tmpfs(check.MustAbs(container.Nonexistent), 1<<10, 0755)
if a, err := check.NewAbs(container.MustExecutable(msg)); err != nil {
log.Println(err)
return 5
} else {
z.Path = a
}
z.Args = os.Args
z.ForwardCancel = true
z.SeccompPresets |= std.PresetStrict
z.ParentPerm = 0700
z.Bind(setup.Source, setup.Source, std.BindWritable)
if !z.AllowOrphan {
z.WaitDelay = hst.WaitDelayMax
z.Stdin, z.Stdout, z.Stderr = os.Stdin, os.Stdout, os.Stderr
}
z.Bind(z.Path, z.Path, 0)
setup.Fuse = int(proc.ExtraFileSlice(&z.ExtraFiles, os.NewFile(uintptr(C.fuse_session_fd(se)), "fuse")))
var setupWriter io.WriteCloser
if fd, w, err := container.Setup(&z.ExtraFiles); err != nil {
log.Println(err)
return 5
} else {
z.Args = append(z.Args, "-osetup="+strconv.Itoa(fd))
setupWriter = w
}
if err := z.Start(); err != nil {
if m, ok := message.GetMessage(err); ok {
log.Println(m)
} else {
log.Println(err)
}
return 5
}
if err := z.Serve(); err != nil {
if m, ok := message.GetMessage(err); ok {
log.Println(m)
} else {
log.Println(err)
}
return 5
}
if err := gob.NewEncoder(setupWriter).Encode(&setup); err != nil {
log.Println(err)
return 5
} else if err = setupWriter.Close(); err != nil {
log.Println(err)
}
if !z.AllowOrphan {
if err := z.Wait(); err != nil {
var exitError *exec.ExitError
if !errors.As(err, &exitError) || exitError == nil {
log.Println(err)
return 5
}
switch code := exitError.ExitCode(); syscall.Signal(code & 0x7f) {
case syscall.SIGINT:
case syscall.SIGTERM:
default:
return code
}
}
}
return 0
} else { // confined
C.free(unsafe.Pointer(opts.mountpoint))
// must be heap allocated
opts.mountpoint = C.CString("/dev/fd/" + strconv.Itoa(setup.Fuse))
if err := os.Chdir("/"); err != nil {
log.Println(err)
}
}
if C.fuse_mount(fuse, opts.mountpoint) != 0 {
return 4
}
defer C.fuse_unmount(fuse)
// TODO(ophestra): spawn container here, set PR_SET_NO_NEW_PRIVS and enforce landlock
if C.fuse_daemonize(opts.foreground) != 0 {
return 5
}
se := C.fuse_get_session(fuse)
if C.fuse_set_signal_handlers(se) != 0 {
return 6
}

View File

@@ -33,11 +33,6 @@ check_bad_opts_output("allow_other", "sharefs: setuid and setgid must not be 0\n
check_bad_opts_output("setuid=1023", "sharefs: setuid and setgid must not be 0\n", privileged=True)
check_bad_opts_output("setgid=1023", "sharefs: setuid and setgid must not be 0\n", privileged=True)
# Bad backing directory:
check_bad_opts_output("clone_fd", "sharefs: cannot open source: no such file or directory\n", source="/proc/nonexistent")
check_bad_opts_output("clone_fd", "sharefs: cannot open source: not a directory\n", source="/proc/self/exe")
check_bad_opts_output("clone_fd", "sharefs: cannot open source: permission denied\n", source="/root")
# Make sure nothing actually got mounted:
machine.fail("umount /mnt")
machine.succeed("rmdir /mnt")

View File

@@ -88,7 +88,6 @@ in
"noatime"
"auto_unmount"
"allow_other"
"clone_fd"
"setuid=$(id -u ${cfg.sharefs.user})"
"setgid=$(id -g ${cfg.sharefs.group})"
"source=${cfg.sharefs.source}"