From 024489e800a3324e2305546598b606357606e274 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Mon, 4 May 2026 01:54:35 +0900 Subject: [PATCH] ext: wrap file-descriptor-based mount facilities This only implements what is required by package container for now. Signed-off-by: Ophestra --- ext/fs.go | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 ext/fs.go diff --git a/ext/fs.go b/ext/fs.go new file mode 100644 index 00000000..f2bce87c --- /dev/null +++ b/ext/fs.go @@ -0,0 +1,267 @@ +package ext + +import ( + "os" + "runtime" + "syscall" + "unsafe" +) + +// include/uapi/linux/mount.h + +/* + * move_mount() flags. + */ +const ( + MOVE_MOUNT_F_SYMLINKS = 1 << iota /* Follow symlinks on from path */ + MOVE_MOUNT_F_AUTOMOUNTS /* Follow automounts on from path */ + MOVE_MOUNT_F_EMPTY_PATH /* Empty from path permitted */ + _ + MOVE_MOUNT_T_SYMLINKS /* Follow symlinks on to path */ + MOVE_MOUNT_T_AUTOMOUNTS /* Follow automounts on to path */ + MOVE_MOUNT_T_EMPTY_PATH /* Empty to path permitted */ + _ + MOVE_MOUNT_SET_GROUP /* Set sharing group instead */ + MOVE_MOUNT_BENEATH /* Mount beneath top mount */ +) + +/* + * fsopen() flags. + */ +const ( + FSOPEN_CLOEXEC = 1 << iota +) + +/* + * fspick() flags. + */ +const ( + FSPICK_CLOEXEC = 1 << iota + FSPICK_SYMLINK_NOFOLLOW + FSPICK_NO_AUTOMOUNT + FSPICK_EMPTY_PATH +) + +/* + * The type of fsconfig() call made. + */ +const ( + FSCONFIG_SET_FLAG = iota /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL /* Create new superblock, fail if reusing existing superblock */ +) + +/* + * fsmount() flags. + */ +const ( + FSMOUNT_CLOEXEC = 1 << iota +) + +/* + * Mount attributes. + */ +const ( + MOUNT_ATTR_RDONLY = 0x00000001 /* Mount read-only */ + MOUNT_ATTR_NOSUID = 0x00000002 /* Ignore suid and sgid bits */ + MOUNT_ATTR_NODEV = 0x00000004 /* Disallow access to device special files */ + MOUNT_ATTR_NOEXEC = 0x00000008 /* Disallow program execution */ + MOUNT_ATTR__ATIME = 0x00000070 /* Setting on how atime should be updated */ + MOUNT_ATTR_RELATIME = 0x00000000 /* - Update atime relative to mtime/ctime. */ + MOUNT_ATTR_NOATIME = 0x00000010 /* - Do not update access times. */ + MOUNT_ATTR_STRICTATIME = 0x00000020 /* - Always perform atime updates */ + MOUNT_ATTR_NODIRATIME = 0x00000080 /* Do not update directory access times */ + MOUNT_ATTR_IDMAP = 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ + MOUNT_ATTR_NOSYMFOLLOW = 0x00200000 /* Do not follow symlinks */ +) + +// FS provides low-level wrappers around the suite of file-descriptor-based +// mount facilities in Linux. +type FS struct { + fd uintptr + c runtime.Cleanup +} + +// newFS allocates a new [FS] for the specified fd. +func newFS(fd uintptr) *FS { + fs := FS{fd: fd} + fs.c = runtime.AddCleanup(&fs, func(fd uintptr) { + _ = syscall.Close(int(fd)) + }, fd) + return &fs +} + +// Close closes the underlying filesystem context. +func (fs *FS) Close() error { + if fs == nil { + return syscall.EINVAL + } + err := syscall.Close(int(fs.fd)) + fs.c.Stop() + return err +} + +// OpenFS creates a new filesystem context. +func OpenFS(fsname string, flags int) (fs *FS, err error) { + var s *byte + s, err = syscall.BytePtrFromString(fsname) + if err != nil { + return + } + fd, _, errno := syscall.Syscall( + SYS_FSOPEN, + uintptr(unsafe.Pointer(s)), + uintptr(flags|FSOPEN_CLOEXEC), + 0, + ) + if errno != 0 { + err = os.NewSyscallError("fsopen", errno) + } else { + fs = newFS(fd) + } + return +} + +// PickFS selects filesystem for reconfiguration. +func PickFS(dirfd int, pathname string, flags int) (fs *FS, err error) { + var s *byte + s, err = syscall.BytePtrFromString(pathname) + if err != nil { + return + } + fd, _, errno := syscall.Syscall( + SYS_FSPICK, + uintptr(dirfd), + uintptr(unsafe.Pointer(s)), + uintptr(flags|FSPICK_CLOEXEC), + ) + if errno != 0 { + err = os.NewSyscallError("fspick", errno) + } else { + fs = newFS(fd) + } + return +} + +// config configures new or existing filesystem context. +func (fs *FS) config(cmd uint, key *byte, value unsafe.Pointer, aux int) (err error) { + _, _, errno := syscall.Syscall6( + SYS_FSCONFIG, + fs.fd, + uintptr(cmd), + uintptr(unsafe.Pointer(key)), + uintptr(value), + uintptr(aux), + 0, + ) + if errno != 0 { + err = os.NewSyscallError("fsconfig", errno) + } + return +} + +// SetFlag sets the flag parameter named by key. ([FSCONFIG_SET_FLAG]) +func (fs *FS) SetFlag(key string) (err error) { + var s *byte + s, err = syscall.BytePtrFromString(key) + if err != nil { + return + } + + return fs.config(FSCONFIG_SET_FLAG, s, nil, 0) +} + +// SetString sets the string parameter named by key to the value specified by +// value. ([FSCONFIG_SET_STRING]) +func (fs *FS) SetString(key, value string) (err error) { + var s0 *byte + s0, err = syscall.BytePtrFromString(key) + if err != nil { + return + } + + var s1 *byte + s1, err = syscall.BytePtrFromString(value) + if err != nil { + return + } + + return fs.config(FSCONFIG_SET_STRING, s0, unsafe.Pointer(s1), 0) +} + +// mount instantiates mount object from filesystem context. +func (fs *FS) mount(flags, attrFlags int) (fsfd int, err error) { + r, _, errno := syscall.Syscall( + SYS_FSMOUNT, + fs.fd, + uintptr(flags|FSMOUNT_CLOEXEC), + uintptr(attrFlags), + ) + fsfd = int(r) + if errno != 0 { + err = os.NewSyscallError("fsmount", errno) + } + return +} + +// MoveMount moves or attaches mount object to filesystem. +func MoveMount( + fromDirfd int, + fromPathname string, + toDirfd int, + toPathname string, + flags int, +) (err error) { + var s0 *byte + s0, err = syscall.BytePtrFromString(fromPathname) + if err != nil { + return + } + + var s1 *byte + s1, err = syscall.BytePtrFromString(toPathname) + if err != nil { + return + } + + _, _, errno := syscall.Syscall6( + SYS_MOVE_MOUNT, + uintptr(fromDirfd), + uintptr(unsafe.Pointer(s0)), + uintptr(toDirfd), + uintptr(unsafe.Pointer(s1)), + uintptr(flags), + 0, + ) + if errno != 0 { + err = os.NewSyscallError("move_mount", errno) + } + return +} + +// Mount attaches the underlying filesystem context to the specified pathname. +func (fs *FS) Mount(pathname string, attrFlags int) error { + if err := fs.config(FSCONFIG_CMD_CREATE_EXCL, nil, nil, 0); err != nil { + return err + } + fd, err := fs.mount(0, attrFlags) + if err != nil { + return err + } + err = MoveMount( + fd, "", + -1, pathname, + MOVE_MOUNT_F_EMPTY_PATH, + ) + closeErr := syscall.Close(fd) + if err == nil { + err = closeErr + } + return err +}