Files
hakurei/internal/uevent/uevent.go
Ophestra 91a2d4d6e1
All checks were successful
Test / Create distribution (push) Successful in 1m17s
Test / Sandbox (push) Successful in 3m13s
Test / Hakurei (push) Successful in 4m18s
Test / ShareFS (push) Successful in 4m24s
Test / Sandbox (race detector) (push) Successful in 5m35s
Test / Hakurei (race detector) (push) Successful in 6m42s
Test / Flake checks (push) Successful in 1m25s
internal/uevent: integrate error handling in event loop
There are many subtleties when recovering from errors in the event loop, and coldboot requires internals to drain the receive buffer as synthetic uevents are being arranged.

Signed-off-by: Ophestra <cat@gensokyo.uk>
2026-03-31 00:10:14 +09:00

223 lines
5.8 KiB
Go

// Package uevent provides userspace client for consuming events from a
// NETLINK_KOBJECT_UEVENT socket, as well as helpers for supplementing
// events received from the kernel.
package uevent
import (
"context"
"errors"
"strconv"
"sync/atomic"
"syscall"
"hakurei.app/internal/netlink"
)
type (
// Recoverable is satisfied by errors that are safe to recover from.
Recoverable interface{ recoverable() }
// Nontrivial is satisfied by errors preferring a JSON encoding.
Nontrivial interface{ nontrivial() }
// NeedsColdboot is satisfied by errors indicating divergence of local state
// from the kernel, usually from lost uevent data.
NeedsColdboot interface {
Recoverable
coldboot()
}
)
const (
exclConsume = iota
_exclLen
)
// Conn represents a NETLINK_KOBJECT_UEVENT socket.
type Conn struct {
conn *netlink.Conn
// Whether currently between a call to enterExcl and exitExcl.
excl [_exclLen]atomic.Bool
}
// enterExcl must be called entering a critical section that interacts with conn.
func (c *Conn) enterExcl(k int) error {
if !c.excl[k].CompareAndSwap(false, true) {
return syscall.EAGAIN
}
return nil
}
// exitExcl must be called exiting a critical section that interacts with conn.
func (c *Conn) exitExcl(k int) { c.excl[k].Store(false) }
// Close closes the underlying socket.
func (c *Conn) Close() error { return c.conn.Close() }
// Dial returns the address of a newly connected [Conn].
func Dial(rcvbuf int64) (*Conn, error) {
// kernel group is hard coded in lib/kobject_uevent.c, undocumented
c, err := netlink.Dial(syscall.NETLINK_KOBJECT_UEVENT, 1, rcvbuf)
if err != nil {
return nil, err
}
return &Conn{conn: c}, err
}
var (
// ErrBadSocket is returned by [Conn.Consume] for a reply from a
// syscall.Sockaddr with unexpected concrete type.
ErrBadSocket = errors.New("unexpected socket address")
)
// ReceiveBufferError indicates one or more [Message] being lost due to the
// socket receive buffer filling up. This is usually caused by epoll waking the
// receiving program up too late.
type ReceiveBufferError struct{ _ [0]*ReceiveBufferError }
var _ NeedsColdboot = ReceiveBufferError{}
func (ReceiveBufferError) recoverable() {}
func (ReceiveBufferError) coldboot() {}
func (ReceiveBufferError) Unwrap() error { return syscall.ENOBUFS }
func (e ReceiveBufferError) Error() string { return syscall.ENOBUFS.Error() }
// BadPortError is returned by [Conn.Consume] upon receiving a message that did
// not come from the kernel.
type BadPortError syscall.SockaddrNetlink
var _ Recoverable = new(BadPortError)
func (*BadPortError) recoverable() {}
func (e *BadPortError) Error() string {
return "unexpected message from port id " + strconv.Itoa(int(e.Pid)) +
" on NETLINK_KOBJECT_UEVENT"
}
// receiveEvent receives a single event and returns the address of its [Message].
func (c *Conn) receiveEvent(ctx context.Context) (*Message, error) {
data, _, from, err := c.conn.Recvmsg(ctx, 0)
if err != nil {
if errors.Is(err, syscall.ENOBUFS) {
return nil, ReceiveBufferError{}
}
return nil, err
}
// lib/kobject_uevent.c:
// set portid 0 to inform userspace message comes from kernel
if v, ok := from.(*syscall.SockaddrNetlink); !ok {
return nil, ErrBadSocket
} else if v.Pid != 0 {
return nil, (*BadPortError)(v)
}
var msg Message
if err = msg.UnmarshalBinary(data); err != nil {
return nil, err
}
return &msg, err
}
// Consume continuously receives and parses events from the kernel and handles
// [Recoverable] and [NeedsColdboot] errors via caller-supplied functions,
// entering coldboot when required.
//
// For each uevent file visited by [Coldboot], handleColdbootVisited is called
// with its pathname. This function must never block.
//
// When consuming events, a non-nil error not satisfying [Recoverable] is
// returned immediately. Otherwise, handleConsumeErr is called with the error
// value. If the error satisfies [NeedsColdboot], a [Coldboot] is arranged
// before event processing resumes. If handleConsumeErr returns false, the error
// value is immediately returned as is.
//
// Callers are expected to reject excessively frequent [NeedsColdboot] errors
// in handleConsumeErr to avoid being stuck in a [Coldboot] loop. Event
// processing is allowed to restart without initial coldboot after recovering
// from such a condition, provided the caller adequately reports the degraded,
// diverging state to the user.
//
// Callers must not restart event processing after a non-nil error that does not
// satisfy [Recoverable] is returned.
func (c *Conn) Consume(
ctx context.Context,
sysfs string,
events chan<- *Message,
coldboot bool,
handleColdbootVisited func(string),
handleConsumeErr func(error) bool,
handleWalkErr func(error) error,
) error {
if err := c.enterExcl(exclConsume); err != nil {
return err
}
defer c.exitExcl(exclConsume)
filterErr := func(err error) (error, bool) {
if _, ok := err.(Recoverable); !ok {
return err, true
}
// avoids dropping pending coldboot
if _, ok := err.(NeedsColdboot); ok {
coldboot = true
}
return err, !handleConsumeErr(err)
}
retry:
if coldboot {
goto coldboot
}
for {
msg, err := c.receiveEvent(ctx)
if err == nil {
events <- msg
continue
}
if _, ok := filterErr(err); ok {
return err
}
}
coldboot:
coldboot = false
visited := make(chan string)
ctxColdboot, cancelColdboot := context.WithCancel(ctx)
var coldbootErr error
go func() {
coldbootErr = Coldboot(ctxColdboot, sysfs, visited, handleWalkErr)
close(visited)
}()
for pathname := range visited {
handleColdbootVisited(pathname)
for {
msg, err := c.receiveEvent(nil)
if err == nil {
events <- msg
continue
}
if errors.Is(err, syscall.EWOULDBLOCK) {
break
}
if filteredErr, ok := filterErr(err); ok {
cancelColdboot()
return filteredErr
}
}
}
cancelColdboot()
if coldbootErr != nil {
return coldbootErr
}
goto retry
}