internal/pkg: implement caching for files
All checks were successful
Test / Create distribution (push) Successful in 46s
Test / Sandbox (push) Successful in 2m30s
Test / ShareFS (push) Successful in 3m34s
Test / Sandbox (race detector) (push) Successful in 4m42s
Test / Hpkg (push) Successful in 4m22s
Test / Hakurei (race detector) (push) Successful in 3m15s
Test / Hakurei (push) Successful in 2m28s
Test / Flake checks (push) Successful in 1m39s

This change contains primitives for validating and caching single-file artifacts.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-01-02 00:59:37 +09:00
parent 993afde840
commit 7ad8f15030
2 changed files with 673 additions and 0 deletions

307
internal/pkg/pkg.go Normal file
View File

@@ -0,0 +1,307 @@
// Package pkg provides utilities for packaging software.
package pkg
import (
"crypto/sha512"
"encoding/base64"
"encoding/gob"
"errors"
"io"
"io/fs"
"os"
"sync"
"hakurei.app/container/check"
)
type (
// A Checksum is a SHA-384 checksum computed for a cured [Artifact].
Checksum = [sha512.Size384]byte
// An ID is a unique identifier returned by [Artifact.ID]. This value must
// be deterministically determined ahead of time.
ID Checksum
)
// MustDecode decodes a string representation of [Checksum] and panics if there
// is a decoding error or the resulting data is too short.
func MustDecode(s string) (checksum Checksum) {
if n, err := base64.URLEncoding.Decode(
checksum[:],
[]byte(s),
); err != nil {
panic(err)
} else if n != len(Checksum{}) {
panic(io.ErrUnexpectedEOF)
}
return
}
// An Artifact is a read-only reference to a piece of data that may be created
// deterministically but might not currently be available in memory or on the
// filesystem.
type Artifact interface {
// ID returns a globally unique identifier referring to the current
// [Artifact]. This value must be known ahead of time and guaranteed to be
// unique without having obtained the full contents of the [Artifact].
ID() ID
// Hash returns the [Checksum] created from the full contents of a cured
// [Artifact]. This can be stored for future lookup in a [Cache].
//
// A call to Hash implicitly cures [Artifact].
Hash() (Checksum, error)
// Pathname returns an absolute pathname to a file or directory populated
// with the full contents of [Artifact]. This is the most expensive
// operation possible on any [Artifact] and should be avoided if possible.
//
// A call to Pathname implicitly cures [Artifact].
//
// Callers must only open files read-only. If [Artifact] is a directory,
// files must not be created or removed under this directory.
Pathname() (*check.Absolute, error)
}
// A File refers to an [Artifact] backed by a single file.
type File interface {
// Data returns the full contents of [Artifact].
//
// Callers must not modify the returned byte slice.
Data() ([]byte, error)
Artifact
}
// FlatEntry is the representation of a directory entry via [Flatten].
type FlatEntry struct {
Name string // base name of the file
Mode fs.FileMode // file mode bits
Data []byte // file content or symlink destination
}
// Flatten writes a deterministic representation of the contents of fsys to w.
// The resulting data can be hashed to produce a deterministic checksum for the
// directory.
func Flatten(fsys fs.FS, root string, w io.Writer) error {
e := gob.NewEncoder(w)
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
var fi fs.FileInfo
fi, err = d.Info()
if err != nil {
return err
}
ent := FlatEntry{
Name: fi.Name(),
Mode: fi.Mode(),
}
if ent.Mode.IsRegular() {
if ent.Data, err = fs.ReadFile(fsys, path); err != nil {
return err
}
} else if ent.Mode&fs.ModeSymlink != 0 {
var newpath string
if newpath, err = fs.ReadLink(fsys, path); err != nil {
return err
}
ent.Data = []byte(newpath)
}
return e.Encode(&ent)
})
}
// HashFS returns a checksum produced by hashing the result of [Flatten].
func HashFS(fsys fs.FS, root string) (Checksum, error) {
h := sha512.New384()
if err := Flatten(fsys, root, h); err != nil {
return Checksum{}, err
}
return (Checksum)(h.Sum(nil)), nil
}
// HashDir returns a checksum produced by hashing the result of [Flatten].
func HashDir(pathname *check.Absolute) (Checksum, error) {
return HashFS(os.DirFS(pathname.String()), ".")
}
const (
// dirIdentifier is the directory name appended to Cache.base for storing
// artifacts named after their [ID].
dirIdentifier = "identifier"
// dirChecksum is the directory name appended to Cache.base for storing
// artifacts named after their [Checksum].
dirChecksum = "checksum"
)
// Cache is a support layer that implementations of [Artifact] can use to store
// cured [Artifact] data in a content addressed fashion.
type Cache struct {
// Directory where all [Cache] related files are placed.
base *check.Absolute
// Synchronises access to public methods.
mu sync.RWMutex
}
// LoadFile loads the contents of a [File] by its identifier.
func (c *Cache) LoadFile(id ID) (
pathname *check.Absolute,
data []byte,
err error,
) {
pathname = c.base.Append(
dirIdentifier,
base64.URLEncoding.EncodeToString(id[:]),
)
c.mu.RLock()
data, err = os.ReadFile(pathname.String())
c.mu.RUnlock()
return
}
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
type ChecksumMismatchError struct {
// Actual and expected checksums.
Got, Want Checksum
}
func (e *ChecksumMismatchError) Error() string {
return "got " + base64.URLEncoding.EncodeToString(e.Got[:]) +
" instead of " + base64.URLEncoding.EncodeToString(e.Want[:])
}
// pathname returns the content-addressed pathname for a [Checksum].
func (c *Cache) pathname(checksum *Checksum) *check.Absolute {
return c.base.Append(
dirChecksum,
base64.URLEncoding.EncodeToString(checksum[:]),
)
}
// pathnameIdent returns the identifier-based pathname for an [ID].
func (c *Cache) pathnameIdent(id *ID) *check.Absolute {
return c.base.Append(
dirIdentifier,
base64.URLEncoding.EncodeToString(id[:]),
)
}
// storeFile stores the contents of a [File]. An optional checksum can be
// passed via the result buffer which is used to validate the submitted data.
//
// If locking is disabled, the caller is responsible for acquiring a write lock
// and releasing it after this method returns. This makes LoadOrStoreFile
// possible without holding the lock while computing hash for store only.
func (c *Cache) storeFile(
identifierPathname *check.Absolute,
data []byte,
buf *Checksum,
validate, lock bool,
) error {
h := sha512.New384()
h.Write(data)
if validate {
if got := (Checksum)(h.Sum(nil)); got != *buf {
return &ChecksumMismatchError{got, *buf}
}
} else {
h.Sum(buf[:0])
}
checksumPathname := c.pathname(buf)
if lock {
c.mu.Lock()
defer c.mu.Unlock()
}
if f, err := os.OpenFile(
checksumPathname.String(),
os.O_WRONLY|os.O_CREATE|os.O_EXCL,
0400,
); err != nil {
// two artifacts may be backed by the same file
if !errors.Is(err, os.ErrExist) {
return err
}
} else if _, err = f.Write(data); err != nil {
// do not attempt cleanup: this is content-addressed and a partial
// write is caught during integrity check
return err
}
return os.Link(
checksumPathname.String(),
identifierPathname.String(),
)
}
// StoreFile stores the contents of a [File]. An optional checksum can be
// passed via the result buffer which is used to validate the submitted data.
func (c *Cache) StoreFile(
id ID,
data []byte,
buf *Checksum,
validate bool,
) (pathname *check.Absolute, err error) {
pathname = c.pathnameIdent(&id)
err = c.storeFile(pathname, data, buf, validate, true)
return
}
// LoadOrStoreFile attempts to load the contents of a [File] by its identifier,
// and if that file is not present, calls makeData and stores its result
// instead. Hash validation behaviour is identical to StoreFile.
func (c *Cache) LoadOrStoreFile(
id ID,
makeData func() ([]byte, error),
buf *Checksum,
validate bool,
) (
pathname *check.Absolute,
data []byte,
store bool,
err error,
) {
pathname = c.pathnameIdent(&id)
c.mu.Lock()
defer c.mu.Unlock()
data, err = os.ReadFile(pathname.String())
if err == nil || !errors.Is(err, os.ErrNotExist) {
return
}
store = true
data, err = makeData()
if err != nil {
return
}
err = c.storeFile(pathname, data, buf, validate, false)
return
}
// New returns the address to a new instance of [Cache].
func New(base *check.Absolute) (*Cache, error) {
for _, name := range []string{
dirIdentifier,
dirChecksum,
} {
if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil &&
!errors.Is(err, os.ErrExist) {
return nil, err
}
}
return &Cache{
base: base,
}, nil
}