All checks were successful
Test / Create distribution (push) Successful in 46s
Test / Sandbox (push) Successful in 2m30s
Test / ShareFS (push) Successful in 3m34s
Test / Sandbox (race detector) (push) Successful in 4m42s
Test / Hpkg (push) Successful in 4m22s
Test / Hakurei (race detector) (push) Successful in 3m15s
Test / Hakurei (push) Successful in 2m28s
Test / Flake checks (push) Successful in 1m39s
This change contains primitives for validating and caching single-file artifacts. Signed-off-by: Ophestra <cat@gensokyo.uk>
308 lines
7.9 KiB
Go
308 lines
7.9 KiB
Go
// Package pkg provides utilities for packaging software.
|
|
package pkg
|
|
|
|
import (
|
|
"crypto/sha512"
|
|
"encoding/base64"
|
|
"encoding/gob"
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"os"
|
|
"sync"
|
|
|
|
"hakurei.app/container/check"
|
|
)
|
|
|
|
type (
|
|
// A Checksum is a SHA-384 checksum computed for a cured [Artifact].
|
|
Checksum = [sha512.Size384]byte
|
|
|
|
// An ID is a unique identifier returned by [Artifact.ID]. This value must
|
|
// be deterministically determined ahead of time.
|
|
ID Checksum
|
|
)
|
|
|
|
// MustDecode decodes a string representation of [Checksum] and panics if there
|
|
// is a decoding error or the resulting data is too short.
|
|
func MustDecode(s string) (checksum Checksum) {
|
|
if n, err := base64.URLEncoding.Decode(
|
|
checksum[:],
|
|
[]byte(s),
|
|
); err != nil {
|
|
panic(err)
|
|
} else if n != len(Checksum{}) {
|
|
panic(io.ErrUnexpectedEOF)
|
|
}
|
|
return
|
|
}
|
|
|
|
// An Artifact is a read-only reference to a piece of data that may be created
|
|
// deterministically but might not currently be available in memory or on the
|
|
// filesystem.
|
|
type Artifact interface {
|
|
// ID returns a globally unique identifier referring to the current
|
|
// [Artifact]. This value must be known ahead of time and guaranteed to be
|
|
// unique without having obtained the full contents of the [Artifact].
|
|
ID() ID
|
|
|
|
// Hash returns the [Checksum] created from the full contents of a cured
|
|
// [Artifact]. This can be stored for future lookup in a [Cache].
|
|
//
|
|
// A call to Hash implicitly cures [Artifact].
|
|
Hash() (Checksum, error)
|
|
|
|
// Pathname returns an absolute pathname to a file or directory populated
|
|
// with the full contents of [Artifact]. This is the most expensive
|
|
// operation possible on any [Artifact] and should be avoided if possible.
|
|
//
|
|
// A call to Pathname implicitly cures [Artifact].
|
|
//
|
|
// Callers must only open files read-only. If [Artifact] is a directory,
|
|
// files must not be created or removed under this directory.
|
|
Pathname() (*check.Absolute, error)
|
|
}
|
|
|
|
// A File refers to an [Artifact] backed by a single file.
|
|
type File interface {
|
|
// Data returns the full contents of [Artifact].
|
|
//
|
|
// Callers must not modify the returned byte slice.
|
|
Data() ([]byte, error)
|
|
|
|
Artifact
|
|
}
|
|
|
|
// FlatEntry is the representation of a directory entry via [Flatten].
|
|
type FlatEntry struct {
|
|
Name string // base name of the file
|
|
Mode fs.FileMode // file mode bits
|
|
Data []byte // file content or symlink destination
|
|
}
|
|
|
|
// Flatten writes a deterministic representation of the contents of fsys to w.
|
|
// The resulting data can be hashed to produce a deterministic checksum for the
|
|
// directory.
|
|
func Flatten(fsys fs.FS, root string, w io.Writer) error {
|
|
e := gob.NewEncoder(w)
|
|
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var fi fs.FileInfo
|
|
fi, err = d.Info()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ent := FlatEntry{
|
|
Name: fi.Name(),
|
|
Mode: fi.Mode(),
|
|
}
|
|
if ent.Mode.IsRegular() {
|
|
if ent.Data, err = fs.ReadFile(fsys, path); err != nil {
|
|
return err
|
|
}
|
|
} else if ent.Mode&fs.ModeSymlink != 0 {
|
|
var newpath string
|
|
if newpath, err = fs.ReadLink(fsys, path); err != nil {
|
|
return err
|
|
}
|
|
ent.Data = []byte(newpath)
|
|
}
|
|
|
|
return e.Encode(&ent)
|
|
})
|
|
}
|
|
|
|
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
|
func HashFS(fsys fs.FS, root string) (Checksum, error) {
|
|
h := sha512.New384()
|
|
if err := Flatten(fsys, root, h); err != nil {
|
|
return Checksum{}, err
|
|
}
|
|
return (Checksum)(h.Sum(nil)), nil
|
|
}
|
|
|
|
// HashDir returns a checksum produced by hashing the result of [Flatten].
|
|
func HashDir(pathname *check.Absolute) (Checksum, error) {
|
|
return HashFS(os.DirFS(pathname.String()), ".")
|
|
}
|
|
|
|
const (
|
|
// dirIdentifier is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [ID].
|
|
dirIdentifier = "identifier"
|
|
// dirChecksum is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [Checksum].
|
|
dirChecksum = "checksum"
|
|
)
|
|
|
|
// Cache is a support layer that implementations of [Artifact] can use to store
|
|
// cured [Artifact] data in a content addressed fashion.
|
|
type Cache struct {
|
|
// Directory where all [Cache] related files are placed.
|
|
base *check.Absolute
|
|
|
|
// Synchronises access to public methods.
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
// LoadFile loads the contents of a [File] by its identifier.
|
|
func (c *Cache) LoadFile(id ID) (
|
|
pathname *check.Absolute,
|
|
data []byte,
|
|
err error,
|
|
) {
|
|
pathname = c.base.Append(
|
|
dirIdentifier,
|
|
base64.URLEncoding.EncodeToString(id[:]),
|
|
)
|
|
|
|
c.mu.RLock()
|
|
data, err = os.ReadFile(pathname.String())
|
|
c.mu.RUnlock()
|
|
|
|
return
|
|
}
|
|
|
|
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
|
|
type ChecksumMismatchError struct {
|
|
// Actual and expected checksums.
|
|
Got, Want Checksum
|
|
}
|
|
|
|
func (e *ChecksumMismatchError) Error() string {
|
|
return "got " + base64.URLEncoding.EncodeToString(e.Got[:]) +
|
|
" instead of " + base64.URLEncoding.EncodeToString(e.Want[:])
|
|
}
|
|
|
|
// pathname returns the content-addressed pathname for a [Checksum].
|
|
func (c *Cache) pathname(checksum *Checksum) *check.Absolute {
|
|
return c.base.Append(
|
|
dirChecksum,
|
|
base64.URLEncoding.EncodeToString(checksum[:]),
|
|
)
|
|
}
|
|
|
|
// pathnameIdent returns the identifier-based pathname for an [ID].
|
|
func (c *Cache) pathnameIdent(id *ID) *check.Absolute {
|
|
return c.base.Append(
|
|
dirIdentifier,
|
|
base64.URLEncoding.EncodeToString(id[:]),
|
|
)
|
|
}
|
|
|
|
// storeFile stores the contents of a [File]. An optional checksum can be
|
|
// passed via the result buffer which is used to validate the submitted data.
|
|
//
|
|
// If locking is disabled, the caller is responsible for acquiring a write lock
|
|
// and releasing it after this method returns. This makes LoadOrStoreFile
|
|
// possible without holding the lock while computing hash for store only.
|
|
func (c *Cache) storeFile(
|
|
identifierPathname *check.Absolute,
|
|
data []byte,
|
|
buf *Checksum,
|
|
validate, lock bool,
|
|
) error {
|
|
h := sha512.New384()
|
|
h.Write(data)
|
|
if validate {
|
|
if got := (Checksum)(h.Sum(nil)); got != *buf {
|
|
return &ChecksumMismatchError{got, *buf}
|
|
}
|
|
} else {
|
|
h.Sum(buf[:0])
|
|
}
|
|
|
|
checksumPathname := c.pathname(buf)
|
|
|
|
if lock {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
}
|
|
|
|
if f, err := os.OpenFile(
|
|
checksumPathname.String(),
|
|
os.O_WRONLY|os.O_CREATE|os.O_EXCL,
|
|
0400,
|
|
); err != nil {
|
|
// two artifacts may be backed by the same file
|
|
if !errors.Is(err, os.ErrExist) {
|
|
return err
|
|
}
|
|
} else if _, err = f.Write(data); err != nil {
|
|
// do not attempt cleanup: this is content-addressed and a partial
|
|
// write is caught during integrity check
|
|
return err
|
|
}
|
|
|
|
return os.Link(
|
|
checksumPathname.String(),
|
|
identifierPathname.String(),
|
|
)
|
|
}
|
|
|
|
// StoreFile stores the contents of a [File]. An optional checksum can be
|
|
// passed via the result buffer which is used to validate the submitted data.
|
|
func (c *Cache) StoreFile(
|
|
id ID,
|
|
data []byte,
|
|
buf *Checksum,
|
|
validate bool,
|
|
) (pathname *check.Absolute, err error) {
|
|
pathname = c.pathnameIdent(&id)
|
|
err = c.storeFile(pathname, data, buf, validate, true)
|
|
return
|
|
}
|
|
|
|
// LoadOrStoreFile attempts to load the contents of a [File] by its identifier,
|
|
// and if that file is not present, calls makeData and stores its result
|
|
// instead. Hash validation behaviour is identical to StoreFile.
|
|
func (c *Cache) LoadOrStoreFile(
|
|
id ID,
|
|
makeData func() ([]byte, error),
|
|
buf *Checksum,
|
|
validate bool,
|
|
) (
|
|
pathname *check.Absolute,
|
|
data []byte,
|
|
store bool,
|
|
err error,
|
|
) {
|
|
pathname = c.pathnameIdent(&id)
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
data, err = os.ReadFile(pathname.String())
|
|
if err == nil || !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
store = true
|
|
|
|
data, err = makeData()
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = c.storeFile(pathname, data, buf, validate, false)
|
|
return
|
|
}
|
|
|
|
// New returns the address to a new instance of [Cache].
|
|
func New(base *check.Absolute) (*Cache, error) {
|
|
for _, name := range []string{
|
|
dirIdentifier,
|
|
dirChecksum,
|
|
} {
|
|
if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil &&
|
|
!errors.Is(err, os.ErrExist) {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return &Cache{
|
|
base: base,
|
|
}, nil
|
|
}
|