All checks were successful
Test / Create distribution (push) Successful in 42s
Test / Sandbox (push) Successful in 2m45s
Test / Hakurei (push) Successful in 3m36s
Test / ShareFS (push) Successful in 3m42s
Test / Hpkg (push) Successful in 4m17s
Test / Sandbox (race detector) (push) Successful in 4m49s
Test / Hakurei (race detector) (push) Successful in 5m48s
Test / Flake checks (push) Successful in 1m44s
These are much easier to handle than hard links and should be just as transparent for this use case. Signed-off-by: Ophestra <cat@gensokyo.uk>
413 lines
10 KiB
Go
413 lines
10 KiB
Go
// Package pkg provides utilities for packaging software.
|
|
package pkg
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha512"
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"slices"
|
|
"sync"
|
|
|
|
"hakurei.app/container/check"
|
|
)
|
|
|
|
type (
|
|
// A Checksum is a SHA-384 checksum computed for a cured [Artifact].
|
|
Checksum = [sha512.Size384]byte
|
|
|
|
// An ID is a unique identifier returned by [Artifact.ID]. This value must
|
|
// be deterministically determined ahead of time.
|
|
ID Checksum
|
|
)
|
|
|
|
// Encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]).
|
|
func Encode(checksum Checksum) string {
|
|
return base64.URLEncoding.EncodeToString(checksum[:])
|
|
}
|
|
|
|
// encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]).
|
|
func encode(checksum *Checksum) string {
|
|
return base64.URLEncoding.EncodeToString(checksum[:])
|
|
}
|
|
|
|
// MustDecode decodes a string representation of [Checksum] and panics if there
|
|
// is a decoding error or the resulting data is too short.
|
|
func MustDecode(s string) (checksum Checksum) {
|
|
if n, err := base64.URLEncoding.Decode(
|
|
checksum[:],
|
|
[]byte(s),
|
|
); err != nil {
|
|
panic(err)
|
|
} else if n != len(Checksum{}) {
|
|
panic(io.ErrUnexpectedEOF)
|
|
}
|
|
return
|
|
}
|
|
|
|
// An Artifact is a read-only reference to a piece of data that may be created
|
|
// deterministically but might not currently be available in memory or on the
|
|
// filesystem.
|
|
type Artifact interface {
|
|
// Kind returns the [Kind] of artifact. This is usually unique to the
|
|
// concrete type but two functionally identical implementations of
|
|
// [Artifact] is allowed to return the same [Kind] value.
|
|
Kind() Kind
|
|
|
|
// ID returns a globally unique identifier referring to the current
|
|
// [Artifact]. This value must be known ahead of time and guaranteed to be
|
|
// unique without having obtained the full contents of the [Artifact].
|
|
ID() ID
|
|
|
|
// Hash returns the [Checksum] created from the full contents of a cured
|
|
// [Artifact]. This can be stored for future lookup in a [Cache].
|
|
//
|
|
// A call to Hash implicitly cures [Artifact].
|
|
Hash() (Checksum, error)
|
|
|
|
// Pathname returns an absolute pathname to a file or directory populated
|
|
// with the full contents of [Artifact]. This is the most expensive
|
|
// operation possible on any [Artifact] and should be avoided if possible.
|
|
//
|
|
// A call to Pathname implicitly cures [Artifact].
|
|
//
|
|
// Callers must only open files read-only. If [Artifact] is a directory,
|
|
// files must not be created or removed under this directory.
|
|
Pathname() (*check.Absolute, error)
|
|
}
|
|
|
|
// A File refers to an [Artifact] backed by a single file.
|
|
type File interface {
|
|
// Data returns the full contents of [Artifact].
|
|
//
|
|
// Callers must not modify the returned byte slice.
|
|
Data() ([]byte, error)
|
|
|
|
Artifact
|
|
}
|
|
|
|
// Kind corresponds to the concrete type of [Artifact] and is used to create
|
|
// identifier for an [Artifact] with dependencies.
|
|
type Kind uint64
|
|
|
|
const (
|
|
// KindHTTP is the kind of [Artifact] returned by [Cache.NewHTTP].
|
|
KindHTTP Kind = iota
|
|
KindTar
|
|
)
|
|
|
|
// Ident returns a deterministic identifier for the supplied params and
|
|
// dependencies. The caller is responsible for ensuring params uniquely and
|
|
// deterministically describes the current [Artifact].
|
|
func (k Kind) Ident(params []byte, deps ...Artifact) ID {
|
|
type extIdent [len(ID{}) + wordSize]byte
|
|
identifiers := make([]extIdent, len(deps))
|
|
for i, a := range deps {
|
|
id := a.ID()
|
|
copy(identifiers[i][wordSize:], id[:])
|
|
binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind()))
|
|
}
|
|
slices.SortFunc(identifiers, func(a, b extIdent) int { return bytes.Compare(a[:], b[:]) })
|
|
slices.Compact(identifiers)
|
|
|
|
h := sha512.New384()
|
|
h.Write(binary.LittleEndian.AppendUint64(nil, uint64(k)))
|
|
h.Write(params)
|
|
for _, e := range identifiers {
|
|
h.Write(e[:])
|
|
}
|
|
return ID(h.Sum(nil))
|
|
}
|
|
|
|
const (
|
|
// dirIdentifier is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [ID].
|
|
dirIdentifier = "identifier"
|
|
// dirChecksum is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [Checksum].
|
|
dirChecksum = "checksum"
|
|
|
|
// dirWork is the directory name appended to Cache.base for working
|
|
// directories created for [Cache.Store].
|
|
dirWork = "work"
|
|
)
|
|
|
|
// Cache is a support layer that implementations of [Artifact] can use to store
|
|
// cured [Artifact] data in a content addressed fashion.
|
|
type Cache struct {
|
|
// Directory where all [Cache] related files are placed.
|
|
base *check.Absolute
|
|
|
|
// Protects the Store critical section.
|
|
storeMu sync.Mutex
|
|
|
|
// Synchronises access to most public methods.
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
// LoadFile loads the contents of a [File] by its identifier.
|
|
func (c *Cache) LoadFile(id ID) (
|
|
pathname *check.Absolute,
|
|
data []byte,
|
|
err error,
|
|
) {
|
|
pathname = c.base.Append(
|
|
dirIdentifier,
|
|
Encode(id),
|
|
)
|
|
|
|
c.mu.RLock()
|
|
data, err = os.ReadFile(pathname.String())
|
|
c.mu.RUnlock()
|
|
|
|
return
|
|
}
|
|
|
|
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
|
|
type ChecksumMismatchError struct {
|
|
// Actual and expected checksums.
|
|
Got, Want Checksum
|
|
}
|
|
|
|
func (e *ChecksumMismatchError) Error() string {
|
|
return "got " + Encode(e.Got) +
|
|
" instead of " + Encode(e.Want)
|
|
}
|
|
|
|
// pathname returns the content-addressed pathname for a [Checksum].
|
|
func (c *Cache) pathname(checksum *Checksum) *check.Absolute {
|
|
return c.base.Append(
|
|
dirChecksum,
|
|
encode(checksum),
|
|
)
|
|
}
|
|
|
|
// pathnameIdent returns the identifier-based pathname for an [ID].
|
|
func (c *Cache) pathnameIdent(id *ID) *check.Absolute {
|
|
return c.base.Append(
|
|
dirIdentifier,
|
|
encode((*Checksum)(id)),
|
|
)
|
|
}
|
|
|
|
// Store looks up an identifier, and if it is not present, calls makeArtifact
|
|
// with a private working directory and stores its result instead. An optional
|
|
// checksum can be passed via the result buffer which is used to validate the
|
|
// produced directory.
|
|
func (c *Cache) Store(
|
|
id ID,
|
|
makeArtifact func(work *check.Absolute) error,
|
|
buf *Checksum,
|
|
validate bool,
|
|
) (
|
|
pathname *check.Absolute,
|
|
store bool,
|
|
err error,
|
|
) {
|
|
pathname = c.pathnameIdent(&id)
|
|
c.storeMu.Lock()
|
|
defer c.storeMu.Unlock()
|
|
|
|
_, err = os.Lstat(pathname.String())
|
|
if err == nil || !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
store = true
|
|
|
|
var (
|
|
workPathname *check.Absolute
|
|
workPathnameRaw string
|
|
)
|
|
if workPathnameRaw, err = os.MkdirTemp(
|
|
c.base.Append(dirWork).String(),
|
|
path.Base(pathname.String()+".*"),
|
|
); err != nil {
|
|
return
|
|
} else if workPathname, err = check.NewAbs(workPathnameRaw); err != nil {
|
|
return
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
chmodErr := filepath.WalkDir(workPathname.String(), func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if d.IsDir() {
|
|
return os.Chmod(path, 0700)
|
|
}
|
|
return nil
|
|
})
|
|
removeErr := os.RemoveAll(workPathname.String())
|
|
if chmodErr != nil || removeErr != nil {
|
|
err = errors.Join(err, chmodErr, removeErr)
|
|
} else if errors.Is(err, os.ErrExist) {
|
|
// two artifacts may be backed by the same file
|
|
err = nil
|
|
}
|
|
}
|
|
}()
|
|
if err = os.Chmod(workPathname.String(), 0700); err != nil {
|
|
return
|
|
}
|
|
|
|
if err = makeArtifact(workPathname); err != nil {
|
|
return
|
|
}
|
|
// override this before hashing since it will be made read-only after the
|
|
// rename anyway so do not let perm bits affect the checksum
|
|
if err = os.Chmod(workPathname.String(), 0700); err != nil {
|
|
return
|
|
}
|
|
var checksum Checksum
|
|
if checksum, err = HashDir(workPathname); err != nil {
|
|
return
|
|
}
|
|
if validate {
|
|
if checksum != *buf {
|
|
err = &ChecksumMismatchError{checksum, *buf}
|
|
return
|
|
}
|
|
} else {
|
|
*buf = checksum
|
|
}
|
|
|
|
checksumPathname := c.pathname(&checksum)
|
|
if err = os.Rename(
|
|
workPathname.String(),
|
|
checksumPathname.String(),
|
|
); err != nil {
|
|
if !errors.Is(err, os.ErrExist) {
|
|
return
|
|
}
|
|
} else if err = os.Chmod(checksumPathname.String(), 0500); err != nil {
|
|
return
|
|
}
|
|
|
|
if linkErr := os.Symlink(
|
|
"../"+dirChecksum+"/"+path.Base(checksumPathname.String()),
|
|
pathname.String(),
|
|
); linkErr != nil {
|
|
err = linkErr
|
|
}
|
|
return
|
|
}
|
|
|
|
// storeFile stores the contents of a [File]. An optional checksum can be
|
|
// passed via the result buffer which is used to validate the submitted data.
|
|
//
|
|
// If locking is disabled, the caller is responsible for acquiring a write lock
|
|
// and releasing it after this method returns. This makes LoadOrStoreFile
|
|
// possible without holding the lock while computing hash for store only.
|
|
func (c *Cache) storeFile(
|
|
identifierPathname *check.Absolute,
|
|
data []byte,
|
|
buf *Checksum,
|
|
validate, lock bool,
|
|
) error {
|
|
h := sha512.New384()
|
|
h.Write(data)
|
|
if validate {
|
|
if got := (Checksum)(h.Sum(nil)); got != *buf {
|
|
return &ChecksumMismatchError{got, *buf}
|
|
}
|
|
} else {
|
|
h.Sum(buf[:0])
|
|
}
|
|
|
|
checksumPathname := c.pathname(buf)
|
|
|
|
if lock {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
}
|
|
|
|
if f, err := os.OpenFile(
|
|
checksumPathname.String(),
|
|
os.O_WRONLY|os.O_CREATE|os.O_EXCL,
|
|
0400,
|
|
); err != nil {
|
|
// two artifacts may be backed by the same file
|
|
if !errors.Is(err, os.ErrExist) {
|
|
return err
|
|
}
|
|
} else if _, err = f.Write(data); err != nil {
|
|
// do not attempt cleanup: this is content-addressed and a partial
|
|
// write is caught during integrity check
|
|
return err
|
|
}
|
|
|
|
return os.Symlink(
|
|
"../"+dirChecksum+"/"+path.Base(checksumPathname.String()),
|
|
identifierPathname.String(),
|
|
)
|
|
}
|
|
|
|
// StoreFile stores the contents of a [File]. An optional checksum can be
|
|
// passed via the result buffer which is used to validate the submitted data.
|
|
func (c *Cache) StoreFile(
|
|
id ID,
|
|
data []byte,
|
|
buf *Checksum,
|
|
validate bool,
|
|
) (pathname *check.Absolute, err error) {
|
|
pathname = c.pathnameIdent(&id)
|
|
err = c.storeFile(pathname, data, buf, validate, true)
|
|
return
|
|
}
|
|
|
|
// LoadOrStoreFile attempts to load the contents of a [File] by its identifier,
|
|
// and if that file is not present, calls makeData and stores its result
|
|
// instead. Hash validation behaviour is identical to StoreFile.
|
|
func (c *Cache) LoadOrStoreFile(
|
|
id ID,
|
|
makeData func() ([]byte, error),
|
|
buf *Checksum,
|
|
validate bool,
|
|
) (
|
|
pathname *check.Absolute,
|
|
data []byte,
|
|
store bool,
|
|
err error,
|
|
) {
|
|
pathname = c.pathnameIdent(&id)
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
data, err = os.ReadFile(pathname.String())
|
|
if err == nil || !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
store = true
|
|
|
|
data, err = makeData()
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = c.storeFile(pathname, data, buf, validate, false)
|
|
return
|
|
}
|
|
|
|
// New returns the address to a new instance of [Cache].
|
|
func New(base *check.Absolute) (*Cache, error) {
|
|
for _, name := range []string{
|
|
dirIdentifier,
|
|
dirChecksum,
|
|
dirWork,
|
|
} {
|
|
if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil &&
|
|
!errors.Is(err, os.ErrExist) {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return &Cache{
|
|
base: base,
|
|
}, nil
|
|
}
|