All checks were successful
Test / Create distribution (push) Successful in 44s
Test / Sandbox (push) Successful in 2m30s
Test / ShareFS (push) Successful in 3m40s
Test / Hpkg (push) Successful in 4m24s
Test / Sandbox (race detector) (push) Successful in 4m46s
Test / Hakurei (race detector) (push) Successful in 5m51s
Test / Hakurei (push) Successful in 2m28s
Test / Flake checks (push) Successful in 1m41s
The previous implementation exposes arbitrary user input to the cache as an identifier, which is highly error-prone and can cause the cache to enter an inconsistent state if the user is not careful. This change replaces the implementation to compute identifier late, using url string as params. Signed-off-by: Ophestra <cat@gensokyo.uk>
610 lines
15 KiB
Go
610 lines
15 KiB
Go
// Package pkg provides utilities for packaging software.
|
|
package pkg
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha512"
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"slices"
|
|
"sync"
|
|
|
|
"hakurei.app/container/check"
|
|
)
|
|
|
|
type (
|
|
// A Checksum is a SHA-384 checksum computed for a cured [Artifact].
|
|
Checksum = [sha512.Size384]byte
|
|
|
|
// An ID is a unique identifier returned by [Artifact.ID]. This value must
|
|
// be deterministically determined ahead of time.
|
|
ID Checksum
|
|
)
|
|
|
|
// Encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]).
|
|
func Encode(checksum Checksum) string {
|
|
return base64.URLEncoding.EncodeToString(checksum[:])
|
|
}
|
|
|
|
// Decode is abbreviation for base64.URLEncoding.Decode(checksum[:], []byte(s)).
|
|
func Decode(s string) (checksum Checksum, err error) {
|
|
var n int
|
|
n, err = base64.URLEncoding.Decode(checksum[:], []byte(s))
|
|
if err == nil && n != len(Checksum{}) {
|
|
err = io.ErrUnexpectedEOF
|
|
}
|
|
return
|
|
}
|
|
|
|
// MustDecode decodes a string representation of [Checksum] and panics if there
|
|
// is a decoding error or the resulting data is too short.
|
|
func MustDecode(s string) Checksum {
|
|
if checksum, err := Decode(s); err != nil {
|
|
panic(err)
|
|
} else {
|
|
return checksum
|
|
}
|
|
}
|
|
|
|
// CacheDataFunc tries to load [File] from [Cache], and if that fails, obtains
|
|
// it via [File.Data] instead.
|
|
type CacheDataFunc func(f File) (data []byte, err error)
|
|
|
|
// An Artifact is a read-only reference to a piece of data that may be created
|
|
// deterministically but might not currently be available in memory or on the
|
|
// filesystem.
|
|
type Artifact interface {
|
|
// Kind returns the [Kind] of artifact. This is usually unique to the
|
|
// concrete type but two functionally identical implementations of
|
|
// [Artifact] is allowed to return the same [Kind] value.
|
|
Kind() Kind
|
|
|
|
// Params returns opaque bytes that describes [Artifact]. Implementations
|
|
// must guarantee that these values are unique among differing instances
|
|
// of the same implementation with the same dependencies.
|
|
//
|
|
// Callers must not modify the retuned byte slice.
|
|
//
|
|
// Result must remain identical across multiple invocations.
|
|
Params() []byte
|
|
|
|
// Dependencies returns a slice of [Artifact] that the current instance
|
|
// depends on to produce its contents.
|
|
//
|
|
// Callers must not modify the retuned slice.
|
|
//
|
|
// Result must remain identical across multiple invocations.
|
|
Dependencies() []Artifact
|
|
|
|
// Cure cures the current [Artifact] to the caller-specified temporary
|
|
// pathname. This is not the final resting place of the [Artifact] and this
|
|
// pathname should not be directly referred to in the final contents.
|
|
//
|
|
// If the implementation produces a single file, it must implement [File]
|
|
// as well. In that case, Cure must produce a single regular file with
|
|
// contents identical to that returned by [File.Data].
|
|
Cure(work *check.Absolute, loadData CacheDataFunc) (err error)
|
|
}
|
|
|
|
// KnownIdent is optionally implemented by [Artifact] and is used instead of
|
|
// [Kind.Ident] when it is available.
|
|
//
|
|
// This is very subtle to use correctly. The implementation must ensure that
|
|
// this value is globally unique, otherwise [Cache] can enter an inconsistent
|
|
// state. This should not be implemented outside of testing.
|
|
type KnownIdent interface {
|
|
// ID returns a globally unique identifier referring to the current
|
|
// [Artifact]. This value must be known ahead of time and guaranteed to be
|
|
// unique without having obtained the full contents of the [Artifact].
|
|
ID() ID
|
|
}
|
|
|
|
// KnownChecksum is optionally implemented by [Artifact] for an artifact with
|
|
// output known ahead of time.
|
|
type KnownChecksum interface {
|
|
// Checksum returns the address of a known checksum.
|
|
//
|
|
// Callers must not modify the [Checksum].
|
|
//
|
|
// Result must remain identical across multiple invocations.
|
|
Checksum() Checksum
|
|
}
|
|
|
|
// A File refers to an [Artifact] backed by a single file.
|
|
type File interface {
|
|
// Data returns the full contents of [Artifact]. If [Artifact.Checksum]
|
|
// returns a non-nil address, Data is responsible for validating any data
|
|
// it produces and must return [ChecksumMismatchError] if validation fails.
|
|
//
|
|
// Callers must not modify the returned byte slice.
|
|
Data() ([]byte, error)
|
|
|
|
Artifact
|
|
}
|
|
|
|
// Ident returns the identifier of an [Artifact].
|
|
func Ident(a Artifact) ID {
|
|
if ki, ok := a.(KnownIdent); ok {
|
|
return ki.ID()
|
|
}
|
|
return a.Kind().Ident(a.Params(), a.Dependencies()...)
|
|
}
|
|
|
|
// Kind corresponds to the concrete type of [Artifact] and is used to create
|
|
// identifier for an [Artifact] with dependencies.
|
|
type Kind uint64
|
|
|
|
const (
|
|
// KindHTTPGet is the kind of [Artifact] returned by [NewHTTPGet].
|
|
KindHTTPGet Kind = iota
|
|
// KindTar is the kind of artifact returned by [NewTar].
|
|
KindTar
|
|
)
|
|
|
|
// Ident returns a deterministic identifier for the supplied params and
|
|
// dependencies. The caller is responsible for ensuring params uniquely and
|
|
// deterministically describes the current [Artifact].
|
|
func (k Kind) Ident(params []byte, deps ...Artifact) ID {
|
|
type extIdent [len(ID{}) + wordSize]byte
|
|
identifiers := make([]extIdent, len(deps))
|
|
for i, a := range deps {
|
|
id := Ident(a)
|
|
copy(identifiers[i][wordSize:], id[:])
|
|
binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind()))
|
|
}
|
|
slices.SortFunc(identifiers, func(a, b extIdent) int {
|
|
return bytes.Compare(a[:], b[:])
|
|
})
|
|
slices.Compact(identifiers)
|
|
|
|
h := sha512.New384()
|
|
h.Write(binary.LittleEndian.AppendUint64(nil, uint64(k)))
|
|
h.Write(params)
|
|
for _, e := range identifiers {
|
|
h.Write(e[:])
|
|
}
|
|
return ID(h.Sum(nil))
|
|
}
|
|
|
|
const (
|
|
// dirIdentifier is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [ID].
|
|
dirIdentifier = "identifier"
|
|
// dirChecksum is the directory name appended to Cache.base for storing
|
|
// artifacts named after their [Checksum].
|
|
dirChecksum = "checksum"
|
|
|
|
// dirWork is the directory name appended to Cache.base for working
|
|
// pathnames set up during [Cache.Cure].
|
|
dirWork = "work"
|
|
|
|
// checksumLinknamePrefix is prepended to the encoded [Checksum] value
|
|
// of an [Artifact] when creating a symbolic link to dirChecksum.
|
|
checksumLinknamePrefix = "../" + dirChecksum + "/"
|
|
)
|
|
|
|
// Cache is a support layer that implementations of [Artifact] can use to store
|
|
// cured [Artifact] data in a content addressed fashion.
|
|
type Cache struct {
|
|
// Directory where all [Cache] related files are placed.
|
|
base *check.Absolute
|
|
|
|
// Whether to validate [File.Data] for a [KnownChecksum] file. This
|
|
// significantly reduces performance.
|
|
strict bool
|
|
|
|
// Synchronises access to dirChecksum.
|
|
checksumMu sync.RWMutex
|
|
|
|
// Identifier to content pair cache.
|
|
ident map[ID]Checksum
|
|
// Identifier to error pair for unrecoverably faulted [Artifact].
|
|
identErr map[ID]error
|
|
// Pending identifiers, accessed through Cure for entries not in ident.
|
|
identPending map[ID]<-chan struct{}
|
|
// Synchronises access to ident and corresponding filesystem entries.
|
|
identMu sync.RWMutex
|
|
}
|
|
|
|
// IsStrict returns whether the [Cache] strictly verifies checksums.
|
|
func (c *Cache) IsStrict() bool { return c.strict }
|
|
|
|
// SetStrict sets whether the [Cache] strictly verifies checksums, even when
|
|
// the implementation promises to validate them internally. This significantly
|
|
// reduces performance and is not recommended outside of testing.
|
|
//
|
|
// This method is not safe for concurrent use with any other method.
|
|
func (c *Cache) SetStrict(strict bool) { c.strict = strict }
|
|
|
|
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
|
|
type ChecksumMismatchError struct {
|
|
// Actual and expected checksums.
|
|
Got, Want Checksum
|
|
}
|
|
|
|
func (e *ChecksumMismatchError) Error() string {
|
|
return "got " + Encode(e.Got) +
|
|
" instead of " + Encode(e.Want)
|
|
}
|
|
|
|
// loadOrStoreIdent attempts to load a cached [Artifact] by its identifier or
|
|
// wait for a pending [Artifact] to cure. If neither is possible, the current
|
|
// identifier is stored in identPending and a non-nil channel is returned.
|
|
func (c *Cache) loadOrStoreIdent(id *ID) (
|
|
done chan<- struct{},
|
|
checksum Checksum,
|
|
err error,
|
|
) {
|
|
var ok bool
|
|
|
|
c.identMu.Lock()
|
|
if checksum, ok = c.ident[*id]; ok {
|
|
c.identMu.Unlock()
|
|
return
|
|
}
|
|
if err, ok = c.identErr[*id]; ok {
|
|
c.identMu.Unlock()
|
|
return
|
|
}
|
|
|
|
var notify <-chan struct{}
|
|
if notify, ok = c.identPending[*id]; ok {
|
|
c.identMu.Unlock()
|
|
<-notify
|
|
c.identMu.RLock()
|
|
if checksum, ok = c.ident[*id]; !ok {
|
|
err = c.identErr[*id]
|
|
}
|
|
c.identMu.RUnlock()
|
|
return
|
|
}
|
|
|
|
d := make(chan struct{})
|
|
c.identPending[*id] = d
|
|
c.identMu.Unlock()
|
|
done = d
|
|
return
|
|
}
|
|
|
|
// finaliseIdent commits a checksum or error to ident for an identifier
|
|
// previously submitted to identPending.
|
|
func (c *Cache) finaliseIdent(
|
|
done chan<- struct{},
|
|
id *ID,
|
|
checksum *Checksum,
|
|
err error,
|
|
) {
|
|
c.identMu.Lock()
|
|
if err != nil {
|
|
c.identErr[*id] = err
|
|
} else {
|
|
c.ident[*id] = *checksum
|
|
}
|
|
c.identMu.Unlock()
|
|
|
|
close(done)
|
|
}
|
|
|
|
// loadData provides [CacheDataFunc] for [Artifact.Cure].
|
|
func (c *Cache) loadData(f File) (data []byte, err error) {
|
|
var r *os.File
|
|
if kc, ok := f.(KnownChecksum); ok {
|
|
c.checksumMu.RLock()
|
|
r, err = os.Open(c.base.Append(
|
|
dirChecksum,
|
|
Encode(kc.Checksum()),
|
|
).String())
|
|
c.checksumMu.RUnlock()
|
|
} else {
|
|
c.identMu.RLock()
|
|
r, err = os.Open(c.base.Append(
|
|
dirIdentifier,
|
|
Encode(Ident(f)),
|
|
).String())
|
|
c.identMu.RUnlock()
|
|
}
|
|
|
|
if err != nil {
|
|
if !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
return f.Data()
|
|
}
|
|
|
|
data, err = io.ReadAll(r)
|
|
closeErr := r.Close()
|
|
if err == nil {
|
|
err = closeErr
|
|
}
|
|
return
|
|
}
|
|
|
|
// InvalidFileModeError describes an [Artifact.Cure] that did not result in
|
|
// a regular file or directory located at the work pathname.
|
|
type InvalidFileModeError fs.FileMode
|
|
|
|
// Error returns a constant string.
|
|
func (e InvalidFileModeError) Error() string {
|
|
return "artifact did not produce a regular file or directory"
|
|
}
|
|
|
|
// NoOutputError describes an [Artifact.Cure] that did not populate its
|
|
// work pathname despite completing successfully.
|
|
type NoOutputError struct{}
|
|
|
|
// Unwrap returns [os.ErrNotExist].
|
|
func (NoOutputError) Unwrap() error { return os.ErrNotExist }
|
|
|
|
// Error returns a constant string.
|
|
func (NoOutputError) Error() string {
|
|
return "artifact cured successfully but did not produce any output"
|
|
}
|
|
|
|
// Cure cures the [Artifact] and returns its pathname and [Checksum].
|
|
func (c *Cache) Cure(a Artifact) (
|
|
pathname *check.Absolute,
|
|
checksum Checksum,
|
|
err error,
|
|
) {
|
|
id := Ident(a)
|
|
ids := Encode(id)
|
|
pathname = c.base.Append(
|
|
dirIdentifier,
|
|
ids,
|
|
)
|
|
defer func() {
|
|
if err != nil {
|
|
pathname = nil
|
|
checksum = Checksum{}
|
|
}
|
|
}()
|
|
|
|
var done chan<- struct{}
|
|
done, checksum, err = c.loadOrStoreIdent(&id)
|
|
if done == nil {
|
|
return
|
|
} else {
|
|
defer func() { c.finaliseIdent(done, &id, &checksum, err) }()
|
|
}
|
|
|
|
_, err = os.Lstat(pathname.String())
|
|
if err == nil {
|
|
var name string
|
|
if name, err = os.Readlink(pathname.String()); err != nil {
|
|
return
|
|
}
|
|
checksum, err = Decode(path.Base(name))
|
|
return
|
|
}
|
|
if !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
|
|
var checksums string
|
|
defer func() {
|
|
if err == nil && checksums != "" {
|
|
err = os.Symlink(
|
|
checksumLinknamePrefix+checksums,
|
|
pathname.String(),
|
|
)
|
|
}
|
|
}()
|
|
|
|
var checksumPathname *check.Absolute
|
|
var checksumFi os.FileInfo
|
|
if kc, ok := a.(KnownChecksum); ok {
|
|
checksum = kc.Checksum()
|
|
checksums = Encode(checksum)
|
|
checksumPathname = c.base.Append(
|
|
dirChecksum,
|
|
checksums,
|
|
)
|
|
|
|
c.checksumMu.RLock()
|
|
checksumFi, err = os.Stat(checksumPathname.String())
|
|
c.checksumMu.RUnlock()
|
|
|
|
if err != nil {
|
|
if !errors.Is(err, os.ErrNotExist) {
|
|
return
|
|
}
|
|
|
|
checksumFi, err = nil, nil
|
|
}
|
|
}
|
|
|
|
if f, ok := a.(File); ok {
|
|
if checksumFi != nil {
|
|
if !checksumFi.Mode().IsRegular() {
|
|
// unreachable
|
|
err = InvalidFileModeError(checksumFi.Mode())
|
|
}
|
|
return
|
|
}
|
|
|
|
var data []byte
|
|
data, err = f.Data()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
if checksumPathname == nil {
|
|
h := sha512.New384()
|
|
h.Write(data)
|
|
h.Sum(checksum[:0])
|
|
checksums = Encode(checksum)
|
|
checksumPathname = c.base.Append(
|
|
dirChecksum,
|
|
checksums,
|
|
)
|
|
} else if c.IsStrict() {
|
|
h := sha512.New384()
|
|
h.Write(data)
|
|
if got := Checksum(h.Sum(nil)); got != checksum {
|
|
err = &ChecksumMismatchError{
|
|
Got: got,
|
|
Want: checksum,
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
c.checksumMu.Lock()
|
|
var w *os.File
|
|
w, err = os.OpenFile(
|
|
checksumPathname.String(),
|
|
os.O_CREATE|os.O_EXCL|os.O_WRONLY,
|
|
0400,
|
|
)
|
|
if err != nil {
|
|
c.checksumMu.Unlock()
|
|
|
|
if errors.Is(err, os.ErrExist) {
|
|
err = nil
|
|
}
|
|
return
|
|
}
|
|
_, err = w.Write(data)
|
|
closeErr := w.Close()
|
|
if err == nil {
|
|
err = closeErr
|
|
}
|
|
c.checksumMu.Unlock()
|
|
|
|
return
|
|
} else {
|
|
if checksumFi != nil {
|
|
if !checksumFi.Mode().IsDir() {
|
|
// unreachable
|
|
err = InvalidFileModeError(checksumFi.Mode())
|
|
}
|
|
return
|
|
}
|
|
|
|
workPathname := c.base.Append(dirWork, ids)
|
|
defer func() {
|
|
// must not use the value of checksum string as it might be zeroed
|
|
// to cancel the deferred symlink operation
|
|
|
|
if err != nil {
|
|
chmodErr := filepath.WalkDir(workPathname.String(), func(
|
|
path string,
|
|
d fs.DirEntry,
|
|
err error,
|
|
) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if d.IsDir() {
|
|
return os.Chmod(path, 0700)
|
|
}
|
|
return nil
|
|
})
|
|
if errors.Is(chmodErr, os.ErrNotExist) {
|
|
chmodErr = nil
|
|
}
|
|
removeErr := os.RemoveAll(workPathname.String())
|
|
if chmodErr != nil || removeErr != nil {
|
|
err = errors.Join(err, chmodErr, removeErr)
|
|
} else if errors.Is(err, os.ErrExist) {
|
|
// two artifacts may be backed by the same file
|
|
err = nil
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err = a.Cure(workPathname, c.loadData); err != nil {
|
|
return
|
|
}
|
|
|
|
var fi os.FileInfo
|
|
if fi, err = os.Lstat(workPathname.String()); err != nil {
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
err = NoOutputError{}
|
|
}
|
|
return
|
|
}
|
|
|
|
if !fi.IsDir() {
|
|
if !fi.Mode().IsRegular() {
|
|
err = InvalidFileModeError(fi.Mode())
|
|
} else {
|
|
err = errors.New("non-file artifact produced regular file")
|
|
}
|
|
return
|
|
}
|
|
|
|
// override this before hashing since it will be made read-only after
|
|
// the rename anyway so do not let perm bits affect the checksum
|
|
if err = os.Chmod(workPathname.String(), 0700); err != nil {
|
|
return
|
|
}
|
|
var gotChecksum Checksum
|
|
if gotChecksum, err = HashDir(workPathname); err != nil {
|
|
return
|
|
}
|
|
|
|
if checksumPathname == nil {
|
|
checksum = gotChecksum
|
|
checksums = Encode(checksum)
|
|
checksumPathname = c.base.Append(
|
|
dirChecksum,
|
|
checksums,
|
|
)
|
|
} else {
|
|
if gotChecksum != checksum {
|
|
err = &ChecksumMismatchError{
|
|
Got: gotChecksum,
|
|
Want: checksum,
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
c.checksumMu.Lock()
|
|
if err = os.Rename(
|
|
workPathname.String(),
|
|
checksumPathname.String(),
|
|
); err != nil {
|
|
if !errors.Is(err, os.ErrExist) {
|
|
c.checksumMu.Unlock()
|
|
return
|
|
}
|
|
// err is zeroed during deferred cleanup
|
|
} else {
|
|
err = os.Chmod(checksumPathname.String(), 0500)
|
|
}
|
|
c.checksumMu.Unlock()
|
|
|
|
return
|
|
}
|
|
}
|
|
|
|
// New returns the address to a new instance of [Cache].
|
|
func New(base *check.Absolute) (*Cache, error) {
|
|
for _, name := range []string{
|
|
dirIdentifier,
|
|
dirChecksum,
|
|
dirWork,
|
|
} {
|
|
if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil &&
|
|
!errors.Is(err, os.ErrExist) {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return &Cache{
|
|
base: base,
|
|
|
|
ident: make(map[ID]Checksum),
|
|
identErr: make(map[ID]error),
|
|
identPending: make(map[ID]<-chan struct{}),
|
|
}, nil
|
|
}
|