// Package pkg provides utilities for packaging software. package pkg import ( "bytes" "crypto/sha512" "encoding/base64" "encoding/binary" "errors" "io" "io/fs" "os" "path" "path/filepath" "slices" "sync" "hakurei.app/container/check" ) type ( // A Checksum is a SHA-384 checksum computed for a cured [Artifact]. Checksum = [sha512.Size384]byte // An ID is a unique identifier returned by [Artifact.ID]. This value must // be deterministically determined ahead of time. ID Checksum ) // Encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]). func Encode(checksum Checksum) string { return base64.URLEncoding.EncodeToString(checksum[:]) } // encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]). func encode(checksum *Checksum) string { return base64.URLEncoding.EncodeToString(checksum[:]) } // MustDecode decodes a string representation of [Checksum] and panics if there // is a decoding error or the resulting data is too short. func MustDecode(s string) (checksum Checksum) { if n, err := base64.URLEncoding.Decode( checksum[:], []byte(s), ); err != nil { panic(err) } else if n != len(Checksum{}) { panic(io.ErrUnexpectedEOF) } return } // An Artifact is a read-only reference to a piece of data that may be created // deterministically but might not currently be available in memory or on the // filesystem. type Artifact interface { // Kind returns the [Kind] of artifact. This is usually unique to the // concrete type but two functionally identical implementations of // [Artifact] is allowed to return the same [Kind] value. Kind() Kind // ID returns a globally unique identifier referring to the current // [Artifact]. This value must be known ahead of time and guaranteed to be // unique without having obtained the full contents of the [Artifact]. ID() ID // Hash returns the [Checksum] created from the full contents of a cured // [Artifact]. This can be stored for future lookup in a [Cache]. // // A call to Hash implicitly cures [Artifact]. Hash() (Checksum, error) // Pathname returns an absolute pathname to a file or directory populated // with the full contents of [Artifact]. This is the most expensive // operation possible on any [Artifact] and should be avoided if possible. // // A call to Pathname implicitly cures [Artifact]. // // Callers must only open files read-only. If [Artifact] is a directory, // files must not be created or removed under this directory. Pathname() (*check.Absolute, error) } // A File refers to an [Artifact] backed by a single file. type File interface { // Data returns the full contents of [Artifact]. // // Callers must not modify the returned byte slice. Data() ([]byte, error) Artifact } // Kind corresponds to the concrete type of [Artifact] and is used to create // identifier for an [Artifact] with dependencies. type Kind uint64 const ( // KindHTTP is the kind of [Artifact] returned by [Cache.NewHTTP]. KindHTTP Kind = iota KindTar ) // Ident returns a deterministic identifier for the supplied params and // dependencies. The caller is responsible for ensuring params uniquely and // deterministically describes the current [Artifact]. func (k Kind) Ident(params []byte, deps ...Artifact) ID { type extIdent [len(ID{}) + wordSize]byte identifiers := make([]extIdent, len(deps)) for i, a := range deps { id := a.ID() copy(identifiers[i][wordSize:], id[:]) binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind())) } slices.SortFunc(identifiers, func(a, b extIdent) int { return bytes.Compare(a[:], b[:]) }) slices.Compact(identifiers) h := sha512.New384() h.Write(binary.LittleEndian.AppendUint64(nil, uint64(k))) h.Write(params) for _, e := range identifiers { h.Write(e[:]) } return ID(h.Sum(nil)) } const ( // dirIdentifier is the directory name appended to Cache.base for storing // artifacts named after their [ID]. dirIdentifier = "identifier" // dirChecksum is the directory name appended to Cache.base for storing // artifacts named after their [Checksum]. dirChecksum = "checksum" // dirWork is the directory name appended to Cache.base for working // directories created for [Cache.Store]. dirWork = "work" ) // Cache is a support layer that implementations of [Artifact] can use to store // cured [Artifact] data in a content addressed fashion. type Cache struct { // Directory where all [Cache] related files are placed. base *check.Absolute // Synchronises access to public methods. mu sync.RWMutex } // LoadFile loads the contents of a [File] by its identifier. func (c *Cache) LoadFile(id ID) ( pathname *check.Absolute, data []byte, err error, ) { pathname = c.base.Append( dirIdentifier, Encode(id), ) c.mu.RLock() data, err = os.ReadFile(pathname.String()) c.mu.RUnlock() return } // A ChecksumMismatchError describes an [Artifact] with unexpected content. type ChecksumMismatchError struct { // Actual and expected checksums. Got, Want Checksum } func (e *ChecksumMismatchError) Error() string { return "got " + Encode(e.Got) + " instead of " + Encode(e.Want) } // pathname returns the content-addressed pathname for a [Checksum]. func (c *Cache) pathname(checksum *Checksum) *check.Absolute { return c.base.Append( dirChecksum, encode(checksum), ) } // pathnameIdent returns the identifier-based pathname for an [ID]. func (c *Cache) pathnameIdent(id *ID) *check.Absolute { return c.base.Append( dirIdentifier, encode((*Checksum)(id)), ) } // Store looks up an identifier, and if it is not present, calls makeArtifact // with a private working directory and stores its result instead. An optional // checksum can be passed via the result buffer which is used to validate the // produced directory. func (c *Cache) Store( id ID, makeArtifact func(work *check.Absolute) error, buf *Checksum, validate bool, ) ( pathname *check.Absolute, store bool, err error, ) { pathname = c.pathnameIdent(&id) c.mu.Lock() defer c.mu.Unlock() _, err = os.Lstat(pathname.String()) if err == nil || !errors.Is(err, os.ErrNotExist) { return } store = true var ( workPathname *check.Absolute workPathnameRaw string ) if workPathnameRaw, err = os.MkdirTemp( c.base.Append(dirWork).String(), path.Base(pathname.String()+".*"), ); err != nil { return } else if workPathname, err = check.NewAbs(workPathnameRaw); err != nil { return } defer func() { if err != nil { chmodErr := filepath.WalkDir(workPathname.String(), func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return os.Chmod(path, 0700) } return nil }) removeErr := os.RemoveAll(workPathname.String()) if chmodErr != nil || removeErr != nil { err = errors.Join(err, chmodErr, removeErr) } else if errors.Is(err, os.ErrExist) { // two artifacts may be backed by the same file err = nil } } }() if err = os.Chmod(workPathname.String(), 0700); err != nil { return } if err = makeArtifact(workPathname); err != nil { return } var checksum Checksum if checksum, err = HashDir(workPathname); err != nil { return } if validate { if checksum != *buf { err = &ChecksumMismatchError{checksum, *buf} return } } else { *buf = checksum } checksumPathname := c.pathname(&checksum) if err = os.Rename( workPathname.String(), checksumPathname.String(), ); err != nil { if !errors.Is(err, os.ErrExist) { return } } if linkErr := os.Symlink( "../"+dirChecksum+"/"+path.Base(checksumPathname.String()), pathname.String(), ); linkErr != nil { err = linkErr } return } // storeFile stores the contents of a [File]. An optional checksum can be // passed via the result buffer which is used to validate the submitted data. // // If locking is disabled, the caller is responsible for acquiring a write lock // and releasing it after this method returns. This makes LoadOrStoreFile // possible without holding the lock while computing hash for store only. func (c *Cache) storeFile( identifierPathname *check.Absolute, data []byte, buf *Checksum, validate, lock bool, ) error { h := sha512.New384() h.Write(data) if validate { if got := (Checksum)(h.Sum(nil)); got != *buf { return &ChecksumMismatchError{got, *buf} } } else { h.Sum(buf[:0]) } checksumPathname := c.pathname(buf) if lock { c.mu.Lock() defer c.mu.Unlock() } if f, err := os.OpenFile( checksumPathname.String(), os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0400, ); err != nil { // two artifacts may be backed by the same file if !errors.Is(err, os.ErrExist) { return err } } else if _, err = f.Write(data); err != nil { // do not attempt cleanup: this is content-addressed and a partial // write is caught during integrity check return err } return os.Link( checksumPathname.String(), identifierPathname.String(), ) } // StoreFile stores the contents of a [File]. An optional checksum can be // passed via the result buffer which is used to validate the submitted data. func (c *Cache) StoreFile( id ID, data []byte, buf *Checksum, validate bool, ) (pathname *check.Absolute, err error) { pathname = c.pathnameIdent(&id) err = c.storeFile(pathname, data, buf, validate, true) return } // LoadOrStoreFile attempts to load the contents of a [File] by its identifier, // and if that file is not present, calls makeData and stores its result // instead. Hash validation behaviour is identical to StoreFile. func (c *Cache) LoadOrStoreFile( id ID, makeData func() ([]byte, error), buf *Checksum, validate bool, ) ( pathname *check.Absolute, data []byte, store bool, err error, ) { pathname = c.pathnameIdent(&id) c.mu.Lock() defer c.mu.Unlock() data, err = os.ReadFile(pathname.String()) if err == nil || !errors.Is(err, os.ErrNotExist) { return } store = true data, err = makeData() if err != nil { return } err = c.storeFile(pathname, data, buf, validate, false) return } // New returns the address to a new instance of [Cache]. func New(base *check.Absolute) (*Cache, error) { for _, name := range []string{ dirIdentifier, dirChecksum, dirWork, } { if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil && !errors.Is(err, os.ErrExist) { return nil, err } } return &Cache{ base: base, }, nil }