// Package pkg provides utilities for packaging software. package pkg import ( "crypto/sha512" "encoding/base64" "encoding/gob" "errors" "io" "io/fs" "os" "sync" "hakurei.app/container/check" ) type ( // A Checksum is a SHA-384 checksum computed for a cured [Artifact]. Checksum = [sha512.Size384]byte // An ID is a unique identifier returned by [Artifact.ID]. This value must // be deterministically determined ahead of time. ID Checksum ) // MustDecode decodes a string representation of [Checksum] and panics if there // is a decoding error or the resulting data is too short. func MustDecode(s string) (checksum Checksum) { if n, err := base64.URLEncoding.Decode( checksum[:], []byte(s), ); err != nil { panic(err) } else if n != len(Checksum{}) { panic(io.ErrUnexpectedEOF) } return } // An Artifact is a read-only reference to a piece of data that may be created // deterministically but might not currently be available in memory or on the // filesystem. type Artifact interface { // ID returns a globally unique identifier referring to the current // [Artifact]. This value must be known ahead of time and guaranteed to be // unique without having obtained the full contents of the [Artifact]. ID() ID // Hash returns the [Checksum] created from the full contents of a cured // [Artifact]. This can be stored for future lookup in a [Cache]. // // A call to Hash implicitly cures [Artifact]. Hash() (Checksum, error) // Pathname returns an absolute pathname to a file or directory populated // with the full contents of [Artifact]. This is the most expensive // operation possible on any [Artifact] and should be avoided if possible. // // A call to Pathname implicitly cures [Artifact]. // // Callers must only open files read-only. If [Artifact] is a directory, // files must not be created or removed under this directory. Pathname() (*check.Absolute, error) } // A File refers to an [Artifact] backed by a single file. type File interface { // Data returns the full contents of [Artifact]. // // Callers must not modify the returned byte slice. Data() ([]byte, error) Artifact } // FlatEntry is the representation of a directory entry via [Flatten]. type FlatEntry struct { Name string // base name of the file Mode fs.FileMode // file mode bits Data []byte // file content or symlink destination } // Flatten writes a deterministic representation of the contents of fsys to w. // The resulting data can be hashed to produce a deterministic checksum for the // directory. func Flatten(fsys fs.FS, root string, w io.Writer) error { e := gob.NewEncoder(w) return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } var fi fs.FileInfo fi, err = d.Info() if err != nil { return err } ent := FlatEntry{ Name: fi.Name(), Mode: fi.Mode(), } if ent.Mode.IsRegular() { if ent.Data, err = fs.ReadFile(fsys, path); err != nil { return err } } else if ent.Mode&fs.ModeSymlink != 0 { var newpath string if newpath, err = fs.ReadLink(fsys, path); err != nil { return err } ent.Data = []byte(newpath) } return e.Encode(&ent) }) } // HashFS returns a checksum produced by hashing the result of [Flatten]. func HashFS(fsys fs.FS, root string) (Checksum, error) { h := sha512.New384() if err := Flatten(fsys, root, h); err != nil { return Checksum{}, err } return (Checksum)(h.Sum(nil)), nil } // HashDir returns a checksum produced by hashing the result of [Flatten]. func HashDir(pathname *check.Absolute) (Checksum, error) { return HashFS(os.DirFS(pathname.String()), ".") } const ( // dirIdentifier is the directory name appended to Cache.base for storing // artifacts named after their [ID]. dirIdentifier = "identifier" // dirChecksum is the directory name appended to Cache.base for storing // artifacts named after their [Checksum]. dirChecksum = "checksum" ) // Cache is a support layer that implementations of [Artifact] can use to store // cured [Artifact] data in a content addressed fashion. type Cache struct { // Directory where all [Cache] related files are placed. base *check.Absolute // Synchronises access to public methods. mu sync.RWMutex } // LoadFile loads the contents of a [File] by its identifier. func (c *Cache) LoadFile(id ID) ( pathname *check.Absolute, data []byte, err error, ) { pathname = c.base.Append( dirIdentifier, base64.URLEncoding.EncodeToString(id[:]), ) c.mu.RLock() data, err = os.ReadFile(pathname.String()) c.mu.RUnlock() return } // A ChecksumMismatchError describes an [Artifact] with unexpected content. type ChecksumMismatchError struct { // Actual and expected checksums. Got, Want Checksum } func (e *ChecksumMismatchError) Error() string { return "got " + base64.URLEncoding.EncodeToString(e.Got[:]) + " instead of " + base64.URLEncoding.EncodeToString(e.Want[:]) } // pathname returns the content-addressed pathname for a [Checksum]. func (c *Cache) pathname(checksum *Checksum) *check.Absolute { return c.base.Append( dirChecksum, base64.URLEncoding.EncodeToString(checksum[:]), ) } // pathnameIdent returns the identifier-based pathname for an [ID]. func (c *Cache) pathnameIdent(id *ID) *check.Absolute { return c.base.Append( dirIdentifier, base64.URLEncoding.EncodeToString(id[:]), ) } // storeFile stores the contents of a [File]. An optional checksum can be // passed via the result buffer which is used to validate the submitted data. // // If locking is disabled, the caller is responsible for acquiring a write lock // and releasing it after this method returns. This makes LoadOrStoreFile // possible without holding the lock while computing hash for store only. func (c *Cache) storeFile( identifierPathname *check.Absolute, data []byte, buf *Checksum, validate, lock bool, ) error { h := sha512.New384() h.Write(data) if validate { if got := (Checksum)(h.Sum(nil)); got != *buf { return &ChecksumMismatchError{got, *buf} } } else { h.Sum(buf[:0]) } checksumPathname := c.pathname(buf) if lock { c.mu.Lock() defer c.mu.Unlock() } if f, err := os.OpenFile( checksumPathname.String(), os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0400, ); err != nil { // two artifacts may be backed by the same file if !errors.Is(err, os.ErrExist) { return err } } else if _, err = f.Write(data); err != nil { // do not attempt cleanup: this is content-addressed and a partial // write is caught during integrity check return err } return os.Link( checksumPathname.String(), identifierPathname.String(), ) } // StoreFile stores the contents of a [File]. An optional checksum can be // passed via the result buffer which is used to validate the submitted data. func (c *Cache) StoreFile( id ID, data []byte, buf *Checksum, validate bool, ) (pathname *check.Absolute, err error) { pathname = c.pathnameIdent(&id) err = c.storeFile(pathname, data, buf, validate, true) return } // LoadOrStoreFile attempts to load the contents of a [File] by its identifier, // and if that file is not present, calls makeData and stores its result // instead. Hash validation behaviour is identical to StoreFile. func (c *Cache) LoadOrStoreFile( id ID, makeData func() ([]byte, error), buf *Checksum, validate bool, ) ( pathname *check.Absolute, data []byte, store bool, err error, ) { pathname = c.pathnameIdent(&id) c.mu.Lock() defer c.mu.Unlock() data, err = os.ReadFile(pathname.String()) if err == nil || !errors.Is(err, os.ErrNotExist) { return } store = true data, err = makeData() if err != nil { return } err = c.storeFile(pathname, data, buf, validate, false) return } // New returns the address to a new instance of [Cache]. func New(base *check.Absolute) (*Cache, error) { for _, name := range []string{ dirIdentifier, dirChecksum, } { if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil && !errors.Is(err, os.ErrExist) { return nil, err } } return &Cache{ base: base, }, nil }