internal/pkg: cache computed identifiers
All checks were successful
Test / Create distribution (push) Successful in 49s
Test / Sandbox (push) Successful in 3m1s
Test / ShareFS (push) Successful in 4m56s
Test / Sandbox (race detector) (push) Successful in 5m21s
Test / Hpkg (push) Successful in 5m30s
Test / Hakurei (push) Successful in 5m53s
Test / Hakurei (race detector) (push) Successful in 7m56s
Test / Flake checks (push) Successful in 1m57s

This eliminates duplicate identifier computations. The new implementation also significantly reduces allocations while computing identifier for a large dependency tree.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-01-15 22:05:24 +09:00
parent 088d35e4e6
commit 3499a82785
9 changed files with 276 additions and 199 deletions

View File

@@ -9,6 +9,7 @@ import (
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"io/fs"
"iter"
@@ -20,6 +21,7 @@ import (
"strings"
"sync"
"syscall"
"unique"
"unsafe"
"hakurei.app/container/check"
@@ -60,6 +62,35 @@ func MustDecode(s string) Checksum {
}
}
// IContext is passed to [Artifact.Params] and provides identifier information
// and the target [hash.Hash] for writing params into.
//
// Methods of IContext are safe for concurrent use. IContext is valid
// until [Artifact.Params] returns.
type IContext struct {
// Address of underlying [Cache], should be zeroed or made unusable after
// [Artifact.Params] returns and must not be exposed directly.
cache *Cache
// Made available for writing, should be zeroed after [Artifact.Params]
// returns. Internal state must not be inspected.
h hash.Hash
}
// Unwrap returns the underlying [context.Context].
func (i *IContext) Unwrap() context.Context { return i.cache.ctx }
// GetHash returns the underlying [hash.Hash] for writing. Callers must not
// attempt to inspect its internal state.
func (i *IContext) GetHash() hash.Hash { return i.h }
// WriteIdent writes the identifier of [Artifact] to the underlying [hash.Hash].
func (i *IContext) WriteIdent(a Artifact) {
buf := i.cache.getIdentBuf()
*(*ID)(buf[wordSize:]) = i.cache.Ident(a).Value()
i.h.Write(buf[wordSize:])
i.cache.putIdentBuf(buf)
}
// TContext is passed to [TrivialArtifact.Cure] and provides information and
// methods required for curing the [TrivialArtifact].
//
@@ -154,7 +185,7 @@ type FContext struct {
TContext
// Cured top-level dependencies looked up by Pathname.
deps map[ID]*check.Absolute
deps map[Artifact]*check.Absolute
}
// InvalidLookupError is the identifier of non-dependency [Artifact] looked up
@@ -171,11 +202,10 @@ var _ error = InvalidLookupError{}
// with an [Artifact] not part of the slice returned by [Artifact.Dependencies]
// panics.
func (f *FContext) Pathname(a Artifact) *check.Absolute {
id := Ident(a)
if p, ok := f.deps[id]; ok {
if p, ok := f.deps[a]; ok {
return p
} else {
panic(InvalidLookupError(id))
panic(InvalidLookupError(f.cache.Ident(a).Value()))
}
}
@@ -188,14 +218,13 @@ type Artifact interface {
// [Artifact] is allowed to return the same [Kind] value.
Kind() Kind
// Params returns opaque bytes that describes [Artifact]. Implementations
// Params writes opaque bytes that describes [Artifact]. Implementations
// must guarantee that these values are unique among differing instances
// of the same implementation with the same dependencies.
//
// Callers must not modify the retuned byte slice.
// of the same implementation with the same dependencies. Callers must not
// attempt to interpret these params.
//
// Result must remain identical across multiple invocations.
Params() []byte
Params(ctx *IContext)
// Dependencies returns a slice of [Artifact] that the current instance
// depends on to produce its contents.
@@ -290,17 +319,9 @@ type File interface {
Artifact
}
// Ident returns the identifier of an [Artifact].
func Ident(a Artifact) ID {
if ki, ok := a.(KnownIdent); ok {
return ki.ID()
}
return a.Kind().Ident(a.Params(), a.Dependencies()...)
}
// reportNameIdent is like reportName but does not recompute [ID].
func reportNameIdent(a Artifact, id ID) string {
r := Encode(id)
// reportName returns a string describing [Artifact] presented to the user.
func reportName(a Artifact, id unique.Handle[ID]) string {
r := Encode(id.Value())
if s, ok := a.(fmt.Stringer); ok {
if name := s.String(); name != "" {
r += "-" + name
@@ -309,9 +330,6 @@ func reportNameIdent(a Artifact, id ID) string {
return r
}
// reportName returns a string describing [Artifact] presented to the user.
func reportName(a Artifact) string { return reportNameIdent(a, Ident(a)) }
// Kind corresponds to the concrete type of [Artifact] and is used to create
// identifier for an [Artifact] with dependencies.
type Kind uint64
@@ -334,31 +352,6 @@ const (
KindCustomOffset = 1 << 31
)
// Ident returns a deterministic identifier for the supplied params and
// dependencies. The caller is responsible for ensuring params uniquely and
// deterministically describes the current [Artifact].
func (k Kind) Ident(params []byte, deps ...Artifact) ID {
type extIdent [len(ID{}) + wordSize]byte
identifiers := make([]extIdent, len(deps))
for i, a := range deps {
id := Ident(a)
copy(identifiers[i][wordSize:], id[:])
binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind()))
}
slices.SortFunc(identifiers, func(a, b extIdent) int {
return bytes.Compare(a[:], b[:])
})
identifiers = slices.Compact(identifiers)
h := sha512.New384()
h.Write(binary.LittleEndian.AppendUint64(nil, uint64(k)))
h.Write(params)
for _, e := range identifiers {
h.Write(e[:])
}
return ID(h.Sum(nil))
}
const (
// dirIdentifier is the directory name appended to Cache.base for storing
// artifacts named after their [ID].
@@ -429,15 +422,20 @@ type Cache struct {
// Maximum size of a dependency graph.
threshold uintptr
// Artifact to [unique.Handle] of identifier cache.
artifact sync.Map
// Identifier free list, must not be accessed directly.
identPool sync.Pool
// Synchronises access to dirChecksum.
checksumMu sync.RWMutex
// Identifier to content pair cache.
ident map[ID]Checksum
ident map[unique.Handle[ID]]Checksum
// Identifier to error pair for unrecoverably faulted [Artifact].
identErr map[ID]error
identErr map[unique.Handle[ID]]error
// Pending identifiers, accessed through Cure for entries not in ident.
identPending map[ID]<-chan struct{}
identPending map[unique.Handle[ID]]<-chan struct{}
// Synchronises access to ident and corresponding filesystem entries.
identMu sync.RWMutex
}
@@ -458,6 +456,89 @@ func (c *Cache) SetStrict(strict bool) { c.strict = strict }
// This method is not safe for concurrent use with any other method.
func (c *Cache) SetThreshold(threshold uintptr) { c.threshold = threshold }
// extIdent is a [Kind] concatenated with [ID].
type extIdent [wordSize + len(ID{})]byte
// getIdentBuf returns the address of an extIdent for Ident.
func (c *Cache) getIdentBuf() *extIdent { return c.identPool.Get().(*extIdent) }
// putIdentBuf adds buf to identPool.
func (c *Cache) putIdentBuf(buf *extIdent) { c.identPool.Put(buf) }
// storeIdent adds an [Artifact] to the artifact cache.
func (c *Cache) storeIdent(a Artifact, buf *extIdent) unique.Handle[ID] {
idu := unique.Make(ID(buf[wordSize:]))
c.artifact.Store(a, idu)
return idu
}
// Ident returns the identifier of an [Artifact].
func (c *Cache) Ident(a Artifact) unique.Handle[ID] {
buf, idu := c.unsafeIdent(a, false)
if buf != nil {
idu = c.storeIdent(a, buf)
c.putIdentBuf(buf)
}
return idu
}
// unsafeIdent implements Ident but returns the underlying buffer for a newly
// computed identifier. Callers must return this buffer to identPool. encodeKind
// is only a hint, kind may still be encoded in the buffer.
func (c *Cache) unsafeIdent(a Artifact, encodeKind bool) (
buf *extIdent,
idu unique.Handle[ID],
) {
if id, ok := c.artifact.Load(a); ok {
idu = id.(unique.Handle[ID])
return
}
if ki, ok := a.(KnownIdent); ok {
buf = c.getIdentBuf()
if encodeKind {
binary.LittleEndian.PutUint64(buf[:], uint64(a.Kind()))
}
*(*ID)(buf[wordSize:]) = ki.ID()
return
}
deps := a.Dependencies()
idents := make([]*extIdent, len(deps))
for i, d := range deps {
dbuf, did := c.unsafeIdent(d, true)
if dbuf == nil {
dbuf = c.getIdentBuf()
binary.LittleEndian.PutUint64(dbuf[:], uint64(d.Kind()))
*(*ID)(dbuf[wordSize:]) = did.Value()
} else {
c.storeIdent(d, dbuf)
}
defer c.putIdentBuf(dbuf)
idents[i] = dbuf
}
slices.SortFunc(idents, func(a, b *extIdent) int {
return bytes.Compare(a[:], b[:])
})
idents = slices.CompactFunc(idents, func(a, b *extIdent) bool {
return *a == *b
})
buf = c.getIdentBuf()
h := sha512.New384()
binary.LittleEndian.PutUint64(buf[:], uint64(a.Kind()))
h.Write(buf[:wordSize])
i := IContext{c, h}
a.Params(&i)
i.cache, i.h = nil, nil
for _, dn := range idents {
h.Write(dn[:])
}
h.Sum(buf[wordSize:wordSize])
return
}
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
type ChecksumMismatchError struct {
// Actual and expected checksums.
@@ -535,8 +616,8 @@ func (c *Cache) Scrub() error {
c.checksumMu.Lock()
defer c.checksumMu.Unlock()
c.ident = make(map[ID]Checksum)
c.identErr = make(map[ID]error)
c.ident = make(map[unique.Handle[ID]]Checksum)
c.identErr = make(map[unique.Handle[ID]]error)
var se ScrubError
@@ -687,7 +768,7 @@ func (c *Cache) Scrub() error {
// loadOrStoreIdent attempts to load a cached [Artifact] by its identifier or
// wait for a pending [Artifact] to cure. If neither is possible, the current
// identifier is stored in identPending and a non-nil channel is returned.
func (c *Cache) loadOrStoreIdent(id *ID) (
func (c *Cache) loadOrStoreIdent(id unique.Handle[ID]) (
done chan<- struct{},
checksum Checksum,
err error,
@@ -695,29 +776,29 @@ func (c *Cache) loadOrStoreIdent(id *ID) (
var ok bool
c.identMu.Lock()
if checksum, ok = c.ident[*id]; ok {
if checksum, ok = c.ident[id]; ok {
c.identMu.Unlock()
return
}
if err, ok = c.identErr[*id]; ok {
if err, ok = c.identErr[id]; ok {
c.identMu.Unlock()
return
}
var notify <-chan struct{}
if notify, ok = c.identPending[*id]; ok {
if notify, ok = c.identPending[id]; ok {
c.identMu.Unlock()
<-notify
c.identMu.RLock()
if checksum, ok = c.ident[*id]; !ok {
err = c.identErr[*id]
if checksum, ok = c.ident[id]; !ok {
err = c.identErr[id]
}
c.identMu.RUnlock()
return
}
d := make(chan struct{})
c.identPending[*id] = d
c.identPending[id] = d
c.identMu.Unlock()
done = d
return
@@ -727,17 +808,17 @@ func (c *Cache) loadOrStoreIdent(id *ID) (
// previously submitted to identPending.
func (c *Cache) finaliseIdent(
done chan<- struct{},
id *ID,
id unique.Handle[ID],
checksum *Checksum,
err error,
) {
c.identMu.Lock()
if err != nil {
c.identErr[*id] = err
c.identErr[id] = err
} else {
c.ident[*id] = *checksum
c.ident[id] = *checksum
}
delete(c.identPending, *id)
delete(c.identPending, id)
c.identMu.Unlock()
close(done)
@@ -758,7 +839,7 @@ func (c *Cache) openFile(f File) (r io.ReadCloser, err error) {
c.identMu.RLock()
r, err = os.Open(c.base.Append(
dirIdentifier,
Encode(Ident(f)),
Encode(c.Ident(f).Value()),
).String())
c.identMu.RUnlock()
}
@@ -768,7 +849,7 @@ func (c *Cache) openFile(f File) (r io.ReadCloser, err error) {
return
}
if c.msg.IsVerbose() {
rn := reportName(f)
rn := reportName(f, c.Ident(f))
c.msg.Verbosef("curing %s to memory...", rn)
defer func() {
if err == nil {
@@ -955,8 +1036,8 @@ func (c *Cache) cure(a Artifact) (
checksum Checksum,
err error,
) {
id := Ident(a)
ids := Encode(id)
id := c.Ident(a)
ids := Encode(id.Value())
pathname = c.base.Append(
dirIdentifier,
ids,
@@ -969,11 +1050,11 @@ func (c *Cache) cure(a Artifact) (
}()
var done chan<- struct{}
done, checksum, err = c.loadOrStoreIdent(&id)
done, checksum, err = c.loadOrStoreIdent(id)
if done == nil {
return
} else {
defer func() { c.finaliseIdent(done, &id, &checksum, err) }()
defer func() { c.finaliseIdent(done, id, &checksum, err) }()
}
_, err = os.Lstat(pathname.String())
@@ -1026,7 +1107,7 @@ func (c *Cache) cure(a Artifact) (
}
if c.msg.IsVerbose() {
rn := reportNameIdent(a, id)
rn := reportName(a, id)
c.msg.Verbosef("curing %s...", rn)
defer func() {
if err != nil {
@@ -1126,7 +1207,7 @@ func (c *Cache) cure(a Artifact) (
case FloodArtifact:
deps := a.Dependencies()
f := FContext{t, make(map[ID]*check.Absolute, len(deps))}
f := FContext{t, make(map[Artifact]*check.Absolute, len(deps))}
var wg sync.WaitGroup
wg.Add(len(deps))
@@ -1155,7 +1236,7 @@ func (c *Cache) cure(a Artifact) (
return
}
for i, p := range res {
f.deps[Ident(deps[i])] = p
f.deps[deps[i]] = p
}
defer f.destroy(&err)
@@ -1165,7 +1246,7 @@ func (c *Cache) cure(a Artifact) (
break
default:
err = InvalidArtifactError(id)
err = InvalidArtifactError(id.Value())
return
}
t.cache = nil
@@ -1285,13 +1366,14 @@ func New(
msg: msg,
base: base,
ident: make(map[ID]Checksum),
identErr: make(map[ID]error),
identPending: make(map[ID]<-chan struct{}),
ident: make(map[unique.Handle[ID]]Checksum),
identErr: make(map[unique.Handle[ID]]error),
identPending: make(map[unique.Handle[ID]]<-chan struct{}),
}
c.ctx, c.cancel = context.WithCancel(ctx)
cureDep := make(chan *pendingArtifactDep, cures)
c.cureDep = cureDep
c.identPool.New = func() any { return new(extIdent) }
if cures < 1 {
cures = runtime.NumCPU()