internal/pkg: cache computed identifiers
All checks were successful
Test / Create distribution (push) Successful in 49s
Test / Sandbox (push) Successful in 3m1s
Test / ShareFS (push) Successful in 4m56s
Test / Sandbox (race detector) (push) Successful in 5m21s
Test / Hpkg (push) Successful in 5m30s
Test / Hakurei (push) Successful in 5m53s
Test / Hakurei (race detector) (push) Successful in 7m56s
Test / Flake checks (push) Successful in 1m57s
All checks were successful
Test / Create distribution (push) Successful in 49s
Test / Sandbox (push) Successful in 3m1s
Test / ShareFS (push) Successful in 4m56s
Test / Sandbox (race detector) (push) Successful in 5m21s
Test / Hpkg (push) Successful in 5m30s
Test / Hakurei (push) Successful in 5m53s
Test / Hakurei (race detector) (push) Successful in 7m56s
Test / Flake checks (push) Successful in 1m57s
This eliminates duplicate identifier computations. The new implementation also significantly reduces allocations while computing identifier for a large dependency tree. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"io/fs"
|
||||
"iter"
|
||||
@@ -20,6 +21,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"unique"
|
||||
"unsafe"
|
||||
|
||||
"hakurei.app/container/check"
|
||||
@@ -60,6 +62,35 @@ func MustDecode(s string) Checksum {
|
||||
}
|
||||
}
|
||||
|
||||
// IContext is passed to [Artifact.Params] and provides identifier information
|
||||
// and the target [hash.Hash] for writing params into.
|
||||
//
|
||||
// Methods of IContext are safe for concurrent use. IContext is valid
|
||||
// until [Artifact.Params] returns.
|
||||
type IContext struct {
|
||||
// Address of underlying [Cache], should be zeroed or made unusable after
|
||||
// [Artifact.Params] returns and must not be exposed directly.
|
||||
cache *Cache
|
||||
// Made available for writing, should be zeroed after [Artifact.Params]
|
||||
// returns. Internal state must not be inspected.
|
||||
h hash.Hash
|
||||
}
|
||||
|
||||
// Unwrap returns the underlying [context.Context].
|
||||
func (i *IContext) Unwrap() context.Context { return i.cache.ctx }
|
||||
|
||||
// GetHash returns the underlying [hash.Hash] for writing. Callers must not
|
||||
// attempt to inspect its internal state.
|
||||
func (i *IContext) GetHash() hash.Hash { return i.h }
|
||||
|
||||
// WriteIdent writes the identifier of [Artifact] to the underlying [hash.Hash].
|
||||
func (i *IContext) WriteIdent(a Artifact) {
|
||||
buf := i.cache.getIdentBuf()
|
||||
*(*ID)(buf[wordSize:]) = i.cache.Ident(a).Value()
|
||||
i.h.Write(buf[wordSize:])
|
||||
i.cache.putIdentBuf(buf)
|
||||
}
|
||||
|
||||
// TContext is passed to [TrivialArtifact.Cure] and provides information and
|
||||
// methods required for curing the [TrivialArtifact].
|
||||
//
|
||||
@@ -154,7 +185,7 @@ type FContext struct {
|
||||
TContext
|
||||
|
||||
// Cured top-level dependencies looked up by Pathname.
|
||||
deps map[ID]*check.Absolute
|
||||
deps map[Artifact]*check.Absolute
|
||||
}
|
||||
|
||||
// InvalidLookupError is the identifier of non-dependency [Artifact] looked up
|
||||
@@ -171,11 +202,10 @@ var _ error = InvalidLookupError{}
|
||||
// with an [Artifact] not part of the slice returned by [Artifact.Dependencies]
|
||||
// panics.
|
||||
func (f *FContext) Pathname(a Artifact) *check.Absolute {
|
||||
id := Ident(a)
|
||||
if p, ok := f.deps[id]; ok {
|
||||
if p, ok := f.deps[a]; ok {
|
||||
return p
|
||||
} else {
|
||||
panic(InvalidLookupError(id))
|
||||
panic(InvalidLookupError(f.cache.Ident(a).Value()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,14 +218,13 @@ type Artifact interface {
|
||||
// [Artifact] is allowed to return the same [Kind] value.
|
||||
Kind() Kind
|
||||
|
||||
// Params returns opaque bytes that describes [Artifact]. Implementations
|
||||
// Params writes opaque bytes that describes [Artifact]. Implementations
|
||||
// must guarantee that these values are unique among differing instances
|
||||
// of the same implementation with the same dependencies.
|
||||
//
|
||||
// Callers must not modify the retuned byte slice.
|
||||
// of the same implementation with the same dependencies. Callers must not
|
||||
// attempt to interpret these params.
|
||||
//
|
||||
// Result must remain identical across multiple invocations.
|
||||
Params() []byte
|
||||
Params(ctx *IContext)
|
||||
|
||||
// Dependencies returns a slice of [Artifact] that the current instance
|
||||
// depends on to produce its contents.
|
||||
@@ -290,17 +319,9 @@ type File interface {
|
||||
Artifact
|
||||
}
|
||||
|
||||
// Ident returns the identifier of an [Artifact].
|
||||
func Ident(a Artifact) ID {
|
||||
if ki, ok := a.(KnownIdent); ok {
|
||||
return ki.ID()
|
||||
}
|
||||
return a.Kind().Ident(a.Params(), a.Dependencies()...)
|
||||
}
|
||||
|
||||
// reportNameIdent is like reportName but does not recompute [ID].
|
||||
func reportNameIdent(a Artifact, id ID) string {
|
||||
r := Encode(id)
|
||||
// reportName returns a string describing [Artifact] presented to the user.
|
||||
func reportName(a Artifact, id unique.Handle[ID]) string {
|
||||
r := Encode(id.Value())
|
||||
if s, ok := a.(fmt.Stringer); ok {
|
||||
if name := s.String(); name != "" {
|
||||
r += "-" + name
|
||||
@@ -309,9 +330,6 @@ func reportNameIdent(a Artifact, id ID) string {
|
||||
return r
|
||||
}
|
||||
|
||||
// reportName returns a string describing [Artifact] presented to the user.
|
||||
func reportName(a Artifact) string { return reportNameIdent(a, Ident(a)) }
|
||||
|
||||
// Kind corresponds to the concrete type of [Artifact] and is used to create
|
||||
// identifier for an [Artifact] with dependencies.
|
||||
type Kind uint64
|
||||
@@ -334,31 +352,6 @@ const (
|
||||
KindCustomOffset = 1 << 31
|
||||
)
|
||||
|
||||
// Ident returns a deterministic identifier for the supplied params and
|
||||
// dependencies. The caller is responsible for ensuring params uniquely and
|
||||
// deterministically describes the current [Artifact].
|
||||
func (k Kind) Ident(params []byte, deps ...Artifact) ID {
|
||||
type extIdent [len(ID{}) + wordSize]byte
|
||||
identifiers := make([]extIdent, len(deps))
|
||||
for i, a := range deps {
|
||||
id := Ident(a)
|
||||
copy(identifiers[i][wordSize:], id[:])
|
||||
binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind()))
|
||||
}
|
||||
slices.SortFunc(identifiers, func(a, b extIdent) int {
|
||||
return bytes.Compare(a[:], b[:])
|
||||
})
|
||||
identifiers = slices.Compact(identifiers)
|
||||
|
||||
h := sha512.New384()
|
||||
h.Write(binary.LittleEndian.AppendUint64(nil, uint64(k)))
|
||||
h.Write(params)
|
||||
for _, e := range identifiers {
|
||||
h.Write(e[:])
|
||||
}
|
||||
return ID(h.Sum(nil))
|
||||
}
|
||||
|
||||
const (
|
||||
// dirIdentifier is the directory name appended to Cache.base for storing
|
||||
// artifacts named after their [ID].
|
||||
@@ -429,15 +422,20 @@ type Cache struct {
|
||||
// Maximum size of a dependency graph.
|
||||
threshold uintptr
|
||||
|
||||
// Artifact to [unique.Handle] of identifier cache.
|
||||
artifact sync.Map
|
||||
// Identifier free list, must not be accessed directly.
|
||||
identPool sync.Pool
|
||||
|
||||
// Synchronises access to dirChecksum.
|
||||
checksumMu sync.RWMutex
|
||||
|
||||
// Identifier to content pair cache.
|
||||
ident map[ID]Checksum
|
||||
ident map[unique.Handle[ID]]Checksum
|
||||
// Identifier to error pair for unrecoverably faulted [Artifact].
|
||||
identErr map[ID]error
|
||||
identErr map[unique.Handle[ID]]error
|
||||
// Pending identifiers, accessed through Cure for entries not in ident.
|
||||
identPending map[ID]<-chan struct{}
|
||||
identPending map[unique.Handle[ID]]<-chan struct{}
|
||||
// Synchronises access to ident and corresponding filesystem entries.
|
||||
identMu sync.RWMutex
|
||||
}
|
||||
@@ -458,6 +456,89 @@ func (c *Cache) SetStrict(strict bool) { c.strict = strict }
|
||||
// This method is not safe for concurrent use with any other method.
|
||||
func (c *Cache) SetThreshold(threshold uintptr) { c.threshold = threshold }
|
||||
|
||||
// extIdent is a [Kind] concatenated with [ID].
|
||||
type extIdent [wordSize + len(ID{})]byte
|
||||
|
||||
// getIdentBuf returns the address of an extIdent for Ident.
|
||||
func (c *Cache) getIdentBuf() *extIdent { return c.identPool.Get().(*extIdent) }
|
||||
|
||||
// putIdentBuf adds buf to identPool.
|
||||
func (c *Cache) putIdentBuf(buf *extIdent) { c.identPool.Put(buf) }
|
||||
|
||||
// storeIdent adds an [Artifact] to the artifact cache.
|
||||
func (c *Cache) storeIdent(a Artifact, buf *extIdent) unique.Handle[ID] {
|
||||
idu := unique.Make(ID(buf[wordSize:]))
|
||||
c.artifact.Store(a, idu)
|
||||
return idu
|
||||
}
|
||||
|
||||
// Ident returns the identifier of an [Artifact].
|
||||
func (c *Cache) Ident(a Artifact) unique.Handle[ID] {
|
||||
buf, idu := c.unsafeIdent(a, false)
|
||||
if buf != nil {
|
||||
idu = c.storeIdent(a, buf)
|
||||
c.putIdentBuf(buf)
|
||||
}
|
||||
return idu
|
||||
}
|
||||
|
||||
// unsafeIdent implements Ident but returns the underlying buffer for a newly
|
||||
// computed identifier. Callers must return this buffer to identPool. encodeKind
|
||||
// is only a hint, kind may still be encoded in the buffer.
|
||||
func (c *Cache) unsafeIdent(a Artifact, encodeKind bool) (
|
||||
buf *extIdent,
|
||||
idu unique.Handle[ID],
|
||||
) {
|
||||
if id, ok := c.artifact.Load(a); ok {
|
||||
idu = id.(unique.Handle[ID])
|
||||
return
|
||||
}
|
||||
|
||||
if ki, ok := a.(KnownIdent); ok {
|
||||
buf = c.getIdentBuf()
|
||||
if encodeKind {
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(a.Kind()))
|
||||
}
|
||||
*(*ID)(buf[wordSize:]) = ki.ID()
|
||||
return
|
||||
}
|
||||
|
||||
deps := a.Dependencies()
|
||||
idents := make([]*extIdent, len(deps))
|
||||
for i, d := range deps {
|
||||
dbuf, did := c.unsafeIdent(d, true)
|
||||
if dbuf == nil {
|
||||
dbuf = c.getIdentBuf()
|
||||
binary.LittleEndian.PutUint64(dbuf[:], uint64(d.Kind()))
|
||||
*(*ID)(dbuf[wordSize:]) = did.Value()
|
||||
} else {
|
||||
c.storeIdent(d, dbuf)
|
||||
}
|
||||
defer c.putIdentBuf(dbuf)
|
||||
idents[i] = dbuf
|
||||
}
|
||||
slices.SortFunc(idents, func(a, b *extIdent) int {
|
||||
return bytes.Compare(a[:], b[:])
|
||||
})
|
||||
idents = slices.CompactFunc(idents, func(a, b *extIdent) bool {
|
||||
return *a == *b
|
||||
})
|
||||
|
||||
buf = c.getIdentBuf()
|
||||
h := sha512.New384()
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(a.Kind()))
|
||||
h.Write(buf[:wordSize])
|
||||
i := IContext{c, h}
|
||||
a.Params(&i)
|
||||
i.cache, i.h = nil, nil
|
||||
for _, dn := range idents {
|
||||
h.Write(dn[:])
|
||||
}
|
||||
|
||||
h.Sum(buf[wordSize:wordSize])
|
||||
return
|
||||
}
|
||||
|
||||
// A ChecksumMismatchError describes an [Artifact] with unexpected content.
|
||||
type ChecksumMismatchError struct {
|
||||
// Actual and expected checksums.
|
||||
@@ -535,8 +616,8 @@ func (c *Cache) Scrub() error {
|
||||
c.checksumMu.Lock()
|
||||
defer c.checksumMu.Unlock()
|
||||
|
||||
c.ident = make(map[ID]Checksum)
|
||||
c.identErr = make(map[ID]error)
|
||||
c.ident = make(map[unique.Handle[ID]]Checksum)
|
||||
c.identErr = make(map[unique.Handle[ID]]error)
|
||||
|
||||
var se ScrubError
|
||||
|
||||
@@ -687,7 +768,7 @@ func (c *Cache) Scrub() error {
|
||||
// loadOrStoreIdent attempts to load a cached [Artifact] by its identifier or
|
||||
// wait for a pending [Artifact] to cure. If neither is possible, the current
|
||||
// identifier is stored in identPending and a non-nil channel is returned.
|
||||
func (c *Cache) loadOrStoreIdent(id *ID) (
|
||||
func (c *Cache) loadOrStoreIdent(id unique.Handle[ID]) (
|
||||
done chan<- struct{},
|
||||
checksum Checksum,
|
||||
err error,
|
||||
@@ -695,29 +776,29 @@ func (c *Cache) loadOrStoreIdent(id *ID) (
|
||||
var ok bool
|
||||
|
||||
c.identMu.Lock()
|
||||
if checksum, ok = c.ident[*id]; ok {
|
||||
if checksum, ok = c.ident[id]; ok {
|
||||
c.identMu.Unlock()
|
||||
return
|
||||
}
|
||||
if err, ok = c.identErr[*id]; ok {
|
||||
if err, ok = c.identErr[id]; ok {
|
||||
c.identMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
var notify <-chan struct{}
|
||||
if notify, ok = c.identPending[*id]; ok {
|
||||
if notify, ok = c.identPending[id]; ok {
|
||||
c.identMu.Unlock()
|
||||
<-notify
|
||||
c.identMu.RLock()
|
||||
if checksum, ok = c.ident[*id]; !ok {
|
||||
err = c.identErr[*id]
|
||||
if checksum, ok = c.ident[id]; !ok {
|
||||
err = c.identErr[id]
|
||||
}
|
||||
c.identMu.RUnlock()
|
||||
return
|
||||
}
|
||||
|
||||
d := make(chan struct{})
|
||||
c.identPending[*id] = d
|
||||
c.identPending[id] = d
|
||||
c.identMu.Unlock()
|
||||
done = d
|
||||
return
|
||||
@@ -727,17 +808,17 @@ func (c *Cache) loadOrStoreIdent(id *ID) (
|
||||
// previously submitted to identPending.
|
||||
func (c *Cache) finaliseIdent(
|
||||
done chan<- struct{},
|
||||
id *ID,
|
||||
id unique.Handle[ID],
|
||||
checksum *Checksum,
|
||||
err error,
|
||||
) {
|
||||
c.identMu.Lock()
|
||||
if err != nil {
|
||||
c.identErr[*id] = err
|
||||
c.identErr[id] = err
|
||||
} else {
|
||||
c.ident[*id] = *checksum
|
||||
c.ident[id] = *checksum
|
||||
}
|
||||
delete(c.identPending, *id)
|
||||
delete(c.identPending, id)
|
||||
c.identMu.Unlock()
|
||||
|
||||
close(done)
|
||||
@@ -758,7 +839,7 @@ func (c *Cache) openFile(f File) (r io.ReadCloser, err error) {
|
||||
c.identMu.RLock()
|
||||
r, err = os.Open(c.base.Append(
|
||||
dirIdentifier,
|
||||
Encode(Ident(f)),
|
||||
Encode(c.Ident(f).Value()),
|
||||
).String())
|
||||
c.identMu.RUnlock()
|
||||
}
|
||||
@@ -768,7 +849,7 @@ func (c *Cache) openFile(f File) (r io.ReadCloser, err error) {
|
||||
return
|
||||
}
|
||||
if c.msg.IsVerbose() {
|
||||
rn := reportName(f)
|
||||
rn := reportName(f, c.Ident(f))
|
||||
c.msg.Verbosef("curing %s to memory...", rn)
|
||||
defer func() {
|
||||
if err == nil {
|
||||
@@ -955,8 +1036,8 @@ func (c *Cache) cure(a Artifact) (
|
||||
checksum Checksum,
|
||||
err error,
|
||||
) {
|
||||
id := Ident(a)
|
||||
ids := Encode(id)
|
||||
id := c.Ident(a)
|
||||
ids := Encode(id.Value())
|
||||
pathname = c.base.Append(
|
||||
dirIdentifier,
|
||||
ids,
|
||||
@@ -969,11 +1050,11 @@ func (c *Cache) cure(a Artifact) (
|
||||
}()
|
||||
|
||||
var done chan<- struct{}
|
||||
done, checksum, err = c.loadOrStoreIdent(&id)
|
||||
done, checksum, err = c.loadOrStoreIdent(id)
|
||||
if done == nil {
|
||||
return
|
||||
} else {
|
||||
defer func() { c.finaliseIdent(done, &id, &checksum, err) }()
|
||||
defer func() { c.finaliseIdent(done, id, &checksum, err) }()
|
||||
}
|
||||
|
||||
_, err = os.Lstat(pathname.String())
|
||||
@@ -1026,7 +1107,7 @@ func (c *Cache) cure(a Artifact) (
|
||||
}
|
||||
|
||||
if c.msg.IsVerbose() {
|
||||
rn := reportNameIdent(a, id)
|
||||
rn := reportName(a, id)
|
||||
c.msg.Verbosef("curing %s...", rn)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
@@ -1126,7 +1207,7 @@ func (c *Cache) cure(a Artifact) (
|
||||
|
||||
case FloodArtifact:
|
||||
deps := a.Dependencies()
|
||||
f := FContext{t, make(map[ID]*check.Absolute, len(deps))}
|
||||
f := FContext{t, make(map[Artifact]*check.Absolute, len(deps))}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(deps))
|
||||
@@ -1155,7 +1236,7 @@ func (c *Cache) cure(a Artifact) (
|
||||
return
|
||||
}
|
||||
for i, p := range res {
|
||||
f.deps[Ident(deps[i])] = p
|
||||
f.deps[deps[i]] = p
|
||||
}
|
||||
|
||||
defer f.destroy(&err)
|
||||
@@ -1165,7 +1246,7 @@ func (c *Cache) cure(a Artifact) (
|
||||
break
|
||||
|
||||
default:
|
||||
err = InvalidArtifactError(id)
|
||||
err = InvalidArtifactError(id.Value())
|
||||
return
|
||||
}
|
||||
t.cache = nil
|
||||
@@ -1285,13 +1366,14 @@ func New(
|
||||
msg: msg,
|
||||
base: base,
|
||||
|
||||
ident: make(map[ID]Checksum),
|
||||
identErr: make(map[ID]error),
|
||||
identPending: make(map[ID]<-chan struct{}),
|
||||
ident: make(map[unique.Handle[ID]]Checksum),
|
||||
identErr: make(map[unique.Handle[ID]]error),
|
||||
identPending: make(map[unique.Handle[ID]]<-chan struct{}),
|
||||
}
|
||||
c.ctx, c.cancel = context.WithCancel(ctx)
|
||||
cureDep := make(chan *pendingArtifactDep, cures)
|
||||
c.cureDep = cureDep
|
||||
c.identPool.New = func() any { return new(extIdent) }
|
||||
|
||||
if cures < 1 {
|
||||
cures = runtime.NumCPU()
|
||||
|
||||
Reference in New Issue
Block a user