internal/pkg: content-based dependency substitution
All checks were successful
Test / Create distribution (push) Successful in 1m4s
Test / Sandbox (push) Successful in 2m48s
Test / ShareFS (push) Successful in 3m42s
Test / Hakurei (push) Successful in 3m57s
Test / Sandbox (race detector) (push) Successful in 5m23s
Test / Hakurei (race detector) (push) Successful in 6m35s
Test / Flake checks (push) Successful in 1m21s

This change introduces a new fast path for FloodArtifact. It is taken when a curing artifact has identical-by-content controlled relevant inputs and are otherwise identical to an already-cured artifact.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-05-11 19:44:28 +09:00
parent ce9f4b5f71
commit ec94eddc58
8 changed files with 337 additions and 49 deletions

View File

@@ -515,6 +515,9 @@ const (
// identification string set by a prior call to [SetExtension].
fileVariant = "variant"
// dirSubstitute is the directory name appended to Cache.base for linking
// artifacts named after their substitute identifier.
dirSubstitute = "substitute"
// dirIdentifier is the directory name appended to Cache.base for storing
// artifacts named after their [ID].
dirIdentifier = "identifier"
@@ -621,6 +624,9 @@ const (
// CSuppressInit arranges for verbose output of the container init to be
// suppressed regardless of [message.Msg] state.
CSuppressInit
// CIgnoreSubstitutes disables content-based dependency substitution.
CIgnoreSubstitutes
)
// toplevel holds [context.WithCancel] over caller-supplied context, where all
@@ -676,6 +682,11 @@ type Cache struct {
// Synchronises access to dirChecksum.
checksumMu sync.RWMutex
// Presence of an alternative in the cache. Keys are not valid identifiers
// and must not be used as such.
substitute map[unique.Handle[ID]]unique.Handle[Checksum]
// Synchronises access to substitute and corresponding filesystem entries.
substituteMu sync.RWMutex
// Identifier to content pair cache.
ident map[unique.Handle[ID]]unique.Handle[Checksum]
// Identifier to error pair for unrecoverably faulted [Artifact].
@@ -886,11 +897,14 @@ func (c *Cache) Scrub(checks int) error {
checks = runtime.NumCPU()
}
c.substituteMu.Lock()
defer c.substituteMu.Unlock()
c.identMu.Lock()
defer c.identMu.Unlock()
c.checksumMu.Lock()
defer c.checksumMu.Unlock()
c.substitute = make(map[unique.Handle[ID]]unique.Handle[Checksum])
c.ident = make(map[unique.Handle[ID]]unique.Handle[Checksum])
c.identErr = make(map[unique.Handle[ID]]error)
c.artifact.Clear()
@@ -998,47 +1012,52 @@ func (c *Cache) Scrub(checks int) error {
wg.Wait()
}
dir = c.base.Append(dirIdentifier)
if entries, readdirErr := os.ReadDir(dir.String()); readdirErr != nil {
addErr(dir, readdirErr)
} else {
wg.Add(len(entries))
for _, ent := range entries {
w <- checkEntry{ent, func(ent os.DirEntry, want *Checksum) bool {
got := p.Get().(*Checksum)
defer p.Put(got)
for _, suffix := range []string{
dirSubstitute,
dirIdentifier,
} {
dir = c.base.Append(suffix)
if entries, readdirErr := os.ReadDir(dir.String()); readdirErr != nil {
addErr(dir, readdirErr)
} else {
wg.Add(len(entries))
for _, ent := range entries {
w <- checkEntry{ent, func(ent os.DirEntry, want *Checksum) bool {
got := p.Get().(*Checksum)
defer p.Put(got)
pathname := dir.Append(ent.Name())
if linkname, err := os.Readlink(
pathname.String(),
); err != nil {
seMu.Lock()
se.Errs[pathname.Handle()] = append(se.Errs[pathname.Handle()], err)
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
} else if err = Decode(got, filepath.Base(linkname)); err != nil {
seMu.Lock()
lnp := dir.Append(linkname)
se.Errs[lnp.Handle()] = append(se.Errs[lnp.Handle()], err)
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
}
if _, err := os.Stat(pathname.String()); err != nil {
if !errors.Is(err, os.ErrNotExist) {
addErr(pathname, err)
pathname := dir.Append(ent.Name())
if linkname, err := os.Readlink(
pathname.String(),
); err != nil {
seMu.Lock()
se.Errs[pathname.Handle()] = append(se.Errs[pathname.Handle()], err)
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
} else if err = Decode(got, filepath.Base(linkname)); err != nil {
seMu.Lock()
lnp := dir.Append(linkname)
se.Errs[lnp.Handle()] = append(se.Errs[lnp.Handle()], err)
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
}
seMu.Lock()
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
}
return true
}}
if _, err := os.Stat(pathname.String()); err != nil {
if !errors.Is(err, os.ErrNotExist) {
addErr(pathname, err)
}
seMu.Lock()
se.DanglingIdentifiers = append(se.DanglingIdentifiers, *want)
seMu.Unlock()
return false
}
return true
}}
}
wg.Wait()
}
wg.Wait()
}
dir = c.base.Append(dirStatus)
@@ -1186,6 +1205,52 @@ func (c *Cache) finaliseIdent(
close(done)
}
// zeroChecksum is a zero [Checksum] handle, used for comparison only.
var zeroChecksum unique.Handle[Checksum]
// loadSubstitute returns a checksum corresponding to a substitute identifier,
// or zeroChecksum if an alternative is not available.
func (c *Cache) loadSubstitute(
substitute unique.Handle[ID],
) (unique.Handle[Checksum], error) {
c.substituteMu.RLock()
if checksum, ok := c.substitute[substitute]; ok {
c.substituteMu.RUnlock()
return checksum, nil
}
linkname, err := os.Readlink(c.base.Append(
dirSubstitute,
Encode(substitute.Value()),
).String())
c.substituteMu.RUnlock()
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return zeroChecksum, err
}
c.substituteMu.Lock()
c.substitute[substitute] = zeroChecksum
c.substituteMu.Unlock()
return zeroChecksum, nil
}
var checksum unique.Handle[Checksum]
buf := c.getIdentBuf()
err = Decode((*Checksum)(buf[:]), filepath.Base(linkname))
if err == nil {
checksum = unique.Make(Checksum(buf[:]))
c.substituteMu.Lock()
c.substitute[substitute] = checksum
c.substituteMu.Unlock()
}
c.putIdentBuf(buf)
return checksum, err
}
// Done returns a channel that is closed when the ongoing cure of an [Artifact]
// referred to by the specified identifier completes. Done may return nil if
// no ongoing cure of the specified identifier exists.
@@ -1654,16 +1719,44 @@ func (c *Cache) cure(a Artifact, curesExempt bool) (
return
}
var checksums string
var (
checksums string
substitute unique.Handle[ID]
alternative *check.Absolute
)
defer func() {
if err == nil && checksums != "" {
linkname := checksumLinknamePrefix + checksums
err = os.Symlink(
checksumLinknamePrefix+checksums,
linkname,
pathname.String(),
)
if err == nil {
err = zeroTimes(pathname.String())
}
if err == nil && alternative != nil {
c.substituteMu.Lock()
err = os.Symlink(
linkname,
alternative.String(),
)
if errors.Is(err, os.ErrExist) {
c.msg.Verbosef(
"creating alternative over %s for artifact %s",
Encode(substitute.Value()), ids,
)
err = nil
}
if err == nil {
err = zeroTimes(alternative.String())
}
if err == nil && checksum != zeroChecksum {
c.substitute[substitute] = checksum
}
c.substituteMu.Unlock()
}
}
}()
@@ -1860,6 +1953,40 @@ func (c *Cache) cure(a Artifact, curesExempt bool) (
f.deps[deps[i]] = p
}
sh := sha512.New384()
c.identMu.RLock()
err = c.encode(sh, a, c.ident)
c.identMu.RUnlock()
if err != nil {
return
}
buf := c.getIdentBuf()
sh.Sum(buf[wordSize:wordSize])
substitute = unique.Make(ID(buf[wordSize:]))
c.putIdentBuf(buf)
alternative = c.base.Append(
dirSubstitute,
Encode(substitute.Value()),
)
if c.flags&CIgnoreSubstitutes == 0 {
var substituteChecksum unique.Handle[Checksum]
substituteChecksum, err = c.loadSubstitute(substitute)
if err != nil {
return
}
if substituteChecksum != zeroChecksum {
checksum = substituteChecksum
checksums = Encode(checksum.Value())
checksumPathname = c.base.Append(
dirChecksum,
checksums,
)
return
}
}
defer f.destroy(&err)
if err = c.enterCure(a, curesExempt); err != nil {
return
@@ -2072,6 +2199,7 @@ func open(
}
for _, name := range []string{
dirSubstitute,
dirIdentifier,
dirChecksum,
dirStatus,
@@ -2097,6 +2225,7 @@ func open(
irCache: zeroIRCache(),
substitute: make(map[unique.Handle[ID]]unique.Handle[Checksum]),
ident: make(map[unique.Handle[ID]]unique.Handle[Checksum]),
identErr: make(map[unique.Handle[ID]]error),
identPending: make(map[unique.Handle[ID]]*pendingCure),