internal/pkg: garbage collection
All checks were successful
Test / Create distribution (push) Successful in 2m53s
Test / Sandbox (push) Successful in 7m2s
Test / ShareFS (push) Successful in 3m51s
Test / Hakurei (push) Successful in 3m58s
Test / Sandbox (race detector) (push) Successful in 5m32s
Test / Hakurei (race detector) (push) Successful in 6m35s
Test / Flake checks (push) Successful in 2m13s

This destroys cache entries not referred to by user-specified artifacts and optionally their inputs.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-06-01 14:51:40 +09:00
parent f398f71fa9
commit fbd2329d50
4 changed files with 484 additions and 5 deletions

138
internal/pkg/clean.go Normal file
View File

@@ -0,0 +1,138 @@
package pkg
import (
"errors"
"os"
"unique"
)
// Clean destroys checksum backing entries without any identifier or substitute
// entry referring to it. If at least one keep [Artifact] is specified,
// identifier and substitute entries not kept alive by them are destroyed first.
func (c *Cache) Clean(dry, inputs bool, keep ...Artifact) (
[]unique.Handle[ID],
[]unique.Handle[Checksum],
error,
) {
c.identMu.Lock()
defer c.identMu.Unlock()
c.checksumMu.Lock()
defer c.checksumMu.Unlock()
dents, err := os.ReadDir(c.base.Append(dirChecksum).String())
if err != nil {
return nil, nil, err
}
checksums := make(map[unique.Handle[Checksum]]string, len(dents))
var buf Checksum
for _, dent := range dents {
name := dent.Name()
if err = Decode(&buf, name); err != nil {
return nil, nil, err
}
checksums[unique.Make(buf)] = name
}
type identPair struct {
id unique.Handle[ID]
name string
}
dents, err = os.ReadDir(c.base.Append(dirIdentifier).String())
if err != nil {
return nil, nil, err
}
keepIdents := make(map[unique.Handle[ID]]struct{})
if inputs {
for _, id := range Inputs((*Collect)(&keep)) {
keepIdents[id] = struct{}{}
}
} else {
for _, a := range keep {
keepIdents[c.Ident(a)] = struct{}{}
}
}
idents := make([]identPair, 0, len(dents))
for _, dent := range dents {
name := dent.Name()
if err = Decode(&buf, name); err != nil {
return nil, nil, err
}
id := unique.Make(ID(buf))
if _, ok := keepIdents[id]; len(keep) == 0 || ok {
if err = readlinkChecksum(c.base.Append(
dirIdentifier,
name,
), &buf); err != nil {
return nil, nil, err
}
delete(checksums, unique.Make(buf))
continue
}
c.msg.Verbosef("arranging for destruction of %s...", name)
idents = append(idents, identPair{id, name})
}
destroyedIdents := make([]unique.Handle[ID], 0, len(idents))
for _, pair := range idents {
if !dry {
if err = os.Remove(c.base.Append(
dirIdentifier,
pair.name,
).String()); err != nil {
return destroyedIdents, nil, err
}
}
destroyedIdents = append(destroyedIdents, pair.id)
}
destroyedChecksums := make([]unique.Handle[Checksum], 0, len(checksums))
for checksum, name := range checksums {
if err = c.parent.Err(); err != nil {
return destroyedIdents, destroyedChecksums, err
}
c.msg.Verbosef("destroying checksum %s...", name)
if !dry {
if err = errors.Join(removeAll(c.base.Append(
dirChecksum,
name,
))); err != nil {
return destroyedIdents, destroyedChecksums, err
}
}
destroyedChecksums = append(destroyedChecksums, checksum)
}
dents, err = os.ReadDir(c.base.Append(dirSubstitute).String())
if err != nil {
return destroyedIdents, destroyedChecksums, err
}
for _, dent := range dents {
name := dent.Name()
if err = readlinkChecksum(c.base.Append(
dirSubstitute,
name,
), &buf); err != nil {
return destroyedIdents, destroyedChecksums, err
}
if _, ok := checksums[unique.Make(buf)]; !ok {
continue
}
c.msg.Verbosef("destroying substitute %s...", name)
if !dry {
if err = os.Remove(c.base.Append(
dirSubstitute,
name,
).String()); err != nil {
return destroyedIdents, destroyedChecksums, err
}
}
}
return destroyedIdents, destroyedChecksums, nil
}

312
internal/pkg/clean_test.go Normal file
View File

@@ -0,0 +1,312 @@
package pkg_test
import (
"bytes"
"crypto/sha512"
"io/fs"
"log"
"os"
"path/filepath"
"slices"
"strings"
"testing"
"unique"
"hakurei.app/check"
"hakurei.app/internal/pkg"
"hakurei.app/message"
)
// formatHandles returns a user-facing string representing h.
func formatHandles[T pkg.ID | pkg.Checksum](handles ...unique.Handle[T]) string {
var buf strings.Builder
for _, h := range handles {
buf.WriteString(pkg.Encode(pkg.Checksum(h.Value())))
buf.WriteString(", ")
}
return strings.TrimSuffix(buf.String(), ", ")
}
func TestClean(t *testing.T) {
t.Parallel()
ic := pkg.NewIR()
testCases := []struct {
name string
a []pkg.Artifact
keep []pkg.Artifact
inputs bool
want expectsFS
wantIdents []unique.Handle[pkg.ID]
wantChecksums []unique.Handle[pkg.Checksum]
}{
{"simple", []pkg.Artifact{
pkg.NewFile("file", nil),
}, nil, false, expectsFS{
".": {Mode: fs.ModeDir | 0700},
"checksum": {Mode: fs.ModeDir | 0700},
"checksum/OLBgp1GsljhM2TJ-sbHjaiH9txEUvgdDTAzHv2P24donTt6_529l-9Ua0vFImLlb": {Mode: 0400},
"identifier": {Mode: fs.ModeDir | 0700},
"identifier/pPRjw2XYgjB5k8dYedwxTBMgHh4_v2JM_G2Vd-skQbAGOOgPsl3CGSUbEF7om_MO": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/OLBgp1GsljhM2TJ-sbHjaiH9txEUvgdDTAzHv2P24donTt6_529l-9Ua0vFImLlb")},
"lock": {Mode: 0644},
"variant": {Mode: 0400},
"status": {Mode: fs.ModeDir | 0700},
"substitute": {Mode: fs.ModeDir | 0700},
"fault": {Mode: fs.ModeDir | 0700},
"work": {Mode: fs.ModeDir | 0700},
}, nil, nil},
{"keep", []pkg.Artifact{
pkg.NewFile("removed-file", []byte("removed file")),
}, []pkg.Artifact{
pkg.NewFile("file", []byte("\xfd")),
}, false, expectsFS{
".": {Mode: fs.ModeDir | 0700},
"checksum": {Mode: fs.ModeDir | 0700},
"checksum/KgZ-FjbGuU-XP2QEHInpgv-2Zn0cTH5NqFMgTU0XrSdKmSwyC-3baVs1BMCP5spk": {Mode: 0400, Data: []byte("\xfd")},
"identifier": {Mode: fs.ModeDir | 0700},
"identifier/FMwSBYw22KqM8jZryfY2ChHXpLuVDdWYyNOYdHvIVYk8ujY6UnGRm5brr2sTTfpD": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/KgZ-FjbGuU-XP2QEHInpgv-2Zn0cTH5NqFMgTU0XrSdKmSwyC-3baVs1BMCP5spk")},
"lock": {Mode: 0644},
"variant": {Mode: 0400},
"status": {Mode: fs.ModeDir | 0700},
"substitute": {Mode: fs.ModeDir | 0700},
"fault": {Mode: fs.ModeDir | 0700},
"work": {Mode: fs.ModeDir | 0700},
}, []unique.Handle[pkg.ID]{
ic.Ident(pkg.NewFile("removed-file", []byte("removed file"))),
}, []unique.Handle[pkg.Checksum]{
unique.Make(sha512.Sum384([]byte("removed file"))),
}},
{"inputs anchored substitute", []pkg.Artifact{
&stubArtifactF{
kind: pkg.KindExec,
params: []byte("destroyed"),
deps: []pkg.Artifact{
pkg.NewFile("destroyed-input", []byte("destroyed")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), nil, 0444)
},
},
}, []pkg.Artifact{
&stubArtifactF{
kind: pkg.KindExec,
params: []byte("kept"),
deps: []pkg.Artifact{
pkg.NewFile("kept-input", []byte("kept")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), nil, 0444)
},
},
}, true, expectsFS{
".": {Mode: fs.ModeDir | 0700},
"checksum": {Mode: fs.ModeDir | 0700},
"checksum/H-eSiCo227-xdqyNl2R-5G3eqXPtbb8XegAB70I5OQb2majeZXJoCxTq9wJy5qqv": {Mode: 0400, Data: []byte("kept")},
"checksum/UjZSrgz7_B7XMd9fHU7jM33UZhWlFgX0rz7JZbCBYR28bCS7jr_CAJdcDhi52ruE": {Mode: fs.ModeDir | 0500},
"checksum/UjZSrgz7_B7XMd9fHU7jM33UZhWlFgX0rz7JZbCBYR28bCS7jr_CAJdcDhi52ruE/result": {Mode: 0444},
"identifier": {Mode: fs.ModeDir | 0700},
"identifier/Ef8KX6s_rS_nLgze0rj90zKyrGAvOnzyU0DL7nrYQWEG_f4a9pmUKI6HBEMD8AE8": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/H-eSiCo227-xdqyNl2R-5G3eqXPtbb8XegAB70I5OQb2majeZXJoCxTq9wJy5qqv")},
"identifier/xoIGLemzLF227e-w_AJcf_1Sgqh2gs3KFgqvOIWUQE-9P_y2vHBMBytL4GRGQqTb": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/UjZSrgz7_B7XMd9fHU7jM33UZhWlFgX0rz7JZbCBYR28bCS7jr_CAJdcDhi52ruE")},
"lock": {Mode: 0644},
"variant": {Mode: 0400},
"status": {Mode: fs.ModeDir | 0700},
"substitute": {Mode: fs.ModeDir | 0700},
"substitute/4bjS-QjGcSV4nth-W6Vg3-wolKmKgiq4Ld2oRIWcOfy6Wi41XXLAWPoo8FcDx6BH": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/UjZSrgz7_B7XMd9fHU7jM33UZhWlFgX0rz7JZbCBYR28bCS7jr_CAJdcDhi52ruE")},
"substitute/dzO8FEY9lu4hwRT6BfRZOX-uYGsC_5XH4jEJ7sJyThcmG9J_w1ArOAaUCGfL8wAM": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/UjZSrgz7_B7XMd9fHU7jM33UZhWlFgX0rz7JZbCBYR28bCS7jr_CAJdcDhi52ruE")},
"fault": {Mode: fs.ModeDir | 0700},
"work": {Mode: fs.ModeDir | 0700},
}, []unique.Handle[pkg.ID]{
ic.Ident(pkg.NewFile("destroyed-input", []byte("destroyed"))),
ic.Ident(&stubArtifactF{
kind: pkg.KindExec,
params: []byte("destroyed"),
deps: []pkg.Artifact{
pkg.NewFile("destroyed-input", []byte("destroyed")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), nil, 0444)
},
}),
}, []unique.Handle[pkg.Checksum]{
unique.Make(sha512.Sum384([]byte("destroyed"))),
}},
{"inputs", []pkg.Artifact{
&stubArtifactF{
kind: pkg.KindExec,
params: []byte("destroyed"),
deps: []pkg.Artifact{
pkg.NewFile("destroyed-input", []byte("destroyed")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), nil, 0444)
},
},
}, []pkg.Artifact{
&stubArtifactF{
kind: pkg.KindExec,
params: []byte("kept"),
deps: []pkg.Artifact{
pkg.NewFile("kept-input", []byte("kept")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), []byte{0}, 0444)
},
},
}, true, expectsFS{
".": {Mode: fs.ModeDir | 0700},
"checksum": {Mode: fs.ModeDir | 0700},
"checksum/CyDnDvF-LaeGPcSW70tPosNCoclByWkTjznUUF1DcgzlIwkN9yzz1ZFME1TlPj6W": {Mode: fs.ModeDir | 0500},
"checksum/CyDnDvF-LaeGPcSW70tPosNCoclByWkTjznUUF1DcgzlIwkN9yzz1ZFME1TlPj6W/result": {Mode: 0444, Data: []byte("\x00")},
"checksum/H-eSiCo227-xdqyNl2R-5G3eqXPtbb8XegAB70I5OQb2majeZXJoCxTq9wJy5qqv": {Mode: 0400, Data: []byte("kept")},
"identifier": {Mode: fs.ModeDir | 0700},
"identifier/Ef8KX6s_rS_nLgze0rj90zKyrGAvOnzyU0DL7nrYQWEG_f4a9pmUKI6HBEMD8AE8": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/H-eSiCo227-xdqyNl2R-5G3eqXPtbb8XegAB70I5OQb2majeZXJoCxTq9wJy5qqv")},
"identifier/xoIGLemzLF227e-w_AJcf_1Sgqh2gs3KFgqvOIWUQE-9P_y2vHBMBytL4GRGQqTb": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/CyDnDvF-LaeGPcSW70tPosNCoclByWkTjznUUF1DcgzlIwkN9yzz1ZFME1TlPj6W")},
"lock": {Mode: 0644},
"variant": {Mode: 0400},
"status": {Mode: fs.ModeDir | 0700},
"substitute": {Mode: fs.ModeDir | 0700},
"substitute/4bjS-QjGcSV4nth-W6Vg3-wolKmKgiq4Ld2oRIWcOfy6Wi41XXLAWPoo8FcDx6BH": {Mode: fs.ModeSymlink | 0777, Data: []byte("../checksum/CyDnDvF-LaeGPcSW70tPosNCoclByWkTjznUUF1DcgzlIwkN9yzz1ZFME1TlPj6W")},
"fault": {Mode: fs.ModeDir | 0700},
"work": {Mode: fs.ModeDir | 0700},
}, []unique.Handle[pkg.ID]{
ic.Ident(pkg.NewFile("destroyed-input", []byte("destroyed"))),
ic.Ident(&stubArtifactF{
kind: pkg.KindExec,
params: []byte("destroyed"),
deps: []pkg.Artifact{
pkg.NewFile("destroyed-input", []byte("destroyed")),
},
cure: func(f *pkg.FContext) error {
p := f.GetWorkDir()
if err := os.MkdirAll(p.String(), 0755); err != nil {
return err
}
return os.WriteFile(p.Append("result").String(), nil, 0444)
},
}),
}, []unique.Handle[pkg.Checksum]{
unique.Make(expectsFS{
".": {Mode: fs.ModeDir | 0500},
"result": {Mode: 0444},
}.hash()),
unique.Make(sha512.Sum384([]byte("destroyed"))),
}},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
base := check.MustAbs(t.TempDir())
if err := os.Chmod(base.String(), 0700); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := filepath.WalkDir(base.String(), func(path string, d fs.DirEntry, err error) error {
if err != nil {
t.Error(err)
return nil
}
if !d.IsDir() {
return nil
}
return os.Chmod(path, 0700)
}); err != nil {
t.Fatal(err)
}
})
msg := message.New(log.New(os.Stderr, "clean: ", 0))
msg.SwapVerbose(testing.Verbose())
c, err := pkg.Open(t.Context(), msg, 0, 0, 0, base)
if err != nil {
t.Fatal(err)
}
t.Cleanup(c.Close)
all := pkg.Collect(slices.Concat(tc.a, tc.keep))
if _, _, err = c.Cure(&all); !pkg.IsCollected(err) {
t.Fatal(err)
}
var (
idents []unique.Handle[pkg.ID]
checksums []unique.Handle[pkg.Checksum]
)
idents, checksums, err = c.Clean(false, tc.inputs, tc.keep...)
if err != nil {
t.Fatalf("Clean: error = %v", err)
}
var buf [2]pkg.Checksum
slices.SortFunc(idents, func(a, b unique.Handle[pkg.ID]) int {
buf[0], buf[1] = a.Value(), b.Value()
return bytes.Compare(buf[0][:], buf[1][:])
})
slices.SortFunc(checksums, func(a, b unique.Handle[pkg.Checksum]) int {
buf[0], buf[1] = a.Value(), b.Value()
return bytes.Compare(buf[0][:], buf[1][:])
})
if !slices.Equal(idents, tc.wantIdents) {
t.Errorf(
"Clean: idents = %s, want %s",
formatHandles(idents...), formatHandles(tc.wantIdents...),
)
}
if !slices.Equal(checksums, tc.wantChecksums) {
t.Errorf(
"Clean: checksums = %s, want %s",
formatHandles(checksums...), formatHandles(tc.wantChecksums...),
)
}
want := tc.want.hash()
var checksum pkg.Checksum
if err = pkg.HashDir(&checksum, base); err != nil {
t.Fatalf("HashDir: error = %v", err)
} else if checksum != want {
t.Error(expectsFrom(base.String()))
}
})
}
}

View File

@@ -816,6 +816,28 @@ func (e *ChecksumMismatchError) Error() string {
" instead of " + Encode(e.Want) " instead of " + Encode(e.Want)
} }
// LinknamePrefixError describes a malformed linkname to a [Checksum].
type LinknamePrefixError string
func (e LinknamePrefixError) Error() string {
return "linkname " + strconv.Quote(string(e)) + " missing prefix"
}
// readlinkChecksum reads a symbolic link to a dirChecksum entry and saves the
// decoded [Checksum] to the value pointed to by buf. The checksumLinknamePrefix
// is required.
func readlinkChecksum(a *check.Absolute, buf *Checksum) error {
linkname, err := os.Readlink(a.String())
if err != nil {
return nil
}
if !strings.HasPrefix(linkname, checksumLinknamePrefix) {
return LinknamePrefixError(linkname)
}
return Decode(buf, linkname[len(checksumLinknamePrefix):])
}
// ScrubError describes the outcome of a [Cache.Scrub] call where errors were // ScrubError describes the outcome of a [Cache.Scrub] call where errors were
// found and removed from the underlying storage of [Cache]. // found and removed from the underlying storage of [Cache].
type ScrubError struct { type ScrubError struct {

View File

@@ -486,7 +486,17 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) {
tc.early(t, base) tc.early(t, base)
} }
tc.f(t, base, c) tc.f(t, base, c)
scrubFunc = func() error { return c.Scrub(1 << 7) } scrubFunc = func() error {
err = c.Scrub(1 << 7)
idents, checksums, cleanErr := c.Clean(false, false)
if len(idents) > 0 {
t.Errorf("destroyed %d idents", len(idents))
}
if len(checksums) > 0 {
t.Errorf("destroyed %d checksums", len(checksums))
}
return errors.Join(err, cleanErr)
}
} }
var restoreTemp bool var restoreTemp bool
@@ -561,10 +571,7 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) {
if err := pkg.HashDir(&checksum, base); err != nil { if err := pkg.HashDir(&checksum, base); err != nil {
t.Fatalf("HashDir: error = %v", err) t.Fatalf("HashDir: error = %v", err)
} else if checksum != want { } else if checksum != want {
t.Fatalf("(scrubbed) HashDir: %v", &pkg.ChecksumMismatchError{ t.Fatalf("(scrubbed) %s", expectsFrom(base.String()))
Got: checksum,
Want: want,
})
} }
}) })
} }