internal/pkg: record cure faults
All checks were successful
Test / ShareFS (push) Successful in 47s
Test / Sandbox (race detector) (push) Successful in 53s
Test / Sandbox (push) Successful in 54s
Test / Hakurei (race detector) (push) Successful in 59s
Test / Hakurei (push) Successful in 59s
Test / Create distribution (push) Successful in 1m12s
Test / Flake checks (push) Successful in 1m35s

These are useful for troubleshooting. This change records them in a separate directory.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-05-13 17:58:18 +09:00
parent f63203cb0a
commit f2f1726190
3 changed files with 91 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
package pkg_test package pkg_test
import ( import (
"bytes"
_ "embed" _ "embed"
"encoding/gob" "encoding/gob"
"errors" "errors"
@@ -112,18 +113,40 @@ func TestExec(t *testing.T) {
}) })
// check init failure passthrough // check init failure passthrough
var exitError *exec.ExitError initFailureArtifact := pkg.NewExec(
if _, _, err := c.Cure(pkg.NewExec(
"", "", nil, 0, false, false, "", "", nil, 0, false, false,
pkg.AbsWork, pkg.AbsWork,
nil, nil,
check.MustAbs("/opt/bin/testtool"), check.MustAbs("/opt/bin/testtool"),
[]string{"testtool"}, []string{"testtool"},
)); !errors.As(err, &exitError) || )
var exitError *exec.ExitError
if _, _, err := c.Cure(initFailureArtifact); !errors.As(err, &exitError) ||
exitError.ExitCode() != hst.ExitFailure { exitError.ExitCode() != hst.ExitFailure {
t.Fatalf("Cure: error = %v, want init exit status 1", err) t.Fatalf("Cure: error = %v, want init exit status 1", err)
} }
var faultStatus []byte
if faults, err := c.ReadFaults(initFailureArtifact); err != nil {
t.Fatal(err)
} else if len(faults) != 1 {
t.Fatalf("ReadFaults: %v", faults)
} else if faultStatus, err = os.ReadFile(faults[0].String()); err != nil {
t.Fatal(err)
} else if err = faults[0].Destroy(); err != nil {
t.Fatal(err)
} else {
t.Logf("destroyed expected fault at %s", faults[0].Time().UTC())
}
if !bytes.HasPrefix(faultStatus, []byte(
"internal/pkg ",
)) || !bytes.Contains(faultStatus, []byte(
"\ninit: fork/exec /opt/bin/testtool: no such file or directory\n",
)) {
t.Errorf("unexpected status:\n%s", string(faultStatus))
}
testtoolDestroy(t, base, c) testtoolDestroy(t, base, c)
}, expectsFS{ }, expectsFS{
".": {Mode: fs.ModeDir | 0700}, ".": {Mode: fs.ModeDir | 0700},

View File

@@ -4,6 +4,7 @@ package pkg
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"cmp"
"context" "context"
"crypto/sha512" "crypto/sha512"
"encoding/base64" "encoding/base64"
@@ -25,6 +26,7 @@ import (
"sync/atomic" "sync/atomic"
"syscall" "syscall"
"testing" "testing"
"time"
"unique" "unique"
"unsafe" "unsafe"
@@ -248,7 +250,14 @@ func (t *TContext) destroy(errP *error) {
*errP = errors.Join(*errP, err) *errP = errors.Join(*errP, err)
} }
if *errP != nil { if *errP != nil {
*errP = errors.Join(*errP, os.Remove(t.statusPath.String())) *errP = errors.Join(*errP, os.Rename(
t.statusPath.String(), t.cache.base.Append(
dirFault,
t.ids+"."+strconv.FormatUint(uint64(
time.Now().UnixNano(),
), 10),
).String(),
))
} }
t.status = nil t.status = nil
} }
@@ -527,6 +536,8 @@ const (
// identifier. For [FloodArtifact], the same file is also available under // identifier. For [FloodArtifact], the same file is also available under
// its substitute identifier. // its substitute identifier.
dirStatus = "status" dirStatus = "status"
// dirFault holds status files of faulted cures.
dirFault = "fault"
// dirWork holds working pathnames set up during [Cache.Cure]. // dirWork holds working pathnames set up during [Cache.Cure].
dirWork = "work" dirWork = "work"
@@ -2148,6 +2159,52 @@ func (c *Cache) OpenStatus(a Artifact) (r io.ReadSeekCloser, err error) {
return return
} }
// Fault holds the pathname and termination time of an [Artifact] fault entry.
type Fault struct {
*check.Absolute
t uint64
}
// Time returns the instant in time where the fault occurred.
func (f Fault) Time() time.Time { return time.Unix(0, int64(f.t)) }
// Open opens the underlying entry for reading.
func (f Fault) Open() (io.ReadCloser, error) { return os.Open(f.Absolute.String()) }
// Destroy removes the underlying fault entry.
func (f Fault) Destroy() error { return os.Remove(f.Absolute.String()) }
// ReadFaults returns fault entries for an [Artifact].
func (c *Cache) ReadFaults(a Artifact) (faults []Fault, err error) {
prefix := Encode(c.Ident(a).Value()) + "."
var dents []os.DirEntry
if dents, err = os.ReadDir(c.base.Append(dirFault).String()); err != nil {
return
}
for _, dent := range dents {
name := dent.Name()
if !strings.HasPrefix(name, prefix) {
continue
}
var t uint64
t, err = strconv.ParseUint(name[len(prefix):], 10, 64)
if err != nil {
return
}
faults = append(faults, Fault{c.base.Append(
dirFault,
name,
), t})
}
slices.SortFunc(faults, func(a, b Fault) int {
return cmp.Compare(a.t, b.t)
})
return
}
// Abort cancels all pending cures and waits for them to clean up, but does not // Abort cancels all pending cures and waits for them to clean up, but does not
// close the cache. // close the cache.
func (c *Cache) Abort() { func (c *Cache) Abort() {
@@ -2251,6 +2308,7 @@ func open(
dirIdentifier, dirIdentifier,
dirChecksum, dirChecksum,
dirStatus, dirStatus,
dirFault,
dirWork, dirWork,
} { } {
if err := os.MkdirAll( if err := os.MkdirAll(

View File

@@ -492,6 +492,12 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) {
if err := os.RemoveAll(base.Append("status").String()); err != nil { if err := os.RemoveAll(base.Append("status").String()); err != nil {
t.Fatal(err) t.Fatal(err)
} }
// destroy empty fault directory
if err := os.Remove(base.Append("fault").String()); err != nil {
t.Fatal(err)
}
want := tc.want.hash() want := tc.want.hash()
var checksum pkg.Checksum var checksum pkg.Checksum