From f2f17261905e6ea6c686bb490188f86710fd9720 Mon Sep 17 00:00:00 2001 From: Ophestra Date: Wed, 13 May 2026 17:58:18 +0900 Subject: [PATCH] internal/pkg: record cure faults These are useful for troubleshooting. This change records them in a separate directory. Signed-off-by: Ophestra --- internal/pkg/exec_test.go | 29 +++++++++++++++++-- internal/pkg/pkg.go | 60 ++++++++++++++++++++++++++++++++++++++- internal/pkg/pkg_test.go | 6 ++++ 3 files changed, 91 insertions(+), 4 deletions(-) diff --git a/internal/pkg/exec_test.go b/internal/pkg/exec_test.go index cab7a9ca..e6751f0f 100644 --- a/internal/pkg/exec_test.go +++ b/internal/pkg/exec_test.go @@ -1,6 +1,7 @@ package pkg_test import ( + "bytes" _ "embed" "encoding/gob" "errors" @@ -112,18 +113,40 @@ func TestExec(t *testing.T) { }) // check init failure passthrough - var exitError *exec.ExitError - if _, _, err := c.Cure(pkg.NewExec( + initFailureArtifact := pkg.NewExec( "", "", nil, 0, false, false, pkg.AbsWork, nil, check.MustAbs("/opt/bin/testtool"), []string{"testtool"}, - )); !errors.As(err, &exitError) || + ) + var exitError *exec.ExitError + if _, _, err := c.Cure(initFailureArtifact); !errors.As(err, &exitError) || exitError.ExitCode() != hst.ExitFailure { t.Fatalf("Cure: error = %v, want init exit status 1", err) } + var faultStatus []byte + if faults, err := c.ReadFaults(initFailureArtifact); err != nil { + t.Fatal(err) + } else if len(faults) != 1 { + t.Fatalf("ReadFaults: %v", faults) + } else if faultStatus, err = os.ReadFile(faults[0].String()); err != nil { + t.Fatal(err) + } else if err = faults[0].Destroy(); err != nil { + t.Fatal(err) + } else { + t.Logf("destroyed expected fault at %s", faults[0].Time().UTC()) + } + + if !bytes.HasPrefix(faultStatus, []byte( + "internal/pkg ", + )) || !bytes.Contains(faultStatus, []byte( + "\ninit: fork/exec /opt/bin/testtool: no such file or directory\n", + )) { + t.Errorf("unexpected status:\n%s", string(faultStatus)) + } + testtoolDestroy(t, base, c) }, expectsFS{ ".": {Mode: fs.ModeDir | 0700}, diff --git a/internal/pkg/pkg.go b/internal/pkg/pkg.go index b81abe08..e1d354e0 100644 --- a/internal/pkg/pkg.go +++ b/internal/pkg/pkg.go @@ -4,6 +4,7 @@ package pkg import ( "bufio" "bytes" + "cmp" "context" "crypto/sha512" "encoding/base64" @@ -25,6 +26,7 @@ import ( "sync/atomic" "syscall" "testing" + "time" "unique" "unsafe" @@ -248,7 +250,14 @@ func (t *TContext) destroy(errP *error) { *errP = errors.Join(*errP, err) } if *errP != nil { - *errP = errors.Join(*errP, os.Remove(t.statusPath.String())) + *errP = errors.Join(*errP, os.Rename( + t.statusPath.String(), t.cache.base.Append( + dirFault, + t.ids+"."+strconv.FormatUint(uint64( + time.Now().UnixNano(), + ), 10), + ).String(), + )) } t.status = nil } @@ -527,6 +536,8 @@ const ( // identifier. For [FloodArtifact], the same file is also available under // its substitute identifier. dirStatus = "status" + // dirFault holds status files of faulted cures. + dirFault = "fault" // dirWork holds working pathnames set up during [Cache.Cure]. dirWork = "work" @@ -2148,6 +2159,52 @@ func (c *Cache) OpenStatus(a Artifact) (r io.ReadSeekCloser, err error) { return } +// Fault holds the pathname and termination time of an [Artifact] fault entry. +type Fault struct { + *check.Absolute + t uint64 +} + +// Time returns the instant in time where the fault occurred. +func (f Fault) Time() time.Time { return time.Unix(0, int64(f.t)) } + +// Open opens the underlying entry for reading. +func (f Fault) Open() (io.ReadCloser, error) { return os.Open(f.Absolute.String()) } + +// Destroy removes the underlying fault entry. +func (f Fault) Destroy() error { return os.Remove(f.Absolute.String()) } + +// ReadFaults returns fault entries for an [Artifact]. +func (c *Cache) ReadFaults(a Artifact) (faults []Fault, err error) { + prefix := Encode(c.Ident(a).Value()) + "." + var dents []os.DirEntry + if dents, err = os.ReadDir(c.base.Append(dirFault).String()); err != nil { + return + } + + for _, dent := range dents { + name := dent.Name() + if !strings.HasPrefix(name, prefix) { + continue + } + var t uint64 + t, err = strconv.ParseUint(name[len(prefix):], 10, 64) + if err != nil { + return + } + + faults = append(faults, Fault{c.base.Append( + dirFault, + name, + ), t}) + } + + slices.SortFunc(faults, func(a, b Fault) int { + return cmp.Compare(a.t, b.t) + }) + return +} + // Abort cancels all pending cures and waits for them to clean up, but does not // close the cache. func (c *Cache) Abort() { @@ -2251,6 +2308,7 @@ func open( dirIdentifier, dirChecksum, dirStatus, + dirFault, dirWork, } { if err := os.MkdirAll( diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go index 96eda8ad..9588fe77 100644 --- a/internal/pkg/pkg_test.go +++ b/internal/pkg/pkg_test.go @@ -492,6 +492,12 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) { if err := os.RemoveAll(base.Append("status").String()); err != nil { t.Fatal(err) } + + // destroy empty fault directory + if err := os.Remove(base.Append("fault").String()); err != nil { + t.Fatal(err) + } + want := tc.want.hash() var checksum pkg.Checksum