diff --git a/internal/pkg/net.go b/internal/pkg/net.go index 09ad7fa..fb90221 100644 --- a/internal/pkg/net.go +++ b/internal/pkg/net.go @@ -3,11 +3,10 @@ package pkg import ( "context" "crypto/sha512" - "errors" "io" "net/http" - "os" "sync" + "syscall" "hakurei.app/container/check" ) @@ -17,38 +16,34 @@ type httpArtifact struct { // Caller-supplied request. req *http.Request - // Caller-supplied checksum of the response body, also used as the - // identifier. This is validated during curing. - id ID + // Caller-supplied checksum of the response body. This is validated during + // curing and the first call to Data. + checksum Checksum // doFunc is the Do method of [http.Client] supplied by the caller. doFunc func(req *http.Request) (*http.Response, error) - // Instance of [Cache] to submit the cured artifact to. - c *Cache // Response body read to EOF. data []byte - // Populated when submitting to or loading from [Cache]. - pathname *check.Absolute - // Synchronises access to pathname and data. + // Synchronises access to data. mu sync.Mutex } // NewHTTP returns a new [File] backed by the supplied client and request. If // c is nil, [http.DefaultClient] is used instead. -func (c *Cache) NewHTTP(hc *http.Client, req *http.Request, checksum Checksum) File { - if hc == nil { - hc = http.DefaultClient +func NewHTTP(c *http.Client, req *http.Request, checksum Checksum) File { + if c == nil { + c = http.DefaultClient } - return &httpArtifact{req: req, id: checksum, doFunc: hc.Do, c: c} + return &httpArtifact{req: req, checksum: checksum, doFunc: c.Do} } // NewHTTPGet returns a new [File] backed by the supplied client. A GET request // is set up for url. If c is nil, [http.DefaultClient] is used instead. -func (c *Cache) NewHTTPGet( +func NewHTTPGet( ctx context.Context, - hc *http.Client, + c *http.Client, url string, checksum Checksum, ) (File, error) { @@ -56,14 +51,25 @@ func (c *Cache) NewHTTPGet( if err != nil { return nil, err } - return c.NewHTTP(hc, req, checksum), nil + return NewHTTP(c, req, checksum), nil } // Kind returns the hardcoded [Kind] constant. func (a *httpArtifact) Kind() Kind { return KindHTTP } // ID returns the caller-supplied hash of the response body. -func (a *httpArtifact) ID() ID { return a.id } +func (a *httpArtifact) ID() ID { return a.checksum } + +// Params is unreachable. +func (a *httpArtifact) Params() []byte { + panic("not implemented") +} + +// Dependencies returns a nil slice. +func (a *httpArtifact) Dependencies() []Artifact { return nil } + +// Checksum returns the address to the caller-supplied checksum. +func (a *httpArtifact) Checksum() *Checksum { return &a.checksum } // ResponseStatusError is returned for a response returned by an [http.Client] // with a status code other than [http.StatusOK]. @@ -95,44 +101,13 @@ func (a *httpArtifact) do() (data []byte, err error) { return } -// Hash cures the [Artifact] and returns its hash. The return value is always -// identical to that of the ID method. -func (a *httpArtifact) Hash() (Checksum, error) { _, err := a.Pathname(); return a.id, err } - -// Pathname cures the [Artifact] and returns its pathname in the [Cache]. -func (a *httpArtifact) Pathname() (pathname *check.Absolute, err error) { - a.mu.Lock() - defer a.mu.Unlock() - - if a.pathname != nil { - return a.pathname, nil - } - - if a.data != nil { - pathname, err = a.c.StoreFile( - a.id, a.data, - (*Checksum)(&a.id), - true, - ) - if err == nil { - a.pathname = pathname - } - return - } else { - a.pathname, a.data, _, err = a.c.LoadOrStoreFile( - a.id, a.do, - (*Checksum)(&a.id), - true, - ) - if err != nil { - a.pathname, a.data = nil, nil - } - return a.pathname, err - } +// Cure returns syscall.ENOTSUP. Callers should use Data instead. +func (a *httpArtifact) Cure(*check.Absolute, CacheDataFunc) error { + return syscall.ENOTSUP } // Data completes the http request and returns the resulting response body read -// to EOF. Data does not write to the underlying [Cache]. +// to EOF. Data does not interact with the filesystem. func (a *httpArtifact) Data() (data []byte, err error) { a.mu.Lock() defer a.mu.Unlock() @@ -142,23 +117,14 @@ func (a *httpArtifact) Data() (data []byte, err error) { return a.data, nil } - if a.pathname, a.data, err = a.c.LoadFile(a.id); err == nil { - return a.data, nil - } else { - a.pathname, a.data = nil, nil - if !errors.Is(err, os.ErrNotExist) { - return - } - } - if data, err = a.do(); err != nil { return } h := sha512.New384() h.Write(data) - if got := (Checksum)(h.Sum(nil)); got != a.id { - return nil, &ChecksumMismatchError{got, a.id} + if got := (Checksum)(h.Sum(nil)); got != a.checksum { + return nil, &ChecksumMismatchError{got, a.checksum} } a.data = data return diff --git a/internal/pkg/net_test.go b/internal/pkg/net_test.go index 0466a4f..2e2e2ce 100644 --- a/internal/pkg/net_test.go +++ b/internal/pkg/net_test.go @@ -34,7 +34,7 @@ func TestHTTP(t *testing.T) { checkWithCache(t, []cacheTestCase{ {"direct", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { var got []byte - if f, err := c.NewHTTPGet( + if f, err := pkg.NewHTTPGet( t.Context(), &client, "file:///testdata", @@ -45,15 +45,15 @@ func TestHTTP(t *testing.T) { t.Fatalf("Data: error = %v", err) } else if string(got) != testdata { t.Fatalf("Data: %x, want %x", got, testdata) - } else if gotIdent := f.ID(); gotIdent != testdataChecksum { - t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } else if gotIdent := pkg.Ident(f); gotIdent != testdataChecksum { + t.Fatalf("Ident: %x, want %x", gotIdent, testdataChecksum) } // check direct validation wantErrMismatch := &pkg.ChecksumMismatchError{ Got: testdataChecksum, } - if f, err := c.NewHTTPGet( + if f, err := pkg.NewHTTPGet( t.Context(), &client, "file:///testdata", @@ -62,13 +62,13 @@ func TestHTTP(t *testing.T) { t.Fatalf("NewHTTPGet: error = %v", err) } else if _, err = f.Data(); !reflect.DeepEqual(err, wantErrMismatch) { t.Fatalf("Data: error = %#v, want %#v", err, wantErrMismatch) - } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { - t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) + } else if gotIdent := pkg.Ident(f); gotIdent != (pkg.Checksum{}) { + t.Fatalf("Ident: %x, want %x", gotIdent, pkg.Checksum{}) } // check direct response error wantErrNotFound := pkg.ResponseStatusError(http.StatusNotFound) - if f, err := c.NewHTTPGet( + if f, err := pkg.NewHTTPGet( t.Context(), &client, "file:///nonexistent", @@ -77,13 +77,13 @@ func TestHTTP(t *testing.T) { t.Fatalf("NewHTTPGet: error = %v", err) } else if _, err = f.Data(); !reflect.DeepEqual(err, wantErrNotFound) { t.Fatalf("Data: error = %#v, want %#v", err, wantErrNotFound) - } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { - t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) + } else if gotIdent := pkg.Ident(f); gotIdent != (pkg.Checksum{}) { + t.Fatalf("Ident: %x, want %x", gotIdent, pkg.Checksum{}) } }, pkg.MustDecode("E4vEZKhCcL2gPZ2Tt59FS3lDng-d_2SKa2i5G_RbDfwGn6EemptFaGLPUDiOa94C")}, - {"load or store", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { - f, err := c.NewHTTPGet( + {"cure", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + f, err := pkg.NewHTTPGet( t.Context(), &client, "file:///testdata", @@ -97,18 +97,16 @@ func TestHTTP(t *testing.T) { "identifier", testdataChecksumString, ) - var pathname *check.Absolute - if pathname, err = f.Pathname(); err != nil { - t.Fatalf("Pathname: error = %v", err) + var ( + pathname *check.Absolute + checksum pkg.Checksum + ) + if pathname, checksum, err = c.Cure(f); err != nil { + t.Fatalf("Cure: error = %v", err) } else if !pathname.Is(wantPathname) { - t.Fatalf("Pathname: %q, want %q", pathname, wantPathname) - } - - var checksum pkg.Checksum - if checksum, err = f.Hash(); err != nil { - t.Fatalf("Hash: error = %v", err) + t.Fatalf("Cure: %q, want %q", pathname, wantPathname) } else if checksum != testdataChecksum { - t.Fatalf("Hash: %x, want %x", checksum, testdataChecksum) + t.Fatalf("Cure: %x, want %x", checksum, testdataChecksum) } var got []byte @@ -116,12 +114,12 @@ func TestHTTP(t *testing.T) { t.Fatalf("Data: error = %v", err) } else if string(got) != testdata { t.Fatalf("Data: %x, want %x", got, testdata) - } else if gotIdent := f.ID(); gotIdent != testdataChecksum { - t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } else if gotIdent := pkg.Ident(f); gotIdent != testdataChecksum { + t.Fatalf("Ident: %x, want %x", gotIdent, testdataChecksum) } // check load from cache - if f, err = c.NewHTTPGet( + if f, err = pkg.NewHTTPGet( t.Context(), &client, "file:///testdata", @@ -132,57 +130,23 @@ func TestHTTP(t *testing.T) { t.Fatalf("Data: error = %v", err) } else if string(got) != testdata { t.Fatalf("Data: %x, want %x", got, testdata) - } else if gotIdent := f.ID(); gotIdent != testdataChecksum { - t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } else if gotIdent := pkg.Ident(f); gotIdent != testdataChecksum { + t.Fatalf("Ident: %x, want %x", gotIdent, testdataChecksum) } // check error passthrough wantErrNotFound := pkg.ResponseStatusError(http.StatusNotFound) - if f, err = c.NewHTTPGet( + if f, err = pkg.NewHTTPGet( t.Context(), &client, "file:///nonexistent", pkg.Checksum{}, ); err != nil { t.Fatalf("NewHTTPGet: error = %v", err) - } else if _, err = f.Pathname(); !reflect.DeepEqual(err, wantErrNotFound) { + } else if _, _, err = c.Cure(f); !reflect.DeepEqual(err, wantErrNotFound) { t.Fatalf("Pathname: error = %#v, want %#v", err, wantErrNotFound) - } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { - t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) - } - }, pkg.MustDecode("4WHaMvRRcCac1uAyXnEklEd2YaNQBj6rXlfMntX9GgYLij3By1znv5QYPGJHYQIH")}, - - {"store", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { - var ( - got []byte - pathname *check.Absolute - checksum pkg.Checksum - ) - wantPathname := base.Append( - "identifier", - testdataChecksumString, - ) - if f, err := c.NewHTTPGet( - t.Context(), - &client, - "file:///testdata", - testdataChecksum, - ); err != nil { - t.Fatalf("NewHTTPGet: error = %v", err) - } else if got, err = f.Data(); err != nil { - t.Fatalf("Data: error = %v", err) - } else if string(got) != testdata { - t.Fatalf("Data: %x, want %x", got, testdata) - } else if gotIdent := f.ID(); gotIdent != testdataChecksum { - t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) - } else if pathname, err = f.Pathname(); err != nil { - t.Fatalf("Pathname: error = %v", err) - } else if !pathname.Is(wantPathname) { - t.Fatalf("Pathname: %q, want %q", pathname, wantPathname) - } else if checksum, err = f.Hash(); err != nil { - t.Fatalf("Hash: error = %v", err) - } else if checksum != testdataChecksum { - t.Fatalf("Hash: %x, want %x", checksum, testdataChecksum) + } else if gotIdent := pkg.Ident(f); gotIdent != (pkg.Checksum{}) { + t.Fatalf("Ident: %x, want %x", gotIdent, pkg.Checksum{}) } }, pkg.MustDecode("4WHaMvRRcCac1uAyXnEklEd2YaNQBj6rXlfMntX9GgYLij3By1znv5QYPGJHYQIH")}, }) diff --git a/internal/pkg/pkg.go b/internal/pkg/pkg.go index f56d42d..4ab3125 100644 --- a/internal/pkg/pkg.go +++ b/internal/pkg/pkg.go @@ -32,25 +32,30 @@ func Encode(checksum Checksum) string { return base64.URLEncoding.EncodeToString(checksum[:]) } -// encode is abbreviation for base64.URLEncoding.EncodeToString(checksum[:]). -func encode(checksum *Checksum) string { - return base64.URLEncoding.EncodeToString(checksum[:]) +// Decode is abbreviation for base64.URLEncoding.Decode(checksum[:], []byte(s)). +func Decode(s string) (checksum Checksum, err error) { + var n int + n, err = base64.URLEncoding.Decode(checksum[:], []byte(s)) + if err == nil && n != len(Checksum{}) { + err = io.ErrUnexpectedEOF + } + return } // MustDecode decodes a string representation of [Checksum] and panics if there // is a decoding error or the resulting data is too short. -func MustDecode(s string) (checksum Checksum) { - if n, err := base64.URLEncoding.Decode( - checksum[:], - []byte(s), - ); err != nil { +func MustDecode(s string) Checksum { + if checksum, err := Decode(s); err != nil { panic(err) - } else if n != len(Checksum{}) { - panic(io.ErrUnexpectedEOF) + } else { + return checksum } - return } +// CacheDataFunc tries to load [File] from [Cache], and if that fails, obtains +// it via [File.Data] instead. +type CacheDataFunc func(f File) (data []byte, err error) + // An Artifact is a read-only reference to a piece of data that may be created // deterministically but might not currently be available in memory or on the // filesystem. @@ -60,31 +65,62 @@ type Artifact interface { // [Artifact] is allowed to return the same [Kind] value. Kind() Kind + // Params returns opaque bytes that describes [Artifact]. Implementations + // must guarantee that these values are unique among differing instances + // of the same implementation with the same dependencies. + // + // Callers must not modify the retuned byte slice. + // + // Result must remain identical across multiple invocations. + Params() []byte + + // Dependencies returns a slice of [Artifact] that the current instance + // depends on to produce its contents. + // + // Callers must not modify the retuned slice. + // + // Result must remain identical across multiple invocations. + Dependencies() []Artifact + + // Cure cures the current [Artifact] to the caller-specified temporary + // pathname. This is not the final resting place of the [Artifact] and this + // pathname should not be directly referred to in the final contents. + // + // If the implementation produces a single file, it must implement [File] + // as well. In that case, Cure must produce a single regular file with + // contents identical to that returned by [File.Data]. + Cure(work *check.Absolute, loadData CacheDataFunc) (err error) +} + +// KnownIdent is optionally implemented by [Artifact] and is used instead of +// [Kind.Ident] when it is available. +// +// This is very subtle to use correctly. The implementation must ensure that +// this value is globally unique, otherwise [Cache] can enter an inconsistent +// state. This should not be implemented outside of testing. +type KnownIdent interface { // ID returns a globally unique identifier referring to the current // [Artifact]. This value must be known ahead of time and guaranteed to be // unique without having obtained the full contents of the [Artifact]. ID() ID +} - // Hash returns the [Checksum] created from the full contents of a cured - // [Artifact]. This can be stored for future lookup in a [Cache]. +// KnownChecksum is optionally implemented by [Artifact] for an artifact with +// output known ahead of time. +type KnownChecksum interface { + // Checksum returns the address of a known checksum. // - // A call to Hash implicitly cures [Artifact]. - Hash() (Checksum, error) - - // Pathname returns an absolute pathname to a file or directory populated - // with the full contents of [Artifact]. This is the most expensive - // operation possible on any [Artifact] and should be avoided if possible. + // Callers must not modify the [Checksum]. // - // A call to Pathname implicitly cures [Artifact]. - // - // Callers must only open files read-only. If [Artifact] is a directory, - // files must not be created or removed under this directory. - Pathname() (*check.Absolute, error) + // Result must remain identical across multiple invocations. + Checksum() Checksum } // A File refers to an [Artifact] backed by a single file. type File interface { - // Data returns the full contents of [Artifact]. + // Data returns the full contents of [Artifact]. If [Artifact.Checksum] + // returns a non-nil address, Data is responsible for validating any data + // it produces and must return [ChecksumMismatchError] if validation fails. // // Callers must not modify the returned byte slice. Data() ([]byte, error) @@ -92,13 +128,22 @@ type File interface { Artifact } +// Ident returns the identifier of an [Artifact]. +func Ident(a Artifact) ID { + if ki, ok := a.(KnownIdent); ok { + return ki.ID() + } + return a.Kind().Ident(a.Params(), a.Dependencies()...) +} + // Kind corresponds to the concrete type of [Artifact] and is used to create // identifier for an [Artifact] with dependencies. type Kind uint64 const ( - // KindHTTP is the kind of [Artifact] returned by [Cache.NewHTTP]. + // KindHTTP is the kind of [Artifact] returned by [NewHTTP]. KindHTTP Kind = iota + // KindTar is the kind of artifact returned by [NewTar]. KindTar ) @@ -109,11 +154,13 @@ func (k Kind) Ident(params []byte, deps ...Artifact) ID { type extIdent [len(ID{}) + wordSize]byte identifiers := make([]extIdent, len(deps)) for i, a := range deps { - id := a.ID() + id := Ident(a) copy(identifiers[i][wordSize:], id[:]) binary.LittleEndian.PutUint64(identifiers[i][:], uint64(a.Kind())) } - slices.SortFunc(identifiers, func(a, b extIdent) int { return bytes.Compare(a[:], b[:]) }) + slices.SortFunc(identifiers, func(a, b extIdent) int { + return bytes.Compare(a[:], b[:]) + }) slices.Compact(identifiers) h := sha512.New384() @@ -134,8 +181,12 @@ const ( dirChecksum = "checksum" // dirWork is the directory name appended to Cache.base for working - // directories created for [Cache.Store]. + // pathnames set up during [Cache.Cure]. dirWork = "work" + + // checksumLinknamePrefix is prepended to the encoded [Checksum] value + // of an [Artifact] when creating a symbolic link to dirChecksum. + checksumLinknamePrefix = "../" + dirChecksum + "/" ) // Cache is a support layer that implementations of [Artifact] can use to store @@ -144,30 +195,32 @@ type Cache struct { // Directory where all [Cache] related files are placed. base *check.Absolute - // Protects the Store critical section. - storeMu sync.Mutex + // Whether to validate [File.Data] for a [KnownChecksum] file. This + // significantly reduces performance. + strict bool - // Synchronises access to most public methods. - mu sync.RWMutex + // Synchronises access to dirChecksum. + checksumMu sync.RWMutex + + // Identifier to content pair cache. + ident map[ID]Checksum + // Identifier to error pair for unrecoverably faulted [Artifact]. + identErr map[ID]error + // Pending identifiers, accessed through Cure for entries not in ident. + identPending map[ID]<-chan struct{} + // Synchronises access to ident and corresponding filesystem entries. + identMu sync.RWMutex } -// LoadFile loads the contents of a [File] by its identifier. -func (c *Cache) LoadFile(id ID) ( - pathname *check.Absolute, - data []byte, - err error, -) { - pathname = c.base.Append( - dirIdentifier, - Encode(id), - ) +// IsStrict returns whether the [Cache] strictly verifies checksums. +func (c *Cache) IsStrict() bool { return c.strict } - c.mu.RLock() - data, err = os.ReadFile(pathname.String()) - c.mu.RUnlock() - - return -} +// SetStrict sets whether the [Cache] strictly verifies checksums, even when +// the implementation promises to validate them internally. This significantly +// reduces performance and is not recommended outside of testing. +// +// This method is not safe for concurrent use with any other method. +func (c *Cache) SetStrict(strict bool) { c.strict = strict } // A ChecksumMismatchError describes an [Artifact] with unexpected content. type ChecksumMismatchError struct { @@ -180,217 +233,357 @@ func (e *ChecksumMismatchError) Error() string { " instead of " + Encode(e.Want) } -// pathname returns the content-addressed pathname for a [Checksum]. -func (c *Cache) pathname(checksum *Checksum) *check.Absolute { - return c.base.Append( - dirChecksum, - encode(checksum), - ) -} - -// pathnameIdent returns the identifier-based pathname for an [ID]. -func (c *Cache) pathnameIdent(id *ID) *check.Absolute { - return c.base.Append( - dirIdentifier, - encode((*Checksum)(id)), - ) -} - -// Store looks up an identifier, and if it is not present, calls makeArtifact -// with a private working directory and stores its result instead. An optional -// checksum can be passed via the result buffer which is used to validate the -// produced directory. -func (c *Cache) Store( - id ID, - makeArtifact func(work *check.Absolute) error, - buf *Checksum, - validate bool, -) ( - pathname *check.Absolute, - store bool, +// loadOrStoreIdent attempts to load a cached [Artifact] by its identifier or +// wait for a pending [Artifact] to cure. If neither is possible, the current +// identifier is stored in identPending and a non-nil channel is returned. +func (c *Cache) loadOrStoreIdent(id *ID) ( + done chan<- struct{}, + checksum Checksum, err error, ) { - pathname = c.pathnameIdent(&id) - c.storeMu.Lock() - defer c.storeMu.Unlock() + var ok bool - _, err = os.Lstat(pathname.String()) - if err == nil || !errors.Is(err, os.ErrNotExist) { + c.identMu.Lock() + if checksum, ok = c.ident[*id]; ok { + c.identMu.Unlock() + return + } + if err, ok = c.identErr[*id]; ok { + c.identMu.Unlock() return } - store = true - var ( - workPathname *check.Absolute - workPathnameRaw string + var notify <-chan struct{} + if notify, ok = c.identPending[*id]; ok { + c.identMu.Unlock() + <-notify + c.identMu.RLock() + if checksum, ok = c.ident[*id]; !ok { + err = c.identErr[*id] + } + c.identMu.RUnlock() + return + } + + d := make(chan struct{}) + c.identPending[*id] = d + c.identMu.Unlock() + done = d + return +} + +// finaliseIdent commits a checksum or error to ident for an identifier +// previously submitted to identPending. +func (c *Cache) finaliseIdent( + done chan<- struct{}, + id *ID, + checksum *Checksum, + err error, +) { + c.identMu.Lock() + if err != nil { + c.identErr[*id] = err + } else { + c.ident[*id] = *checksum + } + c.identMu.Unlock() + + close(done) +} + +// loadData provides [CacheDataFunc] for [Artifact.Cure]. +func (c *Cache) loadData(f File) (data []byte, err error) { + var r *os.File + if kc, ok := f.(KnownChecksum); ok { + c.checksumMu.RLock() + r, err = os.Open(c.base.Append( + dirChecksum, + Encode(kc.Checksum()), + ).String()) + c.checksumMu.RUnlock() + } else { + c.identMu.RLock() + r, err = os.Open(c.base.Append( + dirIdentifier, + Encode(Ident(f)), + ).String()) + c.identMu.RUnlock() + } + + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return + } + return f.Data() + } + + data, err = io.ReadAll(r) + closeErr := r.Close() + if err == nil { + err = closeErr + } + return +} + +// InvalidFileModeError describes an [Artifact.Cure] that did not result in +// a regular file or directory located at the work pathname. +type InvalidFileModeError fs.FileMode + +// Error returns a constant string. +func (e InvalidFileModeError) Error() string { + return "artifact did not produce a regular file or directory" +} + +// NoOutputError describes an [Artifact.Cure] that did not populate its +// work pathname despite completing successfully. +type NoOutputError struct{} + +// Unwrap returns [os.ErrNotExist]. +func (NoOutputError) Unwrap() error { return os.ErrNotExist } + +// Error returns a constant string. +func (NoOutputError) Error() string { + return "artifact cured successfully but did not produce any output" +} + +// Cure cures the [Artifact] and returns its pathname and [Checksum]. +func (c *Cache) Cure(a Artifact) ( + pathname *check.Absolute, + checksum Checksum, + err error, +) { + id := Ident(a) + ids := Encode(id) + pathname = c.base.Append( + dirIdentifier, + ids, ) - if workPathnameRaw, err = os.MkdirTemp( - c.base.Append(dirWork).String(), - path.Base(pathname.String()+".*"), - ); err != nil { - return - } else if workPathname, err = check.NewAbs(workPathnameRaw); err != nil { - return - } defer func() { if err != nil { - chmodErr := filepath.WalkDir(workPathname.String(), func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return os.Chmod(path, 0700) - } - return nil - }) - removeErr := os.RemoveAll(workPathname.String()) - if chmodErr != nil || removeErr != nil { - err = errors.Join(err, chmodErr, removeErr) - } else if errors.Is(err, os.ErrExist) { - // two artifacts may be backed by the same file - err = nil - } + pathname = nil + checksum = Checksum{} } }() - if err = os.Chmod(workPathname.String(), 0700); err != nil { + + var done chan<- struct{} + done, checksum, err = c.loadOrStoreIdent(&id) + if done == nil { return + } else { + defer func() { c.finaliseIdent(done, &id, &checksum, err) }() } - if err = makeArtifact(workPathname); err != nil { - return - } - // override this before hashing since it will be made read-only after the - // rename anyway so do not let perm bits affect the checksum - if err = os.Chmod(workPathname.String(), 0700); err != nil { - return - } - var checksum Checksum - if checksum, err = HashDir(workPathname); err != nil { - return - } - if validate { - if checksum != *buf { - err = &ChecksumMismatchError{checksum, *buf} + _, err = os.Lstat(pathname.String()) + if err == nil { + var name string + if name, err = os.Readlink(pathname.String()); err != nil { return } - } else { - *buf = checksum + checksum, err = Decode(path.Base(name)) + return + } + if !errors.Is(err, os.ErrNotExist) { + return } - checksumPathname := c.pathname(&checksum) - if err = os.Rename( - workPathname.String(), - checksumPathname.String(), - ); err != nil { - if !errors.Is(err, os.ErrExist) { + var checksums string + defer func() { + if err == nil && checksums != "" { + err = os.Symlink( + checksumLinknamePrefix+checksums, + pathname.String(), + ) + } + }() + + var checksumPathname *check.Absolute + var checksumFi os.FileInfo + if kc, ok := a.(KnownChecksum); ok { + checksum = kc.Checksum() + checksums = Encode(checksum) + checksumPathname = c.base.Append( + dirChecksum, + checksums, + ) + + c.checksumMu.RLock() + checksumFi, err = os.Stat(checksumPathname.String()) + c.checksumMu.RUnlock() + + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return + } + + checksumFi, err = nil, nil + } + } + + if f, ok := a.(File); ok { + if checksumFi != nil { + if !checksumFi.Mode().IsRegular() { + // unreachable + err = InvalidFileModeError(checksumFi.Mode()) + } return } - } else if err = os.Chmod(checksumPathname.String(), 0500); err != nil { - return - } - if linkErr := os.Symlink( - "../"+dirChecksum+"/"+path.Base(checksumPathname.String()), - pathname.String(), - ); linkErr != nil { - err = linkErr - } - return -} - -// storeFile stores the contents of a [File]. An optional checksum can be -// passed via the result buffer which is used to validate the submitted data. -// -// If locking is disabled, the caller is responsible for acquiring a write lock -// and releasing it after this method returns. This makes LoadOrStoreFile -// possible without holding the lock while computing hash for store only. -func (c *Cache) storeFile( - identifierPathname *check.Absolute, - data []byte, - buf *Checksum, - validate, lock bool, -) error { - h := sha512.New384() - h.Write(data) - if validate { - if got := (Checksum)(h.Sum(nil)); got != *buf { - return &ChecksumMismatchError{got, *buf} + var data []byte + data, err = f.Data() + if err != nil { + return } + + if checksumPathname == nil { + h := sha512.New384() + h.Write(data) + h.Sum(checksum[:0]) + checksums = Encode(checksum) + checksumPathname = c.base.Append( + dirChecksum, + checksums, + ) + } else if c.IsStrict() { + h := sha512.New384() + h.Write(data) + if got := Checksum(h.Sum(nil)); got != checksum { + err = &ChecksumMismatchError{ + Got: got, + Want: checksum, + } + return + } + } + + c.checksumMu.Lock() + var w *os.File + w, err = os.OpenFile( + checksumPathname.String(), + os.O_CREATE|os.O_EXCL|os.O_WRONLY, + 0400, + ) + if err != nil { + c.checksumMu.Unlock() + + if errors.Is(err, os.ErrExist) { + err = nil + } + return + } + _, err = w.Write(data) + closeErr := w.Close() + if err == nil { + err = closeErr + } + c.checksumMu.Unlock() + + return } else { - h.Sum(buf[:0]) - } - - checksumPathname := c.pathname(buf) - - if lock { - c.mu.Lock() - defer c.mu.Unlock() - } - - if f, err := os.OpenFile( - checksumPathname.String(), - os.O_WRONLY|os.O_CREATE|os.O_EXCL, - 0400, - ); err != nil { - // two artifacts may be backed by the same file - if !errors.Is(err, os.ErrExist) { - return err + if checksumFi != nil { + if !checksumFi.Mode().IsDir() { + // unreachable + err = InvalidFileModeError(checksumFi.Mode()) + } + return } - } else if _, err = f.Write(data); err != nil { - // do not attempt cleanup: this is content-addressed and a partial - // write is caught during integrity check - return err - } - return os.Symlink( - "../"+dirChecksum+"/"+path.Base(checksumPathname.String()), - identifierPathname.String(), - ) -} + workPathname := c.base.Append(dirWork, ids) + defer func() { + // must not use the value of checksum string as it might be zeroed + // to cancel the deferred symlink operation -// StoreFile stores the contents of a [File]. An optional checksum can be -// passed via the result buffer which is used to validate the submitted data. -func (c *Cache) StoreFile( - id ID, - data []byte, - buf *Checksum, - validate bool, -) (pathname *check.Absolute, err error) { - pathname = c.pathnameIdent(&id) - err = c.storeFile(pathname, data, buf, validate, true) - return -} + if err != nil { + chmodErr := filepath.WalkDir(workPathname.String(), func( + path string, + d fs.DirEntry, + err error, + ) error { + if err != nil { + return err + } + if d.IsDir() { + return os.Chmod(path, 0700) + } + return nil + }) + if errors.Is(chmodErr, os.ErrNotExist) { + chmodErr = nil + } + removeErr := os.RemoveAll(workPathname.String()) + if chmodErr != nil || removeErr != nil { + err = errors.Join(err, chmodErr, removeErr) + } else if errors.Is(err, os.ErrExist) { + // two artifacts may be backed by the same file + err = nil + } + } + }() -// LoadOrStoreFile attempts to load the contents of a [File] by its identifier, -// and if that file is not present, calls makeData and stores its result -// instead. Hash validation behaviour is identical to StoreFile. -func (c *Cache) LoadOrStoreFile( - id ID, - makeData func() ([]byte, error), - buf *Checksum, - validate bool, -) ( - pathname *check.Absolute, - data []byte, - store bool, - err error, -) { - pathname = c.pathnameIdent(&id) - c.mu.Lock() - defer c.mu.Unlock() + if err = a.Cure(workPathname, c.loadData); err != nil { + return + } + + var fi os.FileInfo + if fi, err = os.Lstat(workPathname.String()); err != nil { + if errors.Is(err, os.ErrNotExist) { + err = NoOutputError{} + } + return + } + + if !fi.IsDir() { + if !fi.Mode().IsRegular() { + err = InvalidFileModeError(fi.Mode()) + } else { + err = errors.New("non-file artifact produced regular file") + } + return + } + + // override this before hashing since it will be made read-only after + // the rename anyway so do not let perm bits affect the checksum + if err = os.Chmod(workPathname.String(), 0700); err != nil { + return + } + var gotChecksum Checksum + if gotChecksum, err = HashDir(workPathname); err != nil { + return + } + + if checksumPathname == nil { + checksum = gotChecksum + checksums = Encode(checksum) + checksumPathname = c.base.Append( + dirChecksum, + checksums, + ) + } else { + if gotChecksum != checksum { + err = &ChecksumMismatchError{ + Got: gotChecksum, + Want: checksum, + } + return + } + } + + c.checksumMu.Lock() + if err = os.Rename( + workPathname.String(), + checksumPathname.String(), + ); err != nil { + if !errors.Is(err, os.ErrExist) { + c.checksumMu.Unlock() + return + } + // err is zeroed during deferred cleanup + } else { + err = os.Chmod(checksumPathname.String(), 0500) + } + c.checksumMu.Unlock() - data, err = os.ReadFile(pathname.String()) - if err == nil || !errors.Is(err, os.ErrNotExist) { return } - store = true - - data, err = makeData() - if err != nil { - return - } - err = c.storeFile(pathname, data, buf, validate, false) - return } // New returns the address to a new instance of [Cache]. @@ -408,5 +601,9 @@ func New(base *check.Absolute) (*Cache, error) { return &Cache{ base: base, + + ident: make(map[ID]Checksum), + identErr: make(map[ID]error), + identPending: make(map[ID]<-chan struct{}), }, nil } diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go index 915c720..1479e2e 100644 --- a/internal/pkg/pkg_test.go +++ b/internal/pkg/pkg_test.go @@ -5,6 +5,8 @@ import ( "bytes" "crypto/sha512" "encoding/binary" + "errors" + "fmt" "io/fs" "net/http" "os" @@ -12,6 +14,7 @@ import ( "reflect" "syscall" "testing" + "unsafe" "hakurei.app/container" "hakurei.app/container/check" @@ -19,41 +22,131 @@ import ( "hakurei.app/internal/pkg" ) -// A stubArtifact implements [Artifact] with hardcoded kind and identifier. -type stubArtifact struct { - kind pkg.Kind - id pkg.ID +// overrideIdent overrides the ID method of [Artifact]. +type overrideIdent struct { + id pkg.ID + pkg.Artifact } -func (a stubArtifact) Kind() pkg.Kind { return a.kind } -func (a stubArtifact) ID() pkg.ID { return a.id } -func (a stubArtifact) Hash() (pkg.Checksum, error) { panic("unreachable") } -func (a stubArtifact) Pathname() (*check.Absolute, error) { panic("unreachable") } +func (a overrideIdent) ID() pkg.ID { return a.id } + +// overrideIdentFile overrides the ID method of [File]. +type overrideIdentFile struct { + id pkg.ID + pkg.File +} + +func (a overrideIdentFile) ID() pkg.ID { return a.id } + +// A knownIdentArtifact implements [pkg.KnownIdent] and [Artifact] +type knownIdentArtifact interface { + pkg.KnownIdent + pkg.Artifact +} + +// A knownIdentFile implements [pkg.KnownIdent] and [File] +type knownIdentFile interface { + pkg.KnownIdent + pkg.File +} + +// overrideChecksum overrides the Checksum method of [Artifact]. +type overrideChecksum struct { + checksum pkg.Checksum + knownIdentArtifact +} + +func (a overrideChecksum) Checksum() pkg.Checksum { return a.checksum } + +// overrideChecksumFile overrides the Checksum method of [File]. +type overrideChecksumFile struct { + checksum pkg.Checksum + knownIdentFile +} + +func (a overrideChecksumFile) Checksum() pkg.Checksum { return a.checksum } + +// A stubArtifact implements [Artifact] with hardcoded behaviour. +type stubArtifact struct { + kind pkg.Kind + params []byte + deps []pkg.Artifact + + cure func(work *check.Absolute, loadData pkg.CacheDataFunc) error +} + +func (a stubArtifact) Kind() pkg.Kind { return a.kind } +func (a stubArtifact) Params() []byte { return a.params } +func (a stubArtifact) Dependencies() []pkg.Artifact { return a.deps } + +func (a stubArtifact) Cure( + work *check.Absolute, + loadData pkg.CacheDataFunc, +) error { + return a.cure(work, loadData) +} + +// A stubFile implements [File] with hardcoded behaviour. +type stubFile struct { + data []byte + err error + + stubArtifact +} + +func (a stubFile) Data() ([]byte, error) { return a.data, a.err } + +// newStubFile returns an implementation of [pkg.File] with hardcoded behaviour. +func newStubFile( + kind pkg.Kind, + id pkg.ID, + sum *pkg.Checksum, + data []byte, + err error, +) pkg.File { + f := overrideIdentFile{id, stubFile{data, err, stubArtifact{ + kind, + nil, + nil, + func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + panic("unreachable") + }, + }}} + if sum == nil { + return f + } else { + return overrideChecksumFile{*sum, f} + } +} func TestIdent(t *testing.T) { t.Parallel() testCases := []struct { - name string - kind pkg.Kind - params []byte - deps []pkg.Artifact - want pkg.ID + name string + a pkg.Artifact + want pkg.ID }{ - {"tar", pkg.KindTar, []byte{ - pkg.TarGzip, 0, 0, 0, 0, 0, 0, 0, - }, []pkg.Artifact{ - stubArtifact{pkg.KindHTTP, pkg.ID{}}, - }, pkg.MustDecode("HnySzeLQvSBZuTUcvfmLEX_OmH4yJWWH788NxuLuv7kVn8_uPM6Ks4rqFWM2NZJY")}, + {"tar", stubArtifact{ + pkg.KindTar, + []byte{pkg.TarGzip, 0, 0, 0, 0, 0, 0, 0}, + []pkg.Artifact{ + overrideIdent{pkg.ID{}, stubArtifact{}}, + }, + nil, + }, pkg.MustDecode( + "HnySzeLQvSBZuTUcvfmLEX_OmH4yJWWH788NxuLuv7kVn8_uPM6Ks4rqFWM2NZJY", + )}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - if got := tc.kind.Ident(tc.params, tc.deps...); got != tc.want { + if got := pkg.Ident(tc.a); got != tc.want { t.Errorf("Ident: %s, want %s", pkg.Encode(got), - pkg.Encode(tc.want)) + pkg.Encode(tc.want), + ) } }) } @@ -118,6 +211,37 @@ func checkWithCache(t *testing.T, testCases []cacheTestCase) { } } +// A cureStep contains an [Artifact] to be cured, and the expected outcome. +type cureStep struct { + name string + + a pkg.Artifact + + pathname *check.Absolute + checksum pkg.Checksum + err error +} + +// cureMany cures many artifacts against a [Cache] and checks their outcomes. +func cureMany(t *testing.T, c *pkg.Cache, steps []cureStep) { + for _, step := range steps { + t.Log("cure step:", step.name) + if pathname, checksum, err := c.Cure(step.a); !reflect.DeepEqual(err, step.err) { + t.Fatalf("Cure: error = %v, want %v", err, step.err) + } else if !pathname.Is(step.pathname) { + t.Fatalf("Cure: pathname = %q, want %q", pathname, step.pathname) + } else if checksum != step.checksum { + t.Fatalf("Cure: checksum = %s, want %s", pkg.Encode(checksum), pkg.Encode(step.checksum)) + } else { + v := any(err) + if err == nil { + v = pathname + } + t.Log(pkg.Encode(checksum)+":", v) + } + } +} + func TestCache(t *testing.T) { t.Parallel() @@ -134,21 +258,9 @@ func TestCache(t *testing.T) { return (pkg.Checksum)(h.Sum(nil)) }() - testdataChecksumString := pkg.Encode(testdataChecksum) - testCases := []cacheTestCase{ {"file", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { - wantErrNonexistent := &os.PathError{ - Op: "open", - Path: base.Append( - "identifier", - testdataChecksumString, - ).String(), - Err: syscall.ENOENT, - } - if _, _, err := c.LoadFile(testdataChecksum); !reflect.DeepEqual(err, wantErrNonexistent) { - t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistent) - } + c.SetStrict(true) identifier := (pkg.ID)(bytes.Repeat([]byte{ 0x75, 0xe6, 0x9d, 0x6d, 0xe7, 0x9f, @@ -165,154 +277,109 @@ func TestCache(t *testing.T) { "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe", ) - // initial store - if pathname, err := c.StoreFile( - identifier, - []byte(testdata), - &testdataChecksum, - true, - ); err != nil { - t.Fatalf("StoreFile: error = %v", err) - } else if !pathname.Is(wantPathname) { - t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathname) - } + cureMany(t, c, []cureStep{ + {"initial file", newStubFile( + pkg.KindHTTP, + identifier, + &testdataChecksum, + []byte(testdata), nil, + ), wantPathname, testdataChecksum, nil}, - // load or store, identical content - if pathname, data, store, err := c.LoadOrStoreFile(identifier0, func() ([]byte, error) { - return []byte(testdata), nil - }, &testdataChecksum, true); err != nil { - t.Fatalf("LoadOrStoreFile: error = %v", err) - } else if !pathname.Is(wantPathname0) { - t.Fatalf("LoadOrStoreFile: pathname = %q, want %q", pathname, wantPathname0) - } else if string(data) != testdata { - t.Fatalf("LoadOrStoreFile: data = %x, want %x", data, testdata) - } else if !store { - t.Fatal("LoadOrStoreFile did not store nonpresent entry") - } + {"identical content", newStubFile( + pkg.KindHTTP, + identifier0, + &testdataChecksum, + []byte(testdata), nil, + ), wantPathname0, testdataChecksum, nil}, - // load or store, existing entry - if pathname, data, store, err := c.LoadOrStoreFile(identifier, func() ([]byte, error) { - return []byte(testdata), nil - }, &testdataChecksum, true); err != nil { - t.Fatalf("LoadOrStoreFile: error = %v", err) - } else if !pathname.Is(wantPathname) { - t.Fatalf("LoadOrStoreFile: pathname = %q, want %q", pathname, wantPathname) - } else if string(data) != testdata { - t.Fatalf("LoadOrStoreFile: data = %x, want %x", data, testdata) - } else if store { - t.Fatal("LoadOrStoreFile stored over present entry") - } + {"existing entry", newStubFile( + pkg.KindHTTP, + identifier, + &testdataChecksum, + []byte(testdata), nil, + ), wantPathname, testdataChecksum, nil}, - // load, existing entry - if pathname, data, err := c.LoadFile(identifier0); err != nil { - t.Fatalf("LoadFile: error = %v", err) - } else if !pathname.Is(wantPathname0) { - t.Fatalf("LoadFile: pathname = %q, want %q", pathname, wantPathname0) - } else if string(data) != testdata { - t.Fatalf("LoadFile: data = %x, want %x", data, testdata) - } + {"checksum mismatch", newStubFile( + pkg.KindHTTP, + pkg.ID{0xff, 0}, + new(pkg.Checksum), + []byte(testdata), nil, + ), nil, pkg.Checksum{}, &pkg.ChecksumMismatchError{ + Got: testdataChecksum, + }}, - // checksum mismatch - wantErrChecksum := &pkg.ChecksumMismatchError{ - Got: testdataChecksum, - } - if _, err := c.StoreFile( - testdataChecksum, - []byte(testdata), - new(pkg.Checksum), - true, - ); !reflect.DeepEqual(err, wantErrChecksum) { - t.Fatalf("StoreFile: error = %#v, want %#v", err, wantErrChecksum) - } - - // verify failed store - if _, _, err := c.LoadFile(testdataChecksum); !reflect.DeepEqual(err, wantErrNonexistent) { - t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistent) - } - - // store, same identifier - wantPathnameF := base.Append( - "identifier", - testdataChecksumString, - ) - if pathname, err := c.StoreFile( - testdataChecksum, - []byte(testdata), - &testdataChecksum, - true, - ); err != nil { - t.Fatalf("StoreFile: error = %v", err) - } else if !pathname.Is(wantPathnameF) { - t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathnameF) - } - - // load, same identifier - if pathname, data, err := c.LoadFile(testdataChecksum); err != nil { - t.Fatalf("LoadFile: error = %v", err) - } else if !pathname.Is(wantPathnameF) { - t.Fatalf("LoadFile: pathname = %q, want %q", pathname, wantPathnameF) - } else if string(data) != testdata { - t.Fatalf("LoadFile: data = %x, want %x", data, testdata) - } - - // store without validation - wantChecksum := pkg.Checksum{ - 0xbe, 0xc0, 0x21, 0xb4, 0xf3, 0x68, - 0xe3, 0x06, 0x91, 0x34, 0xe0, 0x12, - 0xc2, 0xb4, 0x30, 0x70, 0x83, 0xd3, - 0xa9, 0xbd, 0xd2, 0x06, 0xe2, 0x4e, - 0x5f, 0x0d, 0x86, 0xe1, 0x3d, 0x66, - 0x36, 0x65, 0x59, 0x33, 0xec, 0x2b, - 0x41, 0x34, 0x65, 0x96, 0x68, 0x17, - 0xa9, 0xc2, 0x08, 0xa1, 0x17, 0x17, - } - var gotChecksum pkg.Checksum - wantPathnameG := base.Append( - "identifier", - pkg.Encode(wantChecksum), - ) - if pathname, err := c.StoreFile( - wantChecksum, - []byte{0}, - &gotChecksum, - false, - ); err != nil { - t.Fatalf("StoreFile: error = %#v", err) - } else if !pathname.Is(wantPathnameG) { - t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathnameG) - } else if gotChecksum != wantChecksum { - t.Fatalf("StoreFile: buf = %x, want %x", gotChecksum, wantChecksum) - } - - // makeData passthrough - var zeroIdent pkg.ID - wantErrPassthrough := stub.UniqueError(0xcafe) - if _, _, _, err := c.LoadOrStoreFile(zeroIdent, func() ([]byte, error) { - return nil, wantErrPassthrough - }, new(pkg.Checksum), true); !reflect.DeepEqual(err, wantErrPassthrough) { - t.Fatalf("LoadOrStoreFile: error = %#v, want %#v", err, wantErrPassthrough) - } - - // verify failed store - wantErrNonexistentZero := &os.PathError{ - Op: "open", - Path: base.Append( + {"store without validation", newStubFile( + pkg.KindHTTP, + pkg.MustDecode("vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX"), + nil, + []byte{0}, nil, + ), base.Append( "identifier", - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", - ).String(), - Err: syscall.ENOENT, - } - if _, _, err := c.LoadFile(zeroIdent); !reflect.DeepEqual(err, wantErrNonexistentZero) { - t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistentZero) + "vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", + ), pkg.Checksum{ + 0xbe, 0xc0, 0x21, 0xb4, 0xf3, 0x68, + 0xe3, 0x06, 0x91, 0x34, 0xe0, 0x12, + 0xc2, 0xb4, 0x30, 0x70, 0x83, 0xd3, + 0xa9, 0xbd, 0xd2, 0x06, 0xe2, 0x4e, + 0x5f, 0x0d, 0x86, 0xe1, 0x3d, 0x66, + 0x36, 0x65, 0x59, 0x33, 0xec, 0x2b, + 0x41, 0x34, 0x65, 0x96, 0x68, 0x17, + 0xa9, 0xc2, 0x08, 0xa1, 0x17, 0x17, + }, nil}, + + {"error passthrough", newStubFile( + pkg.KindHTTP, + pkg.ID{0xff, 1}, + nil, + nil, stub.UniqueError(0xcafe), + ), nil, pkg.Checksum{}, stub.UniqueError(0xcafe)}, + + {"error caching", newStubFile( + pkg.KindHTTP, + pkg.ID{0xff, 1}, + nil, + nil, nil, + ), nil, pkg.Checksum{}, stub.UniqueError(0xcafe)}, + + {"cache hit bad type", overrideChecksum{testdataChecksum, overrideIdent{pkg.ID{0xff, 2}, stubArtifact{ + kind: pkg.KindTar, + }}}, nil, pkg.Checksum{}, pkg.InvalidFileModeError( + 0400, + )}, + }) + + if c0, err := pkg.New(base); err != nil { + t.Fatalf("New: error = %v", err) + } else { + cureMany(t, c0, []cureStep{ + {"cache hit ident", overrideIdent{ + id: identifier, + }, wantPathname, testdataChecksum, nil}, + + {"cache miss checksum match", newStubFile( + pkg.KindHTTP, + testdataChecksum, + nil, + []byte(testdata), + nil, + ), base.Append( + "identifier", + pkg.Encode(testdataChecksum), + ), testdataChecksum, nil}, + }) } }, pkg.MustDecode("St9rlE-mGZ5gXwiv_hzQ_B8bZP-UUvSNmf4nHUZzCMOumb6hKnheZSe0dmnuc4Q2")}, {"directory", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { id := pkg.KindTar.Ident( binary.LittleEndian.AppendUint64(nil, pkg.TarGzip), - stubArtifact{pkg.KindHTTP, testdataChecksum}, + overrideIdent{testdataChecksum, stubArtifact{}}, ) - makeSample := func(work *check.Absolute) error { + makeSample := func(work *check.Absolute, _ pkg.CacheDataFunc) error { + if err := os.Mkdir(work.String(), 0700); err != nil { + return err + } + if err := os.WriteFile( work.Append("check").String(), []byte{0, 0}, @@ -344,59 +411,22 @@ func TestCache(t *testing.T) { pkg.Encode(id), ) - if pathname, store, err := c.Store( - id, - makeSample, - &wantChecksum, - true, - ); err != nil { - t.Fatalf("Store: error = %v", err) - } else if !store { - t.Fatal("Store did not store nonpresent entry") - } else if !pathname.Is(wantPathname) { - t.Fatalf("Store: pathname = %q, want %q", pathname, wantPathname) - } - - // check lookup - if pathname, store, err := c.Store( - id, - nil, - &wantChecksum, - true, - ); err != nil { - t.Fatalf("Store: error = %v", err) - } else if store { - t.Fatal("Store stored over present entry") - } else if !pathname.Is(wantPathname) { - t.Fatalf("Store: pathname = %q, want %q", pathname, wantPathname) - } - - // check exist id0 := pkg.KindTar.Ident( binary.LittleEndian.AppendUint64(nil, pkg.TarGzip), - stubArtifact{pkg.KindHTTP, pkg.ID{}}, + overrideIdent{pkg.ID{}, stubArtifact{}}, ) wantPathname0 := base.Append( "identifier", pkg.Encode(id0), ) - if pathname, store, err := c.Store( - id0, - makeSample, - &wantChecksum, - true, - ); err != nil { - t.Fatalf("Store: error = %v", err) - } else if !store { - t.Fatal("Store did not store nonpresent entry") - } else if !pathname.Is(wantPathname0) { - t.Fatalf("Store: pathname = %q, want %q", pathname, wantPathname0) - } - var wantErrMakeGarbage error - makeGarbage := func(work *check.Absolute) error { + makeGarbage := func(work *check.Absolute, wantErr error) error { + if err := os.Mkdir(work.String(), 0700); err != nil { + return err + } + mode := fs.FileMode(0) - if wantErrMakeGarbage == nil { + if wantErr == nil { mode = 0500 } @@ -426,38 +456,144 @@ func TestCache(t *testing.T) { return err } - return wantErrMakeGarbage + return wantErr } - // check makeArtifact fault - wantErrMakeGarbage = stub.UniqueError(0xcafe) - if _, store, err := c.Store( - pkg.ID{}, - makeGarbage, - nil, - false, - ); !reflect.DeepEqual(err, wantErrMakeGarbage) { - t.Fatalf("Store: error = %#v, want %#v", err, wantErrMakeGarbage) - } else if !store { - t.Fatal("Store did not store nonpresent entry") - } + cureMany(t, c, []cureStep{ + {"initial directory", overrideChecksum{wantChecksum, overrideIdent{id, stubArtifact{ + kind: pkg.KindTar, + cure: makeSample, + }}}, wantPathname, wantChecksum, nil}, - // checksum mismatch - wantErrMakeGarbage = nil - wantErrMismatch := &pkg.ChecksumMismatchError{ - Got: pkg.MustDecode("GbjlYMcHQANdfwL6qNGopBF99IscPTvCy95HSH1_kIF3eKjFDSLP0_iUUT0z8hiw"), - } - if _, store, err := c.Store( - pkg.ID{}, - makeGarbage, - new(pkg.Checksum), - true, - ); !reflect.DeepEqual(err, wantErrMismatch) { - t.Fatalf("Store: error = %v, want %v", err, wantErrMismatch) - } else if !store { - t.Fatal("Store did not store nonpresent entry") - } + {"identical identifier", overrideChecksum{wantChecksum, overrideIdent{id, stubArtifact{ + kind: pkg.KindTar, + }}}, wantPathname, wantChecksum, nil}, + + {"identical checksum", overrideIdent{id0, stubArtifact{ + kind: pkg.KindTar, + cure: makeSample, + }}, wantPathname0, wantChecksum, nil}, + + {"cure fault", overrideIdent{pkg.ID{0xff, 0}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, _ pkg.CacheDataFunc) error { + return makeGarbage(work, stub.UniqueError(0xcafe)) + }, + }}, nil, pkg.Checksum{}, stub.UniqueError(0xcafe)}, + + {"checksum mismatch", overrideChecksum{pkg.Checksum{}, overrideIdent{pkg.ID{0xff, 1}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, _ pkg.CacheDataFunc) error { + return makeGarbage(work, nil) + }, + }}}, nil, pkg.Checksum{}, &pkg.ChecksumMismatchError{ + Got: pkg.MustDecode( + "GbjlYMcHQANdfwL6qNGopBF99IscPTvCy95HSH1_kIF3eKjFDSLP0_iUUT0z8hiw", + ), + }}, + + {"cache hit bad type", newStubFile( + pkg.KindHTTP, + pkg.ID{0xff, 2}, + &wantChecksum, + []byte(testdata), nil, + ), nil, pkg.Checksum{}, pkg.InvalidFileModeError( + fs.ModeDir | 0500, + )}, + + {"loadData directory", overrideIdent{pkg.ID{0xff, 3}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + _, err := loadData(overrideChecksumFile{checksum: wantChecksum}) + return err + }, + }}, nil, pkg.Checksum{}, &os.PathError{ + Op: "read", + Path: base.Append( + "checksum", + pkg.Encode(wantChecksum), + ).String(), + Err: syscall.EISDIR, + }}, + + {"no output", overrideIdent{pkg.ID{0xff, 4}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + return nil + }, + }}, nil, pkg.Checksum{}, pkg.NoOutputError{}}, + + {"file output", overrideIdent{pkg.ID{0xff, 5}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + return os.WriteFile(work.String(), []byte{0}, 0400) + }, + }}, nil, pkg.Checksum{}, errors.New("non-file artifact produced regular file")}, + + {"symlink output", overrideIdent{pkg.ID{0xff, 6}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + return os.Symlink(work.String(), work.String()) + }, + }}, nil, pkg.Checksum{}, pkg.InvalidFileModeError( + fs.ModeSymlink | 0777, + )}, + }) }, pkg.MustDecode("8OP6YxJAdRrhV2WSBt1BPD7oC_n2Qh7JqUMyVMoGvjDX83bDqq2hgVMNcdiBH_64")}, + + {"pending", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + c.SetStrict(true) + + wantErr := stub.UniqueError(0xcafe) + n, ready := make(chan struct{}), make(chan struct{}) + go func() { + if _, _, err := c.Cure(overrideIdent{pkg.ID{0xff}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + close(ready) + <-n + return wantErr + }, + }}); !reflect.DeepEqual(err, wantErr) { + panic(fmt.Sprintf("Cure: error = %v, want %v", err, wantErr)) + } + }() + + <-ready + go func() { + if _, _, err := c.Cure(overrideIdent{pkg.ID{0xff}, stubArtifact{ + kind: pkg.KindTar, + }}); !reflect.DeepEqual(err, wantErr) { + panic(fmt.Sprintf("Cure: error = %v, want %v", err, wantErr)) + } + }() + + // check cache activity while a cure is blocking + cureMany(t, c, []cureStep{ + {"error passthrough", newStubFile( + pkg.KindHTTP, + pkg.ID{0xff, 1}, + nil, + nil, stub.UniqueError(0xbad), + ), nil, pkg.Checksum{}, stub.UniqueError(0xbad)}, + + {"file output", overrideIdent{pkg.ID{0xff, 2}, stubArtifact{ + kind: pkg.KindTar, + cure: func(work *check.Absolute, loadData pkg.CacheDataFunc) error { + return os.WriteFile(work.String(), []byte{0}, 0400) + }, + }}, nil, pkg.Checksum{}, errors.New("non-file artifact produced regular file")}, + }) + + identPendingVal := reflect.ValueOf(c).Elem().FieldByName("identPending") + identPending := reflect.NewAt( + identPendingVal.Type(), + unsafe.Pointer(identPendingVal.UnsafeAddr()), + ).Elem().Interface().(map[pkg.ID]<-chan struct{}) + notify := identPending[pkg.ID{0xff}] + go close(n) + <-notify + }, pkg.MustDecode("E4vEZKhCcL2gPZ2Tt59FS3lDng-d_2SKa2i5G_RbDfwGn6EemptFaGLPUDiOa94C")}, } checkWithCache(t, testCases) } @@ -484,6 +620,14 @@ func TestErrors(t *testing.T) { {"DisallowedTypeflagError", pkg.DisallowedTypeflagError( tar.TypeChar, ), "disallowed typeflag '3'"}, + + {"InvalidFileModeError", pkg.InvalidFileModeError( + fs.ModeSymlink | 0777, + ), "artifact did not produce a regular file or directory"}, + + {"NoOutputError", pkg.NoOutputError{ + // empty struct + }, "artifact cured successfully but did not produce any output"}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/internal/pkg/tar.go b/internal/pkg/tar.go index 1483246..2b9aacd 100644 --- a/internal/pkg/tar.go +++ b/internal/pkg/tar.go @@ -11,7 +11,6 @@ import ( "io/fs" "net/http" "os" - "sync" "hakurei.app/container/check" ) @@ -25,59 +24,44 @@ const ( // A tarArtifact is an [Artifact] unpacking a tarball backed by a [File]. type tarArtifact struct { - // Computed ahead of time from the checksum of the identifier of f appended - // with parameters of tarArtifact. - id ID - // Caller-supplied backing tarball. f File // Compression on top of the tarball. compression uint64 - - // Populated when submitting to or loading from [Cache]. - pathname *check.Absolute - // Checksum of cured directory. Valid if pathname is not nil. - checksum Checksum - - // Instance of [Cache] to submit the cured artifact to. - c *Cache - // Protects the Pathname critical section. - mu sync.Mutex } // NewTar returns a new [Artifact] backed by the supplied [File] and // compression method. -func (c *Cache) NewTar(f File, compression uint64) Artifact { - return &tarArtifact{id: KindTar.Ident( - binary.LittleEndian.AppendUint64(nil, compression), f, - ), f: f, compression: compression, c: c} +func NewTar(f File, compression uint64) Artifact { + return &tarArtifact{f: f, compression: compression} } // NewHTTPGetTar is abbreviation for NewHTTPGet passed to NewTar. -func (c *Cache) NewHTTPGetTar( +func NewHTTPGetTar( ctx context.Context, hc *http.Client, url string, checksum Checksum, compression uint64, ) (Artifact, error) { - f, err := c.NewHTTPGet(ctx, hc, url, checksum) + f, err := NewHTTPGet(ctx, hc, url, checksum) if err != nil { return nil, err } - return c.NewTar(f, compression), nil + return NewTar(f, compression), nil } // Kind returns the hardcoded [Kind] constant. func (a *tarArtifact) Kind() Kind { return KindTar } -// ID returns the identifier prepared ahead of time. -func (a *tarArtifact) ID() ID { return a.id } +// Params returns compression encoded in little endian. +func (a *tarArtifact) Params() []byte { + return binary.LittleEndian.AppendUint64(nil, a.compression) +} -// Hash cures the [Artifact] and returns its hash. -func (a *tarArtifact) Hash() (Checksum, error) { - _, err := a.Pathname() - return a.checksum, err +// Dependencies returns a slice containing the backing file. +func (a *tarArtifact) Dependencies() []Artifact { + return []Artifact{a.f} } // A DisallowedTypeflagError describes a disallowed typeflag encountered while @@ -88,146 +72,131 @@ func (e DisallowedTypeflagError) Error() string { return "disallowed typeflag '" + string(e) + "'" } -// Pathname cures the [Artifact] and returns its pathname in the [Cache]. -func (a *tarArtifact) Pathname() (*check.Absolute, error) { - a.mu.Lock() - defer a.mu.Unlock() +// Cure cures the [Artifact], producing a directory located at work. +func (a *tarArtifact) Cure(work *check.Absolute, loadData CacheDataFunc) (err error) { + var tr io.ReadCloser - if a.pathname != nil { - return a.pathname, nil + { + var data []byte + data, err = loadData(a.f) + if err != nil { + return + } + tr = io.NopCloser(bytes.NewReader(data)) } - pathname, _, err := a.c.Store(a.id, func(work *check.Absolute) (err error) { - var tr io.ReadCloser - - { - var data []byte - data, err = a.f.Data() - if err != nil { - return - } - tr = io.NopCloser(bytes.NewReader(data)) - } - - defer func() { - closeErr := tr.Close() - if err == nil { - err = closeErr - } - }() - - switch a.compression { - case TarUncompressed: - break - - case TarGzip: - if tr, err = gzip.NewReader(tr); err != nil { - return - } - break - - default: - return os.ErrInvalid - } - - type dirTargetPerm struct { - path *check.Absolute - mode fs.FileMode - } - var madeDirectories []dirTargetPerm - - var header *tar.Header - r := tar.NewReader(tr) - for header, err = r.Next(); err == nil; header, err = r.Next() { - typeflag := header.Typeflag - for { - switch typeflag { - case 0: - if len(header.Name) > 0 && header.Name[len(header.Name)-1] == '/' { - typeflag = tar.TypeDir - } else { - typeflag = tar.TypeReg - } - continue - - case tar.TypeReg: - var f *os.File - if f, err = os.OpenFile( - work.Append(header.Name).String(), - os.O_CREATE|os.O_EXCL|os.O_WRONLY, - header.FileInfo().Mode()&0400, - ); err != nil { - return - } - if _, err = io.Copy(f, r); err != nil { - _ = f.Close() - return - } else if err = f.Close(); err != nil { - return - } - break - - case tar.TypeLink: - if err = os.Link( - header.Linkname, - work.Append(header.Name).String(), - ); err != nil { - return - } - break - - case tar.TypeSymlink: - if err = os.Symlink( - header.Linkname, - work.Append(header.Name).String(), - ); err != nil { - return - } - break - - case tar.TypeDir: - pathname := work.Append(header.Name) - madeDirectories = append(madeDirectories, dirTargetPerm{ - path: pathname, - mode: header.FileInfo().Mode(), - }) - if err = os.MkdirAll( - pathname.String(), - 0700, - ); err != nil { - return - } - break - - case tar.TypeXGlobalHeader: - // ignore - break - - default: - return DisallowedTypeflagError(typeflag) - } - - break - } - } - if errors.Is(err, io.EOF) { - err = nil - } - + defer func() { + closeErr := tr.Close() if err == nil { - for _, e := range madeDirectories { - if err = os.Chmod(e.path.String(), e.mode&0500); err != nil { + err = closeErr + } + }() + + switch a.compression { + case TarUncompressed: + break + + case TarGzip: + if tr, err = gzip.NewReader(tr); err != nil { + return + } + break + + default: + return os.ErrInvalid + } + + type dirTargetPerm struct { + path *check.Absolute + mode fs.FileMode + } + var madeDirectories []dirTargetPerm + + var header *tar.Header + r := tar.NewReader(tr) + for header, err = r.Next(); err == nil; header, err = r.Next() { + typeflag := header.Typeflag + for { + switch typeflag { + case 0: + if len(header.Name) > 0 && header.Name[len(header.Name)-1] == '/' { + typeflag = tar.TypeDir + } else { + typeflag = tar.TypeReg + } + continue + + case tar.TypeReg: + var f *os.File + if f, err = os.OpenFile( + work.Append(header.Name).String(), + os.O_CREATE|os.O_EXCL|os.O_WRONLY, + header.FileInfo().Mode()&0400, + ); err != nil { return } + if _, err = io.Copy(f, r); err != nil { + _ = f.Close() + return + } else if err = f.Close(); err != nil { + return + } + break + + case tar.TypeLink: + if err = os.Link( + header.Linkname, + work.Append(header.Name).String(), + ); err != nil { + return + } + break + + case tar.TypeSymlink: + if err = os.Symlink( + header.Linkname, + work.Append(header.Name).String(), + ); err != nil { + return + } + break + + case tar.TypeDir: + pathname := work.Append(header.Name) + madeDirectories = append(madeDirectories, dirTargetPerm{ + path: pathname, + mode: header.FileInfo().Mode(), + }) + if err = os.MkdirAll( + pathname.String(), + 0700, + ); err != nil { + return + } + break + + case tar.TypeXGlobalHeader: + // ignore + break + + default: + return DisallowedTypeflagError(typeflag) } - err = os.Chmod(work.String(), 0500) + + break } - return - }, &a.checksum, false) - if err != nil { - return nil, err + } + if errors.Is(err, io.EOF) { + err = nil } - a.pathname = pathname - return pathname, nil + if err == nil { + for _, e := range madeDirectories { + if err = os.Chmod(e.path.String(), e.mode&0500); err != nil { + return + } + } + err = os.Chmod(work.String(), 0500) + } + return } diff --git a/internal/pkg/tar_test.go b/internal/pkg/tar_test.go index a629c9e..309a1c7 100644 --- a/internal/pkg/tar_test.go +++ b/internal/pkg/tar_test.go @@ -79,7 +79,7 @@ func TestTar(t *testing.T) { return pkg.ID(h.Sum(nil)) }() - a, err := c.NewHTTPGetTar( + a, err := pkg.NewHTTPGetTar( t.Context(), &client, "file:///testdata", @@ -89,27 +89,27 @@ func TestTar(t *testing.T) { if err != nil { t.Fatalf("NewHTTPGetTar: error = %v", err) - } else if id := a.ID(); id != wantIdent { - t.Fatalf("ID: %s, want %s", pkg.Encode(id), pkg.Encode(wantIdent)) + } else if id := pkg.Ident(a); id != wantIdent { + t.Fatalf("Ident: %s, want %s", pkg.Encode(id), pkg.Encode(wantIdent)) } - var pathname *check.Absolute + var ( + pathname *check.Absolute + checksum pkg.Checksum + ) wantPathname := base.Append( "identifier", pkg.Encode(wantIdent), ) - if pathname, err = a.Pathname(); err != nil { - t.Fatalf("Pathname: error = %v", err) + wantChecksum := pkg.MustDecode( + "yJlSb2A3jxaMLuKqwp1GwHOguAHddS9MjygF9ICEeegKfRvgLPdPmNh8mva47f8o", + ) + if pathname, checksum, err = c.Cure(a); err != nil { + t.Fatalf("Cure: error = %v", err) } else if !pathname.Is(wantPathname) { - t.Fatalf("Pathname: %q, want %q", pathname, wantPathname) - } - - var checksum pkg.Checksum - wantChecksum := pkg.MustDecode("yJlSb2A3jxaMLuKqwp1GwHOguAHddS9MjygF9ICEeegKfRvgLPdPmNh8mva47f8o") - if checksum, err = a.Hash(); err != nil { - t.Fatalf("Hash: error = %v", err) + t.Fatalf("Cure: %q, want %q", pathname, wantPathname) } else if checksum != wantChecksum { - t.Fatalf("Hash: %v", &pkg.ChecksumMismatchError{ + t.Fatalf("Cure: %v", &pkg.ChecksumMismatchError{ Got: checksum, Want: wantChecksum, })