From 334578fddeda6c952b1c2e35829c91c9da72747e Mon Sep 17 00:00:00 2001 From: Ophestra Date: Sun, 25 Jan 2026 14:14:19 +0900 Subject: [PATCH] internal/pkg: expose underlying reader This will be fully implemented in httpArtifact in a future commit. Signed-off-by: Ophestra --- internal/pkg/exec.go | 2 +- internal/pkg/file.go | 10 ++- internal/pkg/net.go | 15 ++-- internal/pkg/net_test.go | 16 +++- internal/pkg/pkg.go | 185 ++++++++++++++++++++++++--------------- internal/pkg/pkg_test.go | 18 ++-- internal/pkg/tar.go | 2 +- internal/rosa/busybox.go | 2 +- 8 files changed, 157 insertions(+), 93 deletions(-) diff --git a/internal/pkg/exec.go b/internal/pkg/exec.go index a80124a..3d3f0b6 100644 --- a/internal/pkg/exec.go +++ b/internal/pkg/exec.go @@ -32,7 +32,7 @@ type ExecPath struct { P *check.Absolute // Artifacts to mount on the pathname, must contain at least one [Artifact]. // If there are multiple entries or W is true, P is set up as an overlay - // mount, and entries of A must not implement [File]. + // mount, and entries of A must not implement [FileArtifact]. A []Artifact // Whether to make the mount point writable via the temp directory. W bool diff --git a/internal/pkg/file.go b/internal/pkg/file.go index 4558eb4..03197c5 100644 --- a/internal/pkg/file.go +++ b/internal/pkg/file.go @@ -1,9 +1,11 @@ package pkg import ( + "bytes" "context" "crypto/sha512" "fmt" + "io" ) // A fileArtifact is an [Artifact] that cures into data known ahead of time. @@ -24,10 +26,10 @@ var _ KnownChecksum = new(fileArtifactNamed) // String returns the caller-supplied reporting name. func (a *fileArtifactNamed) String() string { return a.name } -// NewFile returns a [File] that cures into a caller-supplied byte slice. +// NewFile returns a [FileArtifact] that cures into a caller-supplied byte slice. // // Caller must not modify data after NewFile returns. -func NewFile(name string, data []byte) File { +func NewFile(name string, data []byte) FileArtifact { f := fileArtifact(data) if name != "" { return &fileArtifactNamed{f, name} @@ -52,4 +54,6 @@ func (a *fileArtifact) Checksum() Checksum { } // Cure returns the caller-supplied data. -func (a *fileArtifact) Cure(context.Context) ([]byte, error) { return *a, nil } +func (a *fileArtifact) Cure(context.Context) (io.ReadCloser, error) { + return io.NopCloser(bytes.NewReader(*a)), nil +} diff --git a/internal/pkg/net.go b/internal/pkg/net.go index c481b52..8726fca 100644 --- a/internal/pkg/net.go +++ b/internal/pkg/net.go @@ -1,6 +1,7 @@ package pkg import ( + "bytes" "context" "crypto/sha512" "fmt" @@ -34,13 +35,13 @@ type httpArtifact struct { var _ KnownChecksum = new(httpArtifact) var _ fmt.Stringer = new(httpArtifact) -// NewHTTPGet returns a new [File] backed by the supplied client. A GET request -// is set up for url. If c is nil, [http.DefaultClient] is used instead. +// NewHTTPGet returns a new [FileArtifact] backed by the supplied client. A GET +// request is set up for url. If c is nil, [http.DefaultClient] is used instead. func NewHTTPGet( c *http.Client, url string, checksum Checksum, -) File { +) FileArtifact { if c == nil { c = http.DefaultClient } @@ -103,15 +104,16 @@ func (a *httpArtifact) do(ctx context.Context) (data []byte, err error) { // Cure completes the http request and returns the resulting response body read // to EOF. Data does not interact with the filesystem. -func (a *httpArtifact) Cure(ctx context.Context) (data []byte, err error) { +func (a *httpArtifact) Cure(ctx context.Context) (r io.ReadCloser, err error) { a.mu.Lock() defer a.mu.Unlock() if a.data != nil { - // validated by cache or a previous call to Data - return a.data, nil + // validated by cache or a previous call to Cure + return io.NopCloser(bytes.NewReader(a.data)), nil } + var data []byte if data, err = a.do(ctx); err != nil { return } @@ -122,5 +124,6 @@ func (a *httpArtifact) Cure(ctx context.Context) (data []byte, err error) { return nil, &ChecksumMismatchError{got, a.checksum} } a.data = data + r = io.NopCloser(bytes.NewReader(data)) return } diff --git a/internal/pkg/net_test.go b/internal/pkg/net_test.go index 9bf58d9..c7c4906 100644 --- a/internal/pkg/net_test.go +++ b/internal/pkg/net_test.go @@ -2,6 +2,7 @@ package pkg_test import ( "crypto/sha512" + "io" "net/http" "reflect" "testing" @@ -31,13 +32,17 @@ func TestHTTPGet(t *testing.T) { checkWithCache(t, []cacheTestCase{ {"direct", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + f := pkg.NewHTTPGet( &client, "file:///testdata", testdataChecksum.Value(), ) - if got, err := f.Cure(t.Context()); err != nil { + var got []byte + if r, err := f.Cure(t.Context()); err != nil { t.Fatalf("Cure: error = %v", err) + } else if got, err = io.ReadAll(r); err != nil { + t.Fatalf("ReadAll: error = %v", err) } else if string(got) != testdata { t.Fatalf("Cure: %x, want %x", got, testdata) } @@ -85,8 +90,11 @@ func TestHTTPGet(t *testing.T) { t.Fatalf("Cure: %x, want %x", checksum.Value(), testdataChecksum.Value()) } - if got, err := f.Cure(t.Context()); err != nil { + var got []byte + if r, err := f.Cure(t.Context()); err != nil { t.Fatalf("Cure: error = %v", err) + } else if got, err = io.ReadAll(r); err != nil { + t.Fatalf("ReadAll: error = %v", err) } else if string(got) != testdata { t.Fatalf("Cure: %x, want %x", got, testdata) } @@ -97,8 +105,10 @@ func TestHTTPGet(t *testing.T) { "file:///testdata", testdataChecksum.Value(), ) - if got, err := f.Cure(t.Context()); err != nil { + if r, err := f.Cure(t.Context()); err != nil { t.Fatalf("Cure: error = %v", err) + } else if got, err = io.ReadAll(r); err != nil { + t.Fatalf("ReadAll: error = %v", err) } else if string(got) != testdata { t.Fatalf("Cure: %x, want %x", got, testdata) } diff --git a/internal/pkg/pkg.go b/internal/pkg/pkg.go index 7f082d5..d029eff 100644 --- a/internal/pkg/pkg.go +++ b/internal/pkg/pkg.go @@ -2,6 +2,7 @@ package pkg import ( + "bufio" "bytes" "context" "crypto/sha512" @@ -147,14 +148,15 @@ func (t *TContext) GetWorkDir() *check.Absolute { return t.work } // create it if they wish to use it, using [os.MkdirAll]. func (t *TContext) GetTempDir() *check.Absolute { return t.temp } -// Open tries to open [Artifact] for reading. If a implements [File], its data -// might be used directly, eliminating the roundtrip to vfs. Otherwise, it must -// cure into a directory containing a single regular file. +// Open tries to open [Artifact] for reading. If a implements [FileArtifact], +// its reader might be used directly, eliminating the roundtrip to vfs. +// Otherwise, it must cure into a directory containing a single regular file. // -// If err is nil, the caller is responsible for closing the resulting -// [io.ReadCloser]. +// If err is nil, the caller must close the resulting [io.ReadCloser] and return +// its error, if any. Failure to read r to EOF may result in a spurious +// [ChecksumMismatchError], or the underlying implementation may block on Close. func (t *TContext) Open(a Artifact) (r io.ReadCloser, err error) { - if f, ok := a.(File); ok { + if f, ok := a.(FileArtifact); ok { return t.cache.openFile(f) } @@ -274,7 +276,7 @@ func Flood(a Artifact) iter.Seq[Artifact] { // // TrivialArtifact is unable to cure any other [Artifact] and it cannot access // pathnames. This type of [Artifact] is primarily intended for dependency-less -// artifacts or direct dependencies that only consists of [File]. +// artifacts or direct dependencies that only consists of [FileArtifact]. type TrivialArtifact interface { // Cure cures the current [Artifact] to the working directory obtained via // [TContext.GetWorkDir]. @@ -309,16 +311,19 @@ type KnownChecksum interface { Checksum() Checksum } -// A File refers to an [Artifact] backed by a single file. -type File interface { - // Cure returns the full contents of [File]. If [File] implements - // [KnownChecksum], Cure is responsible for validating any data it produces - // and must return [ChecksumMismatchError] if validation fails. +// FileArtifact refers to an [Artifact] backed by a single file. +type FileArtifact interface { + // Cure returns [io.ReadCloser] of the full contents of [FileArtifact]. If + // [FileArtifact] implements [KnownChecksum], Cure is responsible for + // validating any data it produces and must return [ChecksumMismatchError] + // if validation fails. This error is conventionally returned during the + // first call to Close, but may be returned during any call to Read before + // EOF, or by Cure itself. // - // Callers must not modify the returned byte slice. + // Callers are responsible for closing the resulting [io.ReadCloser]. // // Result must remain identical across multiple invocations. - Cure(ctx context.Context) ([]byte, error) + Cure(ctx context.Context) (io.ReadCloser, error) Artifact } @@ -430,7 +435,7 @@ type Cache struct { // Directory where all [Cache] related files are placed. base *check.Absolute - // Whether to validate [File.Cure] for a [KnownChecksum] file. This + // Whether to validate [FileArtifact.Cure] for a [KnownChecksum] file. This // significantly reduces performance. strict bool // Maximum size of a dependency graph. @@ -453,6 +458,9 @@ type Cache struct { // Synchronises access to ident and corresponding filesystem entries. identMu sync.RWMutex + // Buffered I/O free list, must not be accessed directly. + bufioPool sync.Pool + // Unlocks the on-filesystem cache. Must only be called from Close. unlock func() // Synchronises calls to Close. @@ -573,8 +581,8 @@ func (e *ChecksumMismatchError) Error() string { // found and removed from the underlying storage of [Cache]. type ScrubError struct { // Content-addressed entries not matching their checksum. This can happen - // if an incorrect [File] implementation was cured against a non-strict - // [Cache]. + // if an incorrect [FileArtifact] implementation was cured against + // a non-strict [Cache]. ChecksumMismatches []ChecksumMismatchError // Dangling identifier symlinks. This can happen if the content-addressed // entry was removed while scrubbing due to a checksum mismatch. @@ -910,10 +918,11 @@ func (c *Cache) finaliseIdent( close(done) } -// openFile tries to load [File] from [Cache], and if that fails, obtains it via -// [File.Cure] instead. Notably, it does not cure [File]. If err is nil, the -// caller is responsible for closing the resulting [io.ReadCloser]. -func (c *Cache) openFile(f File) (r io.ReadCloser, err error) { +// openFile tries to load [FileArtifact] from [Cache], and if that fails, +// obtains it via [FileArtifact.Cure] instead. Notably, it does not cure +// [FileArtifact] to the filesystem. If err is nil, the caller is responsible +// for closing the resulting [io.ReadCloser]. +func (c *Cache) openFile(f FileArtifact) (r io.ReadCloser, err error) { if kc, ok := f.(KnownChecksum); ok { c.checksumMu.RLock() r, err = os.Open(c.base.Append( @@ -943,11 +952,7 @@ func (c *Cache) openFile(f File) (r io.ReadCloser, err error) { } }() } - var data []byte - if data, err = f.Cure(c.ctx); err != nil { - return - } - r = io.NopCloser(bytes.NewReader(data)) + return f.Cure(c.ctx) } return } @@ -1217,6 +1222,16 @@ func (c *Cache) exitCure(curesExempt bool) { } } +// getWriter is like [bufio.NewWriter] but for bufioPool. +func (c *Cache) getWriter(w io.Writer) *bufio.Writer { + bw := c.bufioPool.Get().(*bufio.Writer) + bw.Reset(w) + return bw +} + +// putWriter adds bw to bufioPool. +func (c *Cache) putWriter(bw *bufio.Writer) { c.bufioPool.Put(bw) } + // cure implements Cure without checking the full dependency graph. func (c *Cache) cure(a Artifact, curesExempt bool) ( pathname *check.Absolute, @@ -1313,8 +1328,8 @@ func (c *Cache) cure(a Artifact, curesExempt bool) ( }() } - // cure File outside type switch to skip TContext initialisation - if f, ok := a.(File); ok { + // cure FileArtifact outside type switch to skip TContext initialisation + if f, ok := a.(FileArtifact); ok { if checksumFi != nil { if !checksumFi.Mode().IsRegular() { // unreachable @@ -1323,67 +1338,96 @@ func (c *Cache) cure(a Artifact, curesExempt bool) ( return } - var data []byte + work := c.base.Append(dirWork, ids) + var w *os.File + if w, err = os.OpenFile( + work.String(), + os.O_CREATE|os.O_EXCL|os.O_WRONLY, + 0400, + ); err != nil { + return + } + defer func() { + closeErr := w.Close() + if err == nil { + err = closeErr + } + + removeErr := os.Remove(work.String()) + if err == nil && !errors.Is(removeErr, os.ErrNotExist) { + err = removeErr + } + }() + + var r io.ReadCloser if err = c.enterCure(curesExempt); err != nil { return } - data, err = f.Cure(c.ctx) + r, err = f.Cure(c.ctx) + if err == nil { + if checksumPathname == nil || c.IsStrict() { + h := sha512.New384() + hbw := c.getWriter(h) + _, err = io.Copy(w, io.TeeReader(r, hbw)) + flushErr := hbw.Flush() + c.putWriter(hbw) + if err == nil { + err = flushErr + } + + if err == nil { + buf := c.getIdentBuf() + h.Sum(buf[:0]) + + if checksumPathname == nil { + checksum = unique.Make(Checksum(buf[:])) + checksums = Encode(Checksum(buf[:])) + } else if c.IsStrict() { + if got := Checksum(buf[:]); got != checksum.Value() { + err = &ChecksumMismatchError{ + Got: got, + Want: checksum.Value(), + } + } + } + + c.putIdentBuf(buf) + + if checksumPathname == nil { + checksumPathname = c.base.Append( + dirChecksum, + checksums, + ) + } + } + } else { + _, err = io.Copy(w, r) + } + + closeErr := r.Close() + if err == nil { + err = closeErr + } + } c.exitCure(curesExempt) if err != nil { return } - if checksumPathname == nil { - h := sha512.New384() - h.Write(data) - buf := c.getIdentBuf() - h.Sum(buf[:0]) - checksum = unique.Make(Checksum(buf[:])) - checksums = Encode(Checksum(buf[:])) - c.putIdentBuf(buf) - checksumPathname = c.base.Append( - dirChecksum, - checksums, - ) - } else if c.IsStrict() { - h := sha512.New384() - h.Write(data) - if got := Checksum(h.Sum(nil)); got != checksum.Value() { - err = &ChecksumMismatchError{ - Got: got, - Want: checksum.Value(), - } - return - } - } - c.checksumMu.Lock() - var w *os.File - w, err = os.OpenFile( + if err = os.Rename( + work.String(), checksumPathname.String(), - os.O_CREATE|os.O_EXCL|os.O_WRONLY, - 0400, - ) - if err != nil { + ); err != nil { c.checksumMu.Unlock() - - if errors.Is(err, os.ErrExist) { - err = nil - } return } - _, err = w.Write(data) - closeErr := w.Close() timeErr := zeroTimes(checksumPathname.String()) c.checksumMu.Unlock() if err == nil { err = timeErr } - if err == nil { - err = closeErr - } - return } @@ -1601,6 +1645,7 @@ func open( } c.ctx, c.cancel = context.WithCancel(ctx) c.identPool.New = func() any { return new(extIdent) } + c.bufioPool.New = func() any { return new(bufio.Writer) } if lock || !testing.Testing() { if unlock, err := lockedfile.MutexAt( diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go index 2581cce..903db56 100644 --- a/internal/pkg/pkg_test.go +++ b/internal/pkg/pkg_test.go @@ -47,10 +47,10 @@ type overrideIdent struct { func (a overrideIdent) ID() pkg.ID { return a.id } -// overrideIdentFile overrides the ID method of [File]. +// overrideIdentFile overrides the ID method of [FileArtifact]. type overrideIdentFile struct { id pkg.ID - pkg.File + pkg.FileArtifact } func (a overrideIdentFile) ID() pkg.ID { return a.id } @@ -61,10 +61,10 @@ type knownIdentArtifact interface { pkg.TrivialArtifact } -// A knownIdentFile implements [pkg.KnownIdent] and [File] +// A knownIdentFile implements [pkg.KnownIdent] and [FileArtifact] type knownIdentFile interface { pkg.KnownIdent - pkg.File + pkg.FileArtifact } // overrideChecksum overrides the Checksum method of [Artifact]. @@ -75,7 +75,7 @@ type overrideChecksum struct { func (a overrideChecksum) Checksum() pkg.Checksum { return a.checksum } -// overrideChecksumFile overrides the Checksum method of [File]. +// overrideChecksumFile overrides the Checksum method of [FileArtifact]. type overrideChecksumFile struct { checksum pkg.Checksum knownIdentFile @@ -111,7 +111,7 @@ func (a *stubArtifactF) Params(ctx *pkg.IContext) { ctx.GetHash().Write(a.pa func (a *stubArtifactF) Dependencies() []pkg.Artifact { return a.deps } func (a *stubArtifactF) Cure(f *pkg.FContext) error { return a.cure(f) } -// A stubFile implements [File] with hardcoded behaviour. +// A stubFile implements [FileArtifact] with hardcoded behaviour. type stubFile struct { data []byte err error @@ -119,7 +119,9 @@ type stubFile struct { stubArtifact } -func (a *stubFile) Cure(context.Context) ([]byte, error) { return a.data, a.err } +func (a *stubFile) Cure(context.Context) (io.ReadCloser, error) { + return io.NopCloser(bytes.NewReader(a.data)), a.err +} // newStubFile returns an implementation of [pkg.File] with hardcoded behaviour. func newStubFile( @@ -128,7 +130,7 @@ func newStubFile( sum *pkg.Checksum, data []byte, err error, -) pkg.File { +) pkg.FileArtifact { f := overrideIdentFile{id, &stubFile{data, err, stubArtifact{ kind, nil, diff --git a/internal/pkg/tar.go b/internal/pkg/tar.go index faad485..ba68fa6 100644 --- a/internal/pkg/tar.go +++ b/internal/pkg/tar.go @@ -24,7 +24,7 @@ const ( TarBzip2 ) -// A tarArtifact is an [Artifact] unpacking a tarball backed by a [File]. +// A tarArtifact is an [Artifact] unpacking a tarball backed by a [FileArtifact]. type tarArtifact struct { // Caller-supplied backing tarball. f Artifact diff --git a/internal/rosa/busybox.go b/internal/rosa/busybox.go index 5a1fa92..ba5e2d9 100644 --- a/internal/rosa/busybox.go +++ b/internal/rosa/busybox.go @@ -16,7 +16,7 @@ import ( // busyboxBin is a busybox binary distribution installed under bin/busybox. type busyboxBin struct { // Underlying busybox binary. - bin pkg.File + bin pkg.FileArtifact } // Kind returns the hardcoded [pkg.Kind] value.