From ce249d23f1e3d3d90330fd8d02699b4b7c8ccb9b Mon Sep 17 00:00:00 2001 From: Ophestra Date: Sat, 3 Jan 2026 15:26:59 +0900 Subject: [PATCH] internal/pkg: implement http artifact This is useful for downloading source tarballs from the internet. Signed-off-by: Ophestra --- internal/pkg/dir_test.go | 28 +++++++ internal/pkg/net.go | 156 +++++++++++++++++++++++++++++++++++++++ internal/pkg/net_test.go | 154 ++++++++++++++++++++++++++++++++++++++ internal/pkg/pkg_test.go | 5 ++ 4 files changed, 343 insertions(+) create mode 100644 internal/pkg/net.go create mode 100644 internal/pkg/net_test.go diff --git a/internal/pkg/dir_test.go b/internal/pkg/dir_test.go index 7bc877d..35fbbb3 100644 --- a/internal/pkg/dir_test.go +++ b/internal/pkg/dir_test.go @@ -19,6 +19,16 @@ func TestFlatten(t *testing.T) { entries []pkg.FlatEntry sum pkg.Checksum }{ + {"empty", fstest.MapFS{ + ".": {Mode: 020000000700}, + "checksum": {Mode: 020000000700}, + "identifier": {Mode: 020000000700}, + }, []pkg.FlatEntry{ + {Mode: 020000000700, Path: "."}, + {Mode: 020000000700, Path: "checksum"}, + {Mode: 020000000700, Path: "identifier"}, + }, pkg.MustDecode("ANVz3GwS4oTcFTOjbc-n_N6MtycCtkELMBJB0ohuRz02PtmWZEJF8v3I51DtM0CY")}, + {"sample cache file", fstest.MapFS{ ".": {Mode: 020000000700}, @@ -44,6 +54,24 @@ func TestFlatten(t *testing.T) { {Mode: 0400, Path: "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, {Mode: 0400, Path: "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", Data: []byte{0}}, }, pkg.MustDecode("ZNSQH-mjhtIbFvi51lQ0UjatjoS8_5ILrBPNWlO2LWTq9P6MJEnekYzP0esUJnVr")}, + + {"sample load or store", fstest.MapFS{ + ".": {Mode: 020000000700}, + + "checksum": {Mode: 020000000700}, + "checksum/fLYGIMHgN1louE-JzITJZJo2SDniPu-IHBXubtvQWFO-hXnDVKNuscV7-zlyr5fU": {Mode: 0400, Data: []byte("\x7f\xe1\x69\xa2\xdd\x63\x96\x26\x83\x79\x61\x8b\xf0\x3f\xd5\x16\x9a\x39\x3a\xdb\xcf\xb1\xbc\x8d\x33\xff\x75\xee\x62\x56\xa9\xf0\x27\xac\x13\x94\x69")}, + + "identifier": {Mode: 020000000700}, + "identifier/fLYGIMHgN1louE-JzITJZJo2SDniPu-IHBXubtvQWFO-hXnDVKNuscV7-zlyr5fU": {Mode: 0400, Data: []byte("\x7f\xe1\x69\xa2\xdd\x63\x96\x26\x83\x79\x61\x8b\xf0\x3f\xd5\x16\x9a\x39\x3a\xdb\xcf\xb1\xbc\x8d\x33\xff\x75\xee\x62\x56\xa9\xf0\x27\xac\x13\x94\x69")}, + }, []pkg.FlatEntry{ + {Mode: 020000000700, Path: "."}, + + {Mode: 020000000700, Path: "checksum"}, + {Mode: 0400, Path: "checksum/fLYGIMHgN1louE-JzITJZJo2SDniPu-IHBXubtvQWFO-hXnDVKNuscV7-zlyr5fU", Data: []byte("\x7f\xe1\x69\xa2\xdd\x63\x96\x26\x83\x79\x61\x8b\xf0\x3f\xd5\x16\x9a\x39\x3a\xdb\xcf\xb1\xbc\x8d\x33\xff\x75\xee\x62\x56\xa9\xf0\x27\xac\x13\x94\x69")}, + + {Mode: 020000000700, Path: "identifier"}, + {Mode: 0400, Path: "identifier/fLYGIMHgN1louE-JzITJZJo2SDniPu-IHBXubtvQWFO-hXnDVKNuscV7-zlyr5fU", Data: []byte("\x7f\xe1\x69\xa2\xdd\x63\x96\x26\x83\x79\x61\x8b\xf0\x3f\xd5\x16\x9a\x39\x3a\xdb\xcf\xb1\xbc\x8d\x33\xff\x75\xee\x62\x56\xa9\xf0\x27\xac\x13\x94\x69")}, + }, pkg.MustDecode("5ns3Ky8-n_pETpwO3UYA88FKKLins6kxtgRQBEfSiGIpZXu6QCBOW2ukm-nWnUwC")}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/internal/pkg/net.go b/internal/pkg/net.go new file mode 100644 index 0000000..16229b2 --- /dev/null +++ b/internal/pkg/net.go @@ -0,0 +1,156 @@ +package pkg + +import ( + "crypto/sha512" + "errors" + "io" + "net/http" + "os" + "sync" + + "hakurei.app/container/check" +) + +// An httpArtifact is an [Artifact] backed by an [http] request. +type httpArtifact struct { + // Caller-supplied request. + req *http.Request + + // Caller-supplied checksum of the response body, also used as the + // identifier. This is validated during curing. + id ID + + // doFunc is the Do method of [http.Client] supplied by the caller. + doFunc func(req *http.Request) (*http.Response, error) + + // Instance of [Cache] to submit the cured artifact to. + c *Cache + // Response body read to EOF. + data []byte + // Populated when submitting to or loading from [Cache]. + pathname *check.Absolute + + // Synchronises access to pathname and data. + mu sync.Mutex +} + +// NewHTTP returns a new [File] backed by the supplied client and request. If +// c is nil, [http.DefaultClient] is used instead. +func (c *Cache) NewHTTP(hc *http.Client, req *http.Request, checksum Checksum) File { + if hc == nil { + hc = http.DefaultClient + } + return &httpArtifact{req: req, id: checksum, doFunc: hc.Do, c: c} +} + +// NewHTTPGet returns a new [File] backed by the supplied client. A GET request +// is set up for url. If c is nil, [http.DefaultClient] is used instead. +func (c *Cache) NewHTTPGet(hc *http.Client, url string, checksum Checksum) (File, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + return c.NewHTTP(hc, req, checksum), nil +} + +// ID returns the caller-supplied hash of the response body. +func (a *httpArtifact) ID() ID { return a.id } + +// ResponseStatusError is returned for a response returned by an [http.Client] +// with a status code other than [http.StatusOK]. +type ResponseStatusError int + +func (e ResponseStatusError) Error() string { + return "the requested URL returned non-OK status: " + http.StatusText(int(e)) +} + +// do sends the caller-supplied request on the caller-supplied [http.Client] +// and reads its response body to EOF and returns the resulting bytes. +func (a *httpArtifact) do() (data []byte, err error) { + var resp *http.Response + if resp, err = a.doFunc(a.req); err != nil { + return + } + + if resp.StatusCode != http.StatusOK { + _ = resp.Body.Close() + return nil, ResponseStatusError(resp.StatusCode) + } + + if data, err = io.ReadAll(resp.Body); err != nil { + _ = resp.Body.Close() + return + } + + err = resp.Body.Close() + return +} + +// Hash cures the [Artifact] and returns its hash. The return value is always +// identical to that of the ID method. +func (a *httpArtifact) Hash() (Checksum, error) { _, err := a.Pathname(); return a.id, err } + +// Pathname cures the [Artifact] and returns its pathname in the [Cache]. +func (a *httpArtifact) Pathname() (pathname *check.Absolute, err error) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.pathname != nil { + return a.pathname, nil + } + + if a.data != nil { + pathname, err = a.c.StoreFile( + a.id, a.data, + (*Checksum)(&a.id), + true, + ) + if err == nil { + a.pathname = pathname + } + return + } else { + a.pathname, a.data, _, err = a.c.LoadOrStoreFile( + a.id, a.do, + (*Checksum)(&a.id), + true, + ) + if err != nil { + a.pathname, a.data = nil, nil + } + return a.pathname, err + } +} + +// Data completes the http request and returns the resulting response body read +// to EOF. Data does not write to the underlying [Cache]. +func (a *httpArtifact) Data() (data []byte, err error) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.data != nil { + // validated by cache or a previous call to Data + return a.data, nil + } + + if a.pathname, a.data, err = a.c.LoadFile(a.id); err == nil { + return a.data, nil + } else { + a.pathname, a.data = nil, nil + if !errors.Is(err, os.ErrNotExist) { + return + } + } + + if data, err = a.do(); err != nil { + return + } + + h := sha512.New384() + h.Write(data) + if got := (Checksum)(h.Sum(nil)); got != a.id { + return nil, &ChecksumMismatchError{got, a.id} + } + a.data = data + return +} diff --git a/internal/pkg/net_test.go b/internal/pkg/net_test.go new file mode 100644 index 0000000..87bda6d --- /dev/null +++ b/internal/pkg/net_test.go @@ -0,0 +1,154 @@ +package pkg_test + +import ( + "crypto/sha512" + "encoding/base64" + "net/http" + "reflect" + "testing" + "testing/fstest" + + "hakurei.app/container/check" + "hakurei.app/internal/pkg" +) + +func TestHTTP(t *testing.T) { + t.Parallel() + + const testdata = "\x7f\xe1\x69\xa2\xdd\x63\x96\x26\x83\x79\x61\x8b\xf0\x3f\xd5\x16\x9a\x39\x3a\xdb\xcf\xb1\xbc\x8d\x33\xff\x75\xee\x62\x56\xa9\xf0\x27\xac\x13\x94\x69" + + testdataChecksum := func() pkg.Checksum { + h := sha512.New384() + h.Write([]byte(testdata)) + return (pkg.Checksum)(h.Sum(nil)) + }() + + testdataChecksumString := base64.URLEncoding.EncodeToString(testdataChecksum[:]) + + var transport http.Transport + client := http.Client{Transport: &transport} + transport.RegisterProtocol("file", http.NewFileTransportFS(fstest.MapFS{ + "testdata": {Data: []byte(testdata), Mode: 0400}, + })) + + checkWithCache(t, []cacheTestCase{ + {"direct", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + var got []byte + if f, err := c.NewHTTPGet(&client, "file:///testdata", testdataChecksum); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if got, err = f.Data(); err != nil { + t.Fatalf("Data: error = %v", err) + } else if string(got) != testdata { + t.Fatalf("Data: %x, want %x", got, testdata) + } else if gotIdent := f.ID(); gotIdent != testdataChecksum { + t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } + + // check direct validation + wantErrMismatch := &pkg.ChecksumMismatchError{ + Got: testdataChecksum, + } + if f, err := c.NewHTTPGet(&client, "file:///testdata", pkg.Checksum{}); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if _, err = f.Data(); !reflect.DeepEqual(err, wantErrMismatch) { + t.Fatalf("Data: error = %#v, want %#v", err, wantErrMismatch) + } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { + t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) + } + + // check direct response error + wantErrNotFound := pkg.ResponseStatusError(http.StatusNotFound) + if f, err := c.NewHTTPGet(&client, "file:///nonexistent", pkg.Checksum{}); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if _, err = f.Data(); !reflect.DeepEqual(err, wantErrNotFound) { + t.Fatalf("Data: error = %#v, want %#v", err, wantErrNotFound) + } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { + t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) + } + }, pkg.MustDecode("ANVz3GwS4oTcFTOjbc-n_N6MtycCtkELMBJB0ohuRz02PtmWZEJF8v3I51DtM0CY")}, + + {"load or store", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + f, err := c.NewHTTPGet(&client, "file:///testdata", testdataChecksum) + if err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } + + wantPathname := base.Append( + "identifier", + testdataChecksumString, + ) + var pathname *check.Absolute + if pathname, err = f.Pathname(); err != nil { + t.Fatalf("Pathname: error = %v", err) + } else if !pathname.Is(wantPathname) { + t.Fatalf("Pathname: %q, want %q", pathname, wantPathname) + } + + var checksum pkg.Checksum + if checksum, err = f.Hash(); err != nil { + t.Fatalf("Hash: error = %v", err) + } else if checksum != testdataChecksum { + t.Fatalf("Hash: %x, want %x", checksum, testdataChecksum) + } + + var got []byte + if got, err = f.Data(); err != nil { + t.Fatalf("Data: error = %v", err) + } else if string(got) != testdata { + t.Fatalf("Data: %x, want %x", got, testdata) + } else if gotIdent := f.ID(); gotIdent != testdataChecksum { + t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } + + // check load from cache + if f, err = c.NewHTTPGet(&client, "file:///testdata", testdataChecksum); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if got, err = f.Data(); err != nil { + t.Fatalf("Data: error = %v", err) + } else if string(got) != testdata { + t.Fatalf("Data: %x, want %x", got, testdata) + } else if gotIdent := f.ID(); gotIdent != testdataChecksum { + t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } + + // check error passthrough + wantErrNotFound := pkg.ResponseStatusError(http.StatusNotFound) + if f, err = c.NewHTTPGet(&client, "file:///nonexistent", pkg.Checksum{}); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if _, err = f.Pathname(); !reflect.DeepEqual(err, wantErrNotFound) { + t.Fatalf("Pathname: error = %#v, want %#v", err, wantErrNotFound) + } else if gotIdent := f.ID(); gotIdent != (pkg.Checksum{}) { + t.Fatalf("ID: %x, want %x", gotIdent, pkg.Checksum{}) + } + }, pkg.MustDecode("5ns3Ky8-n_pETpwO3UYA88FKKLins6kxtgRQBEfSiGIpZXu6QCBOW2ukm-nWnUwC")}, + + {"store", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + var ( + got []byte + pathname *check.Absolute + checksum pkg.Checksum + ) + wantPathname := base.Append( + "identifier", + testdataChecksumString, + ) + if f, err := c.NewHTTPGet(&client, "file:///testdata", testdataChecksum); err != nil { + t.Fatalf("NewHTTPGet: error = %v", err) + } else if got, err = f.Data(); err != nil { + t.Fatalf("Data: error = %v", err) + } else if string(got) != testdata { + t.Fatalf("Data: %x, want %x", got, testdata) + } else if gotIdent := f.ID(); gotIdent != testdataChecksum { + t.Fatalf("ID: %x, want %x", gotIdent, testdataChecksum) + } else if pathname, err = f.Pathname(); err != nil { + t.Fatalf("Pathname: error = %v", err) + } else if !pathname.Is(wantPathname) { + t.Fatalf("Pathname: %q, want %q", pathname, wantPathname) + } else if checksum, err = f.Hash(); err != nil { + t.Fatalf("Hash: error = %v", err) + } else if checksum != testdataChecksum { + t.Fatalf("Hash: %x, want %x", checksum, testdataChecksum) + } + }, pkg.MustDecode("5ns3Ky8-n_pETpwO3UYA88FKKLins6kxtgRQBEfSiGIpZXu6QCBOW2ukm-nWnUwC")}, + }) +} diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go index f564572..c5b5cdc 100644 --- a/internal/pkg/pkg_test.go +++ b/internal/pkg/pkg_test.go @@ -5,6 +5,7 @@ import ( "crypto/sha512" "encoding/base64" "io/fs" + "net/http" "os" "path/filepath" "reflect" @@ -281,6 +282,10 @@ func TestErrors(t *testing.T) { }, 8)), }, "got AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + " instead of deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"}, + + {"ResponseStatusError", pkg.ResponseStatusError( + http.StatusNotAcceptable, + ), "the requested URL returned non-OK status: Not Acceptable"}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) {