diff --git a/internal/pkg/pkg.go b/internal/pkg/pkg.go new file mode 100644 index 0000000..3849613 --- /dev/null +++ b/internal/pkg/pkg.go @@ -0,0 +1,307 @@ +// Package pkg provides utilities for packaging software. +package pkg + +import ( + "crypto/sha512" + "encoding/base64" + "encoding/gob" + "errors" + "io" + "io/fs" + "os" + "sync" + + "hakurei.app/container/check" +) + +type ( + // A Checksum is a SHA-384 checksum computed for a cured [Artifact]. + Checksum = [sha512.Size384]byte + + // An ID is a unique identifier returned by [Artifact.ID]. This value must + // be deterministically determined ahead of time. + ID Checksum +) + +// MustDecode decodes a string representation of [Checksum] and panics if there +// is a decoding error or the resulting data is too short. +func MustDecode(s string) (checksum Checksum) { + if n, err := base64.URLEncoding.Decode( + checksum[:], + []byte(s), + ); err != nil { + panic(err) + } else if n != len(Checksum{}) { + panic(io.ErrUnexpectedEOF) + } + return +} + +// An Artifact is a read-only reference to a piece of data that may be created +// deterministically but might not currently be available in memory or on the +// filesystem. +type Artifact interface { + // ID returns a globally unique identifier referring to the current + // [Artifact]. This value must be known ahead of time and guaranteed to be + // unique without having obtained the full contents of the [Artifact]. + ID() ID + + // Hash returns the [Checksum] created from the full contents of a cured + // [Artifact]. This can be stored for future lookup in a [Cache]. + // + // A call to Hash implicitly cures [Artifact]. + Hash() (Checksum, error) + + // Pathname returns an absolute pathname to a file or directory populated + // with the full contents of [Artifact]. This is the most expensive + // operation possible on any [Artifact] and should be avoided if possible. + // + // A call to Pathname implicitly cures [Artifact]. + // + // Callers must only open files read-only. If [Artifact] is a directory, + // files must not be created or removed under this directory. + Pathname() (*check.Absolute, error) +} + +// A File refers to an [Artifact] backed by a single file. +type File interface { + // Data returns the full contents of [Artifact]. + // + // Callers must not modify the returned byte slice. + Data() ([]byte, error) + + Artifact +} + +// FlatEntry is the representation of a directory entry via [Flatten]. +type FlatEntry struct { + Name string // base name of the file + Mode fs.FileMode // file mode bits + Data []byte // file content or symlink destination +} + +// Flatten writes a deterministic representation of the contents of fsys to w. +// The resulting data can be hashed to produce a deterministic checksum for the +// directory. +func Flatten(fsys fs.FS, root string, w io.Writer) error { + e := gob.NewEncoder(w) + return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + var fi fs.FileInfo + fi, err = d.Info() + if err != nil { + return err + } + + ent := FlatEntry{ + Name: fi.Name(), + Mode: fi.Mode(), + } + if ent.Mode.IsRegular() { + if ent.Data, err = fs.ReadFile(fsys, path); err != nil { + return err + } + } else if ent.Mode&fs.ModeSymlink != 0 { + var newpath string + if newpath, err = fs.ReadLink(fsys, path); err != nil { + return err + } + ent.Data = []byte(newpath) + } + + return e.Encode(&ent) + }) +} + +// HashFS returns a checksum produced by hashing the result of [Flatten]. +func HashFS(fsys fs.FS, root string) (Checksum, error) { + h := sha512.New384() + if err := Flatten(fsys, root, h); err != nil { + return Checksum{}, err + } + return (Checksum)(h.Sum(nil)), nil +} + +// HashDir returns a checksum produced by hashing the result of [Flatten]. +func HashDir(pathname *check.Absolute) (Checksum, error) { + return HashFS(os.DirFS(pathname.String()), ".") +} + +const ( + // dirIdentifier is the directory name appended to Cache.base for storing + // artifacts named after their [ID]. + dirIdentifier = "identifier" + // dirChecksum is the directory name appended to Cache.base for storing + // artifacts named after their [Checksum]. + dirChecksum = "checksum" +) + +// Cache is a support layer that implementations of [Artifact] can use to store +// cured [Artifact] data in a content addressed fashion. +type Cache struct { + // Directory where all [Cache] related files are placed. + base *check.Absolute + + // Synchronises access to public methods. + mu sync.RWMutex +} + +// LoadFile loads the contents of a [File] by its identifier. +func (c *Cache) LoadFile(id ID) ( + pathname *check.Absolute, + data []byte, + err error, +) { + pathname = c.base.Append( + dirIdentifier, + base64.URLEncoding.EncodeToString(id[:]), + ) + + c.mu.RLock() + data, err = os.ReadFile(pathname.String()) + c.mu.RUnlock() + + return +} + +// A ChecksumMismatchError describes an [Artifact] with unexpected content. +type ChecksumMismatchError struct { + // Actual and expected checksums. + Got, Want Checksum +} + +func (e *ChecksumMismatchError) Error() string { + return "got " + base64.URLEncoding.EncodeToString(e.Got[:]) + + " instead of " + base64.URLEncoding.EncodeToString(e.Want[:]) +} + +// pathname returns the content-addressed pathname for a [Checksum]. +func (c *Cache) pathname(checksum *Checksum) *check.Absolute { + return c.base.Append( + dirChecksum, + base64.URLEncoding.EncodeToString(checksum[:]), + ) +} + +// pathnameIdent returns the identifier-based pathname for an [ID]. +func (c *Cache) pathnameIdent(id *ID) *check.Absolute { + return c.base.Append( + dirIdentifier, + base64.URLEncoding.EncodeToString(id[:]), + ) +} + +// storeFile stores the contents of a [File]. An optional checksum can be +// passed via the result buffer which is used to validate the submitted data. +// +// If locking is disabled, the caller is responsible for acquiring a write lock +// and releasing it after this method returns. This makes LoadOrStoreFile +// possible without holding the lock while computing hash for store only. +func (c *Cache) storeFile( + identifierPathname *check.Absolute, + data []byte, + buf *Checksum, + validate, lock bool, +) error { + h := sha512.New384() + h.Write(data) + if validate { + if got := (Checksum)(h.Sum(nil)); got != *buf { + return &ChecksumMismatchError{got, *buf} + } + } else { + h.Sum(buf[:0]) + } + + checksumPathname := c.pathname(buf) + + if lock { + c.mu.Lock() + defer c.mu.Unlock() + } + + if f, err := os.OpenFile( + checksumPathname.String(), + os.O_WRONLY|os.O_CREATE|os.O_EXCL, + 0400, + ); err != nil { + // two artifacts may be backed by the same file + if !errors.Is(err, os.ErrExist) { + return err + } + } else if _, err = f.Write(data); err != nil { + // do not attempt cleanup: this is content-addressed and a partial + // write is caught during integrity check + return err + } + + return os.Link( + checksumPathname.String(), + identifierPathname.String(), + ) +} + +// StoreFile stores the contents of a [File]. An optional checksum can be +// passed via the result buffer which is used to validate the submitted data. +func (c *Cache) StoreFile( + id ID, + data []byte, + buf *Checksum, + validate bool, +) (pathname *check.Absolute, err error) { + pathname = c.pathnameIdent(&id) + err = c.storeFile(pathname, data, buf, validate, true) + return +} + +// LoadOrStoreFile attempts to load the contents of a [File] by its identifier, +// and if that file is not present, calls makeData and stores its result +// instead. Hash validation behaviour is identical to StoreFile. +func (c *Cache) LoadOrStoreFile( + id ID, + makeData func() ([]byte, error), + buf *Checksum, + validate bool, +) ( + pathname *check.Absolute, + data []byte, + store bool, + err error, +) { + pathname = c.pathnameIdent(&id) + c.mu.Lock() + defer c.mu.Unlock() + + data, err = os.ReadFile(pathname.String()) + if err == nil || !errors.Is(err, os.ErrNotExist) { + return + } + store = true + + data, err = makeData() + if err != nil { + return + } + err = c.storeFile(pathname, data, buf, validate, false) + return +} + +// New returns the address to a new instance of [Cache]. +func New(base *check.Absolute) (*Cache, error) { + for _, name := range []string{ + dirIdentifier, + dirChecksum, + } { + if err := os.MkdirAll(base.Append(name).String(), 0700); err != nil && + !errors.Is(err, os.ErrExist) { + return nil, err + } + } + + return &Cache{ + base: base, + }, nil +} diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go new file mode 100644 index 0000000..e6ff41e --- /dev/null +++ b/internal/pkg/pkg_test.go @@ -0,0 +1,366 @@ +package pkg_test + +import ( + "bytes" + "crypto/sha512" + "encoding/base64" + "io/fs" + "os" + "path/filepath" + "reflect" + "syscall" + "testing" + "testing/fstest" + + "hakurei.app/container" + "hakurei.app/container/check" + "hakurei.app/container/stub" + "hakurei.app/internal/pkg" +) + +func TestCache(t *testing.T) { + t.Parallel() + + const testdata = "" + + "\x00\x00\x00\x00" + + "\xad\x0b\x00" + + "\x04" + + "\xfe\xfe\x00\x00" + + "\xfe\xca\x00\x00" + + testdataChecksum := func() pkg.Checksum { + h := sha512.New384() + h.Write([]byte(testdata)) + return (pkg.Checksum)(h.Sum(nil)) + }() + + testdataChecksumString := base64.URLEncoding.EncodeToString(testdataChecksum[:]) + + testCases := []struct { + name string + early func(t *testing.T, base *check.Absolute) + f func(t *testing.T, base *check.Absolute, c *pkg.Cache) + check func(t *testing.T, base *check.Absolute) + }{ + {"file", nil, func(t *testing.T, base *check.Absolute, c *pkg.Cache) { + wantErrNonexistent := &os.PathError{ + Op: "open", + Path: base.Append( + "identifier", + testdataChecksumString, + ).String(), + Err: syscall.ENOENT, + } + if _, _, err := c.LoadFile(testdataChecksum); !reflect.DeepEqual(err, wantErrNonexistent) { + t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistent) + } + + identifier := (pkg.ID)(bytes.Repeat([]byte{ + 0x75, 0xe6, 0x9d, 0x6d, 0xe7, 0x9f, + }, 8)) + wantPathname := base.Append( + "identifier", + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef", + ) + identifier0 := (pkg.ID)(bytes.Repeat([]byte{ + 0x71, 0xa7, 0xde, 0x6d, 0xa6, 0xde, + }, 8)) + wantPathname0 := base.Append( + "identifier", + "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe", + ) + + // initial store + if pathname, err := c.StoreFile( + identifier, + []byte(testdata), + &testdataChecksum, + true, + ); err != nil { + t.Fatalf("StoreFile: error = %v", err) + } else if !pathname.Is(wantPathname) { + t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathname) + } + + // load or store, identical content + if pathname, data, store, err := c.LoadOrStoreFile(identifier0, func() ([]byte, error) { + return []byte(testdata), nil + }, &testdataChecksum, true); err != nil { + t.Fatalf("LoadOrStoreFile: error = %v", err) + } else if !pathname.Is(wantPathname0) { + t.Fatalf("LoadOrStoreFile: pathname = %q, want %q", pathname, wantPathname0) + } else if string(data) != testdata { + t.Fatalf("LoadOrStoreFile: data = %x, want %x", data, testdata) + } else if !store { + t.Fatal("LoadOrStoreFile did not store nonpresent entry") + } + + // load or store, existing entry + if pathname, data, store, err := c.LoadOrStoreFile(identifier, func() ([]byte, error) { + return []byte(testdata), nil + }, &testdataChecksum, true); err != nil { + t.Fatalf("LoadOrStoreFile: error = %v", err) + } else if !pathname.Is(wantPathname) { + t.Fatalf("LoadOrStoreFile: pathname = %q, want %q", pathname, wantPathname) + } else if string(data) != testdata { + t.Fatalf("LoadOrStoreFile: data = %x, want %x", data, testdata) + } else if store { + t.Fatal("LoadOrStoreFile stored over present entry") + } + + // load, existing entry + if pathname, data, err := c.LoadFile(identifier0); err != nil { + t.Fatalf("LoadFile: error = %v", err) + } else if !pathname.Is(wantPathname0) { + t.Fatalf("LoadFile: pathname = %q, want %q", pathname, wantPathname0) + } else if string(data) != testdata { + t.Fatalf("LoadFile: data = %x, want %x", data, testdata) + } + + // checksum mismatch + wantErrChecksum := &pkg.ChecksumMismatchError{ + Got: testdataChecksum, + } + if _, err := c.StoreFile( + testdataChecksum, + []byte(testdata), + new(pkg.Checksum), + true, + ); !reflect.DeepEqual(err, wantErrChecksum) { + t.Fatalf("StoreFile: error = %#v, want %#v", err, wantErrChecksum) + } + + // verify failed store + if _, _, err := c.LoadFile(testdataChecksum); !reflect.DeepEqual(err, wantErrNonexistent) { + t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistent) + } + + // store, same identifier + wantPathnameF := base.Append( + "identifier", + testdataChecksumString, + ) + if pathname, err := c.StoreFile( + testdataChecksum, + []byte(testdata), + &testdataChecksum, + true, + ); err != nil { + t.Fatalf("StoreFile: error = %v", err) + } else if !pathname.Is(wantPathnameF) { + t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathnameF) + } + + // load, same identifier + if pathname, data, err := c.LoadFile(testdataChecksum); err != nil { + t.Fatalf("LoadFile: error = %v", err) + } else if !pathname.Is(wantPathnameF) { + t.Fatalf("LoadFile: pathname = %q, want %q", pathname, wantPathnameF) + } else if string(data) != testdata { + t.Fatalf("LoadFile: data = %x, want %x", data, testdata) + } + + // store without validation + wantChecksum := pkg.Checksum{ + 0xbe, 0xc0, 0x21, 0xb4, 0xf3, 0x68, + 0xe3, 0x06, 0x91, 0x34, 0xe0, 0x12, + 0xc2, 0xb4, 0x30, 0x70, 0x83, 0xd3, + 0xa9, 0xbd, 0xd2, 0x06, 0xe2, 0x4e, + 0x5f, 0x0d, 0x86, 0xe1, 0x3d, 0x66, + 0x36, 0x65, 0x59, 0x33, 0xec, 0x2b, + 0x41, 0x34, 0x65, 0x96, 0x68, 0x17, + 0xa9, 0xc2, 0x08, 0xa1, 0x17, 0x17, + } + var gotChecksum pkg.Checksum + wantPathnameG := base.Append( + "identifier", + base64.URLEncoding.EncodeToString(wantChecksum[:]), + ) + if pathname, err := c.StoreFile( + wantChecksum, + []byte{0}, + &gotChecksum, + false, + ); err != nil { + t.Fatalf("StoreFile: error = %#v", err) + } else if !pathname.Is(wantPathnameG) { + t.Fatalf("StoreFile: pathname = %q, want %q", pathname, wantPathnameG) + } else if gotChecksum != wantChecksum { + t.Fatalf("StoreFile: buf = %x, want %x", gotChecksum, wantChecksum) + } + + // makeData passthrough + var zeroIdent pkg.ID + wantErrPassthrough := stub.UniqueError(0xcafe) + if _, _, _, err := c.LoadOrStoreFile(zeroIdent, func() ([]byte, error) { + return nil, wantErrPassthrough + }, new(pkg.Checksum), true); !reflect.DeepEqual(err, wantErrPassthrough) { + t.Fatalf("LoadOrStoreFile: error = %#v, want %#v", err, wantErrPassthrough) + } + + // verify failed store + wantErrNonexistentZero := &os.PathError{ + Op: "open", + Path: base.Append( + "identifier", + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + ).String(), + Err: syscall.ENOENT, + } + if _, _, err := c.LoadFile(zeroIdent); !reflect.DeepEqual(err, wantErrNonexistentZero) { + t.Fatalf("LoadFile: error = %#v, want %#v", err, wantErrNonexistentZero) + } + }, func(t *testing.T, base *check.Absolute) { + wantChecksum := pkg.MustDecode( + "lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx", + ) + if checksum, err := pkg.HashDir(base); err != nil { + t.Fatalf("HashDir: error = %v", err) + } else if checksum != wantChecksum { + t.Fatalf("HashDir: %v", &pkg.ChecksumMismatchError{ + Got: checksum, + Want: wantChecksum, + }) + } + }}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + base := check.MustAbs(t.TempDir()) + if err := os.Chmod(base.String(), 0700); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := filepath.WalkDir(base.String(), func(path string, d fs.DirEntry, err error) error { + if err != nil { + t.Error(err) + return nil + } + if !d.IsDir() { + return nil + } + return os.Chmod(path, 0700) + }); err != nil { + t.Fatal(err) + } + }) + + if c, err := pkg.New(base); err != nil { + t.Fatalf("New: error = %v", err) + } else { + if tc.early != nil { + tc.early(t, base) + } + tc.f(t, base, c) + tc.check(t, base) + } + }) + } +} + +func TestFlatten(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + fsys fs.FS + want pkg.Checksum + }{ + {"sample cache file", fstest.MapFS{ + ".": {Mode: 020000000700}, + + "checksum": {Mode: 020000000700}, + "checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}}, + "checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, + + "identifier": {Mode: 020000000700}, + "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}}, + "identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, + "identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, + "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, + }, pkg.MustDecode("lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx")}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + if got, err := pkg.HashFS(tc.fsys, "."); err != nil { + t.Fatalf("HashFS: error = %v", err) + } else if got != tc.want { + t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{ + Got: got, + Want: tc.want, + }) + } + }) + } +} + +func TestErrors(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + err error + want string + }{ + {"ChecksumMismatchError", &pkg.ChecksumMismatchError{ + Want: (pkg.Checksum)(bytes.Repeat([]byte{ + 0x75, 0xe6, 0x9d, 0x6d, 0xe7, 0x9f, + }, 8)), + }, "got AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + + " instead of deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + if got := tc.err.Error(); got != tc.want { + t.Errorf("Error: %q, want %q", got, tc.want) + } + }) + } +} + +func TestNew(t *testing.T) { + t.Parallel() + + t.Run("nonexistent", func(t *testing.T) { + t.Parallel() + + wantErr := &os.PathError{ + Op: "mkdir", + Path: container.Nonexistent, + Err: syscall.ENOENT, + } + if _, err := pkg.New(check.MustAbs(container.Nonexistent)); !reflect.DeepEqual(err, wantErr) { + t.Errorf("New: error = %#v, want %#v", err, wantErr) + } + }) + + t.Run("permission", func(t *testing.T) { + t.Parallel() + + tempDir := check.MustAbs(t.TempDir()) + if err := os.Chmod(tempDir.String(), 0); err != nil { + t.Fatal(err) + } else { + t.Cleanup(func() { + if err = os.Chmod(tempDir.String(), 0700); err != nil { + t.Fatal(err) + } + }) + } + + wantErr := &os.PathError{ + Op: "mkdir", + Path: tempDir.Append("cache").String(), + Err: syscall.EACCES, + } + if _, err := pkg.New(tempDir.Append("cache")); !reflect.DeepEqual(err, wantErr) { + t.Errorf("New: error = %#v, want %#v", err, wantErr) + } + }) +}