internal/pkg: encode entry in custom format
All checks were successful
Test / Create distribution (push) Successful in 43s
Test / Sandbox (push) Successful in 2m27s
Test / ShareFS (push) Successful in 3m29s
Test / Hpkg (push) Successful in 4m14s
Test / Sandbox (race detector) (push) Successful in 4m40s
Test / Hakurei (race detector) (push) Successful in 5m36s
Test / Hakurei (push) Successful in 2m26s
Test / Flake checks (push) Successful in 1m42s
All checks were successful
Test / Create distribution (push) Successful in 43s
Test / Sandbox (push) Successful in 2m27s
Test / ShareFS (push) Successful in 3m29s
Test / Hpkg (push) Successful in 4m14s
Test / Sandbox (race detector) (push) Successful in 4m40s
Test / Hakurei (race detector) (push) Successful in 5m36s
Test / Hakurei (push) Successful in 2m26s
Test / Flake checks (push) Successful in 1m42s
The fact that Gob serialisation is deterministic is an implementation detail. This change replaces Gob with a simple custom format. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -2,27 +2,149 @@ package pkg
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/sha512"
|
"crypto/sha512"
|
||||||
"encoding/gob"
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
"hakurei.app/container/check"
|
"hakurei.app/container/check"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FlatEntry is the representation of a directory entry via [Flatten].
|
// FlatEntry is a directory entry to be encoded for [Flatten].
|
||||||
type FlatEntry struct {
|
type FlatEntry struct {
|
||||||
Name string // base name of the file
|
|
||||||
Mode fs.FileMode // file mode bits
|
Mode fs.FileMode // file mode bits
|
||||||
|
Path string // pathname of the file
|
||||||
Data []byte // file content or symlink destination
|
Data []byte // file content or symlink destination
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
| mode uint32 | path_sz uint32 |
|
||||||
|
| data_sz uint64 |
|
||||||
|
| path string |
|
||||||
|
| data []byte |
|
||||||
|
*/
|
||||||
|
|
||||||
|
// wordSize is the boundary which binary segments are always aligned to.
|
||||||
|
const wordSize = 8
|
||||||
|
|
||||||
|
// alignSize returns the padded size for aligning sz.
|
||||||
|
func alignSize(sz int) int {
|
||||||
|
return sz + (wordSize-(sz)%wordSize)%wordSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode encodes the entry for transmission or hashing.
|
||||||
|
func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) {
|
||||||
|
pPathSize := alignSize(len(ent.Path))
|
||||||
|
if pPathSize > math.MaxUint32 {
|
||||||
|
return 0, syscall.E2BIG
|
||||||
|
}
|
||||||
|
pDataSize := alignSize(len(ent.Data))
|
||||||
|
|
||||||
|
payload := make([]byte, wordSize*2+pPathSize+pDataSize)
|
||||||
|
binary.LittleEndian.PutUint32(payload, uint32(ent.Mode))
|
||||||
|
binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path)))
|
||||||
|
binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data)))
|
||||||
|
copy(payload[wordSize*2:], ent.Path)
|
||||||
|
copy(payload[wordSize*2+pPathSize:], ent.Data)
|
||||||
|
return w.Write(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode decodes the entry from its representation produced by Encode.
|
||||||
|
func (ent *FlatEntry) Decode(r io.Reader) (n int, err error) {
|
||||||
|
var nr int
|
||||||
|
|
||||||
|
header := make([]byte, wordSize*2)
|
||||||
|
nr, err = r.Read(header)
|
||||||
|
n += nr
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) && n != 0 {
|
||||||
|
err = io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header))
|
||||||
|
pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:]))
|
||||||
|
pPathSize := alignSize(pathSize)
|
||||||
|
dataSize := int(binary.LittleEndian.Uint64(header[wordSize:]))
|
||||||
|
pDataSize := alignSize(dataSize)
|
||||||
|
|
||||||
|
buf := make([]byte, pPathSize+pDataSize)
|
||||||
|
nr, err = r.Read(buf)
|
||||||
|
n += nr
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
if nr != len(buf) {
|
||||||
|
err = io.ErrUnexpectedEOF
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ent.Path = string(buf[:pathSize])
|
||||||
|
if ent.Mode.IsDir() {
|
||||||
|
ent.Data = nil
|
||||||
|
} else {
|
||||||
|
ent.Data = buf[pPathSize : pPathSize+dataSize]
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// DirScanner provides an efficient interface for reading a stream of encoded
|
||||||
|
// [FlatEntry]. Successive calls to the Scan method will step through the
|
||||||
|
// entries in the stream.
|
||||||
|
type DirScanner struct {
|
||||||
|
// Underlying reader to scan [FlatEntry] representations from.
|
||||||
|
r io.Reader
|
||||||
|
|
||||||
|
// First non-EOF I/O error, returned by the Err method.
|
||||||
|
err error
|
||||||
|
|
||||||
|
// Entry to store results in. Its address is returned by the Entry method
|
||||||
|
// and is updated on every call to Scan.
|
||||||
|
ent FlatEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDirScanner returns the address of a new instance of [DirScanner] reading
|
||||||
|
// from r. The caller must no longer read from r after this function returns.
|
||||||
|
func NewDirScanner(r io.Reader) *DirScanner { return &DirScanner{r: r} }
|
||||||
|
|
||||||
|
// Err returns the first non-EOF I/O error.
|
||||||
|
func (s *DirScanner) Err() error {
|
||||||
|
if errors.Is(s.err, io.EOF) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entry returns the address to the [FlatEntry] value storing the last result.
|
||||||
|
func (s *DirScanner) Entry() *FlatEntry { return &s.ent }
|
||||||
|
|
||||||
|
// Scan advances to the next [FlatEntry].
|
||||||
|
func (s *DirScanner) Scan() bool {
|
||||||
|
if s.err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
var n int
|
||||||
|
n, s.err = s.ent.Decode(s.r)
|
||||||
|
if errors.Is(s.err, io.EOF) {
|
||||||
|
return n != 0
|
||||||
|
}
|
||||||
|
return s.err == nil
|
||||||
|
}
|
||||||
|
|
||||||
// Flatten writes a deterministic representation of the contents of fsys to w.
|
// Flatten writes a deterministic representation of the contents of fsys to w.
|
||||||
// The resulting data can be hashed to produce a deterministic checksum for the
|
// The resulting data can be hashed to produce a deterministic checksum for the
|
||||||
// directory.
|
// directory.
|
||||||
func Flatten(fsys fs.FS, root string, w io.Writer) error {
|
func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) {
|
||||||
e := gob.NewEncoder(w)
|
var nr int
|
||||||
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -34,7 +156,7 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ent := FlatEntry{
|
ent := FlatEntry{
|
||||||
Name: fi.Name(),
|
Path: path,
|
||||||
Mode: fi.Mode(),
|
Mode: fi.Mode(),
|
||||||
}
|
}
|
||||||
if ent.Mode.IsRegular() {
|
if ent.Mode.IsRegular() {
|
||||||
@@ -49,14 +171,17 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error {
|
|||||||
ent.Data = []byte(newpath)
|
ent.Data = []byte(newpath)
|
||||||
}
|
}
|
||||||
|
|
||||||
return e.Encode(&ent)
|
nr, err = ent.Encode(w)
|
||||||
|
n += nr
|
||||||
|
return err
|
||||||
})
|
})
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
||||||
func HashFS(fsys fs.FS, root string) (Checksum, error) {
|
func HashFS(fsys fs.FS, root string) (Checksum, error) {
|
||||||
h := sha512.New384()
|
h := sha512.New384()
|
||||||
if err := Flatten(fsys, root, h); err != nil {
|
if _, err := Flatten(fsys, root, h); err != nil {
|
||||||
return Checksum{}, err
|
return Checksum{}, err
|
||||||
}
|
}
|
||||||
return (Checksum)(h.Sum(nil)), nil
|
return (Checksum)(h.Sum(nil)), nil
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
package pkg_test
|
package pkg_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
"testing/fstest"
|
"testing/fstest"
|
||||||
|
|
||||||
@@ -12,36 +14,75 @@ func TestFlatten(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
name string
|
name string
|
||||||
fsys fs.FS
|
fsys fs.FS
|
||||||
want pkg.Checksum
|
entries []pkg.FlatEntry
|
||||||
|
sum pkg.Checksum
|
||||||
}{
|
}{
|
||||||
{"sample cache file", fstest.MapFS{
|
{"sample cache file", fstest.MapFS{
|
||||||
".": {Mode: 020000000700},
|
".": {Mode: 020000000700},
|
||||||
|
|
||||||
"checksum": {Mode: 020000000700},
|
"checksum": {Mode: 020000000700},
|
||||||
"checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}},
|
"checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0}},
|
||||||
"checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}},
|
"checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
|
|
||||||
"identifier": {Mode: 020000000700},
|
"identifier": {Mode: 020000000700},
|
||||||
"identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}},
|
"identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0}},
|
||||||
"identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}},
|
"identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
"identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}},
|
"identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
"identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}},
|
"identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
}, pkg.MustDecode("lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx")},
|
}, []pkg.FlatEntry{
|
||||||
|
{Mode: 020000000700, Path: "."},
|
||||||
|
|
||||||
|
{Mode: 020000000700, Path: "checksum"},
|
||||||
|
{Mode: 0400, Path: "checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
|
{Mode: 0400, Path: "checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", Data: []byte{0}},
|
||||||
|
|
||||||
|
{Mode: 020000000700, Path: "identifier"},
|
||||||
|
{Mode: 0400, Path: "identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
|
{Mode: 0400, Path: "identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
|
{Mode: 0400, Path: "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}},
|
||||||
|
{Mode: 0400, Path: "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", Data: []byte{0}},
|
||||||
|
}, pkg.MustDecode("ZNSQH-mjhtIbFvi51lQ0UjatjoS8_5ILrBPNWlO2LWTq9P6MJEnekYzP0esUJnVr")},
|
||||||
}
|
}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
if got, err := pkg.HashFS(tc.fsys, "."); err != nil {
|
t.Run("roundtrip", func(t *testing.T) {
|
||||||
t.Fatalf("HashFS: error = %v", err)
|
t.Parallel()
|
||||||
} else if got != tc.want {
|
|
||||||
t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{
|
var buf bytes.Buffer
|
||||||
Got: got,
|
if _, err := pkg.Flatten(tc.fsys, ".", &buf); err != nil {
|
||||||
Want: tc.want,
|
t.Fatalf("Flatten: error = %v", err)
|
||||||
})
|
}
|
||||||
}
|
|
||||||
|
s := pkg.NewDirScanner(bytes.NewReader(buf.Bytes()))
|
||||||
|
var got []pkg.FlatEntry
|
||||||
|
for s.Scan() {
|
||||||
|
got = append(got, *s.Entry())
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
t.Fatalf("Err: error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(got, tc.entries) {
|
||||||
|
t.Fatalf("Scan: %#v, want %#v", got, tc.entries)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("hash", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
if got, err := pkg.HashFS(tc.fsys, "."); err != nil {
|
||||||
|
t.Fatalf("HashFS: error = %v", err)
|
||||||
|
} else if got != tc.sum {
|
||||||
|
t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{
|
||||||
|
Got: got,
|
||||||
|
Want: tc.sum,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ func TestCache(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}, func(t *testing.T, base *check.Absolute) {
|
}, func(t *testing.T, base *check.Absolute) {
|
||||||
wantChecksum := pkg.MustDecode(
|
wantChecksum := pkg.MustDecode(
|
||||||
"lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx",
|
"ZNSQH-mjhtIbFvi51lQ0UjatjoS8_5ILrBPNWlO2LWTq9P6MJEnekYzP0esUJnVr",
|
||||||
)
|
)
|
||||||
if checksum, err := pkg.HashDir(base); err != nil {
|
if checksum, err := pkg.HashDir(base); err != nil {
|
||||||
t.Fatalf("HashDir: error = %v", err)
|
t.Fatalf("HashDir: error = %v", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user