diff --git a/internal/pkg/dir.go b/internal/pkg/dir.go index 794fdf5..780b54e 100644 --- a/internal/pkg/dir.go +++ b/internal/pkg/dir.go @@ -2,27 +2,149 @@ package pkg import ( "crypto/sha512" - "encoding/gob" + "encoding/binary" + "errors" "io" "io/fs" + "math" "os" + "syscall" "hakurei.app/container/check" ) -// FlatEntry is the representation of a directory entry via [Flatten]. +// FlatEntry is a directory entry to be encoded for [Flatten]. type FlatEntry struct { - Name string // base name of the file Mode fs.FileMode // file mode bits + Path string // pathname of the file Data []byte // file content or symlink destination } +/* +| mode uint32 | path_sz uint32 | +| data_sz uint64 | +| path string | +| data []byte | +*/ + +// wordSize is the boundary which binary segments are always aligned to. +const wordSize = 8 + +// alignSize returns the padded size for aligning sz. +func alignSize(sz int) int { + return sz + (wordSize-(sz)%wordSize)%wordSize +} + +// Encode encodes the entry for transmission or hashing. +func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) { + pPathSize := alignSize(len(ent.Path)) + if pPathSize > math.MaxUint32 { + return 0, syscall.E2BIG + } + pDataSize := alignSize(len(ent.Data)) + + payload := make([]byte, wordSize*2+pPathSize+pDataSize) + binary.LittleEndian.PutUint32(payload, uint32(ent.Mode)) + binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path))) + binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data))) + copy(payload[wordSize*2:], ent.Path) + copy(payload[wordSize*2+pPathSize:], ent.Data) + return w.Write(payload) +} + +// Decode decodes the entry from its representation produced by Encode. +func (ent *FlatEntry) Decode(r io.Reader) (n int, err error) { + var nr int + + header := make([]byte, wordSize*2) + nr, err = r.Read(header) + n += nr + if err != nil { + if errors.Is(err, io.EOF) && n != 0 { + err = io.ErrUnexpectedEOF + } + return + } + + ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header)) + pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:])) + pPathSize := alignSize(pathSize) + dataSize := int(binary.LittleEndian.Uint64(header[wordSize:])) + pDataSize := alignSize(dataSize) + + buf := make([]byte, pPathSize+pDataSize) + nr, err = r.Read(buf) + n += nr + if err != nil { + if errors.Is(err, io.EOF) { + if nr != len(buf) { + err = io.ErrUnexpectedEOF + return + } + } else { + return + } + } + + ent.Path = string(buf[:pathSize]) + if ent.Mode.IsDir() { + ent.Data = nil + } else { + ent.Data = buf[pPathSize : pPathSize+dataSize] + } + return +} + +// DirScanner provides an efficient interface for reading a stream of encoded +// [FlatEntry]. Successive calls to the Scan method will step through the +// entries in the stream. +type DirScanner struct { + // Underlying reader to scan [FlatEntry] representations from. + r io.Reader + + // First non-EOF I/O error, returned by the Err method. + err error + + // Entry to store results in. Its address is returned by the Entry method + // and is updated on every call to Scan. + ent FlatEntry +} + +// NewDirScanner returns the address of a new instance of [DirScanner] reading +// from r. The caller must no longer read from r after this function returns. +func NewDirScanner(r io.Reader) *DirScanner { return &DirScanner{r: r} } + +// Err returns the first non-EOF I/O error. +func (s *DirScanner) Err() error { + if errors.Is(s.err, io.EOF) { + return nil + } + return s.err +} + +// Entry returns the address to the [FlatEntry] value storing the last result. +func (s *DirScanner) Entry() *FlatEntry { return &s.ent } + +// Scan advances to the next [FlatEntry]. +func (s *DirScanner) Scan() bool { + if s.err != nil { + return false + } + + var n int + n, s.err = s.ent.Decode(s.r) + if errors.Is(s.err, io.EOF) { + return n != 0 + } + return s.err == nil +} + // Flatten writes a deterministic representation of the contents of fsys to w. // The resulting data can be hashed to produce a deterministic checksum for the // directory. -func Flatten(fsys fs.FS, root string, w io.Writer) error { - e := gob.NewEncoder(w) - return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { +func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) { + var nr int + err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -34,7 +156,7 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error { } ent := FlatEntry{ - Name: fi.Name(), + Path: path, Mode: fi.Mode(), } if ent.Mode.IsRegular() { @@ -49,14 +171,17 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error { ent.Data = []byte(newpath) } - return e.Encode(&ent) + nr, err = ent.Encode(w) + n += nr + return err }) + return } // HashFS returns a checksum produced by hashing the result of [Flatten]. func HashFS(fsys fs.FS, root string) (Checksum, error) { h := sha512.New384() - if err := Flatten(fsys, root, h); err != nil { + if _, err := Flatten(fsys, root, h); err != nil { return Checksum{}, err } return (Checksum)(h.Sum(nil)), nil diff --git a/internal/pkg/dir_test.go b/internal/pkg/dir_test.go index 2e3b907..7bc877d 100644 --- a/internal/pkg/dir_test.go +++ b/internal/pkg/dir_test.go @@ -1,7 +1,9 @@ package pkg_test import ( + "bytes" "io/fs" + "reflect" "testing" "testing/fstest" @@ -12,36 +14,75 @@ func TestFlatten(t *testing.T) { t.Parallel() testCases := []struct { - name string - fsys fs.FS - want pkg.Checksum + name string + fsys fs.FS + entries []pkg.FlatEntry + sum pkg.Checksum }{ {"sample cache file", fstest.MapFS{ ".": {Mode: 020000000700}, "checksum": {Mode: 020000000700}, - "checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}}, - "checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, + "checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0}}, + "checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, "identifier": {Mode: 020000000700}, - "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0x0}}, - "identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, - "identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, - "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef": {Mode: 0400, Data: []byte{0x0, 0x0, 0x0, 0x0, 0xad, 0xb, 0x0, 0x4, 0xfe, 0xfe, 0x0, 0x0, 0xfe, 0xca, 0x0, 0x0}}, - }, pkg.MustDecode("lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx")}, + "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX": {Mode: 0400, Data: []byte{0}}, + "identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + "identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef": {Mode: 0400, Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + }, []pkg.FlatEntry{ + {Mode: 020000000700, Path: "."}, + + {Mode: 020000000700, Path: "checksum"}, + {Mode: 0400, Path: "checksum/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + {Mode: 0400, Path: "checksum/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", Data: []byte{0}}, + + {Mode: 020000000700, Path: "identifier"}, + {Mode: 0400, Path: "identifier/0bSFPu5Tnd-2Jj0Mv6co23PW2t3BmHc7eLFj9TgY3eIBg8zislo7xZYNBqovVLcq", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + {Mode: 0400, Path: "identifier/cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + {Mode: 0400, Path: "identifier/deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef", Data: []byte{0, 0, 0, 0, 0xad, 0xb, 0, 4, 0xfe, 0xfe, 0, 0, 0xfe, 0xca, 0, 0}}, + {Mode: 0400, Path: "identifier/vsAhtPNo4waRNOASwrQwcIPTqb3SBuJOXw2G4T1mNmVZM-wrQTRllmgXqcIIoRcX", Data: []byte{0}}, + }, pkg.MustDecode("ZNSQH-mjhtIbFvi51lQ0UjatjoS8_5ILrBPNWlO2LWTq9P6MJEnekYzP0esUJnVr")}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { t.Parallel() - if got, err := pkg.HashFS(tc.fsys, "."); err != nil { - t.Fatalf("HashFS: error = %v", err) - } else if got != tc.want { - t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{ - Got: got, - Want: tc.want, - }) - } + t.Run("roundtrip", func(t *testing.T) { + t.Parallel() + + var buf bytes.Buffer + if _, err := pkg.Flatten(tc.fsys, ".", &buf); err != nil { + t.Fatalf("Flatten: error = %v", err) + } + + s := pkg.NewDirScanner(bytes.NewReader(buf.Bytes())) + var got []pkg.FlatEntry + for s.Scan() { + got = append(got, *s.Entry()) + } + if err := s.Err(); err != nil { + t.Fatalf("Err: error = %v", err) + } + + if !reflect.DeepEqual(got, tc.entries) { + t.Fatalf("Scan: %#v, want %#v", got, tc.entries) + } + }) + + t.Run("hash", func(t *testing.T) { + t.Parallel() + + if got, err := pkg.HashFS(tc.fsys, "."); err != nil { + t.Fatalf("HashFS: error = %v", err) + } else if got != tc.sum { + t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{ + Got: got, + Want: tc.sum, + }) + } + }) }) } } diff --git a/internal/pkg/pkg_test.go b/internal/pkg/pkg_test.go index c73049e..bd2096d 100644 --- a/internal/pkg/pkg_test.go +++ b/internal/pkg/pkg_test.go @@ -211,7 +211,7 @@ func TestCache(t *testing.T) { } }, func(t *testing.T, base *check.Absolute) { wantChecksum := pkg.MustDecode( - "lvK4lY9bQUFscHpxqHmiPvptjUwOgn3BFhzCXZMeupkY1n22WUPSuh7pswEvVZrx", + "ZNSQH-mjhtIbFvi51lQ0UjatjoS8_5ILrBPNWlO2LWTq9P6MJEnekYzP0esUJnVr", ) if checksum, err := pkg.HashDir(base); err != nil { t.Fatalf("HashDir: error = %v", err)