internal/pkg: encode entry in custom format
All checks were successful
Test / Create distribution (push) Successful in 43s
Test / Sandbox (push) Successful in 2m27s
Test / ShareFS (push) Successful in 3m29s
Test / Hpkg (push) Successful in 4m14s
Test / Sandbox (race detector) (push) Successful in 4m40s
Test / Hakurei (race detector) (push) Successful in 5m36s
Test / Hakurei (push) Successful in 2m26s
Test / Flake checks (push) Successful in 1m42s

The fact that Gob serialisation is deterministic is an implementation detail. This change replaces Gob with a simple custom format.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2026-01-02 15:39:42 +09:00
parent e9de5d3aca
commit 3078c41ce7
3 changed files with 194 additions and 28 deletions

View File

@@ -2,27 +2,149 @@ package pkg
import (
"crypto/sha512"
"encoding/gob"
"encoding/binary"
"errors"
"io"
"io/fs"
"math"
"os"
"syscall"
"hakurei.app/container/check"
)
// FlatEntry is the representation of a directory entry via [Flatten].
// FlatEntry is a directory entry to be encoded for [Flatten].
type FlatEntry struct {
Name string // base name of the file
Mode fs.FileMode // file mode bits
Path string // pathname of the file
Data []byte // file content or symlink destination
}
/*
| mode uint32 | path_sz uint32 |
| data_sz uint64 |
| path string |
| data []byte |
*/
// wordSize is the boundary which binary segments are always aligned to.
const wordSize = 8
// alignSize returns the padded size for aligning sz.
func alignSize(sz int) int {
return sz + (wordSize-(sz)%wordSize)%wordSize
}
// Encode encodes the entry for transmission or hashing.
func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) {
pPathSize := alignSize(len(ent.Path))
if pPathSize > math.MaxUint32 {
return 0, syscall.E2BIG
}
pDataSize := alignSize(len(ent.Data))
payload := make([]byte, wordSize*2+pPathSize+pDataSize)
binary.LittleEndian.PutUint32(payload, uint32(ent.Mode))
binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path)))
binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data)))
copy(payload[wordSize*2:], ent.Path)
copy(payload[wordSize*2+pPathSize:], ent.Data)
return w.Write(payload)
}
// Decode decodes the entry from its representation produced by Encode.
func (ent *FlatEntry) Decode(r io.Reader) (n int, err error) {
var nr int
header := make([]byte, wordSize*2)
nr, err = r.Read(header)
n += nr
if err != nil {
if errors.Is(err, io.EOF) && n != 0 {
err = io.ErrUnexpectedEOF
}
return
}
ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header))
pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:]))
pPathSize := alignSize(pathSize)
dataSize := int(binary.LittleEndian.Uint64(header[wordSize:]))
pDataSize := alignSize(dataSize)
buf := make([]byte, pPathSize+pDataSize)
nr, err = r.Read(buf)
n += nr
if err != nil {
if errors.Is(err, io.EOF) {
if nr != len(buf) {
err = io.ErrUnexpectedEOF
return
}
} else {
return
}
}
ent.Path = string(buf[:pathSize])
if ent.Mode.IsDir() {
ent.Data = nil
} else {
ent.Data = buf[pPathSize : pPathSize+dataSize]
}
return
}
// DirScanner provides an efficient interface for reading a stream of encoded
// [FlatEntry]. Successive calls to the Scan method will step through the
// entries in the stream.
type DirScanner struct {
// Underlying reader to scan [FlatEntry] representations from.
r io.Reader
// First non-EOF I/O error, returned by the Err method.
err error
// Entry to store results in. Its address is returned by the Entry method
// and is updated on every call to Scan.
ent FlatEntry
}
// NewDirScanner returns the address of a new instance of [DirScanner] reading
// from r. The caller must no longer read from r after this function returns.
func NewDirScanner(r io.Reader) *DirScanner { return &DirScanner{r: r} }
// Err returns the first non-EOF I/O error.
func (s *DirScanner) Err() error {
if errors.Is(s.err, io.EOF) {
return nil
}
return s.err
}
// Entry returns the address to the [FlatEntry] value storing the last result.
func (s *DirScanner) Entry() *FlatEntry { return &s.ent }
// Scan advances to the next [FlatEntry].
func (s *DirScanner) Scan() bool {
if s.err != nil {
return false
}
var n int
n, s.err = s.ent.Decode(s.r)
if errors.Is(s.err, io.EOF) {
return n != 0
}
return s.err == nil
}
// Flatten writes a deterministic representation of the contents of fsys to w.
// The resulting data can be hashed to produce a deterministic checksum for the
// directory.
func Flatten(fsys fs.FS, root string, w io.Writer) error {
e := gob.NewEncoder(w)
return fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) {
var nr int
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
@@ -34,7 +156,7 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error {
}
ent := FlatEntry{
Name: fi.Name(),
Path: path,
Mode: fi.Mode(),
}
if ent.Mode.IsRegular() {
@@ -49,14 +171,17 @@ func Flatten(fsys fs.FS, root string, w io.Writer) error {
ent.Data = []byte(newpath)
}
return e.Encode(&ent)
nr, err = ent.Encode(w)
n += nr
return err
})
return
}
// HashFS returns a checksum produced by hashing the result of [Flatten].
func HashFS(fsys fs.FS, root string) (Checksum, error) {
h := sha512.New384()
if err := Flatten(fsys, root, h); err != nil {
if _, err := Flatten(fsys, root, h); err != nil {
return Checksum{}, err
}
return (Checksum)(h.Sum(nil)), nil