package pkg import ( "crypto/sha512" "encoding/binary" "errors" "io" "io/fs" "math" "os" "syscall" "hakurei.app/container/check" ) // FlatEntry is a directory entry to be encoded for [Flatten]. type FlatEntry struct { Mode fs.FileMode // file mode bits Path string // pathname of the file Data []byte // file content or symlink destination } /* | mode uint32 | path_sz uint32 | | data_sz uint64 | | path string | | data []byte | */ // wordSize is the boundary which binary segments are always aligned to. const wordSize = 8 // alignSize returns the padded size for aligning sz. func alignSize(sz int) int { return sz + (wordSize-(sz)%wordSize)%wordSize } // Encode encodes the entry for transmission or hashing. func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) { pPathSize := alignSize(len(ent.Path)) if pPathSize > math.MaxUint32 { return 0, syscall.E2BIG } pDataSize := alignSize(len(ent.Data)) payload := make([]byte, wordSize*2+pPathSize+pDataSize) binary.LittleEndian.PutUint32(payload, uint32(ent.Mode)) binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path))) binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data))) copy(payload[wordSize*2:], ent.Path) copy(payload[wordSize*2+pPathSize:], ent.Data) return w.Write(payload) } // Decode decodes the entry from its representation produced by Encode. func (ent *FlatEntry) Decode(r io.Reader) (n int, err error) { var nr int header := make([]byte, wordSize*2) nr, err = r.Read(header) n += nr if err != nil { if errors.Is(err, io.EOF) && n != 0 { err = io.ErrUnexpectedEOF } return } ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header)) pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:])) pPathSize := alignSize(pathSize) dataSize := int(binary.LittleEndian.Uint64(header[wordSize:])) pDataSize := alignSize(dataSize) buf := make([]byte, pPathSize+pDataSize) nr, err = r.Read(buf) n += nr if err != nil { if errors.Is(err, io.EOF) { if nr != len(buf) { err = io.ErrUnexpectedEOF return } } else { return } } ent.Path = string(buf[:pathSize]) if ent.Mode.IsDir() { ent.Data = nil } else { ent.Data = buf[pPathSize : pPathSize+dataSize] } return } // DirScanner provides an efficient interface for reading a stream of encoded // [FlatEntry]. Successive calls to the Scan method will step through the // entries in the stream. type DirScanner struct { // Underlying reader to scan [FlatEntry] representations from. r io.Reader // First non-EOF I/O error, returned by the Err method. err error // Entry to store results in. Its address is returned by the Entry method // and is updated on every call to Scan. ent FlatEntry } // NewDirScanner returns the address of a new instance of [DirScanner] reading // from r. The caller must no longer read from r after this function returns. func NewDirScanner(r io.Reader) *DirScanner { return &DirScanner{r: r} } // Err returns the first non-EOF I/O error. func (s *DirScanner) Err() error { if errors.Is(s.err, io.EOF) { return nil } return s.err } // Entry returns the address to the [FlatEntry] value storing the last result. func (s *DirScanner) Entry() *FlatEntry { return &s.ent } // Scan advances to the next [FlatEntry]. func (s *DirScanner) Scan() bool { if s.err != nil { return false } var n int n, s.err = s.ent.Decode(s.r) if errors.Is(s.err, io.EOF) { return n != 0 } return s.err == nil } // Flatten writes a deterministic representation of the contents of fsys to w. // The resulting data can be hashed to produce a deterministic checksum for the // directory. func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) { var nr int err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } var fi fs.FileInfo fi, err = d.Info() if err != nil { return err } ent := FlatEntry{ Path: path, Mode: fi.Mode(), } if ent.Mode.IsRegular() { if ent.Data, err = fs.ReadFile(fsys, path); err != nil { return err } } else if ent.Mode&fs.ModeSymlink != 0 { var newpath string if newpath, err = fs.ReadLink(fsys, path); err != nil { return err } ent.Data = []byte(newpath) } nr, err = ent.Encode(w) n += nr return err }) return } // HashFS returns a checksum produced by hashing the result of [Flatten]. func HashFS(fsys fs.FS, root string) (Checksum, error) { h := sha512.New384() if _, err := Flatten(fsys, root, h); err != nil { return Checksum{}, err } return (Checksum)(h.Sum(nil)), nil } // HashDir returns a checksum produced by hashing the result of [Flatten]. func HashDir(pathname *check.Absolute) (Checksum, error) { return HashFS(os.DirFS(pathname.String()), ".") }