Files
hakurei/internal/pkg/dir.go
Ophestra e0c720681b
All checks were successful
Test / Create distribution (push) Successful in 1m0s
Test / Sandbox (push) Successful in 2m41s
Test / Hakurei (push) Successful in 4m1s
Test / ShareFS (push) Successful in 4m1s
Test / Hpkg (push) Successful in 4m35s
Test / Sandbox (race detector) (push) Successful in 5m4s
Test / Hakurei (race detector) (push) Successful in 6m0s
Test / Flake checks (push) Successful in 1m46s
internal/pkg: standardise artifact IR
This should hopefully provide good separation between the artifact curing backend implementation and the (still work in progress) language. Making the IR parseable also guarantees uniqueness of the representation.

Signed-off-by: Ophestra <cat@gensokyo.uk>
2026-02-05 08:24:09 +09:00

204 lines
5.0 KiB
Go

package pkg
import (
"crypto/sha512"
"encoding/binary"
"errors"
"io"
"io/fs"
"math"
"os"
"path/filepath"
"syscall"
"hakurei.app/container/check"
)
// FlatEntry is a directory entry to be encoded for [Flatten].
type FlatEntry struct {
Mode fs.FileMode // file mode bits
Path string // pathname of the file
Data []byte // file content or symlink destination
}
/*
| mode uint32 | path_sz uint32 |
| data_sz uint64 |
| path string |
| data []byte |
*/
// Encode encodes the entry for transmission or hashing.
func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) {
pPathSize := alignSize(len(ent.Path))
if pPathSize > math.MaxUint32 {
return 0, syscall.E2BIG
}
pDataSize := alignSize(len(ent.Data))
payload := make([]byte, wordSize*2+pPathSize+pDataSize)
binary.LittleEndian.PutUint32(payload, uint32(ent.Mode))
binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path)))
binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data)))
copy(payload[wordSize*2:], ent.Path)
copy(payload[wordSize*2+pPathSize:], ent.Data)
return w.Write(payload)
}
// ErrInsecurePath is returned by [FlatEntry.Decode] if validation is requested
// and a nonlocal path is encountered in the stream.
var ErrInsecurePath = errors.New("insecure file path")
// Decode decodes the entry from its representation produced by Encode.
func (ent *FlatEntry) Decode(r io.Reader, validate bool) (n int, err error) {
var nr int
header := make([]byte, wordSize*2)
nr, err = r.Read(header)
n += nr
if err != nil {
if errors.Is(err, io.EOF) && n != 0 {
err = io.ErrUnexpectedEOF
}
return
}
ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header))
pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:]))
pPathSize := alignSize(pathSize)
dataSize := int(binary.LittleEndian.Uint64(header[wordSize:]))
pDataSize := alignSize(dataSize)
buf := make([]byte, pPathSize+pDataSize)
nr, err = r.Read(buf)
n += nr
if err != nil {
if errors.Is(err, io.EOF) {
if nr != len(buf) {
err = io.ErrUnexpectedEOF
return
}
} else {
return
}
}
ent.Path = string(buf[:pathSize])
if ent.Mode.IsDir() {
ent.Data = nil
} else {
ent.Data = buf[pPathSize : pPathSize+dataSize]
}
if validate && !filepath.IsLocal(ent.Path) {
err = ErrInsecurePath
}
return
}
// DirScanner provides an efficient interface for reading a stream of encoded
// [FlatEntry]. Successive calls to the Scan method will step through the
// entries in the stream.
type DirScanner struct {
// Underlying reader to scan [FlatEntry] representations from.
r io.Reader
// First non-EOF I/O error, returned by the Err method.
err error
// Entry to store results in. Its address is returned by the Entry method
// and is updated on every call to Scan.
ent FlatEntry
// Validate pathnames during decoding.
validate bool
}
// NewDirScanner returns the address of a new instance of [DirScanner] reading
// from r. The caller must no longer read from r after this function returns.
func NewDirScanner(r io.Reader, validate bool) *DirScanner {
return &DirScanner{r: r, validate: validate}
}
// Err returns the first non-EOF I/O error.
func (s *DirScanner) Err() error {
if errors.Is(s.err, io.EOF) {
return nil
}
return s.err
}
// Entry returns the address to the [FlatEntry] value storing the last result.
func (s *DirScanner) Entry() *FlatEntry { return &s.ent }
// Scan advances to the next [FlatEntry].
func (s *DirScanner) Scan() bool {
if s.err != nil {
return false
}
var n int
n, s.err = s.ent.Decode(s.r, s.validate)
if errors.Is(s.err, io.EOF) {
return n != 0
}
return s.err == nil
}
// Flatten writes a deterministic representation of the contents of fsys to w.
// The resulting data can be hashed to produce a deterministic checksum for the
// directory.
func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) {
var nr int
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
var fi fs.FileInfo
fi, err = d.Info()
if err != nil {
return err
}
ent := FlatEntry{
Path: path,
Mode: fi.Mode(),
}
if ent.Mode.IsRegular() {
if ent.Data, err = fs.ReadFile(fsys, path); err != nil {
return err
}
} else if ent.Mode&fs.ModeSymlink != 0 {
var newpath string
if newpath, err = fs.ReadLink(fsys, path); err != nil {
return err
}
ent.Data = []byte(newpath)
} else if !ent.Mode.IsDir() {
return InvalidFileModeError(ent.Mode)
}
nr, err = ent.Encode(w)
n += nr
return err
})
return
}
// HashFS returns a checksum produced by hashing the result of [Flatten].
func HashFS(buf *Checksum, fsys fs.FS, root string) error {
h := sha512.New384()
if _, err := Flatten(fsys, root, h); err != nil {
return err
}
h.Sum(buf[:0])
return nil
}
// HashDir returns a checksum produced by hashing the result of [Flatten].
func HashDir(buf *Checksum, pathname *check.Absolute) error {
return HashFS(buf, os.DirFS(pathname.String()), ".")
}