internal/pkg: streaming archive reader/writer
All checks were successful
Test / Create distribution (push) Successful in 1m5s
Test / Sandbox (push) Successful in 2m49s
Test / Hakurei (push) Successful in 3m53s
Test / ShareFS (push) Successful in 3m51s
Test / Sandbox (race detector) (push) Successful in 5m35s
Test / Hakurei (race detector) (push) Successful in 6m32s
Test / Flake checks (push) Successful in 1m15s
All checks were successful
Test / Create distribution (push) Successful in 1m5s
Test / Sandbox (push) Successful in 2m49s
Test / Hakurei (push) Successful in 3m53s
Test / ShareFS (push) Successful in 3m51s
Test / Sandbox (race detector) (push) Successful in 5m35s
Test / Hakurei (race detector) (push) Successful in 6m32s
Test / Flake checks (push) Successful in 1m15s
This is much more robust and efficient than the simple buffering implementation for larger files. Allocations happen almost exclusively in WalkDir. Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
@@ -595,7 +595,7 @@ func main() {
|
|||||||
0400,
|
0400,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
return err
|
return err
|
||||||
} else if _, err = pkg.Flatten(
|
} else if err = pkg.Write(
|
||||||
os.DirFS(pathname.String()),
|
os.DirFS(pathname.String()),
|
||||||
".",
|
".",
|
||||||
f,
|
f,
|
||||||
|
|||||||
278
internal/pkg/archive.go
Normal file
278
internal/pkg/archive.go
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
package pkg
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha512"
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"hakurei.app/check"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
| mode uint32 | path_sz uint32 |
|
||||||
|
| data_sz uint64 |
|
||||||
|
| path string |
|
||||||
|
| data []byte |
|
||||||
|
*/
|
||||||
|
|
||||||
|
// An ArchiveHeader represents a single header in an archive.
|
||||||
|
type ArchiveHeader struct {
|
||||||
|
Mode fs.FileMode // file mode bits
|
||||||
|
Path string // pathname of the file
|
||||||
|
Size uint64 // size of data segment
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writer implements sequential writing of an archive. [Writer.WriteHeader]
|
||||||
|
// begins a new file with the provided [ArchiveHeader], and then Writer can be
|
||||||
|
// treated as an [io.Writer] to supply that file's data.
|
||||||
|
//
|
||||||
|
// It is the caller's responsibility to write entries in lexical order.
|
||||||
|
type Writer struct {
|
||||||
|
// Underlying writer.
|
||||||
|
w io.Writer
|
||||||
|
// Current header.
|
||||||
|
h ArchiveHeader
|
||||||
|
// Fixed-size header segment.
|
||||||
|
buf [wordSize * 2]byte
|
||||||
|
// Current position in data segment.
|
||||||
|
n uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWriter returns the address of a new [Writer] writing to w.
|
||||||
|
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
|
||||||
|
|
||||||
|
var zero [wordSize]byte
|
||||||
|
|
||||||
|
// padSize returns the padding size for aligning sz.
|
||||||
|
func padSize[T int | uint64](sz T) T {
|
||||||
|
return (wordSize - (sz)%wordSize) % wordSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush concludes writing to the current file and writes padding.
|
||||||
|
func (aw *Writer) flush() error {
|
||||||
|
if aw.h.Size > aw.n {
|
||||||
|
return fmt.Errorf("missed writing %d bytes", aw.h.Size-aw.n)
|
||||||
|
} else if aw.h.Size < aw.n {
|
||||||
|
return fmt.Errorf("wrote %d bytes beyond end of file", aw.n-aw.h.Size)
|
||||||
|
}
|
||||||
|
|
||||||
|
if psz := padSize(aw.h.Size); psz != 0 {
|
||||||
|
if _, err := aw.w.Write(zero[:psz]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aw.n = 0
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteHeader writes h and begins accepting its corresponding file.
|
||||||
|
func (aw *Writer) WriteHeader(h *ArchiveHeader) error {
|
||||||
|
if err := aw.flush(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
aw.h = *h
|
||||||
|
binary.LittleEndian.PutUint32(aw.buf[:], uint32(aw.h.Mode))
|
||||||
|
binary.LittleEndian.PutUint32(aw.buf[wordSize/2:], uint32(len(aw.h.Path)))
|
||||||
|
binary.LittleEndian.PutUint64(aw.buf[wordSize:], aw.h.Size)
|
||||||
|
if _, err := aw.w.Write(aw.buf[:]); err != nil {
|
||||||
|
return err
|
||||||
|
} else if _, err = aw.w.Write(
|
||||||
|
unsafe.Slice(unsafe.StringData(aw.h.Path), len(aw.h.Path)),
|
||||||
|
); err != nil {
|
||||||
|
return err
|
||||||
|
} else if psz := padSize(len(aw.h.Path)); psz != 0 {
|
||||||
|
if _, err = aw.w.Write(zero[:psz]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes p to the underlying writer and records the new position. Invalid
|
||||||
|
// positions are reported by WriteHeader and Close.
|
||||||
|
func (aw *Writer) Write(p []byte) (n int, err error) {
|
||||||
|
n, err = aw.w.Write(p)
|
||||||
|
aw.n += uint64(n)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close concludes writing to the archive stream.
|
||||||
|
func (aw *Writer) Close() (err error) {
|
||||||
|
err = aw.flush()
|
||||||
|
aw.w = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrInsecurePath is returned by [FlatEntry.Decode] if validation is requested
|
||||||
|
// and a nonlocal path is encountered in the stream.
|
||||||
|
var ErrInsecurePath = errors.New("insecure file path")
|
||||||
|
|
||||||
|
// Reader implements sequential reading of an archive. [Reader.Next] advances to
|
||||||
|
// the next file in the archive (including the first), and then Reader can be
|
||||||
|
// treated as an [io.Reader] to access the file's data.
|
||||||
|
type Reader struct {
|
||||||
|
// Underlying reader.
|
||||||
|
r io.Reader
|
||||||
|
// Fixed-size header segment.
|
||||||
|
buf [wordSize * 2]byte
|
||||||
|
// Remaining bytes in current data segment.
|
||||||
|
n, pad uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewReader returns the address of a new [Reader] reading from r.
|
||||||
|
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
||||||
|
|
||||||
|
// Next advances ar to the next entry. Remaining bytes of the current data
|
||||||
|
// segment are discarded. Advancing beyond the final entry returns [io.EOF].
|
||||||
|
func (ar *Reader) Next() (*ArchiveHeader, error) {
|
||||||
|
if dsz := int64(ar.n + ar.pad); dsz > 0 {
|
||||||
|
if n, err := io.CopyN(io.Discard, ar.r, dsz); err != nil {
|
||||||
|
if errors.Is(err, io.EOF) && n != dsz {
|
||||||
|
err = io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if n, err := ar.r.Read(ar.buf[:]); err != nil {
|
||||||
|
if errors.Is(err, io.EOF) && n != 0 {
|
||||||
|
err = io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
h := ArchiveHeader{
|
||||||
|
Mode: fs.FileMode(binary.LittleEndian.Uint32(ar.buf[:])),
|
||||||
|
Size: binary.LittleEndian.Uint64(ar.buf[wordSize:]),
|
||||||
|
}
|
||||||
|
pathSize := int(binary.LittleEndian.Uint32(ar.buf[wordSize/2:]))
|
||||||
|
pPathSize := alignSize(pathSize)
|
||||||
|
|
||||||
|
buf := make([]byte, pPathSize)
|
||||||
|
if n, err := ar.r.Read(buf); err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
if n != len(buf) {
|
||||||
|
return nil, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
h.Path = unsafe.String(unsafe.SliceData(buf), pathSize)
|
||||||
|
return &h, err
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
h.Path = unsafe.String(unsafe.SliceData(buf), pathSize)
|
||||||
|
if !filepath.IsLocal(h.Path) {
|
||||||
|
return &h, ErrInsecurePath
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.n = h.Size
|
||||||
|
ar.pad = padSize(h.Size)
|
||||||
|
return &h, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read implements [io.Reader] for the data segment of the current entry.
|
||||||
|
func (ar *Reader) Read(p []byte) (n int, err error) {
|
||||||
|
if uint64(len(p)) > ar.n {
|
||||||
|
p = p[:ar.n]
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(p) > 0 {
|
||||||
|
n, err = ar.r.Read(p)
|
||||||
|
ar.n -= uint64(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch err {
|
||||||
|
case io.EOF:
|
||||||
|
if ar.n > 0 {
|
||||||
|
return n, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
case nil:
|
||||||
|
if ar.n == 0 {
|
||||||
|
return n, io.EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes a deterministic representation of the contents of fsys to w.
|
||||||
|
// The resulting data can be hashed to produce a deterministic checksum for the
|
||||||
|
// directory.
|
||||||
|
func Write(fsys fs.FS, root string, w io.Writer) error {
|
||||||
|
aw := NewWriter(w)
|
||||||
|
if err := fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var fi fs.FileInfo
|
||||||
|
fi, err = d.Info()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
h := ArchiveHeader{
|
||||||
|
Path: path,
|
||||||
|
Mode: fi.Mode(),
|
||||||
|
}
|
||||||
|
if h.Mode.IsRegular() {
|
||||||
|
h.Size = uint64(fi.Size())
|
||||||
|
if err = aw.WriteHeader(&h); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var r fs.File
|
||||||
|
r, err = fsys.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = io.Copy(aw, r)
|
||||||
|
if _err := r.Close(); err == nil {
|
||||||
|
err = _err
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
} else if h.Mode&fs.ModeSymlink != 0 {
|
||||||
|
var newpath string
|
||||||
|
if newpath, err = fs.ReadLink(fsys, path); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
h.Size = uint64(len(newpath))
|
||||||
|
if err = aw.WriteHeader(&h); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = aw.Write(unsafe.Slice(unsafe.StringData(newpath), len(newpath)))
|
||||||
|
return err
|
||||||
|
} else if !h.Mode.IsDir() {
|
||||||
|
return InvalidFileModeError(h.Mode)
|
||||||
|
}
|
||||||
|
return aw.WriteHeader(&h)
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return aw.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
||||||
|
func HashFS(buf *Checksum, fsys fs.FS, root string) error {
|
||||||
|
h := sha512.New384()
|
||||||
|
if err := Write(fsys, root, h); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
h.Sum(buf[:0])
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashDir returns a checksum produced by hashing the result of [Flatten].
|
||||||
|
func HashDir(buf *Checksum, pathname *check.Absolute) error {
|
||||||
|
return HashFS(buf, os.DirFS(pathname.String()), ".")
|
||||||
|
}
|
||||||
198
internal/pkg/archive_test.go
Normal file
198
internal/pkg/archive_test.go
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
package pkg_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
"testing/fstest"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"hakurei.app/internal/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestArchive(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
type entry struct {
|
||||||
|
path string
|
||||||
|
mode fs.FileMode
|
||||||
|
data string
|
||||||
|
}
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
fsys fs.FS
|
||||||
|
entries []entry
|
||||||
|
sum pkg.Checksum
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{"bad type", fstest.MapFS{
|
||||||
|
".": {Mode: fs.ModeDir | 0700},
|
||||||
|
"invalid": {Mode: fs.ModeCharDevice | 0400},
|
||||||
|
}, nil, pkg.Checksum{}, pkg.InvalidFileModeError(
|
||||||
|
fs.ModeCharDevice | 0400,
|
||||||
|
)},
|
||||||
|
|
||||||
|
{"coldboot", fstest.MapFS{
|
||||||
|
".": {Mode: fs.ModeDir | 0700},
|
||||||
|
|
||||||
|
"devices": {Mode: fs.ModeDir | 0700},
|
||||||
|
"devices/uevent": {Mode: 0600, Data: []byte("add")},
|
||||||
|
"devices/empty": {Mode: fs.ModeDir | 0700},
|
||||||
|
|
||||||
|
"devices/sub": {Mode: fs.ModeDir | 0700},
|
||||||
|
"devices/sub/uevent": {Mode: 0600, Data: []byte("add")},
|
||||||
|
|
||||||
|
"block": {Mode: fs.ModeDir | 0700},
|
||||||
|
"block/uevent": {Mode: 0600},
|
||||||
|
}, []entry{
|
||||||
|
{".", fs.ModeDir | 0700, ""},
|
||||||
|
|
||||||
|
{"block", fs.ModeDir | 0700, ""},
|
||||||
|
{"block/uevent", 0600, ""},
|
||||||
|
|
||||||
|
{"devices", fs.ModeDir | 0700, ""},
|
||||||
|
{"devices/empty", fs.ModeDir | 0700, ""},
|
||||||
|
{"devices/sub", fs.ModeDir | 0700, ""},
|
||||||
|
{"devices/sub/uevent", 0600, "add"},
|
||||||
|
{"devices/uevent", 0600, "add"},
|
||||||
|
}, pkg.MustDecode("mEy_Lf5KotThm7OwMx7yTKZh5HCCyaB41pVAvI9uDMgVQFM91iosBLYsRm8bDsX8"), nil},
|
||||||
|
|
||||||
|
{"empty", fstest.MapFS{
|
||||||
|
".": {Mode: fs.ModeDir | 0700},
|
||||||
|
"checksum": {Mode: fs.ModeDir | 0700},
|
||||||
|
"identifier": {Mode: fs.ModeDir | 0700},
|
||||||
|
"work": {Mode: fs.ModeDir | 0700},
|
||||||
|
}, []entry{
|
||||||
|
{".", fs.ModeDir | 0700, ""},
|
||||||
|
{"checksum", fs.ModeDir | 0700, ""},
|
||||||
|
{"identifier", fs.ModeDir | 0700, ""},
|
||||||
|
{"work", fs.ModeDir | 0700, ""},
|
||||||
|
}, pkg.MustDecode("E4vEZKhCcL2gPZ2Tt59FS3lDng-d_2SKa2i5G_RbDfwGn6EemptFaGLPUDiOa94C"), nil},
|
||||||
|
|
||||||
|
{"sample directory step garbage", fstest.MapFS{
|
||||||
|
".": {Mode: fs.ModeDir | 0500},
|
||||||
|
|
||||||
|
"lib": {Mode: fs.ModeDir | 0500},
|
||||||
|
"lib/check": {Mode: 0400},
|
||||||
|
|
||||||
|
"lib/pkgconfig": {Mode: fs.ModeDir | 0500},
|
||||||
|
}, []entry{
|
||||||
|
{".", fs.ModeDir | 0500, ""},
|
||||||
|
|
||||||
|
{"lib", fs.ModeDir | 0500, ""},
|
||||||
|
{"lib/check", 0400, ""},
|
||||||
|
|
||||||
|
{"lib/pkgconfig", fs.ModeDir | 0500, ""},
|
||||||
|
}, pkg.MustDecode("CUx-3hSbTWPsbMfDhgalG4Ni_GmR9TnVX8F99tY_P5GtkYvczg9RrF5zO0jX9XYT"), nil},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
t.Run("roundtrip", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := pkg.Write(
|
||||||
|
tc.fsys,
|
||||||
|
".",
|
||||||
|
&buf,
|
||||||
|
); !reflect.DeepEqual(err, tc.err) {
|
||||||
|
t.Fatalf("Flatten: error = %v, want %v", err, tc.err)
|
||||||
|
} else if tc.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r := pkg.NewReader(bytes.NewReader(buf.Bytes()))
|
||||||
|
var got []entry
|
||||||
|
for {
|
||||||
|
h, err := r.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t.Fatalf("Next: error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var data []byte
|
||||||
|
if data, err = io.ReadAll(r); err != nil {
|
||||||
|
t.Fatalf("Read: error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got = append(got, entry{
|
||||||
|
path: h.Path,
|
||||||
|
mode: h.Mode,
|
||||||
|
data: unsafe.String(unsafe.SliceData(data), len(data)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(got, tc.entries) {
|
||||||
|
t.Fatalf("Reader: %#v, want %#v", got, tc.entries)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if tc.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("hash", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var got pkg.Checksum
|
||||||
|
if err := pkg.HashFS(&got, tc.fsys, "."); err != nil {
|
||||||
|
t.Fatalf("HashFS: error = %v", err)
|
||||||
|
} else if got != tc.sum {
|
||||||
|
t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{
|
||||||
|
Got: got,
|
||||||
|
Want: tc.sum,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var archiveTestdata = fstest.MapFS{
|
||||||
|
".": {Mode: fs.ModeDir | 0700},
|
||||||
|
|
||||||
|
"devices": {Mode: fs.ModeDir | 0700},
|
||||||
|
"devices/uevent": {Mode: 0600, Data: []byte("add")},
|
||||||
|
"devices/empty": {Mode: fs.ModeDir | 0700},
|
||||||
|
|
||||||
|
"devices/sub": {Mode: fs.ModeDir | 0700},
|
||||||
|
"devices/sub/uevent": {Mode: 0600, Data: []byte("add")},
|
||||||
|
|
||||||
|
"block": {Mode: fs.ModeDir | 0700},
|
||||||
|
"block/uevent": {Mode: 0600},
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkArchiveRead(b *testing.B) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := pkg.Write(archiveTestdata, ".", &buf); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
testdata := buf.Bytes()
|
||||||
|
|
||||||
|
for b.Loop() {
|
||||||
|
r := pkg.NewReader(bytes.NewReader(testdata))
|
||||||
|
for {
|
||||||
|
_, err := r.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkArchiveWrite(b *testing.B) {
|
||||||
|
for b.Loop() {
|
||||||
|
if err := pkg.Write(archiveTestdata, ".", io.Discard); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,203 +0,0 @@
|
|||||||
package pkg
|
|
||||||
|
|
||||||
import (
|
|
||||||
"crypto/sha512"
|
|
||||||
"encoding/binary"
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"math"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"hakurei.app/check"
|
|
||||||
)
|
|
||||||
|
|
||||||
// FlatEntry is a directory entry to be encoded for [Flatten].
|
|
||||||
type FlatEntry struct {
|
|
||||||
Mode fs.FileMode // file mode bits
|
|
||||||
Path string // pathname of the file
|
|
||||||
Data []byte // file content or symlink destination
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
| mode uint32 | path_sz uint32 |
|
|
||||||
| data_sz uint64 |
|
|
||||||
| path string |
|
|
||||||
| data []byte |
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Encode encodes the entry for transmission or hashing.
|
|
||||||
func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) {
|
|
||||||
pPathSize := alignSize(len(ent.Path))
|
|
||||||
if pPathSize > math.MaxUint32 {
|
|
||||||
return 0, syscall.E2BIG
|
|
||||||
}
|
|
||||||
pDataSize := alignSize(len(ent.Data))
|
|
||||||
|
|
||||||
payload := make([]byte, wordSize*2+pPathSize+pDataSize)
|
|
||||||
binary.LittleEndian.PutUint32(payload, uint32(ent.Mode))
|
|
||||||
binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path)))
|
|
||||||
binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data)))
|
|
||||||
copy(payload[wordSize*2:], ent.Path)
|
|
||||||
copy(payload[wordSize*2+pPathSize:], ent.Data)
|
|
||||||
return w.Write(payload)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ErrInsecurePath is returned by [FlatEntry.Decode] if validation is requested
|
|
||||||
// and a nonlocal path is encountered in the stream.
|
|
||||||
var ErrInsecurePath = errors.New("insecure file path")
|
|
||||||
|
|
||||||
// Decode decodes the entry from its representation produced by Encode.
|
|
||||||
func (ent *FlatEntry) Decode(r io.Reader, validate bool) (n int, err error) {
|
|
||||||
var nr int
|
|
||||||
|
|
||||||
header := make([]byte, wordSize*2)
|
|
||||||
nr, err = r.Read(header)
|
|
||||||
n += nr
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, io.EOF) && n != 0 {
|
|
||||||
err = io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header))
|
|
||||||
pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:]))
|
|
||||||
pPathSize := alignSize(pathSize)
|
|
||||||
dataSize := int(binary.LittleEndian.Uint64(header[wordSize:]))
|
|
||||||
pDataSize := alignSize(dataSize)
|
|
||||||
|
|
||||||
buf := make([]byte, pPathSize+pDataSize)
|
|
||||||
nr, err = r.Read(buf)
|
|
||||||
n += nr
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, io.EOF) {
|
|
||||||
if nr != len(buf) {
|
|
||||||
err = io.ErrUnexpectedEOF
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ent.Path = string(buf[:pathSize])
|
|
||||||
if ent.Mode.IsDir() {
|
|
||||||
ent.Data = nil
|
|
||||||
} else {
|
|
||||||
ent.Data = buf[pPathSize : pPathSize+dataSize]
|
|
||||||
}
|
|
||||||
|
|
||||||
if validate && !filepath.IsLocal(ent.Path) {
|
|
||||||
err = ErrInsecurePath
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// DirScanner provides an efficient interface for reading a stream of encoded
|
|
||||||
// [FlatEntry]. Successive calls to the Scan method will step through the
|
|
||||||
// entries in the stream.
|
|
||||||
type DirScanner struct {
|
|
||||||
// Underlying reader to scan [FlatEntry] representations from.
|
|
||||||
r io.Reader
|
|
||||||
|
|
||||||
// First non-EOF I/O error, returned by the Err method.
|
|
||||||
err error
|
|
||||||
|
|
||||||
// Entry to store results in. Its address is returned by the Entry method
|
|
||||||
// and is updated on every call to Scan.
|
|
||||||
ent FlatEntry
|
|
||||||
|
|
||||||
// Validate pathnames during decoding.
|
|
||||||
validate bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewDirScanner returns the address of a new instance of [DirScanner] reading
|
|
||||||
// from r. The caller must no longer read from r after this function returns.
|
|
||||||
func NewDirScanner(r io.Reader, validate bool) *DirScanner {
|
|
||||||
return &DirScanner{r: r, validate: validate}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Err returns the first non-EOF I/O error.
|
|
||||||
func (s *DirScanner) Err() error {
|
|
||||||
if errors.Is(s.err, io.EOF) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return s.err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Entry returns the address to the [FlatEntry] value storing the last result.
|
|
||||||
func (s *DirScanner) Entry() *FlatEntry { return &s.ent }
|
|
||||||
|
|
||||||
// Scan advances to the next [FlatEntry].
|
|
||||||
func (s *DirScanner) Scan() bool {
|
|
||||||
if s.err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
var n int
|
|
||||||
n, s.err = s.ent.Decode(s.r, s.validate)
|
|
||||||
if errors.Is(s.err, io.EOF) {
|
|
||||||
return n != 0
|
|
||||||
}
|
|
||||||
return s.err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flatten writes a deterministic representation of the contents of fsys to w.
|
|
||||||
// The resulting data can be hashed to produce a deterministic checksum for the
|
|
||||||
// directory.
|
|
||||||
func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) {
|
|
||||||
var nr int
|
|
||||||
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var fi fs.FileInfo
|
|
||||||
fi, err = d.Info()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
ent := FlatEntry{
|
|
||||||
Path: path,
|
|
||||||
Mode: fi.Mode(),
|
|
||||||
}
|
|
||||||
if ent.Mode.IsRegular() {
|
|
||||||
if ent.Data, err = fs.ReadFile(fsys, path); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else if ent.Mode&fs.ModeSymlink != 0 {
|
|
||||||
var newpath string
|
|
||||||
if newpath, err = fs.ReadLink(fsys, path); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ent.Data = []byte(newpath)
|
|
||||||
} else if !ent.Mode.IsDir() {
|
|
||||||
return InvalidFileModeError(ent.Mode)
|
|
||||||
}
|
|
||||||
|
|
||||||
nr, err = ent.Encode(w)
|
|
||||||
n += nr
|
|
||||||
return err
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
|
||||||
func HashFS(buf *Checksum, fsys fs.FS, root string) error {
|
|
||||||
h := sha512.New384()
|
|
||||||
if _, err := Flatten(fsys, root, h); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
h.Sum(buf[:0])
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// HashDir returns a checksum produced by hashing the result of [Flatten].
|
|
||||||
func HashDir(buf *Checksum, pathname *check.Absolute) error {
|
|
||||||
return HashFS(buf, os.DirFS(pathname.String()), ".")
|
|
||||||
}
|
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
package pkg_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"io/fs"
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
"testing/fstest"
|
|
||||||
|
|
||||||
"hakurei.app/internal/pkg"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFlatten(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
testCases := []struct {
|
|
||||||
name string
|
|
||||||
fsys fs.FS
|
|
||||||
entries []pkg.FlatEntry
|
|
||||||
sum pkg.Checksum
|
|
||||||
err error
|
|
||||||
}{
|
|
||||||
{"bad type", fstest.MapFS{
|
|
||||||
".": {Mode: fs.ModeDir | 0700},
|
|
||||||
"invalid": {Mode: fs.ModeCharDevice | 0400},
|
|
||||||
}, nil, pkg.Checksum{}, pkg.InvalidFileModeError(
|
|
||||||
fs.ModeCharDevice | 0400,
|
|
||||||
)},
|
|
||||||
|
|
||||||
{"coldboot", fstest.MapFS{
|
|
||||||
".": {Mode: fs.ModeDir | 0700},
|
|
||||||
|
|
||||||
"devices": {Mode: fs.ModeDir | 0700},
|
|
||||||
"devices/uevent": {Mode: 0600, Data: []byte("add")},
|
|
||||||
"devices/empty": {Mode: fs.ModeDir | 0700},
|
|
||||||
|
|
||||||
"devices/sub": {Mode: fs.ModeDir | 0700},
|
|
||||||
"devices/sub/uevent": {Mode: 0600, Data: []byte("add")},
|
|
||||||
|
|
||||||
"block": {Mode: fs.ModeDir | 0700},
|
|
||||||
"block/uevent": {Mode: 0600, Data: []byte{}},
|
|
||||||
}, []pkg.FlatEntry{
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "."},
|
|
||||||
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "block"},
|
|
||||||
{Mode: 0600, Path: "block/uevent", Data: []byte{}},
|
|
||||||
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "devices"},
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "devices/empty"},
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "devices/sub"},
|
|
||||||
{Mode: 0600, Path: "devices/sub/uevent", Data: []byte("add")},
|
|
||||||
{Mode: 0600, Path: "devices/uevent", Data: []byte("add")},
|
|
||||||
}, pkg.MustDecode("mEy_Lf5KotThm7OwMx7yTKZh5HCCyaB41pVAvI9uDMgVQFM91iosBLYsRm8bDsX8"), nil},
|
|
||||||
|
|
||||||
{"empty", fstest.MapFS{
|
|
||||||
".": {Mode: fs.ModeDir | 0700},
|
|
||||||
"checksum": {Mode: fs.ModeDir | 0700},
|
|
||||||
"identifier": {Mode: fs.ModeDir | 0700},
|
|
||||||
"work": {Mode: fs.ModeDir | 0700},
|
|
||||||
}, []pkg.FlatEntry{
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "."},
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "checksum"},
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "identifier"},
|
|
||||||
{Mode: fs.ModeDir | 0700, Path: "work"},
|
|
||||||
}, pkg.MustDecode("E4vEZKhCcL2gPZ2Tt59FS3lDng-d_2SKa2i5G_RbDfwGn6EemptFaGLPUDiOa94C"), nil},
|
|
||||||
|
|
||||||
{"sample directory step garbage", fstest.MapFS{
|
|
||||||
".": {Mode: fs.ModeDir | 0500},
|
|
||||||
|
|
||||||
"lib": {Mode: fs.ModeDir | 0500},
|
|
||||||
"lib/check": {Mode: 0400, Data: []byte{}},
|
|
||||||
|
|
||||||
"lib/pkgconfig": {Mode: fs.ModeDir | 0500},
|
|
||||||
}, []pkg.FlatEntry{
|
|
||||||
{Mode: fs.ModeDir | 0500, Path: "."},
|
|
||||||
|
|
||||||
{Mode: fs.ModeDir | 0500, Path: "lib"},
|
|
||||||
{Mode: 0400, Path: "lib/check", Data: []byte{}},
|
|
||||||
|
|
||||||
{Mode: fs.ModeDir | 0500, Path: "lib/pkgconfig"},
|
|
||||||
}, pkg.MustDecode("CUx-3hSbTWPsbMfDhgalG4Ni_GmR9TnVX8F99tY_P5GtkYvczg9RrF5zO0jX9XYT"), nil},
|
|
||||||
}
|
|
||||||
for _, tc := range testCases {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
t.Run("roundtrip", func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
if _, err := pkg.Flatten(
|
|
||||||
tc.fsys,
|
|
||||||
".",
|
|
||||||
&buf,
|
|
||||||
); !reflect.DeepEqual(err, tc.err) {
|
|
||||||
t.Fatalf("Flatten: error = %v, want %v", err, tc.err)
|
|
||||||
} else if tc.err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
s := pkg.NewDirScanner(bytes.NewReader(buf.Bytes()), true)
|
|
||||||
var got []pkg.FlatEntry
|
|
||||||
for s.Scan() {
|
|
||||||
got = append(got, *s.Entry())
|
|
||||||
}
|
|
||||||
if err := s.Err(); err != nil {
|
|
||||||
t.Fatalf("Err: error = %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !reflect.DeepEqual(got, tc.entries) {
|
|
||||||
t.Fatalf("Scan: %#v, want %#v", got, tc.entries)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
if tc.err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Run("hash", func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
var got pkg.Checksum
|
|
||||||
if err := pkg.HashFS(&got, tc.fsys, "."); err != nil {
|
|
||||||
t.Fatalf("HashFS: error = %v", err)
|
|
||||||
} else if got != tc.sum {
|
|
||||||
t.Fatalf("HashFS: %v", &pkg.ChecksumMismatchError{
|
|
||||||
Got: got,
|
|
||||||
Want: tc.sum,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -21,7 +21,7 @@ import (
|
|||||||
const wordSize = 8
|
const wordSize = 8
|
||||||
|
|
||||||
// alignSize returns the padded size for aligning sz.
|
// alignSize returns the padded size for aligning sz.
|
||||||
func alignSize(sz int) int {
|
func alignSize[T int | uint64](sz T) T {
|
||||||
return sz + (wordSize-(sz)%wordSize)%wordSize
|
return sz + (wordSize-(sz)%wordSize)%wordSize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,17 @@ func padSize[T int | int64](sz T) T {
|
|||||||
return (wordSize - (sz)%wordSize) % wordSize
|
return (wordSize - (sz)%wordSize) % wordSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteReport writes a report of all available [PArtifact] to w.
|
// countWriter records total amount of data written.
|
||||||
|
type countWriter uint64
|
||||||
|
|
||||||
|
// Write records the size of p.
|
||||||
|
func (w *countWriter) Write(p []byte) (n int, err error) {
|
||||||
|
n = len(p)
|
||||||
|
*w += countWriter(n)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteReport writes a report of all available [Artifact] to w.
|
||||||
func WriteReport(msg message.Msg, w io.Writer, c *pkg.Cache) error {
|
func WriteReport(msg message.Msg, w io.Writer, c *pkg.Cache) error {
|
||||||
var (
|
var (
|
||||||
zero [wordSize]byte
|
zero [wordSize]byte
|
||||||
@@ -90,12 +100,12 @@ func WriteReport(msg message.Msg, w io.Writer, c *pkg.Cache) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// existence of status implies cured artifact
|
// existence of status implies cured artifact
|
||||||
var n int
|
var n countWriter
|
||||||
if pathname, _, err := c.Cure(a); err != nil {
|
if pathname, _, err := c.Cure(a); err != nil {
|
||||||
return err
|
return err
|
||||||
} else if n, err = pkg.Flatten(
|
} else if err = pkg.Write(
|
||||||
os.DirFS(pathname.String()), ".",
|
os.DirFS(pathname.String()), ".",
|
||||||
io.Discard,
|
&n,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user