forked from security/hakurei
This should hopefully provide good separation between the artifact curing backend implementation and the (still work in progress) language. Making the IR parseable also guarantees uniqueness of the representation. Signed-off-by: Ophestra <cat@gensokyo.uk>
204 lines
5.0 KiB
Go
204 lines
5.0 KiB
Go
package pkg
|
|
|
|
import (
|
|
"crypto/sha512"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"syscall"
|
|
|
|
"hakurei.app/container/check"
|
|
)
|
|
|
|
// FlatEntry is a directory entry to be encoded for [Flatten].
|
|
type FlatEntry struct {
|
|
Mode fs.FileMode // file mode bits
|
|
Path string // pathname of the file
|
|
Data []byte // file content or symlink destination
|
|
}
|
|
|
|
/*
|
|
| mode uint32 | path_sz uint32 |
|
|
| data_sz uint64 |
|
|
| path string |
|
|
| data []byte |
|
|
*/
|
|
|
|
// Encode encodes the entry for transmission or hashing.
|
|
func (ent *FlatEntry) Encode(w io.Writer) (n int, err error) {
|
|
pPathSize := alignSize(len(ent.Path))
|
|
if pPathSize > math.MaxUint32 {
|
|
return 0, syscall.E2BIG
|
|
}
|
|
pDataSize := alignSize(len(ent.Data))
|
|
|
|
payload := make([]byte, wordSize*2+pPathSize+pDataSize)
|
|
binary.LittleEndian.PutUint32(payload, uint32(ent.Mode))
|
|
binary.LittleEndian.PutUint32(payload[wordSize/2:], uint32(len(ent.Path)))
|
|
binary.LittleEndian.PutUint64(payload[wordSize:], uint64(len(ent.Data)))
|
|
copy(payload[wordSize*2:], ent.Path)
|
|
copy(payload[wordSize*2+pPathSize:], ent.Data)
|
|
return w.Write(payload)
|
|
}
|
|
|
|
// ErrInsecurePath is returned by [FlatEntry.Decode] if validation is requested
|
|
// and a nonlocal path is encountered in the stream.
|
|
var ErrInsecurePath = errors.New("insecure file path")
|
|
|
|
// Decode decodes the entry from its representation produced by Encode.
|
|
func (ent *FlatEntry) Decode(r io.Reader, validate bool) (n int, err error) {
|
|
var nr int
|
|
|
|
header := make([]byte, wordSize*2)
|
|
nr, err = r.Read(header)
|
|
n += nr
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) && n != 0 {
|
|
err = io.ErrUnexpectedEOF
|
|
}
|
|
return
|
|
}
|
|
|
|
ent.Mode = fs.FileMode(binary.LittleEndian.Uint32(header))
|
|
pathSize := int(binary.LittleEndian.Uint32(header[wordSize/2:]))
|
|
pPathSize := alignSize(pathSize)
|
|
dataSize := int(binary.LittleEndian.Uint64(header[wordSize:]))
|
|
pDataSize := alignSize(dataSize)
|
|
|
|
buf := make([]byte, pPathSize+pDataSize)
|
|
nr, err = r.Read(buf)
|
|
n += nr
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
if nr != len(buf) {
|
|
err = io.ErrUnexpectedEOF
|
|
return
|
|
}
|
|
} else {
|
|
return
|
|
}
|
|
}
|
|
|
|
ent.Path = string(buf[:pathSize])
|
|
if ent.Mode.IsDir() {
|
|
ent.Data = nil
|
|
} else {
|
|
ent.Data = buf[pPathSize : pPathSize+dataSize]
|
|
}
|
|
|
|
if validate && !filepath.IsLocal(ent.Path) {
|
|
err = ErrInsecurePath
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// DirScanner provides an efficient interface for reading a stream of encoded
|
|
// [FlatEntry]. Successive calls to the Scan method will step through the
|
|
// entries in the stream.
|
|
type DirScanner struct {
|
|
// Underlying reader to scan [FlatEntry] representations from.
|
|
r io.Reader
|
|
|
|
// First non-EOF I/O error, returned by the Err method.
|
|
err error
|
|
|
|
// Entry to store results in. Its address is returned by the Entry method
|
|
// and is updated on every call to Scan.
|
|
ent FlatEntry
|
|
|
|
// Validate pathnames during decoding.
|
|
validate bool
|
|
}
|
|
|
|
// NewDirScanner returns the address of a new instance of [DirScanner] reading
|
|
// from r. The caller must no longer read from r after this function returns.
|
|
func NewDirScanner(r io.Reader, validate bool) *DirScanner {
|
|
return &DirScanner{r: r, validate: validate}
|
|
}
|
|
|
|
// Err returns the first non-EOF I/O error.
|
|
func (s *DirScanner) Err() error {
|
|
if errors.Is(s.err, io.EOF) {
|
|
return nil
|
|
}
|
|
return s.err
|
|
}
|
|
|
|
// Entry returns the address to the [FlatEntry] value storing the last result.
|
|
func (s *DirScanner) Entry() *FlatEntry { return &s.ent }
|
|
|
|
// Scan advances to the next [FlatEntry].
|
|
func (s *DirScanner) Scan() bool {
|
|
if s.err != nil {
|
|
return false
|
|
}
|
|
|
|
var n int
|
|
n, s.err = s.ent.Decode(s.r, s.validate)
|
|
if errors.Is(s.err, io.EOF) {
|
|
return n != 0
|
|
}
|
|
return s.err == nil
|
|
}
|
|
|
|
// Flatten writes a deterministic representation of the contents of fsys to w.
|
|
// The resulting data can be hashed to produce a deterministic checksum for the
|
|
// directory.
|
|
func Flatten(fsys fs.FS, root string, w io.Writer) (n int, err error) {
|
|
var nr int
|
|
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var fi fs.FileInfo
|
|
fi, err = d.Info()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ent := FlatEntry{
|
|
Path: path,
|
|
Mode: fi.Mode(),
|
|
}
|
|
if ent.Mode.IsRegular() {
|
|
if ent.Data, err = fs.ReadFile(fsys, path); err != nil {
|
|
return err
|
|
}
|
|
} else if ent.Mode&fs.ModeSymlink != 0 {
|
|
var newpath string
|
|
if newpath, err = fs.ReadLink(fsys, path); err != nil {
|
|
return err
|
|
}
|
|
ent.Data = []byte(newpath)
|
|
} else if !ent.Mode.IsDir() {
|
|
return InvalidFileModeError(ent.Mode)
|
|
}
|
|
|
|
nr, err = ent.Encode(w)
|
|
n += nr
|
|
return err
|
|
})
|
|
return
|
|
}
|
|
|
|
// HashFS returns a checksum produced by hashing the result of [Flatten].
|
|
func HashFS(buf *Checksum, fsys fs.FS, root string) error {
|
|
h := sha512.New384()
|
|
if _, err := Flatten(fsys, root, h); err != nil {
|
|
return err
|
|
}
|
|
h.Sum(buf[:0])
|
|
return nil
|
|
}
|
|
|
|
// HashDir returns a checksum produced by hashing the result of [Flatten].
|
|
func HashDir(buf *Checksum, pathname *check.Absolute) error {
|
|
return HashFS(buf, os.DirFS(pathname.String()), ".")
|
|
}
|