ldd: decode from reader
All checks were successful
Test / Create distribution (push) Successful in 34s
Test / Sandbox (push) Successful in 2m27s
Test / Hakurei (push) Successful in 3m20s
Test / Hpkg (push) Successful in 4m10s
Test / Sandbox (race detector) (push) Successful in 4m18s
Test / Hakurei (race detector) (push) Successful in 5m5s
Test / Flake checks (push) Successful in 1m31s

This should reduce memory footprint of the parsing process and allow decoding part of the stream.

Signed-off-by: Ophestra <cat@gensokyo.uk>
This commit is contained in:
2025-11-14 08:14:46 +09:00
parent a9d72a5eb1
commit 690a0ed0d6
5 changed files with 273 additions and 139 deletions

View File

@@ -2,65 +2,203 @@
package ldd
import (
"math"
"path"
"bufio"
"bytes"
"errors"
"fmt"
"io"
"strconv"
"strings"
"hakurei.app/container/check"
)
var (
// ErrUnexpectedNewline is returned when encountering an unexpected empty line.
ErrUnexpectedNewline = errors.New("unexpected newline")
// ErrUnexpectedSeparator is returned when encountering an unexpected separator segment.
ErrUnexpectedSeparator = errors.New("unexpected separator")
// ErrBadLocationFormat is returned for an incorrectly formatted [Entry.Location] segment.
ErrBadLocationFormat = errors.New("bad location format")
)
// EntryUnexpectedSegmentsError is returned when encountering
// a line containing unexpected number of segments.
type EntryUnexpectedSegmentsError string
func (e EntryUnexpectedSegmentsError) Error() string {
return fmt.Sprintf("unexpected segments in entry %q", string(e))
}
// An Entry represents one line of ldd(1) output.
type Entry struct {
Name string `json:"name,omitempty"`
Path string `json:"path,omitempty"`
// File name of required object.
Name string `json:"name"`
// Absolute pathname of matched object. Only populated for the long variant.
Path *check.Absolute `json:"path,omitempty"`
// Address at which the object is loaded.
Location uint64 `json:"location"`
}
func Parse(p []byte) ([]*Entry, error) {
payload := strings.Split(strings.TrimSpace(string(p)), "\n")
result := make([]*Entry, len(payload))
for i, ent := range payload {
if len(ent) == 0 {
return nil, ErrUnexpectedNewline
// Path returns a deduplicated slice of absolute directory paths in entries.
func Path(entries []*Entry) []*check.Absolute {
p := make([]*check.Absolute, 0, len(entries)*2)
for _, entry := range entries {
if entry.Path != nil {
p = append(p, entry.Path.Dir())
}
segment := strings.SplitN(ent, " ", 5)
// location index
var iL int
switch len(segment) {
case 2: // /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
iL = 1
result[i] = &Entry{Name: strings.TrimSpace(segment[0])}
case 4: // libc.musl-x86_64.so.1 => /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
iL = 3
if segment[1] != "=>" {
return nil, ErrUnexpectedSeparator
}
if !path.IsAbs(segment[2]) {
return nil, ErrPathNotAbsolute
}
result[i] = &Entry{
Name: strings.TrimSpace(segment[0]),
Path: segment[2],
}
default:
return nil, EntryUnexpectedSegmentsError(ent)
if a, err := check.NewAbs(entry.Name); err == nil {
p = append(p, a.Dir())
}
}
check.SortAbs(p)
return check.CompactAbs(p)
}
if loc, err := parseLocation(segment[iL]); err != nil {
return nil, err
const (
// entrySegmentIndexName is the index of the segment holding [Entry.Name].
entrySegmentIndexName = 0
// entrySegmentIndexPath is the index of the segment holding [Entry.Path],
// present only for a line describing a fully populated [Entry].
entrySegmentIndexPath = 2
// entrySegmentIndexSeparator is the index of the segment containing the magic bytes entrySegmentFullSeparator,
// present only for a line describing a fully populated [Entry].
entrySegmentIndexSeparator = 1
// entrySegmentIndexLocation is the index of the segment holding [Entry.Location]
// for a line describing a fully populated [Entry].
entrySegmentIndexLocation = 3
// entrySegmentIndexLocationShort is the index of the segment holding [Entry.Location]
// for a line describing only [Entry.Name].
entrySegmentIndexLocationShort = 1
// entrySegmentSep is the byte separating segments in an [Entry] line.
entrySegmentSep = ' '
// entrySegmentFullSeparator is the exact contents of the segment at index entrySegmentIndexSeparator.
entrySegmentFullSeparator = "=>"
// entrySegmentLocationLengthMin is the minimum possible length of a segment corresponding to [Entry.Location].
entrySegmentLocationLengthMin = 4
// entrySegmentLocationPrefix are magic bytes prefixing a segment corresponding to [Entry.Location].
entrySegmentLocationPrefix = "(0x"
// entrySegmentLocationSuffix is the magic byte suffixing a segment corresponding to [Entry.Location].
entrySegmentLocationSuffix = ')'
)
// decodeLocationSegment decodes and saves the segment corresponding to [Entry.Location].
func (e *Entry) decodeLocationSegment(segment []byte) (err error) {
if len(segment) < entrySegmentLocationLengthMin ||
segment[len(segment)-1] != entrySegmentLocationSuffix ||
string(segment[:len(entrySegmentLocationPrefix)]) != entrySegmentLocationPrefix {
return ErrBadLocationFormat
}
e.Location, err = strconv.ParseUint(string(segment[3:len(segment)-1]), 16, 64)
return
}
// UnmarshalText parses a line of ldd(1) output and saves it to [Entry].
func (e *Entry) UnmarshalText(data []byte) error {
var (
segments = bytes.SplitN(data, []byte{entrySegmentSep}, 5)
// segment to pass to decodeLocationSegment
iL int
)
switch len(segments) {
case 2: // /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
iL = entrySegmentIndexLocationShort
e.Name = string(bytes.TrimSpace(segments[entrySegmentIndexName]))
case 4: // libc.musl-x86_64.so.1 => /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
iL = entrySegmentIndexLocation
if string(segments[entrySegmentIndexSeparator]) != entrySegmentFullSeparator {
return ErrUnexpectedSeparator
}
if a, err := check.NewAbs(string(segments[entrySegmentIndexPath])); err != nil {
return err
} else {
result[i].Location = loc
e.Path = a
}
e.Name = string(bytes.TrimSpace(segments[entrySegmentIndexName]))
default:
return EntryUnexpectedSegmentsError(data)
}
return result, nil
return e.decodeLocationSegment(segments[iL])
}
func parseLocation(s string) (uint64, error) {
if len(s) < 4 || s[len(s)-1] != ')' || s[:3] != "(0x" {
return math.MaxUint64, ErrBadLocationFormat
}
return strconv.ParseUint(s[3:len(s)-1], 16, 64)
// A Decoder reads and decodes [Entry] values from an input stream.
//
// The zero value is not safe for use.
type Decoder struct {
s *bufio.Scanner
// Whether the current line is not the first line.
notFirst bool
// Whether s has no more tokens.
depleted bool
// Holds onto the first error encountered while parsing.
err error
}
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may read
// data from r beyond the [Entry] values requested.
func NewDecoder(r io.Reader) *Decoder { return &Decoder{s: bufio.NewScanner(r)} }
// Scan advances the [Decoder] to the next [Entry] and
// stores the result in the value pointed to by v.
func (d *Decoder) Scan(v *Entry) bool {
if d.s == nil || d.err != nil || d.depleted {
return false
}
if !d.s.Scan() {
d.depleted = true
return false
}
data := d.s.Bytes()
if len(data) == 0 {
if d.notFirst {
if d.s.Scan() && d.err == nil {
d.err = ErrUnexpectedNewline
}
// trailing newline is allowed (glibc)
return false
}
// leading newline is allowed (musl)
d.notFirst = true
return d.Scan(v)
}
d.notFirst = true
d.err = v.UnmarshalText(data)
return d.err == nil
}
// Err returns the first non-EOF error that was encountered
// by the underlying [bufio.Scanner] or [Entry].
func (d *Decoder) Err() error {
if d.err != nil || d.s == nil {
return d.err
}
return d.s.Err()
}
// Decode reads from the input stream until there are no more entries
// and returns the results in a slice.
func (d *Decoder) Decode() ([]*Entry, error) {
var entries []*Entry
e := new(Entry)
for d.Scan(e) {
entries = append(entries, e)
e = new(Entry)
}
return entries, d.Err()
}
// Parse returns a slice of addresses to [Entry] decoded from p.
func Parse(p []byte) ([]*Entry, error) { return NewDecoder(bytes.NewReader(p)).Decode() }