ldd: decode from reader

This should reduce memory footprint of the parsing process and allow decoding part of the stream. Signed-off-by: Ophestra <cat@gensokyo.uk>
2025-11-14 08:14:46 +09:00
parent a9d72a5eb1
commit 690a0ed0d6
5 changed files with 273 additions and 139 deletions
--- a/ldd/ldd.go
+++ b/ldd/ldd.go
@@ -2,65 +2,203 @@
 package ldd

 import (
-	"math"
-	"path"
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
 	"strconv"
-	"strings"
+
+	"hakurei.app/container/check"
 )

+var (
+	// ErrUnexpectedNewline is returned when encountering an unexpected empty line.
+	ErrUnexpectedNewline = errors.New("unexpected newline")
+	// ErrUnexpectedSeparator is returned when encountering an unexpected separator segment.
+	ErrUnexpectedSeparator = errors.New("unexpected separator")
+	// ErrBadLocationFormat is returned for an incorrectly formatted [Entry.Location] segment.
+	ErrBadLocationFormat = errors.New("bad location format")
+)
+
+// EntryUnexpectedSegmentsError is returned when encountering
+// a line containing unexpected number of segments.
+type EntryUnexpectedSegmentsError string
+
+func (e EntryUnexpectedSegmentsError) Error() string {
+	return fmt.Sprintf("unexpected segments in entry %q", string(e))
+}
+
+// An Entry represents one line of ldd(1) output.
 type Entry struct {
-	Name     string `json:"name,omitempty"`
-	Path     string `json:"path,omitempty"`
+	// File name of required object.
+	Name string `json:"name"`
+	// Absolute pathname of matched object. Only populated for the long variant.
+	Path *check.Absolute `json:"path,omitempty"`
+	// Address at which the object is loaded.
 	Location uint64 `json:"location"`
 }

-func Parse(p []byte) ([]*Entry, error) {
-	payload := strings.Split(strings.TrimSpace(string(p)), "\n")
-	result := make([]*Entry, len(payload))
-
-	for i, ent := range payload {
-		if len(ent) == 0 {
-			return nil, ErrUnexpectedNewline
+// Path returns a deduplicated slice of absolute directory paths in entries.
+func Path(entries []*Entry) []*check.Absolute {
+	p := make([]*check.Absolute, 0, len(entries)*2)
+	for _, entry := range entries {
+		if entry.Path != nil {
+			p = append(p, entry.Path.Dir())
 		}
-
-		segment := strings.SplitN(ent, " ", 5)
-
-		// location index
-		var iL int
-
-		switch len(segment) {
-		case 2: // /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
-			iL = 1
-			result[i] = &Entry{Name: strings.TrimSpace(segment[0])}
-		case 4: // libc.musl-x86_64.so.1 => /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
-			iL = 3
-			if segment[1] != "=>" {
-				return nil, ErrUnexpectedSeparator
-			}
-			if !path.IsAbs(segment[2]) {
-				return nil, ErrPathNotAbsolute
-			}
-			result[i] = &Entry{
-				Name: strings.TrimSpace(segment[0]),
-				Path: segment[2],
-			}
-		default:
-			return nil, EntryUnexpectedSegmentsError(ent)
+		if a, err := check.NewAbs(entry.Name); err == nil {
+			p = append(p, a.Dir())
 		}
+	}
+	check.SortAbs(p)
+	return check.CompactAbs(p)
+}

-		if loc, err := parseLocation(segment[iL]); err != nil {
-			return nil, err
+const (
+	// entrySegmentIndexName is the index of the segment holding [Entry.Name].
+	entrySegmentIndexName = 0
+	// entrySegmentIndexPath is the index of the segment holding [Entry.Path],
+	// present only for a line describing a fully populated [Entry].
+	entrySegmentIndexPath = 2
+	// entrySegmentIndexSeparator is the index of the segment containing the magic bytes entrySegmentFullSeparator,
+	// present only for a line describing a fully populated [Entry].
+	entrySegmentIndexSeparator = 1
+	// entrySegmentIndexLocation is the index of the segment holding [Entry.Location]
+	// for a line describing a fully populated [Entry].
+	entrySegmentIndexLocation = 3
+	// entrySegmentIndexLocationShort is the index of the segment holding [Entry.Location]
+	// for a line describing only [Entry.Name].
+	entrySegmentIndexLocationShort = 1
+
+	// entrySegmentSep is the byte separating segments in an [Entry] line.
+	entrySegmentSep = ' '
+	// entrySegmentFullSeparator is the exact contents of the segment at index entrySegmentIndexSeparator.
+	entrySegmentFullSeparator = "=>"
+
+	// entrySegmentLocationLengthMin is the minimum possible length of a segment corresponding to [Entry.Location].
+	entrySegmentLocationLengthMin = 4
+	// entrySegmentLocationPrefix are magic bytes prefixing a segment corresponding to [Entry.Location].
+	entrySegmentLocationPrefix = "(0x"
+	// entrySegmentLocationSuffix is the magic byte suffixing a segment corresponding to [Entry.Location].
+	entrySegmentLocationSuffix = ')'
+)
+
+// decodeLocationSegment decodes and saves the segment corresponding to [Entry.Location].
+func (e *Entry) decodeLocationSegment(segment []byte) (err error) {
+	if len(segment) < entrySegmentLocationLengthMin ||
+		segment[len(segment)-1] != entrySegmentLocationSuffix ||
+		string(segment[:len(entrySegmentLocationPrefix)]) != entrySegmentLocationPrefix {
+		return ErrBadLocationFormat
+	}
+
+	e.Location, err = strconv.ParseUint(string(segment[3:len(segment)-1]), 16, 64)
+	return
+}
+
+// UnmarshalText parses a line of ldd(1) output and saves it to [Entry].
+func (e *Entry) UnmarshalText(data []byte) error {
+	var (
+		segments = bytes.SplitN(data, []byte{entrySegmentSep}, 5)
+		// segment to pass to decodeLocationSegment
+		iL int
+	)
+
+	switch len(segments) {
+	case 2: // /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
+		iL = entrySegmentIndexLocationShort
+		e.Name = string(bytes.TrimSpace(segments[entrySegmentIndexName]))
+
+	case 4: // libc.musl-x86_64.so.1 => /lib/ld-musl-x86_64.so.1 (0x7f04d14ef000)
+		iL = entrySegmentIndexLocation
+		if string(segments[entrySegmentIndexSeparator]) != entrySegmentFullSeparator {
+			return ErrUnexpectedSeparator
+		}
+		if a, err := check.NewAbs(string(segments[entrySegmentIndexPath])); err != nil {
+			return err
 		} else {
-			result[i].Location = loc
+			e.Path = a
 		}
+		e.Name = string(bytes.TrimSpace(segments[entrySegmentIndexName]))
+
+	default:
+		return EntryUnexpectedSegmentsError(data)
 	}

-	return result, nil
+	return e.decodeLocationSegment(segments[iL])
 }

-func parseLocation(s string) (uint64, error) {
-	if len(s) < 4 || s[len(s)-1] != ')' || s[:3] != "(0x" {
-		return math.MaxUint64, ErrBadLocationFormat
-	}
-	return strconv.ParseUint(s[3:len(s)-1], 16, 64)
+// A Decoder reads and decodes [Entry] values from an input stream.
+//
+// The zero value is not safe for use.
+type Decoder struct {
+	s *bufio.Scanner
+
+	// Whether the current line is not the first line.
+	notFirst bool
+	// Whether s has no more tokens.
+	depleted bool
+	// Holds onto the first error encountered while parsing.
+	err error
 }
+
+// NewDecoder returns a new decoder that reads from r.
+//
+// The decoder introduces its own buffering and may read
+// data from r beyond the [Entry] values requested.
+func NewDecoder(r io.Reader) *Decoder { return &Decoder{s: bufio.NewScanner(r)} }
+
+// Scan advances the [Decoder] to the next [Entry] and
+// stores the result in the value pointed to by v.
+func (d *Decoder) Scan(v *Entry) bool {
+	if d.s == nil || d.err != nil || d.depleted {
+		return false
+	}
+	if !d.s.Scan() {
+		d.depleted = true
+		return false
+	}
+
+	data := d.s.Bytes()
+	if len(data) == 0 {
+		if d.notFirst {
+			if d.s.Scan() && d.err == nil {
+				d.err = ErrUnexpectedNewline
+			}
+			// trailing newline is allowed (glibc)
+			return false
+		}
+
+		// leading newline is allowed (musl)
+		d.notFirst = true
+		return d.Scan(v)
+	}
+
+	d.notFirst = true
+	d.err = v.UnmarshalText(data)
+	return d.err == nil
+}
+
+// Err returns the first non-EOF error that was encountered
+// by the underlying [bufio.Scanner] or [Entry].
+func (d *Decoder) Err() error {
+	if d.err != nil || d.s == nil {
+		return d.err
+	}
+	return d.s.Err()
+}
+
+// Decode reads from the input stream until there are no more entries
+// and returns the results in a slice.
+func (d *Decoder) Decode() ([]*Entry, error) {
+	var entries []*Entry
+
+	e := new(Entry)
+	for d.Scan(e) {
+		entries = append(entries, e)
+		e = new(Entry)
+	}
+	return entries, d.Err()
+}
+
+// Parse returns a slice of addresses to [Entry] decoded from p.
+func Parse(p []byte) ([]*Entry, error) { return NewDecoder(bytes.NewReader(p)).Decode() }