Yonah 0999ccd211
cmd/msrfetch: preserve metadata order
The composite maps are no longer needed as the new Album variant makes
more sense than them. They are also unordered while the base variants of
both endpoints are ordered. Composite is therefore only used for
validation in the current implementation.

Signed-off-by: Yonah <contrib@gensokyo.uk>
2025-09-18 21:56:45 +09:00

153 lines
3.0 KiB
Go

package main
import (
"bytes"
"context"
"crypto/sha512"
"encoding/hex"
"flag"
"io"
"log"
"os"
"path"
"slices"
"sync"
"git.gensokyo.uk/yonah/monstersirenfetch"
)
var (
flagFetchDirPath string
flagMaxConnections int
)
func init() {
flag.StringVar(&flagFetchDirPath, "d", "data", "Path to write content-addressed media files to")
flag.IntVar(&flagMaxConnections, "j", 1, "Maximum number of simultaneous connections allowed")
}
func mustFetch(ctx context.Context) {
const (
invalidContainsNil = "invalid metadata"
invalidNotEnriched = "this metadata is not enriched"
)
var metadata *monstersirenfetch.Metadata
mustReadJSON(flagOutputPath, &metadata)
if metadata == nil {
log.Fatal(invalidContainsNil)
}
var urls []string
for i := range metadata.Albums {
a := &metadata.Albums[i]
if !a.IsFull() {
log.Fatal(invalidNotEnriched)
}
if a.CoverURL == "" {
log.Fatalf("album %s missing coverUrl", a.CID.String())
}
urls = append(urls, a.CoverURL)
if a.CoverDeURL != "" {
urls = append(urls, a.CoverDeURL)
}
}
for i := range metadata.Songs {
s := &metadata.Songs[i]
if !s.IsFull() {
log.Fatal(invalidNotEnriched)
}
urls = append(urls, s.SourceURL)
if s.LyricURL != "" {
urls = append(urls, s.LyricURL)
}
if s.MvURL != "" {
urls = append(urls, s.MvURL)
}
if s.MvCoverURL != "" {
urls = append(urls, s.MvCoverURL)
}
}
slices.Sort(urls)
urls = slices.Compact(urls)
if err := os.MkdirAll(flagFetchDirPath, 0755); err != nil {
log.Fatal(err)
}
n := new(netDirect)
var (
wg sync.WaitGroup
mu sync.RWMutex
uc = make(chan string)
urlMap = make(map[[sha512.Size]byte]string, len(urls))
urlMapHs = make(map[string]string, len(urls))
)
if flagMaxConnections < 1 {
log.Fatalf("%d out of range", flagMaxConnections)
}
log.Printf("fetching %d files across %d connections", len(urls), flagMaxConnections)
for i := 0; i < flagMaxConnections; i++ {
wg.Add(1)
go func(t int) {
defer wg.Done()
for u := range uc {
buf := new(bytes.Buffer)
if r, l, err := n.Get(ctx, u); err != nil {
log.Fatal(err)
} else {
if v := int(l); v > 0 {
buf.Grow(v)
}
if _, err = io.Copy(buf, r); err != nil {
if closeErr := r.Close(); closeErr != nil {
log.Print(closeErr)
}
log.Fatal(err)
}
if err = r.Close(); err != nil {
log.Fatal(err)
}
}
s := sha512.Sum512(buf.Bytes())
hs := hex.EncodeToString(s[:])
mu.RLock()
if v, ok := urlMap[s]; ok {
log.Fatalf("file %s and %s has identical content", u, v)
}
mu.RUnlock()
mu.Lock()
urlMap[s] = u
urlMapHs[hs] = u
if err := os.WriteFile(
path.Join(flagFetchDirPath, hs),
buf.Bytes(), 0644); err != nil {
log.Fatal(err)
}
mu.Unlock()
log.Printf("%s created from %s (%d)", hs, u, t)
}
}(i)
}
for _, u := range urls {
uc <- u
}
close(uc)
wg.Wait()
{
pathname := path.Join(flagFetchDirPath, "map")
mustWriteJSON(pathname, urlMapHs)
log.Println("map written to", pathname)
}
}