Yonah da4b1d86d9
cmd/msrfetch: fetch from enriched composite data
This creates a content-addressed store for all media files. It is not
yet usable but an API and cdn backend will be implemented from it in a
later commit, as well as tooling for export and tagging.

Signed-off-by: Yonah <contrib@gensokyo.uk>
2025-09-18 07:20:36 +09:00

148 lines
2.9 KiB
Go

package main
import (
"bytes"
"context"
"crypto/sha512"
"encoding/hex"
"flag"
"io"
"log"
"os"
"path"
"slices"
"sync"
"git.gensokyo.uk/yonah/monstersirenfetch"
)
var (
flagFetchDirPath string
flagMaxConnections int
)
func init() {
flag.StringVar(&flagFetchDirPath, "d", "data", "Path to write content-addressed media files to")
flag.IntVar(&flagMaxConnections, "j", 1, "Maximum number of simultaneous connections allowed")
}
func mustFetch(ctx context.Context) {
var c monstersirenfetch.CompositeAlbumsMap
mustReadJSON(flagOutputPath, &c)
const (
invalidContainsNil = "invalid composite data"
)
var urls []string
for _, ca := range c {
if ca.Album == nil {
log.Fatal(invalidContainsNil)
}
if ca.CoverURL == "" {
log.Fatalf("album %s missing coverUrl", ca.CID.String())
}
urls = append(urls, ca.CoverURL)
for _, cs := range ca.Songs {
if cs == nil {
log.Fatal(invalidContainsNil)
}
if !cs.IsFull() {
log.Fatal("this composite is not enriched")
}
urls = append(urls, cs.SourceURL)
if cs.LyricURL != "" {
urls = append(urls, cs.LyricURL)
}
if cs.MvURL != "" {
urls = append(urls, cs.MvURL)
}
if cs.MvCoverURL != "" {
urls = append(urls, cs.MvCoverURL)
}
}
}
slices.Sort(urls)
urls = slices.Compact(urls)
if err := os.MkdirAll(flagFetchDirPath, 0755); err != nil {
log.Fatal(err)
}
n := new(netDirect)
var (
wg sync.WaitGroup
mu sync.RWMutex
uc = make(chan string)
urlMap = make(map[[sha512.Size]byte]string, len(urls))
urlMapHs = make(map[string]string, len(urls))
)
if flagMaxConnections < 1 {
log.Fatalf("%d out of range", flagMaxConnections)
}
log.Printf("fetching %d files across %d connections", len(urls), flagMaxConnections)
for i := 0; i < flagMaxConnections; i++ {
wg.Add(1)
go func(t int) {
defer wg.Done()
for u := range uc {
buf := new(bytes.Buffer)
if r, l, err := n.Get(ctx, u); err != nil {
log.Fatal(err)
} else {
if v := int(l); v > 0 {
buf.Grow(v)
}
if _, err = io.Copy(buf, r); err != nil {
if closeErr := r.Close(); closeErr != nil {
log.Print(closeErr)
}
log.Fatal(err)
}
if err = r.Close(); err != nil {
log.Fatal(err)
}
}
s := sha512.Sum512(buf.Bytes())
hs := hex.EncodeToString(s[:])
mu.RLock()
if v, ok := urlMap[s]; ok {
log.Fatalf("file %s and %s has identical content", u, v)
}
mu.RUnlock()
mu.Lock()
urlMap[s] = u
urlMapHs[hs] = u
if err := os.WriteFile(
path.Join(flagFetchDirPath, hs),
buf.Bytes(), 0644); err != nil {
log.Fatal(err)
}
mu.Unlock()
log.Printf("%s created from %s (%d)", hs, u, t)
}
}(i)
}
for _, u := range urls {
uc <- u
}
close(uc)
wg.Wait()
{
pathname := path.Join(flagFetchDirPath, "map")
mustWriteJSON(pathname, urlMapHs)
log.Println("map written to", pathname)
}
}