cmd/msrfetch: fetch from enriched composite data
This creates a content-addressed store for all media files. It is not yet usable but an API and cdn backend will be implemented from it in a later commit, as well as tooling for export and tagging. Signed-off-by: Yonah <contrib@gensokyo.uk>
This commit is contained in:
parent
21871a387c
commit
da4b1d86d9
3
.gitignore
vendored
3
.gitignore
vendored
@ -7,6 +7,9 @@
|
||||
*.pkg
|
||||
/msrfetch
|
||||
|
||||
# Content-addressed media files
|
||||
/data
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
|
||||
|
147
cmd/msrfetch/fetch.go
Normal file
147
cmd/msrfetch/fetch.go
Normal file
@ -0,0 +1,147 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha512"
|
||||
"encoding/hex"
|
||||
"flag"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"git.gensokyo.uk/yonah/monstersirenfetch"
|
||||
)
|
||||
|
||||
var (
|
||||
flagFetchDirPath string
|
||||
flagMaxConnections int
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.StringVar(&flagFetchDirPath, "d", "data", "Path to write content-addressed media files to")
|
||||
flag.IntVar(&flagMaxConnections, "j", 1, "Maximum number of simultaneous connections allowed")
|
||||
}
|
||||
|
||||
func mustFetch(ctx context.Context) {
|
||||
var c monstersirenfetch.CompositeAlbumsMap
|
||||
mustReadJSON(flagOutputPath, &c)
|
||||
|
||||
const (
|
||||
invalidContainsNil = "invalid composite data"
|
||||
)
|
||||
|
||||
var urls []string
|
||||
for _, ca := range c {
|
||||
if ca.Album == nil {
|
||||
log.Fatal(invalidContainsNil)
|
||||
}
|
||||
if ca.CoverURL == "" {
|
||||
log.Fatalf("album %s missing coverUrl", ca.CID.String())
|
||||
}
|
||||
urls = append(urls, ca.CoverURL)
|
||||
|
||||
for _, cs := range ca.Songs {
|
||||
if cs == nil {
|
||||
log.Fatal(invalidContainsNil)
|
||||
}
|
||||
if !cs.IsFull() {
|
||||
log.Fatal("this composite is not enriched")
|
||||
}
|
||||
|
||||
urls = append(urls, cs.SourceURL)
|
||||
if cs.LyricURL != "" {
|
||||
urls = append(urls, cs.LyricURL)
|
||||
}
|
||||
if cs.MvURL != "" {
|
||||
urls = append(urls, cs.MvURL)
|
||||
}
|
||||
if cs.MvCoverURL != "" {
|
||||
urls = append(urls, cs.MvCoverURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
slices.Sort(urls)
|
||||
urls = slices.Compact(urls)
|
||||
|
||||
if err := os.MkdirAll(flagFetchDirPath, 0755); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
n := new(netDirect)
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
mu sync.RWMutex
|
||||
uc = make(chan string)
|
||||
|
||||
urlMap = make(map[[sha512.Size]byte]string, len(urls))
|
||||
urlMapHs = make(map[string]string, len(urls))
|
||||
)
|
||||
|
||||
if flagMaxConnections < 1 {
|
||||
log.Fatalf("%d out of range", flagMaxConnections)
|
||||
}
|
||||
log.Printf("fetching %d files across %d connections", len(urls), flagMaxConnections)
|
||||
for i := 0; i < flagMaxConnections; i++ {
|
||||
wg.Add(1)
|
||||
go func(t int) {
|
||||
defer wg.Done()
|
||||
for u := range uc {
|
||||
buf := new(bytes.Buffer)
|
||||
if r, l, err := n.Get(ctx, u); err != nil {
|
||||
log.Fatal(err)
|
||||
} else {
|
||||
if v := int(l); v > 0 {
|
||||
buf.Grow(v)
|
||||
}
|
||||
if _, err = io.Copy(buf, r); err != nil {
|
||||
if closeErr := r.Close(); closeErr != nil {
|
||||
log.Print(closeErr)
|
||||
}
|
||||
log.Fatal(err)
|
||||
}
|
||||
if err = r.Close(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
s := sha512.Sum512(buf.Bytes())
|
||||
hs := hex.EncodeToString(s[:])
|
||||
|
||||
mu.RLock()
|
||||
if v, ok := urlMap[s]; ok {
|
||||
log.Fatalf("file %s and %s has identical content", u, v)
|
||||
}
|
||||
mu.RUnlock()
|
||||
|
||||
mu.Lock()
|
||||
urlMap[s] = u
|
||||
urlMapHs[hs] = u
|
||||
if err := os.WriteFile(
|
||||
path.Join(flagFetchDirPath, hs),
|
||||
buf.Bytes(), 0644); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
log.Printf("%s created from %s (%d)", hs, u, t)
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
for _, u := range urls {
|
||||
uc <- u
|
||||
}
|
||||
close(uc)
|
||||
wg.Wait()
|
||||
|
||||
{
|
||||
pathname := path.Join(flagFetchDirPath, "map")
|
||||
mustWriteJSON(pathname, urlMapHs)
|
||||
log.Println("map written to", pathname)
|
||||
}
|
||||
}
|
@ -28,6 +28,9 @@ func main() {
|
||||
case "enrich":
|
||||
mustEnrich(ctx)
|
||||
|
||||
case "fetch":
|
||||
mustFetch(ctx)
|
||||
|
||||
default:
|
||||
log.Fatalf("%q is not a valid command", flag.Args()[0])
|
||||
}
|
||||
|
1155
testdata/output/fetch.log
vendored
Normal file
1155
testdata/output/fetch.log
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1
testdata/output/map.json
vendored
Normal file
1
testdata/output/map.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
testdata/output/urls
vendored
1
testdata/output/urls
vendored
@ -889,7 +889,6 @@ https://web.hycdn.cn/siren/lyric/20250205/9a5eddbaabd7721be7ebbf6a0e272183.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250307/44662fd516a0839394abb5616f44b584.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250407/30c96fe96915b0f15552926166610319.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250415/d38345245de559efccfee217dec2890f.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250415/d38345245de559efccfee217dec2890f.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250430/19f12a6e5b193f4010f91e1bd82518ba.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250430/4d584eea0711d292b5d170d47cad7b55.lrc
|
||||
https://web.hycdn.cn/siren/lyric/20250430/cfd84e70b19dc9a47fdf3d4c00341a63.lrc
|
||||
|
Loading…
x
Reference in New Issue
Block a user