cmd/msrfetch: fetch from enriched composite data
This creates a content-addressed store for all media files. It is not yet usable but an API and cdn backend will be implemented from it in a later commit, as well as tooling for export and tagging. Signed-off-by: Yonah <contrib@gensokyo.uk>
This commit is contained in:
parent
21871a387c
commit
da4b1d86d9
3
.gitignore
vendored
3
.gitignore
vendored
@ -7,6 +7,9 @@
|
|||||||
*.pkg
|
*.pkg
|
||||||
/msrfetch
|
/msrfetch
|
||||||
|
|
||||||
|
# Content-addressed media files
|
||||||
|
/data
|
||||||
|
|
||||||
# Test binary, built with `go test -c`
|
# Test binary, built with `go test -c`
|
||||||
*.test
|
*.test
|
||||||
|
|
||||||
|
147
cmd/msrfetch/fetch.go
Normal file
147
cmd/msrfetch/fetch.go
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"crypto/sha512"
|
||||||
|
"encoding/hex"
|
||||||
|
"flag"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"slices"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"git.gensokyo.uk/yonah/monstersirenfetch"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
flagFetchDirPath string
|
||||||
|
flagMaxConnections int
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
flag.StringVar(&flagFetchDirPath, "d", "data", "Path to write content-addressed media files to")
|
||||||
|
flag.IntVar(&flagMaxConnections, "j", 1, "Maximum number of simultaneous connections allowed")
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustFetch(ctx context.Context) {
|
||||||
|
var c monstersirenfetch.CompositeAlbumsMap
|
||||||
|
mustReadJSON(flagOutputPath, &c)
|
||||||
|
|
||||||
|
const (
|
||||||
|
invalidContainsNil = "invalid composite data"
|
||||||
|
)
|
||||||
|
|
||||||
|
var urls []string
|
||||||
|
for _, ca := range c {
|
||||||
|
if ca.Album == nil {
|
||||||
|
log.Fatal(invalidContainsNil)
|
||||||
|
}
|
||||||
|
if ca.CoverURL == "" {
|
||||||
|
log.Fatalf("album %s missing coverUrl", ca.CID.String())
|
||||||
|
}
|
||||||
|
urls = append(urls, ca.CoverURL)
|
||||||
|
|
||||||
|
for _, cs := range ca.Songs {
|
||||||
|
if cs == nil {
|
||||||
|
log.Fatal(invalidContainsNil)
|
||||||
|
}
|
||||||
|
if !cs.IsFull() {
|
||||||
|
log.Fatal("this composite is not enriched")
|
||||||
|
}
|
||||||
|
|
||||||
|
urls = append(urls, cs.SourceURL)
|
||||||
|
if cs.LyricURL != "" {
|
||||||
|
urls = append(urls, cs.LyricURL)
|
||||||
|
}
|
||||||
|
if cs.MvURL != "" {
|
||||||
|
urls = append(urls, cs.MvURL)
|
||||||
|
}
|
||||||
|
if cs.MvCoverURL != "" {
|
||||||
|
urls = append(urls, cs.MvCoverURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.Sort(urls)
|
||||||
|
urls = slices.Compact(urls)
|
||||||
|
|
||||||
|
if err := os.MkdirAll(flagFetchDirPath, 0755); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
n := new(netDirect)
|
||||||
|
|
||||||
|
var (
|
||||||
|
wg sync.WaitGroup
|
||||||
|
mu sync.RWMutex
|
||||||
|
uc = make(chan string)
|
||||||
|
|
||||||
|
urlMap = make(map[[sha512.Size]byte]string, len(urls))
|
||||||
|
urlMapHs = make(map[string]string, len(urls))
|
||||||
|
)
|
||||||
|
|
||||||
|
if flagMaxConnections < 1 {
|
||||||
|
log.Fatalf("%d out of range", flagMaxConnections)
|
||||||
|
}
|
||||||
|
log.Printf("fetching %d files across %d connections", len(urls), flagMaxConnections)
|
||||||
|
for i := 0; i < flagMaxConnections; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(t int) {
|
||||||
|
defer wg.Done()
|
||||||
|
for u := range uc {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
if r, l, err := n.Get(ctx, u); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
} else {
|
||||||
|
if v := int(l); v > 0 {
|
||||||
|
buf.Grow(v)
|
||||||
|
}
|
||||||
|
if _, err = io.Copy(buf, r); err != nil {
|
||||||
|
if closeErr := r.Close(); closeErr != nil {
|
||||||
|
log.Print(closeErr)
|
||||||
|
}
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err = r.Close(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s := sha512.Sum512(buf.Bytes())
|
||||||
|
hs := hex.EncodeToString(s[:])
|
||||||
|
|
||||||
|
mu.RLock()
|
||||||
|
if v, ok := urlMap[s]; ok {
|
||||||
|
log.Fatalf("file %s and %s has identical content", u, v)
|
||||||
|
}
|
||||||
|
mu.RUnlock()
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
urlMap[s] = u
|
||||||
|
urlMapHs[hs] = u
|
||||||
|
if err := os.WriteFile(
|
||||||
|
path.Join(flagFetchDirPath, hs),
|
||||||
|
buf.Bytes(), 0644); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
log.Printf("%s created from %s (%d)", hs, u, t)
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, u := range urls {
|
||||||
|
uc <- u
|
||||||
|
}
|
||||||
|
close(uc)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
{
|
||||||
|
pathname := path.Join(flagFetchDirPath, "map")
|
||||||
|
mustWriteJSON(pathname, urlMapHs)
|
||||||
|
log.Println("map written to", pathname)
|
||||||
|
}
|
||||||
|
}
|
@ -28,6 +28,9 @@ func main() {
|
|||||||
case "enrich":
|
case "enrich":
|
||||||
mustEnrich(ctx)
|
mustEnrich(ctx)
|
||||||
|
|
||||||
|
case "fetch":
|
||||||
|
mustFetch(ctx)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
log.Fatalf("%q is not a valid command", flag.Args()[0])
|
log.Fatalf("%q is not a valid command", flag.Args()[0])
|
||||||
}
|
}
|
||||||
|
1155
testdata/output/fetch.log
vendored
Normal file
1155
testdata/output/fetch.log
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1
testdata/output/map.json
vendored
Normal file
1
testdata/output/map.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
testdata/output/urls
vendored
1
testdata/output/urls
vendored
@ -889,7 +889,6 @@ https://web.hycdn.cn/siren/lyric/20250205/9a5eddbaabd7721be7ebbf6a0e272183.lrc
|
|||||||
https://web.hycdn.cn/siren/lyric/20250307/44662fd516a0839394abb5616f44b584.lrc
|
https://web.hycdn.cn/siren/lyric/20250307/44662fd516a0839394abb5616f44b584.lrc
|
||||||
https://web.hycdn.cn/siren/lyric/20250407/30c96fe96915b0f15552926166610319.lrc
|
https://web.hycdn.cn/siren/lyric/20250407/30c96fe96915b0f15552926166610319.lrc
|
||||||
https://web.hycdn.cn/siren/lyric/20250415/d38345245de559efccfee217dec2890f.lrc
|
https://web.hycdn.cn/siren/lyric/20250415/d38345245de559efccfee217dec2890f.lrc
|
||||||
https://web.hycdn.cn/siren/lyric/20250415/d38345245de559efccfee217dec2890f.lrc
|
|
||||||
https://web.hycdn.cn/siren/lyric/20250430/19f12a6e5b193f4010f91e1bd82518ba.lrc
|
https://web.hycdn.cn/siren/lyric/20250430/19f12a6e5b193f4010f91e1bd82518ba.lrc
|
||||||
https://web.hycdn.cn/siren/lyric/20250430/4d584eea0711d292b5d170d47cad7b55.lrc
|
https://web.hycdn.cn/siren/lyric/20250430/4d584eea0711d292b5d170d47cad7b55.lrc
|
||||||
https://web.hycdn.cn/siren/lyric/20250430/cfd84e70b19dc9a47fdf3d4c00341a63.lrc
|
https://web.hycdn.cn/siren/lyric/20250430/cfd84e70b19dc9a47fdf3d4c00341a63.lrc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user