package main import ( "bytes" "context" "crypto/sha512" "encoding/hex" "flag" "io" "log" "net/http" "os" "path" "slices" "sync" "git.gensokyo.uk/yonah/monstersirenfetch" ) var ( flagFetchDirPath string flagMaxConnections int ) func init() { flag.StringVar(&flagFetchDirPath, "d", "data", "Path to write content-addressed media files to") flag.IntVar(&flagMaxConnections, "j", 1, "Maximum number of simultaneous connections allowed") } func mustFetch(ctx context.Context) { const ( invalidContainsNil = "invalid metadata" invalidNotEnriched = "this metadata is not enriched" ) var metadata *monstersirenfetch.Metadata mustReadJSON(flagOutputPath, &metadata) if metadata == nil { log.Fatal(invalidContainsNil) } var urls []string for i := range metadata.Albums { a := &metadata.Albums[i] if !a.IsFull() { log.Fatal(invalidNotEnriched) } if a.CoverURL == "" { log.Fatalf("album %s missing coverUrl", a.CID.String()) } urls = append(urls, a.CoverURL) if a.CoverDeURL != "" { urls = append(urls, a.CoverDeURL) } } for i := range metadata.Songs { s := &metadata.Songs[i] if !s.IsFull() { log.Fatal(invalidNotEnriched) } urls = append(urls, s.SourceURL) if s.LyricURL != "" { urls = append(urls, s.LyricURL) } if s.MvURL != "" { urls = append(urls, s.MvURL) } if s.MvCoverURL != "" { urls = append(urls, s.MvCoverURL) } } slices.Sort(urls) urls = slices.Compact(urls) if err := os.MkdirAll(flagFetchDirPath, 0755); err != nil { log.Fatal(err) } n := new(netDirect) var ( wg sync.WaitGroup mu sync.RWMutex uc = make(chan string) urlMap = make(map[[sha512.Size]byte]string, len(urls)) urlMapHs = make(map[string]string, len(urls)) ) if flagMaxConnections < 1 { log.Fatalf("%d out of range", flagMaxConnections) } log.Printf("fetching %d files across %d connections", len(urls), flagMaxConnections) for i := 0; i < flagMaxConnections; i++ { wg.Add(1) go func(t int) { defer wg.Done() for u := range uc { buf := new(bytes.Buffer) if resp, err := n.Get(ctx, u); err != nil { log.Fatal(err) } else if resp.StatusCode != http.StatusOK { log.Fatal(&monstersirenfetch.ResponseStatusError{URL: u, StatusCode: resp.StatusCode, CloseErr: resp.Body.Close()}) } else { if v := int(resp.ContentLength); v > 0 { buf.Grow(v) } if _, err = io.Copy(buf, resp.Body); err != nil { if closeErr := resp.Body.Close(); closeErr != nil { log.Print(closeErr) } log.Fatal(err) } if err = resp.Body.Close(); err != nil { log.Fatal(err) } } s := sha512.Sum512(buf.Bytes()) hs := hex.EncodeToString(s[:]) mu.RLock() if v, ok := urlMap[s]; ok { log.Fatalf("file %s and %s has identical content", u, v) } mu.RUnlock() mu.Lock() urlMap[s] = u urlMapHs[hs] = u if err := os.WriteFile( path.Join(flagFetchDirPath, hs), buf.Bytes(), 0644); err != nil { log.Fatal(err) } mu.Unlock() log.Printf("%s created from %s (%d)", hs, u, t) } }(i) } for _, u := range urls { uc <- u } close(uc) wg.Wait() { pathname := path.Join(flagFetchDirPath, "map") mustWriteJSON(pathname, urlMapHs) log.Println("map written to", pathname) } }