Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,13 @@
"""Catalog sync package for playlist harvesting and deduplicated downloads."""
from .db import REQUIRED_TABLES, initialize_database
from .models import CatalogSong, PlaylistCandidate, extract_artist_names, normalize_source_name
__all__ = [
"CatalogSong",
"PlaylistCandidate",
"REQUIRED_TABLES",
"extract_artist_names",
"initialize_database",
"normalize_source_name",
]
@@ -0,0 +1,66 @@
from __future__ import annotations
from dataclasses import dataclass
import os
import subprocess
from typing import Any, Mapping
@dataclass(frozen=True)
class CatalogExportResult:
status: str
command: str | None = None
workdir: str | None = None
returncode: int | None = None
stdout: str = ""
stderr: str = ""
def _read_config_value(config_snapshot: Mapping[str, Any] | None, key: str) -> str | None:
snapshot = config_snapshot or {}
if key in snapshot:
value = snapshot.get(key)
return None if value is None else str(value)
value = os.environ.get(key)
return None if value is None else str(value)
def run_catalog_export_command(config_snapshot: Mapping[str, Any] | None) -> CatalogExportResult:
command = _read_config_value(config_snapshot, "CATALOG_EXPORT_COMMAND")
workdir = _read_config_value(config_snapshot, "CATALOG_EXPORT_WORKDIR")
normalized_command = (command or "").strip()
if not normalized_command:
return CatalogExportResult(
status="skipped",
command=normalized_command or None,
workdir=workdir,
)
try:
completed = subprocess.run(
normalized_command,
shell=True,
cwd=workdir,
capture_output=True,
text=True,
check=False,
)
except Exception as exc:
return CatalogExportResult(
status="failed",
command=normalized_command,
workdir=workdir,
stderr=str(exc) or exc.__class__.__name__,
)
status = "succeeded" if completed.returncode == 0 else "failed"
return CatalogExportResult(
status=status,
command=normalized_command,
workdir=workdir,
returncode=completed.returncode,
stdout=completed.stdout or "",
stderr=completed.stderr or "",
)
+418
View File
@@ -0,0 +1,418 @@
from __future__ import annotations
import click
try:
import uvicorn
except Exception: # pragma: no cover - exercised only when uvicorn is missing
class _MissingUvicorn:
def run(self, *args, **kwargs):
raise click.ClickException("serve command requires uvicorn. Install runtime dependencies first.")
uvicorn = _MissingUvicorn()
from .db import initialize_database
from .downloader import CatalogDownloader, DEFAULT_DOWNLOAD_WORKERS
from .manual_playlists import parse_playlist_file
from .repository import CatalogRepository
from .resolver import DEFAULT_DOWNLOAD_SOURCES
from .resolver_stats import default_resolver_stats_db_path, initialize_resolver_stats_database
from .services import CatalogSyncService
from .uploader import CatalogUploader
def parse_sources(value: str) -> list[str]:
return [item.strip() for item in value.split(",") if item.strip()]
def parse_int_list(value: str | None) -> list[int] | None:
if not value:
return None
return [int(item.strip()) for item in value.split(",") if item.strip()]
def format_lyrics_progress(state: dict[str, object]) -> str:
total = int(state.get("total") or 0)
processed = int(state.get("processed") or 0)
progress_percent = int(state.get("progress_percent") or 0)
saved = int(state.get("saved") or 0)
skipped = int(state.get("skipped") or 0)
failed = int(state.get("failed") or 0)
return (
f"Lyrics progress: {processed}/{total} ({progress_percent}%) "
f"saved={saved} skipped={skipped} failed={failed}"
)
PORT_RANGE = click.IntRange(1, 65535)
def create_ops_web_app(*, db_path: str, env_path: str):
from .ops.web import create_app
return create_app(db_path=db_path, env_path=env_path, start_runner=True)
class CatalogSyncApplication:
def __init__(self, db_path: str, library_root: str | None = None):
self.db_path = db_path
self.library_root = library_root
init_conn = initialize_database(db_path, default_library_root=library_root)
init_conn.close()
resolver_stats_init_conn = initialize_resolver_stats_database(default_resolver_stats_db_path(db_path))
resolver_stats_init_conn.close()
self.repository = CatalogRepository(db_path)
self.service = CatalogSyncService(self.repository)
self.downloader = CatalogDownloader(self.repository)
def init_db(self):
init_conn = initialize_database(self.db_path, default_library_root=self.library_root)
init_conn.close()
resolver_stats_init_conn = initialize_resolver_stats_database(
default_resolver_stats_db_path(self.db_path)
)
resolver_stats_init_conn.close()
def collect_playlists(self, sources: list[str], include_playlist_square: bool = True, include_toplist: bool = True):
return self.service.collect_playlists(sources, include_playlist_square, include_toplist)
def sync_playlist_catalog(self, sources: list[str] | None = None, limit: int | None = None):
return self.service.sync_playlist_catalog(sources=sources, limit=limit)
def download_pending(
self,
sources: list[str] | None = None,
limit: int | None = None,
playlist_ids: list[int] | None = None,
workers: int = DEFAULT_DOWNLOAD_WORKERS,
download_sources: list[str] | None = None,
lyrics_enabled: bool = True,
overwrite_lyrics: bool = False,
):
if not self.library_root:
raise click.ClickException("download command requires --library-root")
downloader = CatalogDownloader(self.repository, worker_count=workers)
return downloader.download_pending(
self.library_root,
sources=sources,
limit=limit,
playlist_ids=playlist_ids,
download_sources=download_sources,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
def run_playlist_file(
self,
playlist_file: str,
limit: int | None = None,
workers: int = DEFAULT_DOWNLOAD_WORKERS,
download_sources: list[str] | None = None,
lyrics_enabled: bool = True,
overwrite_lyrics: bool = False,
) -> dict[str, int]:
parsed = parse_playlist_file(playlist_file)
if not parsed.entries:
raise click.ClickException("playlist file does not contain any valid playlist URLs")
playlist_ids = self.service.import_manual_playlists(playlist_file, parsed.entries)
if limit is not None:
playlist_ids = playlist_ids[:limit]
synchronized_songs = self.service.sync_specific_playlists(playlist_ids)
downloaded_songs = self.download_pending(
playlist_ids=playlist_ids,
workers=workers,
download_sources=download_sources,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
return {
"total_lines": parsed.total_lines,
"valid_playlists": len(parsed.entries),
"skipped_lines": parsed.skipped_lines,
"synchronized_songs": synchronized_songs,
"downloaded_songs": downloaded_songs,
}
def register_object_backend(
self,
backend_name: str,
container_name: str,
endpoint: str,
region: str | None,
base_prefix: str | None,
credential_env_prefix: str,
addressing_style: str | None = None,
public_base_url: str | None = None,
) -> int:
return self.repository.upsert_object_storage_backend(
name=backend_name,
container_name=container_name,
endpoint=endpoint,
region=region,
base_prefix=base_prefix,
credential_env_prefix=credential_env_prefix,
addressing_style=addressing_style,
public_base_url=public_base_url,
)
def upload_files(
self,
backend_name: str,
sources: list[str] | None = None,
playlist_ids: list[int] | None = None,
limit: int | None = None,
workers: int = 4,
) -> dict[str, int]:
uploader = CatalogUploader(self.repository, worker_count=workers)
queued = uploader.enqueue_missing_uploads(
backend_name=backend_name,
sources=sources,
limit=limit,
playlist_ids=playlist_ids,
)
summary = uploader.run(backend_name=backend_name)
summary["queued"] = queued
return summary
def sync_local_lyrics(
self,
sources: list[str] | None = None,
playlist_ids: list[int] | None = None,
limit: int | None = None,
workers: int = DEFAULT_DOWNLOAD_WORKERS,
progress_callback=None,
overwrite_lyrics: bool = False,
) -> dict[str, int]:
downloader = CatalogDownloader(self.repository, worker_count=workers)
return downloader.sync_local_lyrics(
sources=sources,
playlist_ids=playlist_ids,
limit=limit,
progress_callback=progress_callback,
overwrite_lyrics=overwrite_lyrics,
)
@click.group()
def cli():
"""Catalog sync CLI for harvesting playlists and downloading songs."""
@cli.command("init-db")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
def init_db_command(db_path: str, library_root: str | None):
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
app.init_db()
click.echo(f"Initialized catalog database at {db_path}")
@cli.command("collect")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
@click.option("--playlist-square/--no-playlist-square", default=True, show_default=True)
@click.option("--toplist/--no-toplist", default=True, show_default=True)
def collect_command(db_path: str, sources: str, library_root: str | None, playlist_square: bool, toplist: bool):
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
result = app.collect_playlists(parse_sources(sources), playlist_square, toplist)
click.echo(f"Collected playlists: {result}")
@cli.command("sync")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
@click.option("--limit", type=int, default=None)
def sync_command(db_path: str, sources: str, library_root: str | None, limit: int | None):
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
count = app.sync_playlist_catalog(parse_sources(sources), limit=limit)
click.echo(f"Synchronized songs: {count}")
@cli.command("download")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
@click.option("--download-sources", default=",".join(DEFAULT_DOWNLOAD_SOURCES), show_default=True)
@click.option("--library-root", type=click.Path(file_okay=False), required=True)
@click.option("--limit", type=int, default=None)
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
@click.option("--lyrics/--no-lyrics", "lyrics_enabled", default=True, show_default=True)
@click.option("--overwrite-lyrics", is_flag=True, default=False)
def download_command(
db_path: str,
sources: str,
download_sources: str,
library_root: str,
limit: int | None,
workers: int,
lyrics_enabled: bool,
overwrite_lyrics: bool,
):
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
count = app.download_pending(
parse_sources(sources),
limit=limit,
workers=workers,
download_sources=parse_sources(download_sources),
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
click.echo(f"Downloaded songs: {count}")
@cli.command("run")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
@click.option("--download-sources", default=",".join(DEFAULT_DOWNLOAD_SOURCES), show_default=True)
@click.option("--library-root", type=click.Path(file_okay=False), required=True)
@click.option("--playlist-file", type=click.Path(dir_okay=False, exists=True), required=False)
@click.option("--limit", type=int, default=None)
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
@click.option("--lyrics/--no-lyrics", "lyrics_enabled", default=True, show_default=True)
@click.option("--overwrite-lyrics", is_flag=True, default=False)
def run_command(
db_path: str,
sources: str,
download_sources: str,
library_root: str,
playlist_file: str | None,
limit: int | None,
workers: int,
lyrics_enabled: bool,
overwrite_lyrics: bool,
):
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
parsed_download_sources = parse_sources(download_sources)
if playlist_file:
app.run_playlist_file(
playlist_file=playlist_file,
limit=limit,
workers=workers,
download_sources=parsed_download_sources,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
click.echo("Catalog sync pipeline completed")
return
parsed_sources = parse_sources(sources)
app.collect_playlists(parsed_sources)
app.sync_playlist_catalog(parsed_sources, limit=limit)
app.download_pending(
parsed_sources,
limit=limit,
workers=workers,
download_sources=parsed_download_sources,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
click.echo("Catalog sync pipeline completed")
@cli.command("register-object-backend")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--backend", "backend_name", required=True)
@click.option("--bucket", "container_name", required=True)
@click.option("--endpoint", required=True)
@click.option("--region", default=None)
@click.option("--base-prefix", default=None)
@click.option("--credential-env-prefix", required=True)
@click.option("--addressing-style", default=None)
@click.option("--public-base-url", default=None)
def register_object_backend_command(
db_path: str,
backend_name: str,
container_name: str,
endpoint: str,
region: str | None,
base_prefix: str | None,
credential_env_prefix: str,
addressing_style: str | None,
public_base_url: str | None,
):
app = CatalogSyncApplication(db_path=db_path)
backend_id = app.register_object_backend(
backend_name=backend_name,
container_name=container_name,
endpoint=endpoint,
region=region,
base_prefix=base_prefix,
credential_env_prefix=credential_env_prefix,
addressing_style=addressing_style,
public_base_url=public_base_url,
)
click.echo(f"Registered object backend: {backend_id}")
@cli.command("upload")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--backend", "backend_name", required=True)
@click.option("--sources", default=None)
@click.option("--playlist-ids", default=None)
@click.option("--limit", type=int, default=None)
@click.option("--workers", type=int, default=4, show_default=True)
def upload_command(
db_path: str,
backend_name: str,
sources: str | None,
playlist_ids: str | None,
limit: int | None,
workers: int,
):
app = CatalogSyncApplication(db_path=db_path)
summary = app.upload_files(
backend_name=backend_name,
sources=parse_sources(sources) if sources else None,
playlist_ids=parse_int_list(playlist_ids),
limit=limit,
workers=workers,
)
click.echo(f"Upload summary: {summary}")
@cli.command("lyrics")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--sources", default=None)
@click.option("--playlist-ids", default=None)
@click.option("--limit", type=int, default=None)
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
@click.option("--overwrite-lyrics", is_flag=True, default=False)
def lyrics_command(
db_path: str,
sources: str | None,
playlist_ids: str | None,
limit: int | None,
workers: int,
overwrite_lyrics: bool,
):
app = CatalogSyncApplication(db_path=db_path)
def progress_callback(**state):
click.echo(format_lyrics_progress(state))
summary = app.sync_local_lyrics(
sources=parse_sources(sources) if sources else None,
playlist_ids=parse_int_list(playlist_ids),
limit=limit,
workers=workers,
progress_callback=progress_callback,
overwrite_lyrics=overwrite_lyrics,
)
click.echo(f"Lyrics summary: {summary}")
@cli.command("serve")
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
@click.option("--env-file", required=True, type=click.Path(dir_okay=False))
@click.option("--host", default="127.0.0.1", show_default=True)
@click.option("--port", type=PORT_RANGE, default=18080, show_default=True)
def serve_command(db_path: str, env_file: str, host: str, port: int):
app = create_ops_web_app(db_path=db_path, env_path=env_file)
uvicorn.run(app, host=host, port=port)
def main():
cli()
if __name__ == "__main__":
main()
@@ -0,0 +1,15 @@
from .kuwo import KuwoCollector, parse_playlist_square_html as parse_kuwo_playlist_square_html, parse_toplist_html as parse_kuwo_toplist_html
from .netease import NeteaseCollector, parse_playlist_square_html as parse_netease_playlist_square_html, parse_toplist_payload as parse_netease_toplist_payload
from .qq import QQCollector, parse_playlist_square_payload as parse_qq_playlist_square_payload, parse_toplist_payload as parse_qq_toplist_payload
__all__ = [
"KuwoCollector",
"NeteaseCollector",
"QQCollector",
"parse_kuwo_playlist_square_html",
"parse_kuwo_toplist_html",
"parse_netease_playlist_square_html",
"parse_netease_toplist_payload",
"parse_qq_playlist_square_payload",
"parse_qq_toplist_payload",
]
@@ -0,0 +1,16 @@
from __future__ import annotations
from dataclasses import dataclass, field
import requests
@dataclass
class BaseCollector:
headers: dict[str, str] = field(default_factory=lambda: {"User-Agent": "Mozilla/5.0"})
session: requests.Session = field(default_factory=requests.Session)
def get(self, url: str, **kwargs):
response = self.session.get(url, headers=self.headers, timeout=15, **kwargs)
response.raise_for_status()
return response
@@ -0,0 +1,260 @@
from __future__ import annotations
import json
import re
import subprocess
from bs4 import BeautifulSoup
from ..models import PlaylistCandidate
from .base import BaseCollector
PLAYLIST_SQUARE_URL = "https://www.kuwo.cn/playlist"
TOPLIST_URL = "https://www.kuwo.cn/rankList"
NUXT_SCRIPT_RE = re.compile(r"<script>\s*window\.__NUXT__=(.*?)</script>", re.DOTALL)
NUXT_FUNCTION_RE = re.compile(
r"^\(function\((?P<params>.*?)\)\s*\{\s*return\s+(?P<body>.*)\}\)\((?P<args>.*)\)\s*;?\s*$",
re.DOTALL,
)
_COUNT_UNIT_MULTIPLIERS = {
"": 10_000,
"亿": 100_000_000,
}
def _parse_play_count(value: object) -> int | None:
if value in (None, ""):
return None
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return int(value)
text = re.sub(r"\s+", "", str(value)).replace(",", "")
if not text:
return None
if text.isdigit():
return int(text)
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
if not match:
return None
number_value = float(match.group(1))
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
if multiplier is None:
return None
return int(number_value * multiplier)
def split_js_arguments(text: str) -> list[str]:
items: list[str] = []
current: list[str] = []
quote_char = ""
escape = False
depth = 0
for char in str(text or ""):
if escape:
current.append(char)
escape = False
continue
if quote_char:
current.append(char)
if char == "\\":
escape = True
elif char == quote_char:
quote_char = ""
continue
if char in {"'", '"'}:
current.append(char)
quote_char = char
continue
if char in "([{":
depth += 1
current.append(char)
continue
if char in ")]}":
depth = max(depth - 1, 0)
current.append(char)
continue
if char == "," and depth == 0:
item = "".join(current).strip()
if item:
items.append(item)
current = []
continue
current.append(char)
tail = "".join(current).strip()
if tail:
items.append(tail)
return items
def resolve_js_value(token: str, variables: dict[str, object] | None = None):
token = str(token or "").strip()
variables = variables or {}
if not token:
return None
if token in variables:
return variables[token]
if token in {"true", "false", "null"}:
return {"true": True, "false": False, "null": None}[token]
if token.startswith(("'", '"')) and token.endswith(("'", '"')):
normalized = token
if token.startswith("'") and token.endswith("'"):
normalized = '"' + token[1:-1].replace("\\", "\\\\").replace('"', '\\"') + '"'
return json.loads(normalized)
try:
if "." in token:
return float(token)
return int(token)
except ValueError:
return token
def extract_kuwo_bang_menu_items(script_body: str) -> list[dict]:
match = NUXT_FUNCTION_RE.match(str(script_body or "").strip())
if not match:
return []
params = [part.strip() for part in str(match.group("params") or "").split(",") if part.strip()]
args = [resolve_js_value(part) for part in split_js_arguments(match.group("args") or "")]
variables = {name: value for name, value in zip(params, args)}
body = str(match.group("body") or "")
if "bangMenu" not in body:
return []
item_pattern = re.compile(
r"\{sourceid:(?P<sourceid>[^,]+),.*?name:(?P<name>[^,]+),\s*id:(?P<id>[^,]+),\s*source:(?P<source>[^,]+),\s*pic:(?P<pic>[^,]+),\s*pub:(?P<pub>[^,}\]]+)(?:,\s*(?:listencnt|playCount|listenCount):(?P<play_count>[^,}\]]+))?",
re.DOTALL,
)
items: list[dict] = []
for item_match in item_pattern.finditer(body):
resolved = {
key: resolve_js_value(item_match.group(key), variables)
for key in ("sourceid", "name", "id", "source", "pic", "pub", "play_count")
}
if not resolved.get("id"):
continue
items.append(resolved)
return items
def extract_nuxt_state(html: str) -> dict | None:
match = NUXT_SCRIPT_RE.search(html)
if not match:
return None
script_body = match.group(1)
node_script = (
"const window = {}; "
f"window.__NUXT__={script_body}; "
"process.stdout.write(JSON.stringify(window.__NUXT__));"
)
try:
completed = subprocess.run(
["node", "-e", node_script],
check=True,
capture_output=True,
timeout=10,
)
except Exception:
return None
output = completed.stdout.decode("utf-8", errors="ignore").strip()
if not output:
return None
try:
return json.loads(output)
except json.JSONDecodeError:
return None
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
soup = BeautifulSoup(html, "lxml")
items: list[PlaylistCandidate] = []
seen: set[str] = set()
for anchor in soup.select("a[href*='playlist_detail/']"):
href = anchor.get("href", "").strip()
remote_id = href.rstrip("/").split("/")[-1]
if not remote_id or remote_id in seen:
continue
seen.add(remote_id)
absolute_url = href if href.startswith("http") else f"https://www.kuwo.cn{href}"
name = anchor.get("title") or anchor.get_text(strip=True) or remote_id
cover = (anchor.find("img") or {}).get("src")
play_count_node = anchor.select_one(".num")
items.append(
PlaylistCandidate(
platform="kuwo",
pool_kind="playlist_square",
remote_id=remote_id,
name=name,
url=absolute_url,
cover_url=cover,
play_count=_parse_play_count(
play_count_node.get_text(" ", strip=True) if play_count_node else None
),
)
)
return items
def _extract_toplist_play_count(entry: dict) -> int | None:
for key in ("listencnt", "play_count", "playCount", "listenCount"):
parsed = _parse_play_count(entry.get(key))
if parsed is not None:
return parsed
return None
def parse_toplist_html(html: str) -> list[PlaylistCandidate]:
items: list[PlaylistCandidate] = []
state = extract_nuxt_state(html)
if not state:
for entry in extract_kuwo_bang_menu_items(NUXT_SCRIPT_RE.search(html).group(1) if NUXT_SCRIPT_RE.search(html) else ""):
remote_id = str(entry.get("id", "")).strip()
if not remote_id:
continue
items.append(
PlaylistCandidate(
platform="kuwo",
pool_kind="toplist",
remote_id=remote_id,
name=entry.get("name") or remote_id,
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
cover_url=entry.get("pic"),
parse_strategy="kuwo_toplist",
play_count=_extract_toplist_play_count(entry),
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
)
)
return items
for group in state.get("data", []) or []:
for menu in group.get("bangMenu", []) or []:
for entry in menu.get("list", []) or []:
remote_id = str(entry.get("id", "")).strip()
if not remote_id:
continue
items.append(
PlaylistCandidate(
platform="kuwo",
pool_kind="toplist",
remote_id=remote_id,
name=entry.get("name") or remote_id,
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
cover_url=entry.get("pic"),
parse_strategy="kuwo_toplist",
play_count=_extract_toplist_play_count(entry),
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
)
)
return items
class KuwoCollector(BaseCollector):
def collect_playlist_square(self, page: int = 1, page_size: int = 30) -> list[PlaylistCandidate]:
response = self.get(
PLAYLIST_SQUARE_URL,
params={"pn": str(max(page, 1)), "rn": str(max(page_size, 1))},
)
return parse_playlist_square_html(response.text)
def collect_toplist(self) -> list[PlaylistCandidate]:
response = self.get(TOPLIST_URL)
return parse_toplist_html(response.text)
@@ -0,0 +1,113 @@
from __future__ import annotations
import re
from bs4 import BeautifulSoup
from ..models import PlaylistCandidate
from .base import BaseCollector
PLAYLIST_SQUARE_URL = "https://music.163.com/discover/playlist"
TOPLIST_API_URL = "https://music.163.com/api/toplist/detail"
_COUNT_UNIT_MULTIPLIERS = {
"": 10_000,
"亿": 100_000_000,
}
def _parse_play_count(value: object) -> int | None:
if value in (None, ""):
return None
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return int(value)
text = re.sub(r"\s+", "", str(value)).replace(",", "")
if not text:
return None
if text.isdigit():
return int(text)
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
if not match:
return None
number_value = float(match.group(1))
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
if multiplier is None:
return None
return int(number_value * multiplier)
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
soup = BeautifulSoup(html, "lxml")
items: list[PlaylistCandidate] = []
seen: set[str] = set()
for anchor in soup.select("a.msk[href*='/playlist?id=']"):
href = anchor.get("href", "")
remote_id = href.split("id=")[-1].strip()
if not remote_id or remote_id in seen:
continue
seen.add(remote_id)
cover_node = anchor.parent if anchor.parent else anchor
play_count_node = cover_node.select_one(".nb")
items.append(
PlaylistCandidate(
platform="netease",
pool_kind="playlist_square",
remote_id=remote_id,
name=anchor.get("title") or remote_id,
url=f"https://music.163.com/#/playlist?id={remote_id}",
cover_url=(anchor.find_previous("img") or {}).get("src"),
play_count=_parse_play_count(
play_count_node.get_text(" ", strip=True) if play_count_node else None
),
)
)
return items
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
items: list[PlaylistCandidate] = []
for entry in payload.get("list", []) or []:
remote_id = str(entry.get("id", "")).strip()
if not remote_id:
continue
items.append(
PlaylistCandidate(
platform="netease",
pool_kind="toplist",
remote_id=remote_id,
name=entry.get("name") or remote_id,
url=f"https://music.163.com/#/playlist?id={remote_id}",
cover_url=entry.get("coverImgUrl"),
parse_strategy="netease_toplist",
play_count=_parse_play_count(
entry.get("playCount") or entry.get("subscribedCount")
),
metadata={"update_frequency": entry.get("updateFrequency")},
)
)
return items
class NeteaseCollector(BaseCollector):
def collect_playlist_square(
self,
category: str = "全部",
order: str = "hot",
page: int = 1,
page_size: int = 35,
offset: int | None = None,
) -> list[PlaylistCandidate]:
if offset is None:
offset = max(page - 1, 0) * max(page_size, 1)
response = self.get(
PLAYLIST_SQUARE_URL,
params={"cat": category, "order": order, "offset": offset},
)
return parse_playlist_square_html(response.text)
def collect_toplist(self) -> list[PlaylistCandidate]:
response = self.get(TOPLIST_API_URL)
return parse_toplist_payload(response.json())
@@ -0,0 +1,104 @@
from __future__ import annotations
import requests
from ..models import PlaylistCandidate
from .base import BaseCollector
PLAYLIST_SQUARE_URL = "https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg"
TOPLIST_URL = "https://c.y.qq.com/v8/fcg-bin/fcg_myqq_toplist.fcg"
def _extract_collected_song_count(entry: dict) -> int | None:
for key in ("songnum", "song_num", "songCount", "song_count", "trackCount", "track_count"):
value = entry.get(key)
if isinstance(value, bool):
continue
if isinstance(value, (int, float)):
return int(value)
if isinstance(value, str) and value.strip().isdigit():
return int(value.strip())
return None
def parse_playlist_square_payload(payload: dict) -> list[PlaylistCandidate]:
items: list[PlaylistCandidate] = []
for entry in payload.get("data", {}).get("list", []) or []:
remote_id = str(entry.get("dissid", "")).strip()
if not remote_id:
continue
creator = entry.get("creator") or {}
items.append(
PlaylistCandidate(
platform="qq",
pool_kind="playlist_square",
remote_id=remote_id,
name=entry.get("dissname") or remote_id,
url=f"https://y.qq.com/n/ryqq/playlist/{remote_id}",
cover_url=entry.get("imgurl"),
creator_name=creator.get("name"),
play_count=entry.get("listennum"),
collected_song_count=_extract_collected_song_count(entry),
)
)
return items
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
items: list[PlaylistCandidate] = []
for entry in payload.get("data", {}).get("topList", []) or []:
remote_id = str(entry.get("id", "")).strip()
if not remote_id:
continue
items.append(
PlaylistCandidate(
platform="qq",
pool_kind="toplist",
remote_id=remote_id,
name=entry.get("topTitle") or remote_id,
url=f"https://y.qq.com/n/ryqq/toplist/{remote_id}",
cover_url=entry.get("picUrl"),
play_count=entry.get("listenCount"),
collected_song_count=_extract_collected_song_count(entry),
parse_strategy="qq_toplist",
)
)
return items
class QQCollector(BaseCollector):
def __init__(self, headers: dict[str, str] | None = None, session: requests.Session | None = None):
super().__init__(headers=headers or {"User-Agent": "Mozilla/5.0"}, session=session or requests.Session())
self.headers.update({"Referer": "https://y.qq.com/", "Origin": "https://y.qq.com/"})
def collect_playlist_square(
self,
category_id: int = 10000000,
sort_id: int = 5,
page: int = 1,
page_size: int = 30,
) -> list[PlaylistCandidate]:
params = {
"picmid": "1",
"rnd": "0.1",
"g_tk": "732560869",
"loginUin": "0",
"hostUin": "0",
"format": "json",
"inCharset": "utf8",
"outCharset": "utf-8",
"notice": "0",
"platform": "yqq.json",
"needNewCode": "0",
"categoryId": str(category_id),
"sortId": str(sort_id),
"sin": str(max(page - 1, 0) * page_size),
"ein": str(max(page, 1) * page_size - 1),
}
response = self.get(PLAYLIST_SQUARE_URL, params=params)
return parse_playlist_square_payload(response.json())
def collect_toplist(self) -> list[PlaylistCandidate]:
response = self.get(TOPLIST_URL, params={"format": "json"})
return parse_toplist_payload(response.json())
+492
View File
@@ -0,0 +1,492 @@
from __future__ import annotations
import sqlite3
from contextlib import suppress
from pathlib import Path
SQLITE_BUSY_TIMEOUT_MS = 30000
REQUIRED_TABLES = {
"playlist_pools",
"playlists",
"playlist_download_preferences",
"pool_playlists",
"artist_pools",
"artists",
"pool_artists",
"songs",
"playlist_songs",
"artist_songs",
"storage_backends",
"file_assets",
"file_locations",
"download_tasks",
"song_backend_presence",
"upload_tasks",
"job_runs",
"job_stages",
"job_items",
"job_workers",
"job_commands",
"job_events",
"job_logs",
"config_revisions",
}
SCHEMA_STATEMENTS = [
"""
CREATE TABLE IF NOT EXISTS playlist_pools (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
pool_kind TEXT NOT NULL,
external_id TEXT NOT NULL,
name TEXT NOT NULL,
url TEXT,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, pool_kind, external_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS playlists (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
remote_playlist_id TEXT NOT NULL,
name TEXT NOT NULL,
url TEXT NOT NULL,
parse_strategy TEXT NOT NULL DEFAULT 'playlist_url',
cover_url TEXT,
creator_name TEXT,
play_count INTEGER,
collected_song_count INTEGER,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, remote_playlist_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS playlist_download_preferences (
playlist_id INTEGER PRIMARY KEY,
is_wanted INTEGER NOT NULL DEFAULT 1,
marked_by TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS pool_playlists (
pool_id INTEGER NOT NULL,
playlist_id INTEGER NOT NULL,
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(pool_id, playlist_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS artist_pools (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
pool_kind TEXT NOT NULL,
external_id TEXT NOT NULL,
name TEXT NOT NULL,
source_playlist_pool_id INTEGER,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, pool_kind, external_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS artists (
id INTEGER PRIMARY KEY AUTOINCREMENT,
artist_key TEXT NOT NULL UNIQUE,
platform TEXT NOT NULL,
remote_artist_id TEXT,
name TEXT NOT NULL,
normalized_name TEXT NOT NULL,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS pool_artists (
pool_id INTEGER NOT NULL,
artist_id INTEGER NOT NULL,
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(pool_id, artist_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS songs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
remote_song_id TEXT NOT NULL,
name TEXT NOT NULL,
singers TEXT,
album TEXT,
duration_seconds INTEGER,
ext TEXT,
file_size_bytes INTEGER,
quality_label TEXT,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, remote_song_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS playlist_songs (
playlist_id INTEGER NOT NULL,
song_id INTEGER NOT NULL,
position INTEGER,
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(playlist_id, song_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS artist_songs (
artist_id INTEGER NOT NULL,
song_id INTEGER NOT NULL,
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(artist_id, song_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS storage_backends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
backend_type TEXT NOT NULL,
base_path TEXT,
container_name TEXT,
config_json TEXT,
is_default INTEGER NOT NULL DEFAULT 0,
is_active INTEGER NOT NULL DEFAULT 1,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS file_assets (
id INTEGER PRIMARY KEY AUTOINCREMENT,
song_id INTEGER NOT NULL,
quality_label TEXT,
ext TEXT,
file_size_bytes INTEGER,
checksum_sha256 TEXT,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS file_locations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_asset_id INTEGER NOT NULL,
backend_id INTEGER NOT NULL,
container_name TEXT,
locator TEXT NOT NULL,
absolute_path TEXT,
remote_file_id TEXT,
public_url TEXT,
download_url TEXT,
status TEXT NOT NULL DEFAULT 'active',
is_primary INTEGER NOT NULL DEFAULT 1,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(file_asset_id, backend_id, locator)
)
""",
"""
CREATE TABLE IF NOT EXISTS download_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
song_id INTEGER NOT NULL,
target_backend_id INTEGER,
status TEXT NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
last_error TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS song_backend_presence (
song_id INTEGER NOT NULL,
backend_id INTEGER NOT NULL,
has_active_file INTEGER NOT NULL DEFAULT 0,
active_file_count INTEGER NOT NULL DEFAULT 0,
primary_file_location_id INTEGER,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(song_id, backend_id)
)
""",
"""
CREATE TABLE IF NOT EXISTS upload_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_asset_id INTEGER NOT NULL,
source_location_id INTEGER NOT NULL,
target_backend_id INTEGER NOT NULL,
target_container_name TEXT,
target_locator TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
last_error TEXT,
queued_at TEXT DEFAULT CURRENT_TIMESTAMP,
started_at TEXT,
finished_at TEXT,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(file_asset_id, target_backend_id, target_locator)
)
""",
"""
CREATE TABLE IF NOT EXISTS job_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_type TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'queued',
priority INTEGER NOT NULL DEFAULT 100,
requested_by TEXT,
config_snapshot_json TEXT NOT NULL,
sources TEXT,
download_sources TEXT,
playlist_scope_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
started_at TEXT,
ended_at TEXT,
last_error TEXT,
resume_token TEXT
)
""",
"""
CREATE TABLE IF NOT EXISTS job_stages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER NOT NULL,
stage_type TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
seq_no INTEGER NOT NULL DEFAULT 0,
total_items INTEGER NOT NULL DEFAULT 0,
pending_items INTEGER NOT NULL DEFAULT 0,
running_items INTEGER NOT NULL DEFAULT 0,
success_items INTEGER NOT NULL DEFAULT 0,
failed_items INTEGER NOT NULL DEFAULT 0,
skipped_items INTEGER NOT NULL DEFAULT 0,
started_at TEXT,
ended_at TEXT,
last_error TEXT,
UNIQUE(job_run_id, stage_type)
)
""",
"""
CREATE TABLE IF NOT EXISTS job_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_stage_id INTEGER NOT NULL,
item_type TEXT NOT NULL,
item_key TEXT NOT NULL,
playlist_pool_id INTEGER,
playlist_id INTEGER,
song_id INTEGER,
file_location_id INTEGER,
status TEXT NOT NULL DEFAULT 'pending',
attempt_count INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
worker_id INTEGER,
started_at TEXT,
ended_at TEXT,
last_error TEXT,
last_error_code TEXT,
payload_json TEXT,
UNIQUE(job_stage_id, item_key)
)
""",
"""
CREATE TABLE IF NOT EXISTS job_workers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER,
job_stage_id INTEGER,
worker_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'idle',
current_job_item_id INTEGER,
current_song_id INTEGER,
current_playlist_id INTEGER,
current_display_text TEXT,
heartbeat_at TEXT,
last_progress_text TEXT,
processed_count INTEGER NOT NULL DEFAULT 0,
error_count INTEGER NOT NULL DEFAULT 0,
downloaded_bytes INTEGER,
total_bytes INTEGER,
speed_bytes_per_sec INTEGER,
progress_percent REAL
)
""",
"""
CREATE TABLE IF NOT EXISTS job_commands (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER NOT NULL,
command_type TEXT NOT NULL,
target_item_id INTEGER,
status TEXT NOT NULL DEFAULT 'pending',
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
applied_at TEXT,
payload_json TEXT
)
""",
"""
CREATE TABLE IF NOT EXISTS job_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER NOT NULL,
job_stage_id INTEGER,
job_item_id INTEGER,
worker_id INTEGER,
event_type TEXT NOT NULL,
message TEXT,
details_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS job_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER NOT NULL,
job_stage_id INTEGER,
worker_id INTEGER,
level TEXT NOT NULL DEFAULT 'info',
message TEXT NOT NULL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""",
"""
CREATE TABLE IF NOT EXISTS config_revisions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_type TEXT NOT NULL DEFAULT 'env_file',
file_path TEXT NOT NULL,
content_text TEXT NOT NULL,
content_hash TEXT NOT NULL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
applied_at TEXT,
note TEXT,
UNIQUE(source_type, file_path, content_hash)
)
""",
"""
CREATE INDEX IF NOT EXISTS idx_playlist_download_preferences_is_wanted
ON playlist_download_preferences (is_wanted, updated_at DESC)
""",
"""
CREATE INDEX IF NOT EXISTS idx_pool_playlists_playlist_id
ON pool_playlists (playlist_id, pool_id)
""",
"""
CREATE INDEX IF NOT EXISTS idx_playlist_songs_song_id
ON playlist_songs (song_id, playlist_id)
""",
"""
CREATE INDEX IF NOT EXISTS idx_file_assets_song_id
ON file_assets (song_id)
""",
"""
CREATE INDEX IF NOT EXISTS idx_job_items_running_song_id
ON job_items (song_id, status)
""",
]
def connect_database(db_path: str | Path) -> sqlite3.Connection:
path = Path(db_path)
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path, timeout=SQLITE_BUSY_TIMEOUT_MS / 1000)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
conn.execute(f"PRAGMA busy_timeout = {SQLITE_BUSY_TIMEOUT_MS}")
with suppress(sqlite3.OperationalError):
conn.execute("PRAGMA journal_mode = WAL")
with suppress(sqlite3.OperationalError):
conn.execute("PRAGMA synchronous = NORMAL")
return conn
def ensure_default_local_backend(conn: sqlite3.Connection, library_root: str | Path) -> None:
resolved_root = str(Path(library_root).resolve())
conn.execute(
"""
INSERT INTO storage_backends (name, backend_type, base_path, is_default)
VALUES (?, ?, ?, 1)
ON CONFLICT(name) DO UPDATE SET
backend_type = excluded.backend_type,
base_path = excluded.base_path,
is_default = excluded.is_default,
updated_at = CURRENT_TIMESTAMP
""",
("default-local", "local_fs", resolved_root),
)
_JOB_WORKER_THROUGHPUT_COLUMNS: dict[str, str] = {
"downloaded_bytes": "INTEGER",
"total_bytes": "INTEGER",
"speed_bytes_per_sec": "INTEGER",
"progress_percent": "REAL",
}
_PLAYLIST_COLUMNS: dict[str, str] = {
"play_count": "INTEGER",
"collected_song_count": "INTEGER",
}
def _ensure_table_columns(
conn: sqlite3.Connection,
*,
table_name: str,
required_columns: dict[str, str],
) -> None:
table_exists = conn.execute(
"SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?",
(table_name,),
).fetchone()
if table_exists is None:
return
existing_columns = {
str(row["name"])
for row in conn.execute(f"PRAGMA table_info({table_name})").fetchall()
}
for column_name, column_type in required_columns.items():
if column_name in existing_columns:
continue
conn.execute(
f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
)
def _ensure_job_worker_throughput_columns(conn: sqlite3.Connection) -> None:
_ensure_table_columns(
conn,
table_name="job_workers",
required_columns=_JOB_WORKER_THROUGHPUT_COLUMNS,
)
def _ensure_playlist_columns(conn: sqlite3.Connection) -> None:
_ensure_table_columns(
conn,
table_name="playlists",
required_columns=_PLAYLIST_COLUMNS,
)
def initialize_database(
db_path: str | Path,
default_library_root: str | Path | None = None,
) -> sqlite3.Connection:
conn = connect_database(db_path)
for statement in SCHEMA_STATEMENTS:
conn.execute(statement)
_ensure_job_worker_throughput_columns(conn)
_ensure_playlist_columns(conn)
if default_library_root is not None:
Path(default_library_root).mkdir(parents=True, exist_ok=True)
ensure_default_local_backend(conn, default_library_root)
conn.commit()
return conn
@@ -0,0 +1,332 @@
from __future__ import annotations
import copy
import json
from urllib.parse import parse_qs, urlparse
from musicdl.modules.utils import SongInfo, safeextractfromdict, seconds2hms
def _parse_duration_seconds(value) -> int:
try:
return max(int(float(value or 0)), 0)
except Exception:
return 0
def _has_positive_value(value) -> bool:
try:
return float(value or 0) > 0
except Exception:
return False
def _normalize_text(value, default: str = "NULL") -> str:
text = str(value or "").strip()
if not text:
return default
return text
def _join_artist_names(value) -> str:
if isinstance(value, (list, tuple)):
names = []
for item in value:
if isinstance(item, dict):
name = str(item.get("name", "")).strip()
else:
name = str(item or "").strip()
if name and name not in names:
names.append(name)
return ", ".join(names) if names else "NULL"
text = str(value or "").replace("/", ", ").strip()
return text or "NULL"
def _normalize_audio_ext(value: str | None) -> str:
return str(value or "").strip().lower().lstrip(".")
def _remove_suffix(value: str, suffix: str) -> str:
text = str(value or "")
token = str(suffix or "")
if token and text.endswith(token):
return text[: -len(token)]
return text
def _remove_prefix(value: str, prefix: str) -> str:
text = str(value or "")
token = str(prefix or "")
if token and text.startswith(token):
return text[len(token) :]
return text
def guess_rough_audio_format(source: str, search_result: dict) -> str:
source_name = str(source or "")
if source_name == "QQMusicClient":
file_meta = safeextractfromdict(search_result, ["file"], {}) or {}
if any(_has_positive_value(file_meta.get(key)) for key in ("size_hires", "size_try", "size_flac", "size_ape")):
return "flac"
if any(_has_positive_value(file_meta.get(key)) for key in ("size_320mp3", "size_mp3", "size_128mp3")):
return "mp3"
if any(_has_positive_value(search_result.get(key)) for key in ("sizeflac", "sizeape")):
return "flac"
if any(_has_positive_value(search_result.get(key)) for key in ("size320", "size128", "sizeogg")):
return "mp3"
return ""
if source_name == "KuwoMusicClient":
meta_text = str(search_result.get("MINFO") or search_result.get("formats") or "").lower()
if any(token in meta_text for token in ("flac", "ape", "wav", "lossless", "hires")):
return "flac"
if any(token in meta_text for token in ("mp3", "320kmp3", "192kmp3", "128kmp3")):
return "mp3"
return ""
if source_name == "NeteaseMusicClient":
if _has_positive_value(safeextractfromdict(search_result, ["hr", "size"], 0)) or _has_positive_value(
safeextractfromdict(search_result, ["sq", "size"], 0)
):
return "flac"
if any(
_has_positive_value(safeextractfromdict(search_result, [quality_key, "size"], 0))
for quality_key in ("h", "m", "l")
):
return "mp3"
return ""
return ""
def build_deferred_song_info(
source: str,
raw_search_result: dict,
identifier,
song_name,
singers,
album: str | None = None,
duration_s: int | float = 0,
cover_url: str | None = None,
ext: str | None = None,
) -> SongInfo:
duration_seconds = _parse_duration_seconds(duration_s)
return SongInfo(
raw_data={"search": copy.deepcopy(raw_search_result or {}), "deferred_search": True},
source=str(source),
song_name=_normalize_text(song_name),
singers=_normalize_text(singers),
album=_normalize_text(album),
ext=_normalize_audio_ext(ext),
file_size_bytes=None,
file_size=None,
identifier=str(identifier or "").strip(),
duration_s=duration_seconds,
duration=seconds2hms(duration_seconds) if duration_seconds > 0 else "-:-:-",
lyric="NULL",
cover_url=str(cover_url or "").strip() or None,
download_url=None,
download_url_status={},
)
def _apply_work_dir(client, playlist_name: str, song_infos: list[SongInfo]) -> list[SongInfo]:
if not song_infos:
return []
if hasattr(client, "_constructuniqueworkdir") and callable(client._constructuniqueworkdir):
work_dir = client._constructuniqueworkdir(keyword=playlist_name)
for song_info in song_infos:
song_info.work_dir = work_dir
if hasattr(client, "_removeduplicates") and callable(client._removeduplicates):
return client._removeduplicates(song_infos=song_infos)
return song_infos
def _extract_playlist_id_from_url(playlist_url: str, query_keys: tuple[str, ...] = ("id", "pid", "bangId")) -> str:
parsed = urlparse(str(playlist_url or "").strip())
query_candidates = [parsed.query]
fragment = str(parsed.fragment or "").strip()
if fragment:
fragment_url = fragment if "://" in fragment else f"https://placeholder{fragment if fragment.startswith('/') else '/' + fragment}"
query_candidates.append(urlparse(fragment_url).query)
for query_text in query_candidates:
parsed_query = parse_qs(query_text, keep_blank_values=True)
for query_key in query_keys:
candidate = str((parsed_query.get(query_key) or [""])[0]).strip()
if candidate:
return candidate
for path_part in reversed([part for part in parsed.path.split("/") if part]):
candidate = _remove_suffix(_remove_suffix(str(path_part), ".html"), ".htm").strip()
if candidate:
return candidate
return ""
def build_netease_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
request_overrides = copy.deepcopy(request_overrides or {})
request_overrides.setdefault("timeout", (10, 30))
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id",))
if not playlist_id:
return []
response = client.post("https://music.163.com/api/v6/playlist/detail", data={"id": playlist_id}, **request_overrides)
response.raise_for_status()
playlist_result = response.json()
playlist_info = safeextractfromdict(playlist_result, ["playlist"], {}) or {}
track_refs = safeextractfromdict(playlist_info, ["trackIds"], []) or []
playlist_name = _normalize_text(playlist_info.get("name"), f"playlist-{playlist_id}")
if not track_refs:
return []
track_details_by_id: dict[str, dict] = {}
for track_info in safeextractfromdict(playlist_info, ["tracks"], []) or []:
track_id = str(track_info.get("id") or "").strip()
if track_id:
track_details_by_id[track_id] = track_info
missing_track_ids = [
str(track_ref.get("id") or "").strip()
for track_ref in track_refs
if str(track_ref.get("id") or "").strip() and str(track_ref.get("id") or "").strip() not in track_details_by_id
]
for offset in range(0, len(missing_track_ids), 200):
batch_track_ids = [track_id for track_id in missing_track_ids[offset : offset + 200] if track_id.isdigit()]
if not batch_track_ids:
continue
payload = json.dumps([{"id": int(track_id), "v": 0} for track_id in batch_track_ids], ensure_ascii=False, separators=(",", ":"))
detail_response = client.post(
"https://interface3.music.163.com/api/v3/song/detail",
data={"c": payload},
**request_overrides,
)
detail_response.raise_for_status()
for track_info in detail_response.json().get("songs", []) or []:
track_id = str(track_info.get("id") or "").strip()
if track_id:
track_details_by_id[track_id] = track_info
song_infos: list[SongInfo] = []
for track_ref in track_refs:
track_id = str(track_ref.get("id") or "").strip()
track_info = track_details_by_id.get(track_id)
if not track_id or not isinstance(track_info, dict):
continue
duration_value = 0
if str(track_info.get("dt", "")).strip():
try:
duration_value = float(track_info.get("dt", 0) or 0) / 1000
except Exception:
duration_value = 0
song_infos.append(
build_deferred_song_info(
source=client.source,
raw_search_result=track_info,
identifier=track_id,
song_name=track_info.get("name"),
singers=_join_artist_names(track_info.get("ar") or []),
album=safeextractfromdict(track_info, ["al", "name"], None),
duration_s=duration_value,
cover_url=safeextractfromdict(track_info, ["al", "picUrl"], None),
ext=guess_rough_audio_format(client.source, track_info),
)
)
return _apply_work_dir(client, playlist_name, song_infos)
def build_qq_raw_track_song_infos(client, raw_tracks: list[dict], playlist_name: str | None = None) -> list[SongInfo]:
song_infos: list[SongInfo] = []
for track_info in raw_tracks or []:
track_id = track_info.get("mid") or track_info.get("songmid") or track_info.get("songid") or track_info.get("id")
if not track_id:
continue
cover_mid = safeextractfromdict(track_info, ["album", "mid"], "") or track_info.get("albummid")
song_infos.append(
build_deferred_song_info(
source=client.source,
raw_search_result=track_info,
identifier=track_id,
song_name=track_info.get("title") or track_info.get("songname") or track_info.get("name"),
singers=_join_artist_names(track_info.get("singer") or []),
album=safeextractfromdict(track_info, ["album", "title"], None) or track_info.get("albumname"),
duration_s=track_info.get("interval", 0),
cover_url=f"https://y.gtimg.cn/music/photo_new/T002R800x800M000{cover_mid}.jpg" if cover_mid else None,
ext=guess_rough_audio_format(client.source, track_info),
)
)
return _apply_work_dir(client, _normalize_text(playlist_name, "playlist"), song_infos)
def build_qq_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
request_overrides = copy.deepcopy(request_overrides or {})
request_overrides.setdefault("timeout", (10, 30))
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id", "disstid"))
if not playlist_id:
return []
response = client.get(
"https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg",
headers={"Referer": f"https://y.qq.com/n/ryqq/playlist/{playlist_id}"},
params={"disstid": str(playlist_id), "type": "1", "json": "1", "utf8": "1", "onlysong": "0", "format": "json"},
**request_overrides,
)
response.raise_for_status()
playlist_result = response.json()
raw_tracks = (
safeextractfromdict(playlist_result, ["cdlist", 0, "songlist"], [])
or safeextractfromdict(playlist_result, ["cdlist", 0, "list"], [])
or safeextractfromdict(playlist_result, ["songlist"], [])
or []
)
playlist_name = safeextractfromdict(playlist_result, ["cdlist", 0, "dissname"], None) or f"playlist-{playlist_id}"
return build_qq_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_name)
def build_kuwo_raw_track_song_infos(client, raw_tracks: list[dict], playlist_name: str | None = None) -> list[SongInfo]:
song_infos: list[SongInfo] = []
for track_info in raw_tracks or []:
track_id = _remove_prefix(str(track_info.get("MUSICRID") or track_info.get("musicrid") or track_info.get("rid") or ""), "MUSIC_")
if not track_id:
continue
duration_value = track_info.get("DURATION") or track_info.get("duration", 0)
song_infos.append(
build_deferred_song_info(
source=client.source,
raw_search_result=track_info,
identifier=track_id,
song_name=track_info.get("SONGNAME") or track_info.get("name"),
singers=track_info.get("ARTIST") or track_info.get("artist"),
album=track_info.get("ALBUM") or track_info.get("album"),
duration_s=duration_value,
cover_url=track_info.get("hts_MVPIC") or track_info.get("albumpic") or track_info.get("pic"),
ext=guess_rough_audio_format(client.source, track_info),
)
)
return _apply_work_dir(client, _normalize_text(playlist_name, "playlist"), song_infos)
def build_kuwo_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
request_overrides = copy.deepcopy(request_overrides or {})
request_overrides.setdefault("timeout", (10, 30))
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id", "pid"))
if not playlist_id:
return []
raw_tracks: list[dict] = []
page = 1
playlist_result_first = {}
while True:
response = client.get(
f"https://m.kuwo.cn/newh5app/wapi/api/www/playlist/playListInfo?pid={playlist_id}&pn={page}&rn=100",
**request_overrides,
)
response.raise_for_status()
playlist_result = response.json()
page_tracks = safeextractfromdict(playlist_result, ["data", "musicList"], []) or []
if not page_tracks:
break
raw_tracks.extend(page_tracks)
page += 1
if not playlist_result_first:
playlist_result_first = copy.deepcopy(playlist_result)
if float(safeextractfromdict(playlist_result, ["data", "total"], 0) or 0) <= len(raw_tracks):
break
deduped_tracks = list({str(track.get("musicrid") or track.get("rid") or ""): track for track in raw_tracks}.values())
playlist_name = safeextractfromdict(playlist_result_first, ["data", "name"], None) or f"playlist-{playlist_id}"
return build_kuwo_raw_track_song_infos(client, deduped_tracks, playlist_name=playlist_name)
@@ -0,0 +1,684 @@
from __future__ import annotations
import json
import shutil
import signal
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from .models import deserialize_song_info, normalize_source_name
from .repository import CatalogRepository
from .resolver import DEFAULT_DOWNLOAD_SOURCES, MultiSourceSongResolver, SOURCE_CLIENT_NAMES, normalize_audio_ext
from .resolver_stats import ResolverStatsRepository, default_resolver_stats_db_path
from .runtime import build_download_relative_dir
from musicdl.modules.utils.lyric import LyricSearchClient
from musicdl.modules.utils.misc import shortenpathsinsonginfos
from musicdl.modules.utils.songinfoutils import SongInfoUtils
LOSSLESS_EXTENSIONS = {"flac", "wav", "alac", "ape", "wv", "tta", "dsf", "dff"}
DEFAULT_DOWNLOAD_WORKERS = 10
DEFAULT_LYRIC_SEARCH_TIMEOUT_SECONDS = 20
class _LyricSearchTimeout(BaseException):
pass
@dataclass
class ResolvedDownloadPayload:
row: dict[str, object]
display_text: str
default_root: Path
target_root: Path
backend_id: int
expected_bytes: int | None
resolved_song_info: object
def _progress_percent(completed: int | None, total: int | None) -> int:
normalized_total = max(int(total or 0), 0)
normalized_completed = max(int(completed or 0), 0)
if normalized_total <= 0:
return 0
if normalized_completed >= normalized_total:
return 100
return int((normalized_completed * 100) / normalized_total)
def _format_progress_text(downloaded_bytes: int | None, total_bytes: int | None) -> str:
downloaded_value = max(int(downloaded_bytes or 0), 0)
total_value = max(int(total_bytes or 0), downloaded_value)
return f"{downloaded_value / 1024 / 1024:.2f}MB/{total_value / 1024 / 1024:.2f}MB"
class DownloadPlanner:
def __init__(self, repository: CatalogRepository):
self.repository = repository
def build_download_queue(
self,
sources: list[str] | None = None,
limit: int | None = None,
playlist_ids: list[int] | None = None,
) -> list[dict]:
rows = self.repository.list_pending_download_songs(
sources=sources,
limit=limit,
playlist_ids=playlist_ids,
)
queue = []
for row in rows:
if self.repository.song_has_active_local_file(int(row["id"])):
continue
item = dict(row)
item["song_id"] = int(row["id"])
queue.append(item)
return queue
class CatalogDownloader:
def __init__(
self,
repository: CatalogRepository,
work_dir: str = "musicdl_outputs/catalogsync",
worker_count: int = DEFAULT_DOWNLOAD_WORKERS,
):
self.repository = repository
self.work_dir = work_dir
self.worker_count = max(1, worker_count)
self._clients: dict[str, object] = {}
self._client_lock = threading.Lock()
self._space_lock = threading.Lock()
self._current_library_root: Path | None = None
self._lyric_search_timeout_seconds = DEFAULT_LYRIC_SEARCH_TIMEOUT_SECONDS
resolver_stats_repo = ResolverStatsRepository(default_resolver_stats_db_path(self.repository.db_path))
self._resolver = MultiSourceSongResolver(
client_factory=lambda platform: self.get_client(platform),
request_overrides_factory=lambda timeout: self._request_overrides(timeout),
resolver_stats_repo=resolver_stats_repo,
)
@contextmanager
def _lyric_search_timeout_guard(self):
timeout_seconds = float(self._lyric_search_timeout_seconds or 0)
if timeout_seconds <= 0:
yield
return
if threading.current_thread() is not threading.main_thread():
yield
return
if not hasattr(signal, "SIGALRM") or not hasattr(signal, "setitimer"):
yield
return
def _handle_timeout(_signum, _frame):
raise _LyricSearchTimeout()
previous_handler = signal.getsignal(signal.SIGALRM)
signal.signal(signal.SIGALRM, _handle_timeout)
signal.setitimer(signal.ITIMER_REAL, timeout_seconds)
try:
yield
finally:
signal.setitimer(signal.ITIMER_REAL, 0)
signal.signal(signal.SIGALRM, previous_handler)
@staticmethod
def _request_overrides(timeout: tuple[int, int]) -> dict:
return {"timeout": timeout}
def get_client(self, platform: str):
platform = normalize_source_name(platform)
client_key = f"{threading.get_ident()}:{platform}"
if client_key not in self._clients:
with self._client_lock:
if client_key not in self._clients:
from musicdl.modules import BuildMusicClient
self._clients[client_key] = BuildMusicClient(
{
"type": SOURCE_CLIENT_NAMES[platform],
"disable_print": True,
"maintain_session": False,
"work_dir": self.work_dir,
"search_size_per_source": 5,
"search_size_per_page": 5,
"strict_limit_search_size_per_page": True,
}
)
return self._clients[client_key]
def ensure_space(self, root_path: str | Path, required_bytes: int | None) -> Path:
with self._space_lock:
root = self._current_library_root or Path(root_path).resolve()
root.mkdir(parents=True, exist_ok=True)
if required_bytes is None or required_bytes <= 0:
self._current_library_root = root
return root
while shutil.disk_usage(root).free < required_bytes:
new_root = input("磁盘空间不足,请输入新的下载目录继续: ").strip()
if not new_root:
raise RuntimeError("Disk space is insufficient and no new directory was provided")
root = Path(new_root).resolve()
root.mkdir(parents=True, exist_ok=True)
self._current_library_root = root
return root
def _initialize_library_root(self, root_path: str | Path) -> Path:
normalized_root = Path(root_path).resolve()
with self._space_lock:
if self._current_library_root is None:
self._current_library_root = normalized_root
return self._current_library_root
@staticmethod
def _normalize_singers(value: object) -> str | None:
if not isinstance(value, str):
return None
text = value.strip()
if not text or text.upper() == "NULL":
return None
return text
@staticmethod
def _detect_download_platform(song_info: object, fallback_platform: str) -> str:
detected_platform = normalize_source_name(getattr(song_info, "source", None))
if detected_platform == "unknown":
return normalize_source_name(fallback_platform)
return detected_platform
def resolve_song_info_for_download(
self,
row: dict,
song_info: object,
download_sources: list[str] | None = None,
progress_callback=None,
) -> object:
return self._resolver.resolve_song_info(
row=row,
snapshot_song_info=song_info,
download_sources=download_sources or DEFAULT_DOWNLOAD_SOURCES,
progress_callback=progress_callback,
)
@staticmethod
def _detect_quality_label(song_info: object, actual_ext: str | None, fallback: str | None = None) -> str | None:
raw_data = getattr(song_info, "raw_data", None)
if isinstance(raw_data, dict):
quality = raw_data.get("quality")
if quality:
return str(quality)
normalized_ext = normalize_audio_ext(actual_ext or getattr(song_info, "ext", None))
if normalized_ext in LOSSLESS_EXTENSIONS:
return "lossless"
if normalized_ext:
return "standard"
return fallback
@staticmethod
def _build_display_text(row: dict) -> str:
display_name = str(row.get("name") or row.get("id") or "")
singers = str(row.get("singers") or "").strip()
return f"{display_name} / {singers}".strip(" /")
@staticmethod
def _normalize_lyric_text(value: object) -> str | None:
if not isinstance(value, str):
return None
text = value.replace("\r\n", "\n").strip()
if not text or text.upper() == "NULL":
return None
return text
def _resolve_lyrics_text(self, *, song_info: object | None, row: dict[str, object] | None = None) -> str | None:
lyric_text = self._normalize_lyric_text(getattr(song_info, "lyric", None))
if lyric_text:
return lyric_text
row = row or {}
title = self._normalize_lyric_text(getattr(song_info, "song_name", None)) or self._normalize_lyric_text(row.get("name"))
singers = self._normalize_singers(getattr(song_info, "singers", None)) or self._normalize_singers(row.get("singers"))
if not title or not singers:
return None
try:
with self._lyric_search_timeout_guard():
_lyric_result, lyric = LyricSearchClient.search(track_name=title, artist_name=singers)
except _LyricSearchTimeout:
return None
return self._normalize_lyric_text(lyric)
def _sync_lyrics_for_saved_song(
self,
*,
row: dict[str, object],
song_info: object | None,
saved_path: Path,
overwrite_lyrics: bool,
worker_callback=None,
display_text: str | None = None,
) -> str:
try:
lyric_text = self._resolve_lyrics_text(song_info=song_info, row=row)
if not lyric_text:
self._emit_worker_progress(
row,
worker_callback,
display_text=display_text,
last_progress_text="lyrics unavailable",
)
return "skipped"
if hasattr(song_info, "lyric"):
song_info.lyric = lyric_text
lrc_path = saved_path.with_suffix(".lrc")
if lrc_path.exists() and not overwrite_lyrics:
self._emit_worker_progress(
row,
worker_callback,
display_text=display_text,
last_progress_text="lyrics exists, skipped",
)
return "skipped"
saved = SongInfoUtils.savelrctofile(saved_path, lyric_text, overwrite=overwrite_lyrics)
if saved:
self._emit_worker_progress(
row,
worker_callback,
display_text=display_text,
last_progress_text="lyrics saved",
)
return "saved"
self._emit_worker_progress(
row,
worker_callback,
display_text=display_text,
last_progress_text="lyrics skipped",
)
return "skipped"
except Exception as exc:
self._emit_worker_progress(
row,
worker_callback,
display_text=display_text,
last_progress_text=f"lyrics failed: {type(exc).__name__}: {exc}",
)
return "failed"
@staticmethod
def _emit_worker_progress(
row: dict,
progress_callback,
*,
display_text: str | None,
downloaded_bytes: int | None = None,
total_bytes: int | None = None,
speed_bytes_per_sec: int | None = None,
progress_percent: int | None = None,
last_progress_text: str | None = None,
) -> None:
if progress_callback is None:
return
state: dict[str, object] = {
"current_song_id": int(row["id"]) if row.get("id") is not None else None,
"current_playlist_id": row.get("playlist_id"),
"current_display_text": display_text,
}
if downloaded_bytes is not None:
state["downloaded_bytes"] = int(downloaded_bytes)
if total_bytes is not None:
state["total_bytes"] = int(total_bytes)
if speed_bytes_per_sec is not None:
state["speed_bytes_per_sec"] = int(speed_bytes_per_sec)
if progress_percent is not None:
state["progress_percent"] = int(progress_percent)
if last_progress_text is not None:
state["last_progress_text"] = str(last_progress_text)
progress_callback(**state)
def _monitor_save_path(
self,
*,
save_path: Path,
expected_bytes: int | None,
progress_callback,
stop_event: threading.Event,
row: dict,
display_text: str | None,
interval_seconds: float = 0.02,
) -> None:
last_size = 0
last_change_at = time.monotonic()
while not stop_event.wait(interval_seconds):
if not save_path.exists():
continue
try:
current_size = int(save_path.stat().st_size)
except OSError:
continue
if current_size <= last_size:
continue
now = time.monotonic()
delta_bytes = current_size - last_size
delta_seconds = max(now - last_change_at, 1e-6)
total_bytes = int(expected_bytes or current_size)
self._emit_worker_progress(
row,
progress_callback,
display_text=display_text,
downloaded_bytes=current_size,
total_bytes=total_bytes,
speed_bytes_per_sec=int(delta_bytes / delta_seconds),
progress_percent=_progress_percent(current_size, total_bytes),
last_progress_text=_format_progress_text(current_size, total_bytes),
)
last_size = current_size
last_change_at = now
def resolve_song_row(
self,
row,
library_root: str | Path,
download_sources: list[str] | None = None,
worker_callback=None,
) -> ResolvedDownloadPayload | None:
"""Resolve a snapshot into a downloadable payload and choose the active local target."""
row_dict = dict(row)
default_root = self._initialize_library_root(library_root)
self.repository.ensure_local_backend(default_root, name="default-local", is_default=True)
display_text = self._build_display_text(row_dict)
self._emit_worker_progress(
row_dict,
worker_callback,
display_text=display_text,
)
metadata = json.loads(row_dict["metadata_json"]) if row_dict.get("metadata_json") else {}
song_info = deserialize_song_info(metadata.get("snapshot"))
if song_info is None:
return None
resolve_progress_callback = None
if worker_callback is not None:
resolve_progress_callback = lambda message: self._emit_worker_progress(
row_dict,
worker_callback,
display_text=display_text,
last_progress_text=message,
)
resolved_song_info = self.resolve_song_info_for_download(
row=row_dict,
song_info=song_info,
download_sources=download_sources,
progress_callback=resolve_progress_callback,
)
if resolved_song_info is None:
return None
target_root = self.ensure_space(
default_root,
getattr(resolved_song_info, "file_size_bytes", None) or row_dict.get("file_size_bytes"),
)
is_default_root = target_root.resolve() == default_root
backend_id = self.repository.ensure_local_backend(
target_root,
name="default-local" if is_default_root else None,
is_default=is_default_root,
)
expected_bytes = int(
getattr(resolved_song_info, "file_size_bytes", None) or row_dict.get("file_size_bytes") or 0
) or None
return ResolvedDownloadPayload(
row=row_dict,
display_text=display_text,
default_root=default_root,
target_root=target_root,
backend_id=backend_id,
expected_bytes=expected_bytes,
resolved_song_info=resolved_song_info,
)
def download_resolved_song(
self,
resolved_payload: ResolvedDownloadPayload,
worker_callback=None,
lyrics_enabled: bool = True,
overwrite_lyrics: bool = False,
) -> bool:
row = resolved_payload.row
song_info = resolved_payload.resolved_song_info
download_platform = self._detect_download_platform(song_info, str(row["platform"]))
client = self.get_client(download_platform)
singers = self._normalize_singers(getattr(song_info, "singers", None)) or self._normalize_singers(
row.get("singers")
)
relative_dir = build_download_relative_dir(platform=download_platform, singers=singers)
target_dir = resolved_payload.target_root / relative_dir
target_dir.mkdir(parents=True, exist_ok=True)
song_info.work_dir = str(target_dir)
if hasattr(song_info, "_save_path"):
song_info._save_path = None
save_path: Path | None = None
monitor_stop: threading.Event | None = None
monitor_thread: threading.Thread | None = None
self._emit_worker_progress(
row,
worker_callback,
display_text=resolved_payload.display_text,
last_progress_text=f"starting download via {download_platform}",
)
if worker_callback is not None and hasattr(song_info, "save_path"):
shortenpathsinsonginfos([song_info])
save_path = Path(song_info.save_path)
monitor_stop = threading.Event()
monitor_thread = threading.Thread(
target=self._monitor_save_path,
kwargs={
"save_path": save_path,
"expected_bytes": resolved_payload.expected_bytes,
"progress_callback": worker_callback,
"stop_event": monitor_stop,
"row": row,
"display_text": resolved_payload.display_text,
},
daemon=True,
name=f"download-monitor-{row.get('id')}",
)
monitor_thread.start()
try:
downloaded = client.download(
[song_info],
num_threadings=1,
request_overrides=self._request_overrides((10, 60)),
auto_supplement_song=False,
)
except TypeError:
downloaded = client.download(
[song_info],
num_threadings=1,
auto_supplement_song=False,
)
finally:
if monitor_stop is not None:
monitor_stop.set()
if monitor_thread is not None:
monitor_thread.join(timeout=1.0)
if save_path is not None and save_path.exists():
try:
final_size = int(save_path.stat().st_size)
except OSError:
final_size = 0
if final_size > 0:
total_bytes = int(resolved_payload.expected_bytes or final_size)
self._emit_worker_progress(
row,
worker_callback,
display_text=resolved_payload.display_text,
downloaded_bytes=final_size,
total_bytes=total_bytes,
progress_percent=_progress_percent(final_size, total_bytes),
last_progress_text=_format_progress_text(final_size, total_bytes),
)
if not downloaded:
return False
saved_song = downloaded[0]
saved_path = Path(saved_song.save_path)
relative_path = saved_path.relative_to(resolved_payload.target_root).as_posix()
actual_size = saved_path.stat().st_size if saved_path.exists() else row.get("file_size_bytes")
actual_ext = saved_path.suffix.lstrip(".") or row.get("ext")
self.repository.record_local_file(
song_id=int(row["id"]),
backend_id=resolved_payload.backend_id,
relative_path=relative_path,
file_size_bytes=actual_size,
ext=actual_ext,
quality_label=self._detect_quality_label(song_info, actual_ext, fallback=row.get("quality_label")),
)
if lyrics_enabled:
lyrics_song_info = saved_song if self._normalize_lyric_text(getattr(saved_song, "lyric", None)) else song_info
self._sync_lyrics_for_saved_song(
row=row,
song_info=lyrics_song_info,
saved_path=saved_path,
overwrite_lyrics=overwrite_lyrics,
worker_callback=worker_callback,
display_text=resolved_payload.display_text,
)
return True
def download_song_row(
self,
row,
library_root: str | Path,
download_sources: list[str] | None = None,
worker_callback=None,
lyrics_enabled: bool = True,
overwrite_lyrics: bool = False,
) -> bool:
resolved_payload = self.resolve_song_row(
row=row,
library_root=library_root,
download_sources=download_sources,
worker_callback=worker_callback,
)
if resolved_payload is None:
return False
return self.download_resolved_song(
resolved_payload=resolved_payload,
worker_callback=worker_callback,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
def download_pending(
self,
library_root: str | Path,
sources: list[str] | None = None,
limit: int | None = None,
playlist_ids: list[int] | None = None,
download_sources: list[str] | None = None,
lyrics_enabled: bool = True,
overwrite_lyrics: bool = False,
) -> int:
planner = DownloadPlanner(self.repository)
queue = planner.build_download_queue(sources=sources, limit=limit, playlist_ids=playlist_ids)
default_root = self._initialize_library_root(library_root)
self.repository.ensure_local_backend(default_root, name="default-local", is_default=True)
downloaded_count = 0
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
futures = [
executor.submit(
self.download_song_row,
row=row,
library_root=default_root,
download_sources=download_sources,
lyrics_enabled=lyrics_enabled,
overwrite_lyrics=overwrite_lyrics,
)
for row in queue
]
for future in as_completed(futures):
result = future.result()
if result:
downloaded_count += 1
return downloaded_count
def sync_local_lyrics(
self,
sources: list[str] | None = None,
playlist_ids: list[int] | None = None,
limit: int | None = None,
overwrite_lyrics: bool = False,
progress_callback=None,
) -> dict[str, int]:
rows = self.repository.list_local_songs_for_lyrics(
sources=sources,
playlist_ids=playlist_ids,
limit=limit,
)
summary = {"total": len(rows), "processed": 0, "saved": 0, "skipped": 0, "failed": 0}
def emit_progress(*, row_dict: dict[str, object] | None = None, display_text: str | None = None, last_status: str | None = None, last_progress_text: str | None = None) -> None:
if progress_callback is None:
return
state: dict[str, object] = {
"total": summary["total"],
"processed": summary["processed"],
"saved": summary["saved"],
"skipped": summary["skipped"],
"failed": summary["failed"],
"progress_percent": _progress_percent(summary["processed"], summary["total"]),
}
if row_dict is not None:
state["current_song_id"] = int(row_dict["id"]) if row_dict.get("id") is not None else None
state["current_playlist_id"] = row_dict.get("playlist_id")
if display_text is not None:
state["current_display_text"] = display_text
if last_status is not None:
state["last_status"] = last_status
if last_progress_text is not None:
state["last_progress_text"] = last_progress_text
progress_callback(**state)
def process_row(row) -> tuple[dict[str, object], str, str, str]:
row_dict = dict(row)
display_text = self._build_display_text(row_dict)
try:
local_file_path = row_dict.get("local_file_path")
if not local_file_path:
return row_dict, display_text, "failed", "missing local file path"
saved_path = Path(str(local_file_path))
if not saved_path.exists():
return row_dict, display_text, "failed", "local file missing"
metadata = json.loads(row_dict["metadata_json"]) if row_dict.get("metadata_json") else {}
song_info = deserialize_song_info(metadata.get("snapshot")) if isinstance(metadata, dict) else None
status = self._sync_lyrics_for_saved_song(
row=row_dict,
song_info=song_info,
saved_path=saved_path,
overwrite_lyrics=overwrite_lyrics,
display_text=display_text,
)
normalized_status = status if status in {"saved", "skipped", "failed"} else "failed"
status_text = {
"saved": "lyrics saved",
"skipped": "lyrics skipped",
"failed": "lyrics failed",
}[normalized_status]
return row_dict, display_text, normalized_status, status_text
except Exception as exc:
return row_dict, display_text, "failed", f"lyrics failed: {type(exc).__name__}: {exc}"
emit_progress()
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
futures = [executor.submit(process_row, row) for row in rows]
for future in as_completed(futures):
row_dict, display_text, status, status_text = future.result()
summary["processed"] += 1
summary[status] += 1
emit_progress(
row_dict=row_dict,
display_text=display_text,
last_status=status,
last_progress_text=status_text,
)
return summary
@@ -0,0 +1,118 @@
from __future__ import annotations
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Iterable
from uuid import uuid4
from .runtime import sanitize_path_component
INTERNAL_BUNDLE_NAME_SEPARATOR = "--"
def default_bundle_root() -> Path:
root = Path(tempfile.gettempdir()) / "musicdl-catalogsync" / "bundles"
root.mkdir(parents=True, exist_ok=True)
return root
def build_single_playlist_bundle_filename(
*,
platform: str,
playlist_id: int,
playlist_name: str,
) -> str:
safe_platform = sanitize_path_component(str(platform or ""), "unknown")
safe_name = sanitize_path_component(str(playlist_name or ""), f"playlist-{int(playlist_id)}")
return f"playlist-{safe_platform}-{int(playlist_id)}-{safe_name}.zip"
def build_multi_playlist_bundle_filename(*, created_at: datetime | None = None) -> str:
now = created_at or datetime.now()
return "playlists-export-" + now.strftime("%Y%m%d-%H%M%S") + ".zip"
def bundle_download_filename(bundle_path_or_name: str | Path) -> str:
filename = Path(bundle_path_or_name).name
if INTERNAL_BUNDLE_NAME_SEPARATOR not in filename:
return filename
return filename.split(INTERNAL_BUNDLE_NAME_SEPARATOR, 1)[1]
def resolve_bundle_download_path(bundle_root: Path, bundle_name: str) -> Path | None:
normalized_name = str(bundle_name or "").strip()
if not normalized_name:
return None
safe_name = sanitize_path_component(normalized_name, "")
if not safe_name or safe_name != normalized_name:
return None
return Path(bundle_root) / f"{normalized_name}.zip"
def create_single_playlist_bundle(
*,
playlist_dir: Path,
bundle_root: Path,
platform: str,
playlist_id: int,
playlist_name: str,
) -> Path:
source_dir = Path(playlist_dir)
if not source_dir.exists() or not source_dir.is_dir():
raise FileNotFoundError(f"playlist directory not found: {source_dir}")
root = Path(bundle_root)
root.mkdir(parents=True, exist_ok=True)
bundle_path = root / build_single_playlist_bundle_filename(
platform=platform,
playlist_id=playlist_id,
playlist_name=playlist_name,
)
_write_zip_from_directories(bundle_path, [(source_dir, source_dir.name)])
return bundle_path
def create_multi_playlist_bundle(
*,
playlist_dirs: Iterable[Path],
bundle_root: Path,
created_at: datetime | None = None,
) -> Path:
resolved_dirs: list[Path] = []
for item in playlist_dirs:
playlist_dir = Path(item)
if not playlist_dir.exists() or not playlist_dir.is_dir():
raise FileNotFoundError(f"playlist directory not found: {playlist_dir}")
resolved_dirs.append(playlist_dir)
if not resolved_dirs:
raise ValueError("playlist_dirs is required")
root = Path(bundle_root)
root.mkdir(parents=True, exist_ok=True)
friendly_name = build_multi_playlist_bundle_filename(created_at=created_at)
unique_storage_name = (
datetime.now().strftime("%Y%m%d%H%M%S%f")
+ "-"
+ uuid4().hex[:8]
+ INTERNAL_BUNDLE_NAME_SEPARATOR
+ friendly_name
)
bundle_path = root / unique_storage_name
_write_zip_from_directories(
bundle_path,
[(playlist_dir, f"playlists/{playlist_dir.name}") for playlist_dir in resolved_dirs],
)
return bundle_path
def _write_zip_from_directories(bundle_path: Path, directories: list[tuple[Path, str]]) -> None:
if bundle_path.exists():
bundle_path.unlink()
with zipfile.ZipFile(bundle_path, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
for source_dir, zip_root in directories:
for child in sorted(source_dir.rglob("*")):
if not child.is_file():
continue
relative_path = child.relative_to(source_dir).as_posix()
archive.write(child, arcname=f"{zip_root}/{relative_path}")
@@ -0,0 +1,179 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from urllib.parse import parse_qs, urlparse
from .models import PlaylistCandidate
SUPPORTED_PLATFORMS = {"netease", "qq", "kuwo"}
@dataclass
class ParsedPlaylistFile:
entries: list[PlaylistCandidate]
total_lines: int
skipped_lines: int
def infer_platform_from_url(url: str) -> str | None:
parsed = urlparse(url)
host = parsed.netloc.lower()
if host in {"music.163.com", "163.com"}:
return "netease"
if host.endswith("y.qq.com") or host == "qq.com":
return "qq"
if host.endswith("kuwo.cn") or host == "kuwo.cn":
return "kuwo"
return None
def build_playlist_candidate(platform: str, url: str) -> PlaylistCandidate | None:
platform = platform.strip().lower()
normalized_url = url.strip()
if platform not in SUPPORTED_PLATFORMS or not normalized_url:
return None
if platform == "netease":
return _build_netease_candidate(normalized_url)
if platform == "qq":
return _build_qq_candidate(normalized_url)
if platform == "kuwo":
return _build_kuwo_candidate(normalized_url)
return None
def parse_playlist_file(path: str | Path) -> ParsedPlaylistFile:
playlist_path = Path(path)
raw_text = playlist_path.read_text(encoding="utf-8")
lines = raw_text.splitlines()
if raw_text.endswith(("\n", "\r")):
lines.append("")
entries: list[PlaylistCandidate] = []
seen: set[str] = set()
skipped_lines = 0
for raw_line in lines:
line = raw_line.strip()
if not line or line.startswith("#"):
continue
platform: str | None = None
url = line
if "," in line:
platform_text, url_text = line.split(",", 1)
platform = platform_text.strip().lower()
url = url_text.strip()
if platform is None:
platform = infer_platform_from_url(url)
candidate = build_playlist_candidate(platform or "", url)
if candidate is None:
skipped_lines += 1
continue
if candidate.playlist_key in seen:
continue
seen.add(candidate.playlist_key)
entries.append(candidate)
return ParsedPlaylistFile(entries=entries, total_lines=len(lines), skipped_lines=skipped_lines)
def _build_netease_candidate(url: str) -> PlaylistCandidate | None:
parsed = urlparse(url)
if parsed.netloc.lower() not in {"music.163.com", "163.com"}:
return None
if not _path_matches(parsed.path, "/playlist") and not _fragment_path_matches(parsed.fragment, "/playlist"):
return None
remote_id = _extract_query_value(parsed, "id")
if not remote_id:
return None
return PlaylistCandidate(
platform="netease",
pool_kind="manual_file",
remote_id=remote_id,
name=remote_id,
url=f"https://music.163.com/#/playlist?id={remote_id}",
)
def _build_qq_candidate(url: str) -> PlaylistCandidate | None:
parsed = urlparse(url)
if not (parsed.netloc.lower().endswith("y.qq.com") or parsed.netloc.lower() == "qq.com"):
return None
path_parts = [part for part in parsed.path.split("/") if part]
if len(path_parts) < 2:
return None
remote_id = path_parts[-1].strip()
if not remote_id:
return None
if "playlist" in path_parts:
return PlaylistCandidate(
platform="qq",
pool_kind="manual_file",
remote_id=remote_id,
name=remote_id,
url=f"https://y.qq.com/n/ryqq/playlist/{remote_id}",
)
if "toplist" in path_parts:
return PlaylistCandidate(
platform="qq",
pool_kind="manual_file",
remote_id=remote_id,
name=remote_id,
url=f"https://y.qq.com/n/ryqq/toplist/{remote_id}",
parse_strategy="qq_toplist",
)
return None
def _build_kuwo_candidate(url: str) -> PlaylistCandidate | None:
parsed = urlparse(url)
if not (parsed.netloc.lower().endswith("kuwo.cn") or parsed.netloc.lower() == "kuwo.cn"):
return None
path_parts = [part for part in parsed.path.split("/") if part]
if "playlist_detail" in path_parts:
remote_id = path_parts[-1].strip()
if not remote_id:
return None
return PlaylistCandidate(
platform="kuwo",
pool_kind="manual_file",
remote_id=remote_id,
name=remote_id,
url=f"https://www.kuwo.cn/playlist_detail/{remote_id}",
)
if "rankList" in path_parts:
remote_id = _extract_query_value(parsed, "bangId")
if not remote_id:
return None
return PlaylistCandidate(
platform="kuwo",
pool_kind="manual_file",
remote_id=remote_id,
name=remote_id,
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
parse_strategy="kuwo_toplist",
metadata={"bang_id": remote_id},
)
return None
def _extract_query_value(parsed, key: str) -> str | None:
for query_text in (parsed.query, urlparse(parsed.fragment).query):
value = parse_qs(query_text).get(key)
if value and value[0].strip():
return value[0].strip()
return None
def _path_matches(path: str, expected_suffix: str) -> bool:
return path.rstrip("/").endswith(expected_suffix)
def _fragment_path_matches(fragment: str, expected_suffix: str) -> bool:
if not fragment:
return False
return urlparse(fragment).path.rstrip("/").endswith(expected_suffix)
+172
View File
@@ -0,0 +1,172 @@
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import Any
SOURCE_NAME_MAP = {
"NeteaseMusicClient": "netease",
"QQMusicClient": "qq",
"KuwoMusicClient": "kuwo",
"netease": "netease",
"qq": "qq",
"kuwo": "kuwo",
}
ARTIST_SPLIT_RE = re.compile(r"\s*(?:/|,||、|&|\|)\s*")
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
def normalize_source_name(source: str | None) -> str:
if not source:
return "unknown"
return SOURCE_NAME_MAP.get(source, remove_suffix(str(source), "MusicClient").lower())
def get_field(obj: Any, key: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
def serialize_song_info(song_info: Any) -> dict[str, Any]:
if song_info is None:
return {}
if isinstance(song_info, dict):
return dict(song_info)
if hasattr(song_info, "todict") and callable(song_info.todict):
return song_info.todict()
if hasattr(song_info, "__dict__"):
return {
key: value
for key, value in vars(song_info).items()
if not key.startswith("_")
}
return {}
def deserialize_song_info(snapshot: dict[str, Any] | None):
if not snapshot:
return None
from musicdl.modules.utils.data import SongInfo
return SongInfo.fromdict(snapshot)
def parse_size_to_bytes(file_size: Any) -> int | None:
if file_size in {None, "", "NULL"}:
return None
if isinstance(file_size, (int, float)):
return int(file_size)
text = str(file_size).strip().upper().replace("IB", "B")
match = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([KMGTP]?B)$", text)
if not match:
return None
value = float(match.group(1))
unit = match.group(2)
multiplier = {
"B": 1,
"KB": 1024,
"MB": 1024**2,
"GB": 1024**3,
"TB": 1024**4,
"PB": 1024**5,
}[unit]
return int(value * multiplier)
def dedupe_preserve_order(values: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for value in values:
stripped = value.strip()
if not stripped or stripped in seen:
continue
seen.add(stripped)
result.append(stripped)
return result
def extract_artist_names(raw_data: dict | None, singers_text: str | None = None) -> list[str]:
raw_data = raw_data or {}
search_data = raw_data.get("search") if isinstance(raw_data, dict) else {}
candidates: list[str] = []
for key in ("ar", "artists", "singer"):
value = search_data.get(key)
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and item.get("name"):
candidates.append(str(item["name"]))
for key in ("artist", "ARTIST", "author", "singerName", "singers"):
value = search_data.get(key)
if isinstance(value, str):
candidates.extend(ARTIST_SPLIT_RE.split(value))
if singers_text:
candidates.extend(ARTIST_SPLIT_RE.split(str(singers_text)))
return dedupe_preserve_order(candidates)
@dataclass
class PlaylistCandidate:
platform: str
pool_kind: str
remote_id: str
name: str
url: str
parse_strategy: str = "playlist_url"
cover_url: str | None = None
creator_name: str | None = None
play_count: int | None = None
collected_song_count: int | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@property
def playlist_key(self) -> str:
return f"{self.platform}:{self.remote_id}"
@dataclass
class CatalogSong:
platform: str
remote_song_id: str
name: str | None = None
singers: str | None = None
album: str | None = None
ext: str | None = None
file_size_bytes: int | None = None
file_size_label: str | None = None
quality_label: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@property
def song_key(self) -> str:
return f"{self.platform}:{self.remote_song_id}"
@classmethod
def from_song_info(cls, song_info: Any) -> "CatalogSong":
raw_data = get_field(song_info, "raw_data", {}) or {}
file_size_bytes = get_field(song_info, "file_size_bytes")
if file_size_bytes is None:
file_size_bytes = parse_size_to_bytes(get_field(song_info, "file_size"))
return cls(
platform=normalize_source_name(get_field(song_info, "source")),
remote_song_id=str(get_field(song_info, "identifier")),
name=get_field(song_info, "song_name"),
singers=get_field(song_info, "singers"),
album=get_field(song_info, "album"),
ext=get_field(song_info, "ext"),
file_size_bytes=file_size_bytes,
file_size_label=get_field(song_info, "file_size"),
quality_label=raw_data.get("quality"),
metadata={"raw_data": raw_data, "snapshot": serialize_song_info(song_info)},
)
@@ -0,0 +1,12 @@
from .models import ItemStatus, JobItem, JobRun, JobStatus, JobStage, StageStatus
from .repository import OpsRepository
__all__ = [
"ItemStatus",
"JobItem",
"JobRun",
"JobStatus",
"JobStage",
"OpsRepository",
"StageStatus",
]
@@ -0,0 +1,91 @@
from __future__ import annotations
import hashlib
from pathlib import Path
from typing import Any
from .repository import OpsRepository
def _parse_sources(value: str | None) -> list[str]:
if not value:
return []
return [item.strip() for item in value.split(",") if item and item.strip()]
def _normalize_env_value(raw_value: str) -> str:
stripped_value = raw_value.strip()
if (
len(stripped_value) >= 2
and stripped_value[0] == stripped_value[-1]
and stripped_value[0] in {"'", '"'}
):
return stripped_value[1:-1]
return raw_value
def _parse_env(content: str) -> dict[str, str]:
mapping: dict[str, str] = {}
for raw_line in content.splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
normalized = raw_line.lstrip()
if normalized.startswith("export "):
normalized = normalized[len("export ") :]
if "=" not in normalized:
continue
key, value = normalized.split("=", 1)
key = key.strip()
if not key:
continue
mapping[key] = _normalize_env_value(value)
return mapping
class CatalogsyncEnvManager:
def __init__(
self,
*,
db_path: str | Path,
env_file_path: str | Path,
repository: OpsRepository | None = None,
):
self.env_file_path = Path(env_file_path)
self.repository = repository or OpsRepository(db_path)
def load_current(self) -> dict[str, str]:
if not self.env_file_path.exists():
return {}
content = self.env_file_path.read_text(encoding="utf-8")
return _parse_env(content)
def build_job_snapshot(self) -> dict[str, Any]:
current = self.load_current()
snapshot: dict[str, Any] = dict(current)
snapshot["download_sources"] = _parse_sources(current.get("DOWNLOAD_SOURCES"))
return snapshot
def save_revision(self, note: str | None = None, source_type: str = "env_file") -> int:
content = ""
if self.env_file_path.exists():
content = self.env_file_path.read_text(encoding="utf-8")
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
return self.repository.create_config_revision(
source_type=source_type,
file_path=str(self.env_file_path.resolve()),
content_text=content,
content_hash=content_hash,
note=note,
)
def list_revisions(self, limit: int = 50) -> list[dict[str, Any]]:
return self.repository.list_config_revisions(limit=limit)
def apply_revision(self, revision_id: int) -> None:
revision = self.repository.get_config_revision(revision_id)
if revision is None:
raise ValueError(f"config revision not found: {revision_id}")
self.env_file_path.parent.mkdir(parents=True, exist_ok=True)
self.env_file_path.write_text(revision["content_text"], encoding="utf-8")
self.repository.mark_config_revision_applied(revision_id)
@@ -0,0 +1,466 @@
from __future__ import annotations
import json
import threading
from dataclasses import dataclass
from pathlib import Path
from typing import Callable
from musicdl.catalogsync.downloader import CatalogDownloader
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.services import CatalogSyncService
from musicdl.catalogsync.uploader import CatalogUploader
from .repository import OpsRepository
NON_MUSIC_RESOURCE_REASON = "非音乐资源(有声榜条目)"
NON_MUSIC_RESOURCE_CODE = "NON_MUSIC_RESOURCE"
@dataclass
class ResolvedStageDownloadTask:
item_id: int
playlist_id: int | None
row: dict[str, object]
resolved_payload: object
def _format_error(exc: Exception) -> str:
return f"{type(exc).__name__}: {exc}"
class _TransitionUpdateError(RuntimeError):
pass
def _ensure_transition_applied(applied: bool, *, item_id: int, action: str) -> None:
if applied:
return
raise _TransitionUpdateError(
f"CAS transition failed for item {item_id}: {action} returned False"
)
def _mark_failed_or_raise(ops_repo: OpsRepository, *, item_id: int, error_message: str, cause: Exception) -> None:
if ops_repo.mark_item_failed(item_id=item_id, error_message=error_message):
return
raise RuntimeError(
f"CAS transition failed for item {item_id}: mark_item_failed returned False while handling error: {error_message}"
) from cause
def _mark_non_music_resource_skipped_or_raise(ops_repo: OpsRepository, *, item_id: int) -> None:
_ensure_transition_applied(
ops_repo.mark_item_skipped(
item_id=item_id,
reason_message=NON_MUSIC_RESOURCE_REASON,
reason_code=NON_MUSIC_RESOURCE_CODE,
),
item_id=item_id,
action="mark_item_skipped",
)
def _is_non_music_resource_download_row(row: dict[str, object] | None) -> bool:
row = row or {}
remote_song_id = str(row.get("remote_song_id") or "").strip().lower()
if remote_song_id.startswith("qqtop_"):
return True
metadata_json = row.get("metadata_json")
if not metadata_json:
return False
try:
metadata = json.loads(str(metadata_json))
except Exception:
return False
if not isinstance(metadata, dict):
return False
snapshot = metadata.get("snapshot")
if not isinstance(snapshot, dict):
return False
raw_data = snapshot.get("raw_data")
if not isinstance(raw_data, dict):
return False
search = raw_data.get("search")
if not isinstance(search, dict):
return False
return bool(search.get("qq_toplist_fallback"))
class CollectStageExecutor:
def __init__(
self,
db_path: str | Path,
service: CatalogSyncService | None = None,
ops_repo: OpsRepository | None = None,
):
self.db_path = Path(db_path)
self.ops_repo = ops_repo or OpsRepository(self.db_path)
self.catalog_repo = CatalogRepository(self.db_path)
self.service = service or CatalogSyncService(repository=self.catalog_repo)
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
if not already_claimed:
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
try:
item = self.ops_repo.get_item(item_id)
if item is None:
raise RuntimeError(f"Unknown item: {item_id}")
source = str(item.payload.get("source") or "").strip()
if not source:
raise RuntimeError(f"Collect item {item_id} is missing source")
display_text = f"collect:{source}"
self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
current_display_text=display_text,
last_progress_text="starting playlist collection",
)
counts = self.service.collect_playlists(
sources=[source],
include_playlist_square=bool(item.payload.get("include_playlist_square", True)),
include_toplist=bool(item.payload.get("include_toplist", True)),
progress_callback=lambda event_type, payload: self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
current_display_text=display_text,
last_progress_text=self._format_progress_text(event_type, payload),
),
)
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(item_id=item_id, result_payload={"counts": counts}),
item_id=item_id,
action="mark_item_succeeded",
)
except Exception as exc:
failure_message = _format_error(exc)
_mark_failed_or_raise(
self.ops_repo,
item_id=item_id,
error_message=failure_message,
cause=exc,
)
if isinstance(exc, _TransitionUpdateError):
raise
@staticmethod
def _format_progress_text(event_type: str, payload: dict[str, object]) -> str:
if event_type == "playlist_square_page":
page = int(payload.get("page") or 0)
total = int(payload.get("total") or 0)
new_count = int(payload.get("new_count") or 0)
if payload.get("duplicate_page"):
return f"page {page}: duplicate page detected, stopping at {total}"
return f"page {page}: +{new_count}, total {total}"
if event_type == "toplist_collected":
return f"toplist: {int(payload.get('count') or 0)}"
if event_type == "source_finished":
counts = payload.get("counts") if isinstance(payload.get("counts"), dict) else {}
playlist_square = int(counts.get("playlist_square") or 0)
toplist = int(counts.get("toplist") or 0)
return f"done: square {playlist_square}, toplist {toplist}"
return str(event_type).replace("_", " ")
class DownloadStageExecutor:
def __init__(
self,
db_path: str | Path,
library_root: str | Path,
download_sources: list[str] | None = None,
downloader: CatalogDownloader | None = None,
ops_repo: OpsRepository | None = None,
):
self.db_path = Path(db_path)
self.library_root = Path(library_root)
self.download_sources = list(download_sources or [])
self.ops_repo = ops_repo or OpsRepository(self.db_path)
self.catalog_repo = CatalogRepository(self.db_path)
self.downloader = downloader or CatalogDownloader(repository=self.catalog_repo)
def process_resolve_item(
self,
item_id: int,
worker_name: str,
*,
ready_queue,
already_claimed: bool = False,
) -> None:
if not already_claimed:
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
row = self.ops_repo.build_download_row(item_id=item_id)
song_id = int(row.get("id") or row.get("song_id") or 0)
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
current_song_id=song_id,
current_playlist_id=row.get("playlist_id"),
current_display_text=str(row.get("name") or row.get("id") or song_id),
last_progress_text="already downloaded",
)
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(
item_id=item_id,
result_payload={"already_downloaded": True},
),
item_id=item_id,
action="mark_item_succeeded",
)
return
resolved_payload = self.downloader.resolve_song_row(
row=row,
library_root=self.library_root,
download_sources=self.download_sources,
worker_callback=lambda **state: self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
**state,
),
)
if resolved_payload is None:
if _is_non_music_resource_download_row(row):
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
return
_ensure_transition_applied(
self.ops_repo.mark_item_failed(
item_id=item_id,
error_message="resolve returned no downloadable song",
),
item_id=item_id,
action="mark_item_failed",
)
return
ready_queue.put(
ResolvedStageDownloadTask(
item_id=item_id,
playlist_id=row.get("playlist_id"),
row=row,
resolved_payload=resolved_payload,
)
)
def process_download_task(self, task: ResolvedStageDownloadTask, worker_name: str) -> None:
try:
succeeded = self.downloader.download_resolved_song(
resolved_payload=task.resolved_payload,
worker_callback=lambda **state: self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=task.item_id,
status="running",
**state,
),
)
if succeeded:
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(item_id=task.item_id),
item_id=task.item_id,
action="mark_item_succeeded",
)
return
if _is_non_music_resource_download_row(task.row):
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
return
_ensure_transition_applied(
self.ops_repo.mark_item_failed(
item_id=task.item_id,
error_message="download returned no file",
),
item_id=task.item_id,
action="mark_item_failed",
)
except Exception as exc:
if _is_non_music_resource_download_row(task.row):
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
if isinstance(exc, _TransitionUpdateError):
raise
return
failure_message = _format_error(exc)
_mark_failed_or_raise(
self.ops_repo,
item_id=task.item_id,
error_message=failure_message,
cause=exc,
)
if isinstance(exc, _TransitionUpdateError):
raise
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
if not already_claimed:
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
row: dict[str, object] | None = None
try:
row = self.ops_repo.build_download_row(item_id=item_id)
song_id = int(row.get("id") or row.get("song_id") or 0)
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
current_song_id=song_id,
current_playlist_id=row.get("playlist_id"),
current_display_text=str(row.get("name") or row.get("id") or song_id),
last_progress_text="already downloaded",
)
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(
item_id=item_id,
result_payload={"already_downloaded": True},
),
item_id=item_id,
action="mark_item_succeeded",
)
return
succeeded = self.downloader.download_song_row(
row=row,
library_root=self.library_root,
download_sources=self.download_sources,
worker_callback=lambda **state: self.ops_repo.update_worker_state(
worker_name=worker_name,
current_job_item_id=item_id,
status="running",
**state,
),
)
if succeeded:
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(item_id=item_id),
item_id=item_id,
action="mark_item_succeeded",
)
return
if _is_non_music_resource_download_row(row):
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
return
_ensure_transition_applied(
self.ops_repo.mark_item_failed(
item_id=item_id,
error_message="download returned no file",
),
item_id=item_id,
action="mark_item_failed",
)
except Exception as exc:
if _is_non_music_resource_download_row(row):
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
if isinstance(exc, _TransitionUpdateError):
raise
return
failure_message = _format_error(exc)
_mark_failed_or_raise(
self.ops_repo,
item_id=item_id,
error_message=failure_message,
cause=exc,
)
if isinstance(exc, _TransitionUpdateError):
raise
class SyncStageExecutor:
def __init__(
self,
db_path: str | Path,
service: CatalogSyncService | None = None,
service_factory: Callable[[], CatalogSyncService] | None = None,
ops_repo: OpsRepository | None = None,
):
self.db_path = Path(db_path)
self.ops_repo = ops_repo or OpsRepository(self.db_path)
self.catalog_repo = CatalogRepository(self.db_path)
if service_factory is not None:
self._service_factory = service_factory
elif service is not None:
self._service_factory = lambda: service
else:
self._service_factory = lambda: CatalogSyncService(repository=self.catalog_repo)
self._service_local = threading.local()
def _get_service(self) -> CatalogSyncService:
service = getattr(self._service_local, "service", None)
if service is None:
service = self._service_factory()
self._service_local.service = service
return service
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
if not already_claimed:
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
try:
playlist_row = self.ops_repo.get_playlist_row_for_item(item_id=item_id)
linked_count = int(self._get_service().sync_playlist_row(playlist_row))
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(
item_id=item_id,
result_payload={"linked_count": linked_count},
),
item_id=item_id,
action="mark_item_succeeded",
)
except Exception as exc:
failure_message = _format_error(exc)
_mark_failed_or_raise(
self.ops_repo,
item_id=item_id,
error_message=failure_message,
cause=exc,
)
if isinstance(exc, _TransitionUpdateError):
raise
class UploadStageExecutor:
def __init__(
self,
db_path: str | Path,
backend_name: str,
uploader: CatalogUploader | None = None,
ops_repo: OpsRepository | None = None,
):
self.db_path = Path(db_path)
self.backend_name = str(backend_name)
self.ops_repo = ops_repo or OpsRepository(self.db_path)
self.catalog_repo = CatalogRepository(self.db_path)
self.uploader = uploader or CatalogUploader(repository=self.catalog_repo)
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
if not already_claimed:
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
try:
upload_row = self.ops_repo.get_upload_row_for_item(item_id=item_id)
result = str(
self.uploader.process_upload_task_row(
task_row=upload_row,
backend_name=self.backend_name,
)
)
if result == "succeeded":
_ensure_transition_applied(
self.ops_repo.mark_item_succeeded(item_id=item_id),
item_id=item_id,
action="mark_item_succeeded",
)
else:
_ensure_transition_applied(
self.ops_repo.mark_item_failed(
item_id=item_id,
error_message=f"upload result: {result}",
),
item_id=item_id,
action="mark_item_failed",
)
except Exception as exc:
failure_message = _format_error(exc)
_mark_failed_or_raise(
self.ops_repo,
item_id=item_id,
error_message=failure_message,
cause=exc,
)
if isinstance(exc, _TransitionUpdateError):
raise
@@ -0,0 +1,48 @@
from __future__ import annotations
from typing import Any
DOWNLOAD_LANE = "download"
GENERAL_LANE = "general"
JOB_STAGE_SEQUENCES: dict[str, tuple[str, ...]] = {
"catalog_sync": ("collect", "sync", "download"),
"collect_only": ("collect",),
"sync_only": ("sync",),
"sync_download": ("sync", "download"),
"download_only": ("download",),
"upload_only": ("upload",),
"download_upload": ("download", "upload"),
}
def job_has_stage(job_type: str, stage_type: str) -> bool:
sequence = JOB_STAGE_SEQUENCES.get(str(job_type), ())
return str(stage_type) in sequence
def job_lane_type(job_type: str) -> str:
if job_has_stage(job_type, "download"):
return DOWNLOAD_LANE
return GENERAL_LANE
def primary_stage_type(job_type: str) -> str | None:
for stage_type in ("download", "upload", "sync", "collect"):
if job_has_stage(job_type, stage_type):
return stage_type
return None
def display_name(job_type: str, playlist_scope: dict[str, Any] | None = None) -> str:
playlist_ids = (playlist_scope or {}).get("playlist_ids")
is_scoped = isinstance(playlist_ids, list) and len(playlist_ids) > 0
mapping = {
"catalog_sync": "Full Pipeline",
"collect_only": "Collect",
"sync_only": "Sync Selected Playlists" if is_scoped else "Sync",
"sync_download": "Sync Then Download" if is_scoped else "Sync Then Download All",
"download_only": "Download Selected Playlists" if is_scoped else "Download",
"upload_only": "Upload",
"download_upload": "Download Then Upload",
}
return mapping.get(str(job_type), str(job_type))
@@ -0,0 +1,402 @@
from __future__ import annotations
import re
import sqlite3
from contextlib import contextmanager, suppress
from pathlib import Path, PurePath
from typing import Any
from musicdl.catalogsync.db import connect_database
_COPY_SUFFIX_RE = re.compile(r" \(\d+\)(?=(\.[^.]+)?$)")
class LocalDedupeBlockedError(RuntimeError):
pass
def _coerce_int(value: Any) -> int | None:
try:
return int(value)
except (TypeError, ValueError):
return None
def _row_value(row: sqlite3.Row | dict[str, Any], key: str) -> Any:
if isinstance(row, sqlite3.Row):
try:
return row[key]
except IndexError:
return None
return row.get(key)
def _path_for_location(row: sqlite3.Row | dict[str, Any]) -> Path | None:
absolute_path = str(_row_value(row, "absolute_path") or "").strip()
if absolute_path:
return Path(absolute_path)
base_path = str(_row_value(row, "base_path") or "").strip()
locator = str(_row_value(row, "locator") or "").strip()
if not base_path or not locator:
return None
return Path(base_path) / locator
def _resolved_path(path: Path | None) -> Path | None:
if path is None:
return None
with suppress(OSError, RuntimeError):
return path.resolve(strict=False)
return path
def _paths_match(left: Path | None, right: Path | None) -> bool:
if left is None or right is None:
return False
return _resolved_path(left) == _resolved_path(right)
def _has_copy_suffix(locator: str | None) -> bool:
return bool(_COPY_SUFFIX_RE.search(PurePath(str(locator or "")).name))
def _location_payload(row: sqlite3.Row | dict[str, Any]) -> dict[str, Any]:
path = _path_for_location(row)
file_exists = bool(path and path.exists())
actual_file_size_bytes = None
if file_exists and path is not None:
with suppress(OSError):
actual_file_size_bytes = int(path.stat().st_size)
return {
"id": int(row["location_id"]),
"file_asset_id": int(row["file_asset_id"]),
"song_id": int(row["song_id"]),
"backend_id": int(row["backend_id"]),
"backend_name": str(row["backend_name"] or ""),
"locator": str(row["locator"] or ""),
"absolute_path": str(row["absolute_path"] or ""),
"file_exists": file_exists,
"file_size_bytes": _coerce_int(row["file_size_bytes"]),
"actual_file_size_bytes": actual_file_size_bytes,
"song_name": str(row["song_name"] or ""),
"singers": str(row["singers"] or ""),
"_path": path,
}
def _location_sort_key(location: dict[str, Any]) -> tuple[int, int, int, int]:
return (
0 if location["file_exists"] else 1,
0 if not _has_copy_suffix(location["locator"]) else 1,
len(location["locator"]),
int(location["id"]),
)
def _duplicate_size_bytes(location: dict[str, Any]) -> int:
size_value = location.get("actual_file_size_bytes")
if size_value is None:
size_value = location.get("file_size_bytes")
return max(int(size_value or 0), 0)
class LocalMaintenanceService:
def __init__(self, db_path: str | Path):
self.db_path = Path(db_path)
def _connect(self) -> sqlite3.Connection:
return connect_database(self.db_path)
@contextmanager
def _connection(self):
conn = self._connect()
try:
yield conn
conn.commit()
finally:
conn.close()
def scan_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
with self._connection() as conn:
groups = self._load_duplicate_groups(conn)
scanned_row = conn.execute(
"""
SELECT COUNT(*) AS count_value
FROM file_locations AS fl
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
"""
).fetchone()
return self._build_scan_payload(
groups,
scanned_active_local_location_count=int(scanned_row["count_value"]) if scanned_row else 0,
sample_limit=sample_limit,
)
def dedupe_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
with self._connection() as conn:
self._raise_if_running_work(conn)
groups = self._load_duplicate_groups(conn)
execution = {
"deduped_group_count": 0,
"inactive_location_count": 0,
"deleted_file_count": 0,
"released_bytes": 0,
"repointed_upload_task_count": 0,
"repointed_job_item_count": 0,
}
affected_pairs: set[tuple[int, int]] = set()
for group in groups:
keep = group["keep"]
duplicates = list(group["duplicates"])
if not duplicates:
continue
execution["deduped_group_count"] += 1
conn.execute(
"""
UPDATE file_locations
SET
is_primary = CASE WHEN id = ? THEN 1 ELSE 0 END,
updated_at = CURRENT_TIMESTAMP
WHERE file_asset_id = ? AND backend_id = ?
""",
(
int(keep["id"]),
int(group["file_asset_id"]),
int(group["backend_id"]),
),
)
for duplicate in duplicates:
duplicate_id = int(duplicate["id"])
upload_cursor = conn.execute(
"""
UPDATE upload_tasks
SET
source_location_id = ?,
updated_at = CURRENT_TIMESTAMP
WHERE source_location_id = ?
""",
(int(keep["id"]), duplicate_id),
)
execution["repointed_upload_task_count"] += max(upload_cursor.rowcount, 0)
item_cursor = conn.execute(
"""
UPDATE job_items
SET file_location_id = ?
WHERE file_location_id = ?
""",
(int(keep["id"]), duplicate_id),
)
execution["repointed_job_item_count"] += max(item_cursor.rowcount, 0)
inactive_cursor = conn.execute(
"""
UPDATE file_locations
SET
status = 'inactive',
is_primary = 0,
updated_at = CURRENT_TIMESTAMP
WHERE id = ? AND status = 'active'
""",
(duplicate_id,),
)
execution["inactive_location_count"] += max(inactive_cursor.rowcount, 0)
duplicate_path = duplicate["_path"]
if (
duplicate_path is not None
and duplicate_path.exists()
and not _paths_match(duplicate_path, keep["_path"])
):
duplicate_size_bytes = _duplicate_size_bytes(duplicate)
with suppress(OSError):
duplicate_path.unlink()
execution["deleted_file_count"] += 1
execution["released_bytes"] += duplicate_size_bytes
affected_pairs.add((int(group["song_id"]), int(group["backend_id"])))
for song_id, backend_id in affected_pairs:
self._refresh_song_backend_presence_with_connection(
conn,
song_id=song_id,
backend_id=backend_id,
)
payload = self.scan_local_duplicates(sample_limit=sample_limit)
payload["execution"] = execution
return payload
def _raise_if_running_work(self, conn: sqlite3.Connection) -> None:
running_jobs_row = conn.execute(
"SELECT COUNT(*) AS count_value FROM job_runs WHERE status = 'running'"
).fetchone()
running_items_row = conn.execute(
"SELECT COUNT(*) AS count_value FROM job_items WHERE status = 'running'"
).fetchone()
running_jobs = int(running_jobs_row["count_value"]) if running_jobs_row else 0
running_items = int(running_items_row["count_value"]) if running_items_row else 0
if running_jobs > 0 or running_items > 0:
raise LocalDedupeBlockedError(
f"cannot dedupe while jobs or items are running (jobs={running_jobs}, items={running_items})"
)
def _load_duplicate_groups(self, conn: sqlite3.Connection) -> list[dict[str, Any]]:
rows = conn.execute(
"""
WITH duplicate_pairs AS (
SELECT fl.file_asset_id, fl.backend_id
FROM file_locations AS fl
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
GROUP BY fl.file_asset_id, fl.backend_id
HAVING COUNT(*) > 1
)
SELECT
fl.id AS location_id,
fl.file_asset_id,
fa.song_id,
fl.backend_id,
sb.name AS backend_name,
sb.base_path,
fl.locator,
fl.absolute_path,
COALESCE(fa.file_size_bytes, s.file_size_bytes) AS file_size_bytes,
s.name AS song_name,
s.singers
FROM file_locations AS fl
JOIN duplicate_pairs AS dp
ON dp.file_asset_id = fl.file_asset_id
AND dp.backend_id = fl.backend_id
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
JOIN songs AS s ON s.id = fa.song_id
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
ORDER BY fl.file_asset_id ASC, fl.backend_id ASC, fl.id ASC
"""
).fetchall()
grouped: dict[tuple[int, int], list[dict[str, Any]]] = {}
for row in rows:
location = _location_payload(row)
key = (int(location["file_asset_id"]), int(location["backend_id"]))
grouped.setdefault(key, []).append(location)
groups: list[dict[str, Any]] = []
for (file_asset_id, backend_id), locations in grouped.items():
ordered_locations = sorted(locations, key=_location_sort_key)
keep = ordered_locations[0]
groups.append(
{
"file_asset_id": int(file_asset_id),
"backend_id": int(backend_id),
"backend_name": keep["backend_name"],
"song_id": int(keep["song_id"]),
"song_name": keep["song_name"],
"singers": keep["singers"],
"keep": keep,
"duplicates": ordered_locations[1:],
}
)
groups.sort(
key=lambda group: (
int(group["song_id"]),
int(group["file_asset_id"]),
int(group["backend_id"]),
)
)
return groups
def _build_scan_payload(
self,
groups: list[dict[str, Any]],
*,
scanned_active_local_location_count: int,
sample_limit: int,
) -> dict[str, Any]:
normalized_sample_limit = max(int(sample_limit or 20), 1)
return {
"summary": {
"duplicate_group_count": len(groups),
"duplicate_location_count": sum(len(group["duplicates"]) for group in groups),
"duplicate_file_size_bytes": sum(
_duplicate_size_bytes(location)
for group in groups
for location in group["duplicates"]
),
"scanned_active_local_location_count": int(scanned_active_local_location_count),
},
"groups": [self._serialize_group(group) for group in groups[:normalized_sample_limit]],
}
@staticmethod
def _serialize_group(group: dict[str, Any]) -> dict[str, Any]:
return {
"file_asset_id": int(group["file_asset_id"]),
"backend_id": int(group["backend_id"]),
"backend_name": str(group["backend_name"]),
"song_id": int(group["song_id"]),
"song_name": str(group["song_name"]),
"singers": str(group["singers"]),
"keep": LocalMaintenanceService._serialize_location(group["keep"]),
"duplicates": [
LocalMaintenanceService._serialize_location(location)
for location in group["duplicates"]
],
}
@staticmethod
def _serialize_location(location: dict[str, Any]) -> dict[str, Any]:
return {
"id": int(location["id"]),
"locator": str(location["locator"]),
"absolute_path": str(location["absolute_path"]),
"file_exists": bool(location["file_exists"]),
"file_size_bytes": _coerce_int(location["file_size_bytes"]),
"actual_file_size_bytes": _coerce_int(location["actual_file_size_bytes"]),
}
@staticmethod
def _refresh_song_backend_presence_with_connection(
conn: sqlite3.Connection,
*,
song_id: int,
backend_id: int,
) -> None:
summary = conn.execute(
"""
SELECT
COUNT(*) AS active_file_count,
MIN(fl.id) AS primary_file_location_id
FROM file_locations AS fl
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
WHERE fa.song_id = ?
AND fl.backend_id = ?
AND fl.status = 'active'
""",
(int(song_id), int(backend_id)),
).fetchone()
active_file_count = int(summary["active_file_count"]) if summary else 0
has_active_file = 1 if active_file_count > 0 else 0
primary_file_location_id = summary["primary_file_location_id"] if summary else None
conn.execute(
"""
INSERT INTO song_backend_presence (
song_id,
backend_id,
has_active_file,
active_file_count,
primary_file_location_id
)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(song_id, backend_id) DO UPDATE SET
has_active_file = excluded.has_active_file,
active_file_count = excluded.active_file_count,
primary_file_location_id = excluded.primary_file_location_id,
updated_at = CURRENT_TIMESTAMP
""",
(
int(song_id),
int(backend_id),
has_active_file,
active_file_count,
primary_file_location_id,
),
)
@@ -0,0 +1,93 @@
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from typing import Any
class JobStatus(str, Enum):
QUEUED = "queued"
RUNNING = "running"
PAUSE_REQUESTED = "pause_requested"
PAUSED = "paused"
COMPLETED = "completed"
COMPLETED_WITH_ERRORS = "completed_with_errors"
FAILED = "failed"
CANCELED = "canceled"
class StageStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
PAUSE_REQUESTED = "pause_requested"
PAUSED = "paused"
COMPLETED = "completed"
FAILED = "failed"
SKIPPED = "skipped"
class ItemStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
SUCCEEDED = "succeeded"
FAILED = "failed"
INTERRUPTED = "interrupted"
SKIPPED = "skipped"
CANCELED = "canceled"
@dataclass(frozen=True)
class JobRun:
id: int
job_type: str
status: JobStatus
priority: int
requested_by: str | None
config_snapshot: dict[str, Any]
sources: list[str]
download_sources: list[str]
playlist_scope: dict[str, Any]
created_at: str | None
started_at: str | None
ended_at: str | None
last_error: str | None
resume_token: str | None
@dataclass(frozen=True)
class JobStage:
id: int
job_run_id: int
stage_type: str
seq_no: int
status: StageStatus
total_items: int
pending_items: int
running_items: int
success_items: int
failed_items: int
skipped_items: int
started_at: str | None
ended_at: str | None
last_error: str | None
@dataclass(frozen=True)
class JobItem:
id: int
job_stage_id: int
item_type: str
item_key: str
playlist_pool_id: int | None
playlist_id: int | None
song_id: int | None
file_location_id: int | None
status: ItemStatus
attempt_count: int
max_attempts: int
worker_id: int | None
started_at: str | None
ended_at: str | None
last_error: str | None
last_error_code: str | None
payload: dict[str, Any]
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,896 @@
from __future__ import annotations
import logging
import threading
import time
from collections import Counter
from concurrent.futures import Future, ThreadPoolExecutor
from pathlib import Path
from queue import Queue
from typing import Any
from musicdl.catalogsync.catalog_export import run_catalog_export_command
from musicdl.catalogsync.downloader import DownloadPlanner
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.services import CatalogSyncService
from musicdl.catalogsync.uploader import CatalogUploader
from .jobdefs import DOWNLOAD_LANE, JOB_STAGE_SEQUENCES, job_lane_type
from .executors import (
CollectStageExecutor,
DownloadStageExecutor,
SyncStageExecutor,
UploadStageExecutor,
)
from .models import JobStatus, StageStatus
from .repository import OpsRepository
DEFAULT_DOWNLOAD_WORKERS = 10
DEFAULT_SYNC_WORKERS = 4
logger = logging.getLogger(__name__)
def _unique_preserve_order(values: list[str]) -> list[str]:
normalized: list[str] = []
seen: set[str] = set()
for value in values:
item = str(value).strip()
if not item or item in seen:
continue
normalized.append(item)
seen.add(item)
return normalized
def _split_csv(value: Any) -> list[str]:
if isinstance(value, list):
return [str(item).strip() for item in value if str(item).strip()]
if not value:
return []
return [part.strip() for part in str(value).split(",") if part.strip()]
def _int_value(value: Any, default: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return parsed if parsed > 0 else default
class OpsRunner:
def __init__(
self,
repository: OpsRepository,
sleep_seconds: float = 1.0,
*,
download_lane_concurrency: int = 1,
general_lane_concurrency: int = 3,
):
self.repository = repository
self.sleep_seconds = max(float(sleep_seconds), 0.1)
self.download_lane_concurrency = 1
self.general_lane_concurrency = max(int(general_lane_concurrency), 1)
self._job_pool = ThreadPoolExecutor(
max_workers=self.download_lane_concurrency + self.general_lane_concurrency
)
self._futures: dict[int, Future[None]] = {}
self._futures_lock = threading.Lock()
self._playlist_export_lock = threading.Lock()
self._catalog_export_lock = threading.Lock()
self._exported_stage_playlists: set[tuple[int, int]] = set()
self.db_path = Path(self.repository.db_path)
self.catalog_repo = CatalogRepository(self.db_path)
def recover_incomplete_jobs(self) -> None:
for job in self.repository.list_recoverable_jobs():
self.repository.pause_job_for_recovery(job.id)
for item in self.repository.list_running_items(job.id):
self.repository.mark_item_interrupted(
item.id,
last_error="Recovery interrupted running item after runner restart.",
)
self.repository.add_job_event(
job.id,
"recovery_requeued",
"Recovered incomplete job and re-queued resumable work.",
)
self.repository.resume_job(job.id)
def apply_pending_commands(self) -> None:
for command in self.repository.list_pending_commands():
command_type = str(command["command_type"])
job_id = int(command["job_run_id"])
command_id = int(command["id"])
target_item_id = command["target_item_id"]
if command_type == "pause":
self.repository.request_job_pause(job_id)
elif command_type == "resume":
self.repository.resume_job(job_id)
elif command_type == "cancel":
self.repository.cancel_job(job_id)
elif command_type == "retry_item":
if target_item_id is None:
self.repository.add_job_event(
job_id,
"ignored_command",
"retry_item command missing target_item_id.",
details={"command_type": command_type, "command_id": command_id},
)
elif not self.repository.requeue_item(
int(target_item_id), force=False, job_id=job_id
):
self.repository.add_job_event(
job_id,
"retry_rejected",
"retry_item command rejected.",
item_id=int(target_item_id),
details={"command_type": command_type, "command_id": command_id},
)
elif command_type == "force_retry_item":
if target_item_id is None:
self.repository.add_job_event(
job_id,
"ignored_command",
"force_retry_item command missing target_item_id.",
details={"command_type": command_type, "command_id": command_id},
)
elif not self.repository.requeue_item(
int(target_item_id), force=True, job_id=job_id
):
self.repository.add_job_event(
job_id,
"retry_rejected",
"force_retry_item command rejected.",
item_id=int(target_item_id),
details={"command_type": command_type, "command_id": command_id},
)
else:
self.repository.add_job_event(
job_id,
"ignored_command",
"Unsupported command type.",
details={"command_type": command_type, "command_id": command_id},
)
self.repository.mark_command_applied(command_id)
def reconcile_pause_state(self, job_id: int) -> None:
if self.repository.job_has_running_items(job_id):
return
self.repository.finalize_pause(job_id)
def run_forever(self, stop_event=None) -> None:
self.recover_incomplete_jobs()
while stop_event is None or not stop_event.is_set():
worked = self.loop_once()
if worked:
continue
if stop_event is not None:
stop_event.wait(self.sleep_seconds)
else:
time.sleep(self.sleep_seconds)
def loop_once(self) -> bool:
had_commands = bool(self.repository.list_pending_commands())
self.apply_pending_commands()
finished = self._reap_finished_jobs()
started = self._start_eligible_jobs()
return bool(had_commands or finished or started)
def _reap_finished_jobs(self) -> int:
finished_count = 0
finished_futures: list[tuple[int, Future[None]]] = []
with self._futures_lock:
for job_id, future in list(self._futures.items()):
if not future.done():
continue
del self._futures[job_id]
finished_futures.append((job_id, future))
for job_id, future in finished_futures:
try:
future.result()
except Exception as exc:
self.repository.add_job_event(
job_id,
"job_future_error",
str(exc),
)
job = self.repository.get_job(job_id)
if job is not None and job.status not in {
JobStatus.COMPLETED,
JobStatus.COMPLETED_WITH_ERRORS,
JobStatus.FAILED,
JobStatus.CANCELED,
JobStatus.PAUSED,
}:
self.repository.mark_job_finished(
job_id,
status=JobStatus.FAILED,
last_error=str(exc),
)
finished_count += 1
return finished_count
def _submit_job(self, job_id: int) -> bool:
with self._futures_lock:
if job_id in self._futures:
return False
self._futures[job_id] = self._job_pool.submit(self._run_job, job_id)
return True
def _start_eligible_jobs(self) -> int:
started_count = 0
active_jobs = self.repository.list_active_jobs()
lane_counts = Counter(job_lane_type(job.job_type) for job in active_jobs)
for active_job in active_jobs:
if active_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(active_job.id)
continue
if self._submit_job(active_job.id):
started_count += 1
for queued_job in self.repository.list_queued_jobs():
lane_type = job_lane_type(queued_job.job_type)
lane_limit = (
self.download_lane_concurrency
if lane_type == DOWNLOAD_LANE
else self.general_lane_concurrency
)
if lane_counts[lane_type] >= lane_limit:
continue
claimed = self.repository.claim_job_if_queued(queued_job.id)
if claimed is None:
continue
lane_counts[lane_type] += 1
if self._submit_job(claimed.id):
started_count += 1
return started_count
def _run_job(self, job_id: int) -> None:
try:
current_job = self.repository.get_job(job_id)
if current_job is None:
return
if current_job.status == JobStatus.CANCELED:
self.repository.finalize_canceled_job(job_id)
return
if current_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(job_id)
return
if current_job.status == JobStatus.PAUSED:
return
if not self.repository.mark_job_running(job_id):
current_job = self.repository.get_job(job_id)
if current_job is not None:
if current_job.status == JobStatus.CANCELED:
self.repository.finalize_canceled_job(job_id)
elif current_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(job_id)
return
current_job = self.repository.get_job(job_id)
if current_job is None:
return
self._ensure_job_stages(current_job)
while True:
current_job = self.repository.get_job(job_id)
if current_job is None:
return
if current_job.status == JobStatus.CANCELED:
self.repository.finalize_canceled_job(job_id)
return
if current_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(job_id)
return
stage = self._next_runnable_stage(job_id)
if stage is None:
if self._job_is_finished(job_id):
self._finalize_job(job_id)
return
stages = self.repository.list_job_stages(job_id)
if any(
stage_row.status in {StageStatus.PAUSED, StageStatus.PAUSE_REQUESTED}
for stage_row in stages
):
self.repository.pause_job_for_recovery(job_id)
return
raise RuntimeError("Job has no runnable stages but is not finished.")
self._run_stage(current_job, stage)
refreshed_job = self.repository.get_job(job_id)
if refreshed_job is None:
return
if refreshed_job.status == JobStatus.CANCELED:
self.repository.finalize_canceled_job(job_id)
return
if refreshed_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(job_id)
return
if self._job_is_finished(job_id):
self._finalize_job(job_id)
return
except Exception as exc:
self.repository.add_job_event(
job_id,
"job_execution_error",
str(exc),
)
job = self.repository.get_job(job_id)
if job is not None and job.status not in {
JobStatus.COMPLETED,
JobStatus.COMPLETED_WITH_ERRORS,
JobStatus.FAILED,
JobStatus.CANCELED,
JobStatus.PAUSED,
}:
self.repository.mark_job_finished(
job_id,
status=JobStatus.FAILED,
last_error=str(exc),
)
def _ensure_job_stages(self, job) -> None:
existing = self.repository.list_job_stages(job.id)
if existing:
return
for seq_no, stage_type in enumerate(
JOB_STAGE_SEQUENCES.get(str(job.job_type), []), start=1
):
self.repository.create_stage(job_run_id=job.id, stage_type=stage_type, seq_no=seq_no)
def _next_runnable_stage(self, job_id: int):
for stage in self.repository.list_job_stages(job_id):
if stage.status in {StageStatus.PENDING, StageStatus.RUNNING}:
return stage
return None
def _job_sources(self, job) -> list[str]:
return _unique_preserve_order(
list(job.sources or _split_csv(job.config_snapshot.get("SOURCES")))
)
def _job_download_sources(self, job) -> list[str]:
return _unique_preserve_order(
list(
job.download_sources
or _split_csv(job.config_snapshot.get("download_sources"))
or _split_csv(job.config_snapshot.get("DOWNLOAD_SOURCES"))
)
)
def _job_playlist_ids(self, job) -> list[int] | None:
raw_value = job.playlist_scope.get("playlist_ids")
if not isinstance(raw_value, list):
return None
playlist_ids = []
for item in raw_value:
try:
playlist_ids.append(int(item))
except (TypeError, ValueError):
continue
return playlist_ids or None
def _resolve_library_root(self, job) -> Path:
mapping = dict(job.config_snapshot or {})
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
if library_dir:
return Path(str(library_dir)).resolve()
try:
backend = self.catalog_repo.get_backend(self.catalog_repo.get_default_backend_id())
except Exception:
backend = None
if backend and backend["base_path"]:
return Path(str(backend["base_path"])).resolve()
raise RuntimeError("No library root configured for download stage")
def _resolve_playlists_root(self, job) -> Path | None:
mapping = dict(job.config_snapshot or {})
root_dir = mapping.get("ROOT_DIR") or mapping.get("root_dir")
if root_dir:
path = Path(str(root_dir)).resolve() / "playlists"
path.mkdir(parents=True, exist_ok=True)
return path
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
if library_dir:
path = Path(str(library_dir)).resolve().parent / "playlists"
path.mkdir(parents=True, exist_ok=True)
return path
library_root = self.catalog_repo.get_default_local_library_root()
if library_root is None:
return None
path = library_root.parent / "playlists"
path.mkdir(parents=True, exist_ok=True)
return path
def _mark_playlist_exported(self, stage_id: int, playlist_id: int) -> bool:
key = (int(stage_id), int(playlist_id))
with self._playlist_export_lock:
if key in self._exported_stage_playlists:
return False
self._exported_stage_playlists.add(key)
return True
def _forget_playlist_exported(self, stage_id: int, playlist_id: int) -> None:
key = (int(stage_id), int(playlist_id))
with self._playlist_export_lock:
self._exported_stage_playlists.discard(key)
def _export_playlist_artifacts_for_playlist_if_ready(self, job, stage, playlist_id: int | None) -> bool:
if str(stage.stage_type) != "download" or playlist_id is None:
return False
scoped_playlist_ids = self._job_playlist_ids(job)
normalized_playlist_id = int(playlist_id)
if not scoped_playlist_ids or normalized_playlist_id not in scoped_playlist_ids:
return False
if self.repository.playlist_has_open_items(stage.id, normalized_playlist_id):
return False
if not self._mark_playlist_exported(stage.id, normalized_playlist_id):
return False
playlists_root = self._resolve_playlists_root(job)
if playlists_root is None:
self.repository.add_job_event(
job.id,
"playlist_export_skipped",
"Playlists root is not configured for scoped download export.",
stage_id=stage.id,
details={"playlist_id": normalized_playlist_id},
)
return False
service = CatalogSyncService(
repository=self.catalog_repo,
playlists_root=playlists_root,
)
try:
folder_path = service.ensure_playlist_artifacts_for_playlist(normalized_playlist_id)
except Exception as exc:
self._forget_playlist_exported(stage.id, normalized_playlist_id)
self.repository.add_job_event(
job.id,
"playlist_export_error",
str(exc),
stage_id=stage.id,
details={"playlist_id": normalized_playlist_id},
)
return False
if folder_path is None:
self.repository.add_job_event(
job.id,
"playlist_export_skipped",
"Playlist export row is unavailable.",
stage_id=stage.id,
details={"playlist_id": normalized_playlist_id},
)
return False
self.repository.add_job_event(
job.id,
"playlist_export_ready",
f"Exported playlist artifacts for playlist {normalized_playlist_id}.",
stage_id=stage.id,
details={"playlist_id": normalized_playlist_id, "playlist_dir": str(folder_path)},
)
return True
def _refresh_ready_playlist_artifacts(self, job, stage) -> list[int]:
if str(stage.stage_type) != "download":
return []
playlist_ids = self._job_playlist_ids(job)
if not playlist_ids:
return []
exported_ids: list[int] = []
for playlist_id in playlist_ids:
if self._export_playlist_artifacts_for_playlist_if_ready(job, stage, int(playlist_id)):
exported_ids.append(int(playlist_id))
return exported_ids
def _resolve_backend_name(self, job) -> str:
value = (
job.config_snapshot.get("OBJECT_BACKEND_NAME")
or job.config_snapshot.get("object_backend_name")
or ""
)
return str(value).strip()
def _worker_count(self, job, stage_type: str) -> int:
mapping = dict(job.config_snapshot or {})
if stage_type == "download":
return _int_value(mapping.get("DOWNLOAD_WORKERS"), DEFAULT_DOWNLOAD_WORKERS)
if stage_type == "sync":
return _int_value(mapping.get("SYNC_WORKERS"), DEFAULT_SYNC_WORKERS)
if stage_type == "upload":
return _int_value(mapping.get("UPLOAD_WORKERS"), 4)
return 1
def _download_stage_worker_split(self, total_workers: int) -> tuple[int, int]:
normalized_total = max(int(total_workers or 0), 1)
if normalized_total == 1:
return 1, 0
if normalized_total == 2:
return 1, 1
if normalized_total <= 5:
download_workers = 1
else:
download_workers = 2
resolver_workers = max(1, normalized_total - download_workers)
return resolver_workers, download_workers
def _materialize_stage_items(self, job, stage) -> None:
refreshed_stage = self.repository.get_stage(stage.id)
if refreshed_stage is None or refreshed_stage.total_items > 0:
return
playlist_ids = self._job_playlist_ids(job)
if stage.stage_type == "collect":
for source in self._job_sources(job):
self.repository.create_item(
job_stage_id=stage.id,
item_type="collect_source",
item_key=f"collect:{source}",
payload={
"source": source,
"include_playlist_square": True,
"include_toplist": True,
},
)
return
if stage.stage_type == "sync":
if playlist_ids:
playlist_rows = self.catalog_repo.list_playlists_by_ids(playlist_ids)
else:
playlist_rows = self.catalog_repo.list_playlists(sources=self._job_sources(job))
for row in playlist_rows:
playlist_id = int(row["id"])
self.repository.create_item(
job_stage_id=stage.id,
item_type="playlist_sync",
item_key=f"playlist:{playlist_id}",
playlist_id=playlist_id,
payload={"playlist_row": dict(row)},
)
return
if stage.stage_type == "download":
planner = DownloadPlanner(self.catalog_repo)
for row in planner.build_download_queue(
sources=self._job_sources(job),
playlist_ids=playlist_ids,
):
song_id = int(row.get("song_id") or row["id"])
self.repository.create_item(
job_stage_id=stage.id,
item_type="song_download",
item_key=f"song:{song_id}",
song_id=song_id,
playlist_id=row.get("playlist_id"),
payload={"row": dict(row)},
)
return
if stage.stage_type == "upload":
backend_name = self._resolve_backend_name(job)
if not backend_name:
return
uploader = CatalogUploader(self.catalog_repo)
uploader.enqueue_missing_uploads(
backend_name=backend_name,
sources=self._job_sources(job) or None,
playlist_ids=playlist_ids,
)
backend = self.catalog_repo.get_backend_by_name(backend_name)
if backend is None:
return
rows = self.catalog_repo.list_pending_upload_tasks(target_backend_id=int(backend["id"]))
for row in rows:
upload_task_id = int(row["id"])
self.repository.create_item(
job_stage_id=stage.id,
item_type="file_upload",
item_key=f"upload:{upload_task_id}",
file_location_id=row["source_location_id"],
payload={
"upload_task_id": upload_task_id,
"upload_row": dict(row),
},
)
def _build_executor(self, job, stage):
if stage.stage_type == "collect":
return CollectStageExecutor(self.db_path, ops_repo=self.repository)
if stage.stage_type == "sync":
return SyncStageExecutor(self.db_path, ops_repo=self.repository)
if stage.stage_type == "download":
return DownloadStageExecutor(
self.db_path,
library_root=self._resolve_library_root(job),
download_sources=self._job_download_sources(job),
ops_repo=self.repository,
)
if stage.stage_type == "upload":
backend_name = self._resolve_backend_name(job)
if not backend_name:
raise RuntimeError("No object backend configured for upload stage")
return UploadStageExecutor(
self.db_path,
backend_name=backend_name,
ops_repo=self.repository,
)
raise RuntimeError(f"Unsupported stage type: {stage.stage_type}")
def _export_playlist_artifacts_for_job(self, job, stage) -> None:
exported_ids = self._refresh_ready_playlist_artifacts(job, stage)
playlist_ids = self._job_playlist_ids(job) or []
if str(stage.stage_type) != "download" or not playlist_ids:
return
try:
self.repository.add_job_event(
job.id,
"playlist_exported",
f"Refreshed playlist export folders for {len(exported_ids)} playlists.",
stage_id=stage.id,
details={"playlist_ids": exported_ids, "scoped_playlist_ids": playlist_ids},
)
except Exception:
logger.warning(
"Failed to persist playlist_exported event for job %s stage %s.",
job.id,
stage.id,
exc_info=True,
)
def _run_catalog_export_for_stage(self, job, stage) -> None:
if str(stage.stage_type) != "download":
return
with self._catalog_export_lock:
refreshed_job = self.repository.get_job(job.id) or job
if refreshed_job.status in {
JobStatus.CANCELED,
JobStatus.PAUSE_REQUESTED,
JobStatus.PAUSED,
}:
return
self.repository.add_job_event(
job.id,
"catalog_export_started",
"Started post-download catalog export command.",
stage_id=stage.id,
)
try:
result = run_catalog_export_command(refreshed_job.config_snapshot)
except Exception as exc:
self.repository.add_job_event(
job.id,
"catalog_export_failed",
f"Catalog export command raised an error: {exc}",
stage_id=stage.id,
details={"error": str(exc) or exc.__class__.__name__},
)
return
details: dict[str, Any] = {}
if result.command:
details["command"] = result.command
if result.workdir:
details["workdir"] = result.workdir
if result.returncode is not None:
details["returncode"] = result.returncode
if result.stdout:
details["stdout"] = result.stdout
if result.stderr:
details["stderr"] = result.stderr
normalized_status = str(result.status).strip().lower()
if normalized_status == "succeeded":
event_type = "catalog_export_succeeded"
message = "Catalog export command completed successfully."
elif normalized_status == "skipped":
event_type = "catalog_export_skipped"
message = "Catalog export command was skipped."
else:
event_type = "catalog_export_failed"
message = "Catalog export command failed."
self.repository.add_job_event(
job.id,
event_type,
message,
stage_id=stage.id,
details=details or None,
)
def _run_stage_with_single_pool(self, job, stage, executor, worker_count: int) -> None:
def worker_loop(worker_index: int) -> None:
worker_name = f"{stage.stage_type}-{worker_index + 1}"
while True:
active_job = self.repository.get_job(job.id)
if active_job is None or active_job.status in {
JobStatus.PAUSE_REQUESTED,
JobStatus.CANCELED,
}:
return
item = self.repository.claim_next_stage_item(stage.id, worker_name)
if item is None:
return
try:
executor.process_item(item.id, worker_name, already_claimed=True)
self._export_playlist_artifacts_for_playlist_if_ready(
job,
stage,
item.playlist_id,
)
except Exception as exc:
self.repository.add_job_event(
job.id,
"item_execution_error",
str(exc),
stage_id=stage.id,
item_id=item.id,
)
with ThreadPoolExecutor(max_workers=worker_count) as pool:
futures = [pool.submit(worker_loop, index) for index in range(worker_count)]
for future in futures:
future.result()
def _run_download_stage_pipeline(self, job, stage, executor, worker_count: int) -> None:
resolver_workers, download_workers = self._download_stage_worker_split(worker_count)
if download_workers == 0:
self._run_stage_with_single_pool(job, stage, executor, worker_count)
return
ready_queue: Queue = Queue(maxsize=max(1, download_workers * 2))
stop_event = threading.Event()
sentinel = object()
def resolver_loop(worker_index: int) -> None:
worker_name = f"resolve-{worker_index + 1}"
while not stop_event.is_set():
active_job = self.repository.get_job(job.id)
if active_job is None or active_job.status in {
JobStatus.PAUSE_REQUESTED,
JobStatus.CANCELED,
}:
stop_event.set()
return
item = self.repository.claim_next_stage_item(stage.id, worker_name)
if item is None:
return
try:
executor.process_resolve_item(
item.id,
worker_name,
ready_queue=ready_queue,
already_claimed=True,
)
self._export_playlist_artifacts_for_playlist_if_ready(
job,
stage,
item.playlist_id,
)
except Exception as exc:
self.repository.add_job_event(
job.id,
"item_execution_error",
str(exc),
stage_id=stage.id,
item_id=item.id,
)
def download_loop(worker_index: int) -> None:
worker_name = f"download-{worker_index + 1}"
while True:
task = ready_queue.get()
if task is sentinel:
return
try:
executor.process_download_task(task, worker_name)
self._export_playlist_artifacts_for_playlist_if_ready(
job,
stage,
getattr(task, "playlist_id", None),
)
except Exception as exc:
self.repository.add_job_event(
job.id,
"item_execution_error",
str(exc),
stage_id=stage.id,
item_id=getattr(task, "item_id", None),
)
with ThreadPoolExecutor(max_workers=resolver_workers + download_workers) as pool:
resolver_futures = [pool.submit(resolver_loop, index) for index in range(resolver_workers)]
download_futures = [pool.submit(download_loop, index) for index in range(download_workers)]
for future in resolver_futures:
future.result()
for _ in range(download_workers):
ready_queue.put(sentinel)
for future in download_futures:
future.result()
def _run_stage(self, job, stage) -> None:
if stage.status == StageStatus.PENDING:
self.repository.mark_stage_running(stage.id)
self.repository.add_job_event(
job.id,
"stage_started",
f"Started stage {stage.stage_type}.",
stage_id=stage.id,
)
self._materialize_stage_items(job, stage)
refreshed_stage = self.repository.get_stage(stage.id)
if refreshed_stage is None:
return
if refreshed_stage.total_items == 0:
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
final_stage = self.repository.get_stage(stage.id)
if final_stage is not None:
self._export_playlist_artifacts_for_job(job, final_stage)
self._run_catalog_export_for_stage(job, final_stage)
return
executor = self._build_executor(job, refreshed_stage)
worker_count = self._worker_count(job, refreshed_stage.stage_type)
if refreshed_stage.stage_type == "download":
self._run_download_stage_pipeline(job, refreshed_stage, executor, worker_count)
else:
self._run_stage_with_single_pool(job, refreshed_stage, executor, worker_count)
current_job = self.repository.get_job(job.id)
if current_job is not None:
if current_job.status == JobStatus.CANCELED:
self.repository.finalize_canceled_job(job.id)
return
if current_job.status == JobStatus.PAUSE_REQUESTED:
self.reconcile_pause_state(job.id)
return
current_stage = self.repository.get_stage(stage.id)
if current_stage is None:
return
if self.repository.stage_has_open_items(stage.id):
return
if current_stage.failed_items > 0:
self.repository.mark_stage_finished(
stage.id,
status=StageStatus.FAILED,
last_error="One or more stage items failed.",
)
else:
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
final_stage = self.repository.get_stage(stage.id)
if final_stage is not None:
self._export_playlist_artifacts_for_job(job, final_stage)
self._run_catalog_export_for_stage(job, final_stage)
def _job_is_finished(self, job_id: int) -> bool:
stages = self.repository.list_job_stages(job_id)
if not stages:
return True
return all(
stage.status in {StageStatus.COMPLETED, StageStatus.FAILED, StageStatus.SKIPPED}
for stage in stages
)
def _finalize_job(self, job_id: int) -> None:
stages = self.repository.list_job_stages(job_id)
if not stages:
self.repository.mark_job_finished(job_id, status=JobStatus.COMPLETED)
return
has_errors = any(
stage.status == StageStatus.FAILED or stage.failed_items > 0 for stage in stages
)
self.repository.mark_job_finished(
job_id,
status=JobStatus.COMPLETED_WITH_ERRORS if has_errors else JobStatus.COMPLETED,
last_error="One or more stage items failed." if has_errors else None,
)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,265 @@
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
import requests
from .runtime import sanitize_path_component
LOGGER = logging.getLogger(__name__)
PLAYLIST_META_FILENAME = ".playlist_meta.json"
PLAYLIST_YAML_FILENAME = "playlist.yaml"
PLAYLIST_COVERS_DIRNAME = "covers"
MAX_COVER_BYTES = 10 * 1024 * 1024
_YAML_SAFE_TEXT_RE = re.compile(r"^[A-Za-z0-9_./%+\- :]+$")
_SPECIAL_YAML_TOKENS = (": ", "#", "[", "]", "{", "}", ",", "&", "*", "!", "|", ">", "'", '"', "@", "`")
_COVER_EXT_RE = re.compile(r"\.(jpg|jpeg|png|webp|gif|bmp)$", re.IGNORECASE)
_CONTENT_TYPE_TO_EXT = {
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"image/gif": ".gif",
"image/bmp": ".bmp",
}
def yaml_scalar(value: Any) -> str:
if value in (None, ""):
return "null"
if isinstance(value, bool):
return "true" if value else "false"
if isinstance(value, (int, float)):
return str(int(value)) if isinstance(value, bool) or float(value).is_integer() else str(value)
text = str(value)
if _YAML_SAFE_TEXT_RE.match(text) and not any(token in text for token in _SPECIAL_YAML_TOKENS):
return text
return json.dumps(text, ensure_ascii=False)
def build_playlist_dir_name(playlist_name: str | None, playlist_id: int) -> str:
safe_name = sanitize_path_component(str(playlist_name or ""), f"playlist-{int(playlist_id)}")
return f"{safe_name}_{int(playlist_id)}"
def build_playlist_meta_payload(playlist: dict[str, Any]) -> dict[str, Any]:
return {
"playlist_id": int(playlist.get("id") or 0),
"platform": str(playlist.get("platform") or ""),
"remote_playlist_id": str(playlist.get("remote_playlist_id") or ""),
"name": str(playlist.get("name") or ""),
}
def read_playlist_meta(playlist_dir: Path) -> dict[str, Any] | None:
meta_path = playlist_dir / PLAYLIST_META_FILENAME
if not meta_path.exists():
return None
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
return None
return payload if isinstance(payload, dict) else None
def locate_playlist_dir(playlists_root: Path, playlist: dict[str, Any]) -> Path | None:
if not playlists_root.exists():
return None
playlist_id = int(playlist.get("id") or 0)
if playlist_id <= 0:
return None
preferred = playlists_root / build_playlist_dir_name(str(playlist.get("name") or ""), playlist_id)
if preferred.exists():
return preferred
suffix = f"_{playlist_id}"
candidates: list[Path] = []
for child in playlists_root.iterdir():
if not child.is_dir():
continue
if child.name.endswith(suffix):
candidates.append(child)
continue
meta_payload = read_playlist_meta(child)
if int(meta_payload.get("playlist_id") or 0) == playlist_id if meta_payload else False:
candidates.append(child)
if not candidates:
return None
candidates.sort(key=lambda path: path.stat().st_mtime, reverse=True)
return candidates[0]
def ensure_playlist_dir(playlists_root: Path, playlist: dict[str, Any]) -> Path:
existing = locate_playlist_dir(playlists_root, playlist)
if existing is not None:
return existing
playlist_id = int(playlist.get("id") or 0)
target = playlists_root / build_playlist_dir_name(str(playlist.get("name") or ""), playlist_id)
target.mkdir(parents=True, exist_ok=True)
return target
def _guess_cover_extension(url: str | None, content_type: str | None) -> str:
parsed = urlparse(str(url or ""))
match = _COVER_EXT_RE.search(str(parsed.path or ""))
if match:
return "." + str(match.group(1)).lower()
normalized_type = str(content_type or "").split(";", 1)[0].strip().lower()
return _CONTENT_TYPE_TO_EXT.get(normalized_type, ".jpg")
def download_cover_file(
*,
cover_url: str,
covers_dir: Path,
file_stem: str,
timeout: tuple[int, int] = (10, 20),
) -> str | None:
normalized_url = str(cover_url or "").strip()
if not normalized_url:
return None
try:
response = requests.get(normalized_url, timeout=timeout)
response.raise_for_status()
content = bytes(response.content or b"")
except Exception:
LOGGER.warning("Failed to download cover image: %s", normalized_url, exc_info=True)
return None
if not content:
return None
if len(content) > MAX_COVER_BYTES:
LOGGER.warning(
"Skipped oversized cover image (> %d bytes): %s",
MAX_COVER_BYTES,
normalized_url,
)
return None
extension = _guess_cover_extension(normalized_url, response.headers.get("Content-Type"))
normalized_stem = sanitize_path_component(file_stem, "cover")
filename = f"{normalized_stem}{extension}"
destination_path = covers_dir / filename
destination_path.write_bytes(content)
return f"{PLAYLIST_COVERS_DIRNAME}/{filename}"
def serialize_playlist_yaml(playlist: dict[str, Any], items: list[dict[str, Any]]) -> str:
lines = [
"playlist_id: " + yaml_scalar(playlist.get("id")),
"playlist_name: " + yaml_scalar(playlist.get("name")),
"platform: " + yaml_scalar(playlist.get("platform")),
"play_count: " + yaml_scalar(playlist.get("play_count")),
"playlist_cover_url: " + yaml_scalar(playlist.get("cover_url")),
"playlist_cover_file: " + yaml_scalar(playlist.get("cover_file")),
]
if not items:
lines.append("songs: []")
return "\n".join(lines) + "\n"
lines.append("songs:")
for song in items:
uploaded_locations = song.get("uploaded_locations")
normalized_locations = (
list(uploaded_locations)
if isinstance(uploaded_locations, list)
else []
)
lines.append(" - local_song_id: " + yaml_scalar(song.get("song_id")))
lines.append(" platform_song_id: " + yaml_scalar(song.get("remote_song_id")))
lines.append(" platform: " + yaml_scalar(song.get("platform")))
lines.append(" name: " + yaml_scalar(song.get("name")))
lines.append(" singers: " + yaml_scalar(song.get("singers")))
lines.append(" album: " + yaml_scalar(song.get("album")))
lines.append(" ext: " + yaml_scalar(song.get("ext")))
lines.append(" file_size_bytes: " + yaml_scalar(song.get("file_size_bytes")))
lines.append(" cover_url: " + yaml_scalar(song.get("cover_url")))
lines.append(" cover_file: " + yaml_scalar(song.get("cover_file")))
lines.append(" local_file_path: " + yaml_scalar(song.get("local_file_path")))
if not normalized_locations:
lines.append(" uploaded_locations: []")
continue
lines.append(" uploaded_locations:")
for location in normalized_locations:
payload = dict(location or {})
lines.append(" - backend_name: " + yaml_scalar(payload.get("backend_name")))
lines.append(" backend_type: " + yaml_scalar(payload.get("backend_type")))
lines.append(" uploaded_url: " + yaml_scalar(payload.get("url")))
lines.append(" container_name: " + yaml_scalar(payload.get("container_name")))
lines.append(" locator: " + yaml_scalar(payload.get("locator")))
return "\n".join(lines) + "\n"
def write_playlist_artifacts(
*,
playlist: dict[str, Any],
songs: list[dict[str, Any]],
playlists_root: Path,
) -> Path:
playlists_root.mkdir(parents=True, exist_ok=True)
playlist_dir = ensure_playlist_dir(playlists_root, playlist)
covers_dir = playlist_dir / PLAYLIST_COVERS_DIRNAME
covers_dir.mkdir(parents=True, exist_ok=True)
cover_file_cache: dict[str, str] = {}
playlist_cover_url = str(playlist.get("cover_url") or "").strip()
playlist_cover_file = None
if playlist_cover_url:
playlist_cover_file = download_cover_file(
cover_url=playlist_cover_url,
covers_dir=covers_dir,
file_stem="playlist-cover",
)
if playlist_cover_file:
cover_file_cache[playlist_cover_url] = playlist_cover_file
normalized_songs: list[dict[str, Any]] = []
for index, song in enumerate(songs, start=1):
payload = dict(song)
cover_url = str(payload.get("cover_url") or "").strip()
cover_file = None
if cover_url:
cover_file = cover_file_cache.get(cover_url)
if not cover_file:
remote_song_id = sanitize_path_component(
str(payload.get("remote_song_id") or payload.get("song_id") or index),
str(index),
)
cover_file = download_cover_file(
cover_url=cover_url,
covers_dir=covers_dir,
file_stem=f"song-{index}-{remote_song_id}",
)
if cover_file:
cover_file_cache[cover_url] = cover_file
payload["cover_url"] = cover_url or None
payload["cover_file"] = cover_file
normalized_songs.append(payload)
playlist_payload = {
"id": int(playlist.get("id") or 0),
"name": str(playlist.get("name") or ""),
"platform": str(playlist.get("platform") or ""),
"play_count": playlist.get("play_count"),
"cover_url": playlist_cover_url or None,
"cover_file": playlist_cover_file,
}
(playlist_dir / PLAYLIST_YAML_FILENAME).write_text(
serialize_playlist_yaml(playlist_payload, normalized_songs),
encoding="utf-8",
)
(playlist_dir / PLAYLIST_META_FILENAME).write_text(
json.dumps(build_playlist_meta_payload(playlist), ensure_ascii=False, indent=2),
encoding="utf-8",
)
return playlist_dir
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,378 @@
from __future__ import annotations
import copy
import re
from typing import Any, Callable
from .models import normalize_source_name, parse_size_to_bytes
SOURCE_CLIENT_NAMES = {
"netease": "NeteaseMusicClient",
"qq": "QQMusicClient",
"kuwo": "KuwoMusicClient",
"migu": "MiguMusicClient",
"qianqian": "QianqianMusicClient",
"kugou": "KugouMusicClient",
}
DEFAULT_DOWNLOAD_SOURCES = ["qq", "kuwo", "migu", "qianqian", "kugou", "netease"]
DEFAULT_FALLBACK_RANK_WARMUP_ATTEMPTS = 1000
LOSSLESS_EXTENSIONS = {"flac", "wav", "alac", "ape", "wv", "tta", "dsf", "dff"}
ARTIST_SEPARATOR_RE = re.compile(r"\s*(?:/|,|&|\|)\s*")
def normalize_audio_ext(value: str | None) -> str:
return str(value or "").strip().lower().lstrip(".")
def normalize_keyword(value: str | None) -> str:
return " ".join(str(value or "").strip().lower().split())
def normalize_artist_keyword(value: str | None) -> str:
normalized = normalize_keyword(value)
for token in ("&", "/", "\\", ",", "|", ";"):
normalized = normalized.replace(token, " ")
return " ".join(normalized.split())
def dedupe_preserve_order(values: list[str]) -> list[str]:
seen: set[str] = set()
result: list[str] = []
for value in values:
normalized = normalize_source_name(value)
if normalized in seen:
continue
seen.add(normalized)
result.append(normalized)
return result
def candidate_file_size_bytes(song_info: Any) -> int:
size_bytes = getattr(song_info, "file_size_bytes", None)
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
return int(size_bytes)
return int(parse_size_to_bytes(getattr(song_info, "file_size", None)) or 0)
def search_result_quality_group(song_info: Any) -> int:
ext_candidates = [
normalize_audio_ext(getattr(song_info, "ext", None)),
normalize_audio_ext(getattr(song_info, "codec", None)),
]
download_url_status = getattr(song_info, "download_url_status", None)
if isinstance(download_url_status, dict):
probe_status = download_url_status.get("probe_status") or {}
ext_candidates.append(normalize_audio_ext(probe_status.get("ext")))
for ext in ext_candidates:
if not ext:
continue
if ext in LOSSLESS_EXTENSIONS:
return 0
if ext == "mp3":
return 1
return 2
def song_info_match_priority(candidate_song_info: Any, target_song_info: Any) -> int:
candidate_source = normalize_source_name(getattr(candidate_song_info, "source", None))
target_source = normalize_source_name(getattr(target_song_info, "source", None))
candidate_identifier = str(getattr(candidate_song_info, "identifier", "") or "").strip()
target_identifier = str(getattr(target_song_info, "identifier", "") or "").strip()
candidate_song_name = normalize_keyword(getattr(candidate_song_info, "song_name", None))
target_song_name = normalize_keyword(getattr(target_song_info, "song_name", None))
candidate_singers = normalize_artist_keyword(getattr(candidate_song_info, "singers", None))
target_singers = normalize_artist_keyword(getattr(target_song_info, "singers", None))
if candidate_source == target_source and candidate_identifier and target_identifier and candidate_identifier == target_identifier:
return 0
if candidate_song_name and target_song_name and candidate_song_name == target_song_name and candidate_singers and target_singers and candidate_singers == target_singers:
return 1
if candidate_song_name and target_song_name and candidate_song_name == target_song_name:
return 2
return 99
def match_priority_group(match_priority: int) -> int:
if match_priority >= 99:
return 99
if match_priority <= 1:
return 0
return 1
def is_high_confidence_match(match_priority: int) -> bool:
return match_priority_group(match_priority) == 0
def build_resolve_keyword(song_info: Any, row: dict[str, Any]) -> str:
keyword_parts: list[str] = []
for value in (
getattr(song_info, "song_name", None),
row.get("name"),
getattr(song_info, "singers", None),
row.get("singers"),
):
text = str(value or "").strip()
if text and text.upper() != "NULL" and text not in keyword_parts:
keyword_parts.append(text)
if keyword_parts:
return " ".join(keyword_parts)
return str(getattr(song_info, "identifier", None) or row.get("remote_song_id") or "").strip()
def merge_resolved_song_info(base_song_info: Any, resolved_song_info: Any) -> Any:
if not resolved_song_info or not getattr(resolved_song_info, "with_valid_download_url", False):
return copy.deepcopy(base_song_info)
merged_song_info = copy.deepcopy(resolved_song_info)
merged_song_info.work_dir = getattr(base_song_info, "work_dir", getattr(merged_song_info, "work_dir", None))
if not isinstance(getattr(merged_song_info, "raw_data", None), dict):
merged_song_info.raw_data = {}
base_raw_data = getattr(base_song_info, "raw_data", None)
if isinstance(base_raw_data, dict) and "search" in base_raw_data and "search" not in merged_song_info.raw_data:
merged_song_info.raw_data["search"] = copy.deepcopy(base_raw_data["search"])
merged_song_info.raw_data["deferred_search"] = False
if not getattr(merged_song_info, "source", None):
merged_song_info.source = getattr(base_song_info, "source", None)
if not getattr(merged_song_info, "root_source", None):
merged_song_info.root_source = getattr(base_song_info, "root_source", None)
for attr in ("song_name", "singers", "album", "duration_s", "duration", "cover_url"):
current_value = getattr(merged_song_info, attr, None)
fallback_value = getattr(base_song_info, attr, None)
if current_value in {None, "", "NULL", "-:-:-"} and fallback_value not in {None, "", "NULL"}:
setattr(merged_song_info, attr, fallback_value)
if not getattr(merged_song_info, "ext", None):
merged_song_info.ext = getattr(base_song_info, "ext", None)
if not getattr(merged_song_info, "file_size_bytes", None):
merged_song_info.file_size_bytes = getattr(base_song_info, "file_size_bytes", None)
if not getattr(merged_song_info, "file_size", None):
merged_song_info.file_size = getattr(base_song_info, "file_size", None)
return merged_song_info
class MultiSourceSongResolver:
def __init__(
self,
client_factory: Callable[[str], object],
request_overrides_factory: Callable[[tuple[int, int]], dict] | None = None,
resolver_stats_repo: Any | None = None,
warmup_attempts: int = DEFAULT_FALLBACK_RANK_WARMUP_ATTEMPTS,
):
self.client_factory = client_factory
self.request_overrides_factory = request_overrides_factory or (lambda timeout: {"timeout": timeout})
self.resolver_stats_repo = resolver_stats_repo
self.warmup_attempts = max(0, int(warmup_attempts))
@staticmethod
def _has_valid_download_url(song_info: Any) -> bool:
return bool(getattr(song_info, "with_valid_download_url", False))
def _request_overrides(self, timeout: tuple[int, int]) -> dict:
return dict(self.request_overrides_factory(timeout))
@staticmethod
def _emit_progress(progress_callback: Callable[[str], None] | None, message: str) -> None:
if progress_callback is None:
return
progress_callback(str(message))
def _refresh_song_info(self, client: object, song_info: Any) -> Any:
if self._has_valid_download_url(song_info):
return copy.deepcopy(song_info)
raw_data = getattr(song_info, "raw_data", None)
search_result = raw_data.get("search") if isinstance(raw_data, dict) else None
if not isinstance(search_result, dict):
return copy.deepcopy(song_info)
request_overrides = self._request_overrides((10, 30))
third_party_song = None
if hasattr(client, "_parsewiththirdpartapis"):
try:
third_party_song = client._parsewiththirdpartapis(
search_result=search_result,
request_overrides=request_overrides,
)
except Exception:
third_party_song = None
refreshed_song = None
if hasattr(client, "_parsewithofficialapiv1"):
try:
kwargs = {
"search_result": search_result,
"request_overrides": request_overrides,
}
if third_party_song is not None:
kwargs["song_info_flac"] = third_party_song
refreshed_song = client._parsewithofficialapiv1(**kwargs)
except TypeError:
try:
refreshed_song = client._parsewithofficialapiv1(
search_result=search_result,
request_overrides=request_overrides,
)
except Exception:
refreshed_song = None
except Exception:
refreshed_song = None
for candidate in (refreshed_song, third_party_song):
if not self._has_valid_download_url(candidate):
continue
return merge_resolved_song_info(song_info, candidate)
return copy.deepcopy(song_info)
def _search_source_candidates(self, source: str, keyword: str) -> list[Any]:
if not keyword:
return []
try:
client = self.client_factory(source)
results = client.search(
keyword=keyword,
num_threadings=1,
request_overrides=self._request_overrides((10, 30)),
rule={},
)
except Exception:
return []
return list(results or [])
def _pick_best_candidate(self, candidates: list[Any], target_song_info: Any, source_rank: int) -> Any:
matched_candidates: list[tuple[Any, int, int]] = []
for candidate in candidates:
if not self._has_valid_download_url(candidate):
continue
match_priority = song_info_match_priority(candidate, target_song_info)
if match_priority >= 99:
continue
matched_candidates.append((candidate, match_priority, source_rank))
if not matched_candidates:
return None
matched_candidates.sort(
key=lambda item: (
match_priority_group(item[1]),
search_result_quality_group(item[0]),
-candidate_file_size_bytes(item[0]),
item[2],
item[1],
)
)
return matched_candidates[0][0]
def _build_target_song_info(self, row: dict[str, Any], snapshot_song_info: Any):
if snapshot_song_info is not None:
return copy.deepcopy(snapshot_song_info)
from musicdl.modules.utils.data import SongInfo
return SongInfo(
source=SOURCE_CLIENT_NAMES.get(normalize_source_name(row.get("platform"))),
identifier=str(row.get("remote_song_id") or row.get("id") or ""),
song_name=row.get("name"),
singers=row.get("singers"),
album=row.get("album"),
ext=row.get("ext"),
file_size_bytes=row.get("file_size_bytes"),
raw_data={},
)
def _rank_fallback_sources(self, origin_source: str, fallback_sources: list[str]) -> list[str]:
ordered_sources = dedupe_preserve_order(list(fallback_sources))
if len(ordered_sources) <= 1 or self.resolver_stats_repo is None:
return ordered_sources
try:
ranked_sources = self.resolver_stats_repo.rank_fallback_sources(
origin_source,
ordered_sources,
warmup_attempts=self.warmup_attempts,
)
except Exception:
return ordered_sources
ranked_ordered_sources = dedupe_preserve_order(list(ranked_sources or []))
filtered_ranked_sources = [source for source in ranked_ordered_sources if source in ordered_sources]
for source in ordered_sources:
if source not in filtered_ranked_sources:
filtered_ranked_sources.append(source)
return filtered_ranked_sources
def _record_fallback_result(self, origin_source: str, candidate_source: str, *, succeeded: bool) -> None:
if self.resolver_stats_repo is None:
return
try:
self.resolver_stats_repo.record_fallback_result(
origin_source,
candidate_source,
succeeded=succeeded,
)
except Exception:
return
def resolve_song_info(
self,
row: dict[str, Any],
snapshot_song_info: Any,
download_sources: list[str] | None = None,
progress_callback: Callable[[str], None] | None = None,
) -> Any:
target_song_info = self._build_target_song_info(row=row, snapshot_song_info=snapshot_song_info)
preferred_source = normalize_source_name(getattr(target_song_info, "source", None) or row.get("platform"))
ordered_sources = dedupe_preserve_order(list(download_sources or DEFAULT_DOWNLOAD_SOURCES))
keyword = build_resolve_keyword(target_song_info, row)
candidate_rows: list[tuple[Any, int, int]] = []
fallback_sources = [source for source in ordered_sources if source != preferred_source]
ranked_fallback_sources = self._rank_fallback_sources(preferred_source, fallback_sources)
should_attempt_preferred = preferred_source not in {"", "unknown", None}
total_attempts = len(ranked_fallback_sources) + (1 if should_attempt_preferred else 0)
if should_attempt_preferred:
source_rank = 0
self._emit_progress(
progress_callback,
f"resolving source {preferred_source} ({source_rank + 1}/{total_attempts})",
)
try:
client = self.client_factory(preferred_source)
refreshed_song = self._refresh_song_info(client, target_song_info)
if self._has_valid_download_url(refreshed_song):
merged_refreshed = merge_resolved_song_info(target_song_info, refreshed_song)
refreshed_match_priority = song_info_match_priority(merged_refreshed, target_song_info)
candidate_rows.append((merged_refreshed, refreshed_match_priority, source_rank))
if is_high_confidence_match(refreshed_match_priority):
return merged_refreshed
search_candidates = self._search_source_candidates(preferred_source, keyword)
best_candidate = self._pick_best_candidate(search_candidates, target_song_info, source_rank)
if best_candidate is not None:
merged_candidate = merge_resolved_song_info(target_song_info, best_candidate)
match_priority = song_info_match_priority(merged_candidate, target_song_info)
candidate_rows.append((merged_candidate, match_priority, source_rank))
if is_high_confidence_match(match_priority):
return merged_candidate
except Exception:
pass
fallback_start_rank = 2 if should_attempt_preferred else 1
for source_rank, source in enumerate(ranked_fallback_sources, start=fallback_start_rank):
self._emit_progress(
progress_callback,
f"resolving source {source} ({source_rank}/{total_attempts})",
)
search_candidates = self._search_source_candidates(source, keyword)
best_candidate = self._pick_best_candidate(search_candidates, target_song_info, source_rank - 1)
if best_candidate is None:
self._record_fallback_result(preferred_source, source, succeeded=False)
continue
self._record_fallback_result(preferred_source, source, succeeded=True)
return merge_resolved_song_info(target_song_info, best_candidate)
if not candidate_rows:
return target_song_info
candidate_rows.sort(
key=lambda item: (
match_priority_group(item[1]),
search_result_quality_group(item[0]),
-candidate_file_size_bytes(item[0]),
item[2],
item[1],
)
)
return candidate_rows[0][0]
@@ -0,0 +1,166 @@
from __future__ import annotations
import sqlite3
from contextlib import suppress
from pathlib import Path
from typing import Iterable
SQLITE_BUSY_TIMEOUT_MS = 30000
RESOLVER_STATS_DB_FILENAME = "resolver_stats.db"
SCHEMA_STATEMENTS = [
"""
CREATE TABLE IF NOT EXISTS resolver_source_stats (
origin_source TEXT NOT NULL,
candidate_source TEXT NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
resolve_success_count INTEGER NOT NULL DEFAULT 0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
last_attempt_at TEXT,
last_success_at TEXT,
PRIMARY KEY(origin_source, candidate_source)
)
""",
"""
CREATE INDEX IF NOT EXISTS idx_resolver_source_stats_origin_source
ON resolver_source_stats (origin_source)
""",
]
def default_resolver_stats_db_path(db_path: str | Path) -> Path:
return Path(db_path).parent / RESOLVER_STATS_DB_FILENAME
def connect_resolver_stats_database(db_path: str | Path) -> sqlite3.Connection:
path = Path(db_path)
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path, timeout=SQLITE_BUSY_TIMEOUT_MS / 1000)
conn.row_factory = sqlite3.Row
conn.execute(f"PRAGMA busy_timeout = {SQLITE_BUSY_TIMEOUT_MS}")
with suppress(sqlite3.OperationalError):
conn.execute("PRAGMA journal_mode = WAL")
with suppress(sqlite3.OperationalError):
conn.execute("PRAGMA synchronous = NORMAL")
return conn
def initialize_resolver_stats_database(db_path: str | Path) -> sqlite3.Connection:
conn = connect_resolver_stats_database(db_path)
for statement in SCHEMA_STATEMENTS:
conn.execute(statement)
conn.commit()
return conn
class ResolverStatsRepository:
def __init__(self, db_path: str | Path):
self.db_path = Path(db_path)
conn = initialize_resolver_stats_database(self.db_path)
conn.close()
def record_fallback_result(
self,
origin_source: str,
candidate_source: str,
*,
succeeded: bool,
) -> None:
conn = connect_resolver_stats_database(self.db_path)
try:
conn.execute(
"""
INSERT INTO resolver_source_stats (
origin_source,
candidate_source,
attempt_count,
resolve_success_count,
created_at,
updated_at,
last_attempt_at,
last_success_at
)
VALUES (
?, ?, 1, ?,
CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP,
CASE WHEN ? THEN CURRENT_TIMESTAMP ELSE NULL END
)
ON CONFLICT(origin_source, candidate_source) DO UPDATE SET
attempt_count = attempt_count + 1,
resolve_success_count = (
resolve_success_count + excluded.resolve_success_count
),
updated_at = CURRENT_TIMESTAMP,
last_attempt_at = CURRENT_TIMESTAMP,
last_success_at = CASE
WHEN excluded.resolve_success_count > 0
THEN CURRENT_TIMESTAMP
ELSE last_success_at
END
""",
(
origin_source,
candidate_source,
int(succeeded),
int(succeeded),
),
)
conn.commit()
finally:
conn.close()
def rank_fallback_sources(
self,
origin_source: str,
fallback_sources: Iterable[str],
*,
warmup_attempts: int = 1000,
) -> list[str]:
sources = list(fallback_sources)
if len(sources) <= 1:
return sources
conn = connect_resolver_stats_database(self.db_path)
try:
if warmup_attempts > 0:
row = conn.execute(
"""
SELECT COALESCE(SUM(attempt_count), 0) AS total_attempt_count
FROM resolver_source_stats
WHERE origin_source = ?
""",
(origin_source,),
).fetchone()
total_attempt_count = int(row["total_attempt_count"] if row else 0)
if total_attempt_count < warmup_attempts:
return sources
placeholders = ", ".join("?" for _ in sources)
rows = conn.execute(
f"""
SELECT candidate_source, attempt_count, resolve_success_count
FROM resolver_source_stats
WHERE origin_source = ? AND candidate_source IN ({placeholders})
""",
(origin_source, *sources),
).fetchall()
finally:
conn.close()
stats_by_source = {
str(row["candidate_source"]): (
int(row["attempt_count"]),
int(row["resolve_success_count"]),
)
for row in rows
}
order_index = {source: idx for idx, source in enumerate(sources)}
def _sort_key(source: str) -> tuple[float, int]:
attempts, successes = stats_by_source.get(source, (0, 0))
smoothed_success_rate = (successes + 1) / (attempts + 2)
return (-smoothed_success_rate, order_index[source])
return sorted(sources, key=_sort_key)
@@ -0,0 +1,88 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
INVALID_PATH_CHARS_RE = re.compile(r'[<>:"/\\|?*\x00-\x1f]')
DEFAULT_WEB_PORT = 18080
def sanitize_path_component(value: str, fallback: str) -> str:
cleaned = INVALID_PATH_CHARS_RE.sub("_", (value or "").strip()).rstrip(". ")
return cleaned or fallback
def pick_first_artist_name(singers: str | None) -> str:
for candidate in re.split(r"\s*(?:/|,|&|\|)\s*", singers or ""):
if candidate.strip():
return sanitize_path_component(candidate, "Unknown Artist")
return "Unknown Artist"
def build_download_relative_dir(platform: str, singers: str | None) -> Path:
return Path(sanitize_path_component(platform, "unknown")) / pick_first_artist_name(
singers
)
def parse_web_port(value: str | int | None, fallback: int = DEFAULT_WEB_PORT) -> int:
try:
parsed = int(value) # type: ignore[arg-type]
except (TypeError, ValueError):
return fallback
if 1 <= parsed <= 65535:
return parsed
return fallback
@dataclass
class CatalogSyncRuntimeConfig:
root_dir: Path
app_home: Path
library_dir: Path
db_path: Path
env_file: Path
input_dir: Path
log_dir: Path
python_bin: str
venv_dir: Path
web_host: str
web_port: int
download_layout: str
@classmethod
def from_mapping(cls, mapping: dict[str, str]) -> "CatalogSyncRuntimeConfig":
root_dir = Path(mapping["ROOT_DIR"])
app_home = Path(mapping.get("APP_HOME", root_dir / "catalogsync"))
library_dir = Path(mapping.get("LIBRARY_DIR", root_dir / "library"))
web_port = parse_web_port(mapping.get("WEB_PORT"), fallback=DEFAULT_WEB_PORT)
return cls(
root_dir=root_dir,
app_home=app_home,
library_dir=library_dir,
db_path=Path(mapping.get("DB_PATH", app_home / "data" / "catalogsync.db")),
env_file=Path(mapping.get("ENV_FILE", app_home / "config" / "catalogsync.env")),
input_dir=Path(mapping.get("INPUT_DIR", app_home / "inputs")),
log_dir=Path(mapping.get("LOG_DIR", app_home / "logs")),
python_bin=mapping.get("PYTHON_BIN", "python3"),
venv_dir=Path(mapping.get("VENV_DIR", app_home / "app" / ".venv")),
web_host=mapping.get("WEB_HOST", "127.0.0.1"),
web_port=web_port,
download_layout=mapping.get("DOWNLOAD_LAYOUT", "platform_first_artist"),
)
def ensure_directories(self) -> None:
for path in (
self.root_dir,
self.library_dir,
self.app_home / "app",
self.app_home / "bin",
self.app_home / "config",
self.db_path.parent,
self.env_file.parent,
self.input_dir,
self.log_dir,
):
path.mkdir(parents=True, exist_ok=True)
@@ -0,0 +1,643 @@
from __future__ import annotations
import hashlib
import inspect
import logging
import warnings
from pathlib import Path
from typing import Any, Callable, Dict
import requests
from urllib3.exceptions import InsecureRequestWarning
from .collectors import KuwoCollector, NeteaseCollector, QQCollector, parse_kuwo_toplist_html
from .deferred import (
build_kuwo_playlist_song_infos,
build_kuwo_raw_track_song_infos,
build_netease_playlist_song_infos,
build_qq_playlist_song_infos,
build_qq_raw_track_song_infos,
)
from .models import CatalogSong, extract_artist_names
from .playlist_artifacts import write_playlist_artifacts
from .repository import CatalogRepository
SOURCE_CLIENT_NAMES = {
"netease": "NeteaseMusicClient",
"qq": "QQMusicClient",
"kuwo": "KuwoMusicClient",
}
SOURCE_POOL_NAMES = {
"netease": {"playlist_square": "网易云歌单广场", "toplist": "网易云排行榜"},
"qq": {"playlist_square": "QQ 音乐歌单广场", "toplist": "QQ 音乐排行榜"},
"kuwo": {"playlist_square": "酷我歌单广场", "toplist": "酷我排行榜"},
}
LOGGER = logging.getLogger(__name__)
PLAYLIST_SQUARE_PAGE_SIZES = {
"netease": 35,
"qq": 30,
"kuwo": 30,
}
PlaylistProgressCallback = Callable[[str, Dict[str, Any]], None]
class CatalogSyncService:
def __init__(
self,
repository: CatalogRepository,
collectors: dict[str, object] | None = None,
work_dir: str = "musicdl_outputs/catalogsync",
playlists_root: str | Path | None = None,
):
self.repository = repository
self.collectors = collectors or {
"netease": NeteaseCollector(),
"qq": QQCollector(),
"kuwo": KuwoCollector(),
}
self.work_dir = work_dir
self.playlists_root = Path(playlists_root).resolve() if playlists_root else None
self._clients: dict[str, object] = {}
def get_client(self, platform: str):
if platform not in self._clients:
from musicdl.modules import BuildMusicClient
self._clients[platform] = BuildMusicClient(
{
"type": SOURCE_CLIENT_NAMES[platform],
"disable_print": True,
"maintain_session": False,
"work_dir": self.work_dir,
"search_size_per_source": 1,
"search_size_per_page": 1,
"strict_limit_search_size_per_page": True,
}
)
return self._clients[platform]
def store_playlist_candidates(
self,
platform: str,
pool_kind: str,
pool_name: str,
candidates: list,
pool_external_id: str | None = None,
) -> int:
pool_id = self.repository.upsert_playlist_pool(
platform=platform,
pool_kind=pool_kind,
external_id=pool_external_id or pool_kind,
name=pool_name,
)
for candidate in candidates:
playlist_id = self.repository.upsert_playlist(candidate)
self.repository.link_pool_playlist(pool_id, playlist_id)
return pool_id
def collect_playlists(
self,
sources: list[str],
include_playlist_square: bool = True,
include_toplist: bool = True,
progress_callback: PlaylistProgressCallback | None = None,
) -> dict[str, int]:
counts = {"playlist_square": 0, "toplist": 0}
for source in sources:
collector = self.collectors[source]
self._emit_progress(
progress_callback,
"source_started",
source=source,
include_playlist_square=include_playlist_square,
include_toplist=include_toplist,
)
if include_playlist_square:
try:
counts["playlist_square"] += self._collect_playlist_square(
source,
collector,
progress_callback=progress_callback,
)
except Exception as exc:
LOGGER.warning(
"playlist_square collection failed for source=%s",
source,
exc_info=True,
)
if include_toplist:
toplist_candidates = collector.collect_toplist()
self.store_playlist_candidates(
platform=source,
pool_kind="toplist",
pool_name=SOURCE_POOL_NAMES[source]["toplist"],
candidates=toplist_candidates,
)
counts["toplist"] += len(toplist_candidates)
self._emit_progress(
progress_callback,
"toplist_collected",
source=source,
count=len(toplist_candidates),
)
self._emit_progress(progress_callback, "source_finished", source=source, counts=dict(counts))
return counts
def _collect_playlist_square(
self,
source: str,
collector: object,
*,
progress_callback: PlaylistProgressCallback | None = None,
) -> int:
total = 0
page = 1
seen_remote_ids: set[str] = set()
while True:
candidates, should_continue = self._collect_playlist_square_page(source, collector, page)
unique_candidates = []
for candidate in candidates:
remote_id = str(getattr(candidate, "remote_id", "") or "").strip()
dedupe_key = f"{source}:{remote_id}"
if remote_id and dedupe_key in seen_remote_ids:
continue
if remote_id:
seen_remote_ids.add(dedupe_key)
unique_candidates.append(candidate)
if unique_candidates:
self.store_playlist_candidates(
platform=source,
pool_kind="playlist_square",
pool_name=SOURCE_POOL_NAMES[source]["playlist_square"],
candidates=unique_candidates,
)
total += len(unique_candidates)
self._emit_progress(
progress_callback,
"playlist_square_page",
source=source,
page=page,
page_count=len(candidates),
new_count=len(unique_candidates),
total=total,
duplicate_page=bool(candidates) and not bool(unique_candidates),
)
if (candidates and not unique_candidates) or not should_continue:
break
page += 1
return total
def _collect_playlist_square_page(self, source: str, collector: object, page: int) -> tuple[list, bool]:
method = collector.collect_playlist_square
kwargs = self._build_playlist_square_kwargs(method, source, page)
if kwargs is None:
candidates, has_more = self._normalize_playlist_square_result(method())
if self._is_mock_side_effect_iterator(method):
return candidates, (bool(candidates) and (has_more is not False))
return candidates, bool(has_more)
candidates, has_more = self._normalize_playlist_square_result(method(**kwargs))
if has_more is False:
return candidates, False
return candidates, bool(candidates)
@staticmethod
def _normalize_playlist_square_result(result) -> tuple[list, bool | None]:
if isinstance(result, tuple) and len(result) == 2:
candidates = list(result[0] or [])
has_more = result[1]
return candidates, None if has_more is None else bool(has_more)
if isinstance(result, dict):
raw_candidates = result.get("candidates")
if raw_candidates is None:
raw_candidates = result.get("items", [])
candidates = list(raw_candidates or [])
has_more = result.get("has_more")
return candidates, None if has_more is None else bool(has_more)
return list(result or []), None
@staticmethod
def _is_mock_side_effect_iterator(method) -> bool:
side_effect = getattr(method, "side_effect", None)
if side_effect is None:
return False
if isinstance(side_effect, BaseException):
return False
return not callable(side_effect)
@staticmethod
def _build_playlist_square_kwargs(method, source: str, page: int) -> dict[str, int] | None:
try:
signature = inspect.signature(method)
except (TypeError, ValueError):
return None
parameters = signature.parameters
kwargs: dict[str, int] = {}
page_size = PLAYLIST_SQUARE_PAGE_SIZES.get(source, 30)
if "page" in parameters:
kwargs["page"] = max(page, 1)
if "page_size" in parameters:
kwargs["page_size"] = page_size
if "offset" in parameters and "page" not in parameters:
kwargs["offset"] = max(page - 1, 0) * page_size
return kwargs or None
@staticmethod
def _emit_progress(
callback: PlaylistProgressCallback | None,
event_type: str,
**payload: Any,
) -> None:
if callback is None:
return
callback(event_type, payload)
def import_manual_playlists(self, playlist_file: str | Path, candidates: list) -> list[int]:
playlist_ids: list[int] = []
pool_ids_by_platform: dict[str, int] = {}
for candidate in candidates:
pool_id = pool_ids_by_platform.get(candidate.platform)
if pool_id is None:
pool_id = self.repository.get_or_create_manual_file_pool(playlist_file, candidate.platform)
pool_ids_by_platform[candidate.platform] = pool_id
playlist_id = self.repository.upsert_playlist(candidate)
self.repository.link_pool_playlist(pool_id, playlist_id)
playlist_ids.append(playlist_id)
return playlist_ids
def store_playlist_songs(self, playlist_id: int, source_pool_id: int, song_infos: list[object]) -> int:
pool_row = self.repository.get_playlist_pool(source_pool_id)
if not pool_row:
raise RuntimeError(f"Unknown playlist pool: {source_pool_id}")
artist_pool_id = self.repository.ensure_derived_artist_pool(
platform=pool_row["platform"],
source_pool_id=source_pool_id,
source_pool_name=pool_row["name"],
)
for position, song_info in enumerate(song_infos, start=1):
song = CatalogSong.from_song_info(song_info)
song_id = self.repository.upsert_song(song)
self.repository.link_playlist_song(playlist_id, song_id, position)
for artist_name in extract_artist_names(song.metadata.get("raw_data"), song.singers):
artist_id = self.repository.upsert_artist(song.platform, artist_name)
self.repository.link_pool_artist(artist_pool_id, artist_id)
self.repository.link_artist_song(artist_id, song_id)
return artist_pool_id
def sync_playlist_catalog(self, sources: list[str] | None = None, limit: int | None = None) -> int:
processed = 0
for playlist_row in self.repository.list_playlists(sources=sources, limit=limit):
processed += self.sync_playlist_row(playlist_row)
return processed
def sync_specific_playlists(self, playlist_ids: list[int]) -> int:
processed = 0
for playlist_row in self.repository.list_playlists_by_ids(playlist_ids):
processed += self.sync_playlist_row(playlist_row)
return processed
def sync_playlist_row(self, playlist_row) -> int:
song_infos = self.resolve_playlist_song_infos(playlist_row)
pool_ids = self.repository.get_pool_ids_for_playlist(int(playlist_row["id"]))
for pool_id in pool_ids:
self.store_playlist_songs(int(playlist_row["id"]), pool_id, song_infos)
self._backfill_playlist_play_count(playlist_row)
return len(song_infos)
def _backfill_playlist_play_count(self, playlist_row) -> None:
playlist_id = int(playlist_row["id"])
play_count = self.resolve_playlist_play_count(playlist_row)
if play_count is None:
return
self.repository.update_playlist_play_count(playlist_id, play_count)
def _resolve_playlists_root(self) -> Path | None:
if self.playlists_root is not None:
self.playlists_root.mkdir(parents=True, exist_ok=True)
return self.playlists_root
library_root = self.repository.get_default_local_library_root()
if library_root is None:
return None
playlists_root = library_root.parent / "playlists"
playlists_root.mkdir(parents=True, exist_ok=True)
return playlists_root
def _playlist_export_payload(self, playlist_id: int) -> tuple[dict[str, Any], list[dict[str, Any]]] | None:
playlist_rows = self.repository.list_playlists_by_ids([int(playlist_id)])
if not playlist_rows:
return None
playlist_row = dict(playlist_rows[0])
payload = {
"id": int(playlist_row["id"]),
"platform": str(playlist_row["platform"] or ""),
"remote_playlist_id": str(playlist_row["remote_playlist_id"] or ""),
"name": str(playlist_row["name"] or ""),
"play_count": self._coerce_int(playlist_row["play_count"]),
"cover_url": str(playlist_row.get("cover_url") or "").strip() or None,
}
songs = self.repository.list_playlist_song_details(int(playlist_id), limit=5000)
return payload, songs
def _write_playlist_artifacts(self, playlist_row) -> Path | None:
playlists_root = self._resolve_playlists_root()
if playlists_root is None:
return None
playlist_id = int(playlist_row["id"])
export_payload = self._playlist_export_payload(playlist_id)
if export_payload is None:
return None
playlist_payload, song_items = export_payload
try:
return write_playlist_artifacts(
playlist=playlist_payload,
songs=song_items,
playlists_root=playlists_root,
)
except Exception:
LOGGER.warning(
"Failed to write playlist artifacts: playlist_id=%s",
playlist_id,
exc_info=True,
)
return None
def ensure_playlist_artifacts_for_playlist(self, playlist_id: int) -> Path | None:
playlist_rows = self.repository.list_playlists_by_ids([int(playlist_id)])
if not playlist_rows:
return None
return self._write_playlist_artifacts(playlist_rows[0])
def resolve_playlist_play_count(self, playlist_row) -> int | None:
platform = str(playlist_row["platform"] or "").strip()
parse_strategy = str(playlist_row["parse_strategy"] or "").strip()
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
fallback_value = self._coerce_int(playlist_row["play_count"])
if parse_strategy != "playlist_url" or platform not in {"netease", "qq", "kuwo"} or not remote_id:
return fallback_value
try:
if platform == "netease":
client = self.get_client("netease")
response = client.post(
"https://music.163.com/api/v6/playlist/detail",
data={"id": remote_id},
timeout=(10, 30),
)
response.raise_for_status()
payload = response.json() or {}
playlist_payload = payload.get("playlist") or {}
return self._coerce_int(playlist_payload.get("playCount")) or fallback_value
if platform == "qq":
client = self.get_client("qq")
response = client.get(
"https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg",
headers={"Referer": f"https://y.qq.com/n/ryqq/playlist/{remote_id}"},
params={
"disstid": str(remote_id),
"type": "1",
"json": "1",
"utf8": "1",
"onlysong": "0",
"format": "json",
},
timeout=(10, 30),
)
response.raise_for_status()
payload = response.json() or {}
playlist_payload = ((payload.get("cdlist") or [{}])[0] or {}) if isinstance(payload, dict) else {}
return self._coerce_int(playlist_payload.get("visitnum")) or fallback_value
client = self.get_client("kuwo")
response = client.get(
f"https://m.kuwo.cn/newh5app/wapi/api/www/playlist/playListInfo?pid={remote_id}&pn=1&rn=100",
timeout=(10, 30),
)
response.raise_for_status()
payload = response.json() or {}
data_payload = payload.get("data") or {}
return self._coerce_int(data_payload.get("listencnt")) or fallback_value
except Exception:
LOGGER.warning(
"Failed to resolve playlist play_count during sync: platform=%s remote_id=%s",
platform,
remote_id,
exc_info=True,
)
return fallback_value
@staticmethod
def _coerce_int(value: object) -> int | None:
if value in (None, "") or isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return int(value)
text = str(value).strip().replace(",", "")
if not text:
return None
try:
return int(float(text))
except ValueError:
return None
def resolve_playlist_song_infos(self, playlist_row) -> list[object]:
strategy = playlist_row["parse_strategy"]
if strategy == "playlist_url":
if playlist_row["platform"] == "netease":
return build_netease_playlist_song_infos(self.get_client("netease"), playlist_row["url"])
if playlist_row["platform"] == "qq":
return build_qq_playlist_song_infos(self.get_client("qq"), playlist_row["url"])
if playlist_row["platform"] == "kuwo":
return build_kuwo_playlist_song_infos(self.get_client("kuwo"), playlist_row["url"])
client = self.get_client(playlist_row["platform"])
return client.parseplaylist(playlist_row["url"])
if strategy == "netease_toplist":
return build_netease_playlist_song_infos(self.get_client("netease"), playlist_row["url"])
if strategy == "qq_toplist":
return self._resolve_qq_toplist(playlist_row)
if strategy == "kuwo_toplist":
return self._resolve_kuwo_toplist(playlist_row)
raise ValueError(f"Unsupported parse strategy: {strategy}")
def _resolve_qq_toplist(self, playlist_row) -> list[object]:
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
with warnings.catch_warnings():
warnings.simplefilter("ignore", InsecureRequestWarning)
response = requests.get(
"https://c.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg",
params={
"topid": remote_id,
"tpl": "3",
"page": "detail",
"type": "top",
"format": "json",
},
headers={
"User-Agent": "Mozilla/5.0",
"Referer": "https://y.qq.com/",
"Origin": "https://y.qq.com/",
},
timeout=15,
verify=False,
)
response.raise_for_status()
raw_tracks = []
for item in response.json().get("songlist", []) or []:
if not isinstance(item, dict):
continue
track_data = item.get("data")
if isinstance(track_data, dict) and track_data:
raw_tracks.append(track_data)
if not raw_tracks:
raw_tracks = self._resolve_qq_toplist_fallback_tracks(remote_id)
client = self.get_client("qq")
return build_qq_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_row["name"])
def _resolve_qq_toplist_fallback_tracks(self, remote_id: str) -> list[dict]:
if not remote_id:
return []
request_payload = {
"comm": {"ct": 24, "cv": 0},
"toplist": {
"module": "musicToplist.ToplistInfoServer",
"method": "GetDetail",
"param": {
"topid": int(remote_id) if remote_id.isdigit() else remote_id,
"offset": 0,
"num": 100,
"period": "",
},
},
}
with warnings.catch_warnings():
warnings.simplefilter("ignore", InsecureRequestWarning)
response = requests.post(
"https://u.y.qq.com/cgi-bin/musicu.fcg",
json=request_payload,
headers={
"User-Agent": "Mozilla/5.0",
"Referer": "https://y.qq.com/",
"Origin": "https://y.qq.com/",
"Content-Type": "application/json",
},
timeout=15,
verify=False,
)
response.raise_for_status()
payload_raw = response.json()
payload = payload_raw if isinstance(payload_raw, dict) else {}
toplist_data = payload.get("toplist") or {}
toplist_inner = toplist_data.get("data") or {}
toplist_detail = toplist_inner.get("data") or {}
raw_items = toplist_detail.get("song") or toplist_detail.get("songlist") or []
if not isinstance(raw_items, list):
return []
fallback_tracks: list[dict] = []
for item in raw_items:
if not isinstance(item, dict):
continue
title = str(item.get("title") or item.get("name") or "").strip()
singer_text = str(item.get("singerName") or item.get("singers") or "").strip()
album_mid = str(item.get("albumMid") or item.get("albummid") or "").strip()
if not title:
continue
track_id = str(
item.get("songMid")
or item.get("songmid")
or item.get("mid")
or item.get("songId")
or item.get("songid")
or ""
).strip()
if not track_id or track_id == "0":
hash_input = f"{remote_id}|{title}|{singer_text}|{album_mid}"
track_id = f"qqtop_{remote_id}_{hashlib.md5(hash_input.encode('utf-8')).hexdigest()[:16]}"
singer_items = [{"name": part.strip()} for part in singer_text.split("/") if part.strip()]
fallback_tracks.append(
{
"songmid": track_id,
"title": title,
"singer": singer_items,
"album": {"mid": album_mid, "title": str(item.get("albumName") or "").strip()},
"albummid": album_mid,
"interval": item.get("interval", 0),
"qq_toplist_fallback": True,
"qq_toplist_remote_id": remote_id,
}
)
return fallback_tracks
def _resolve_kuwo_toplist(self, playlist_row) -> list[object]:
playlist_name = str(playlist_row["name"] or "").strip()
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
if not playlist_name or playlist_name == remote_id:
playlist_name = self._resolve_kuwo_toplist_name(remote_id) or remote_id
with warnings.catch_warnings():
warnings.simplefilter("ignore", InsecureRequestWarning)
response = requests.get(
"https://kw-api.cenguigui.cn",
params={"name": playlist_name, "type": "rank", "page": "1", "limit": "100"},
timeout=15,
verify=False,
)
response.raise_for_status()
raw_tracks = []
for item in response.json().get("data", {}).get("musicList", []) or []:
rid = str(item.get("rid", "")).strip()
if not rid:
continue
raw_tracks.append(
{
"musicrid": f"MUSIC_{rid}",
"rid": rid,
"name": item.get("name"),
"artist": item.get("artist"),
"album": item.get("album"),
"albumpic": item.get("pic"),
}
)
client = self.get_client("kuwo")
return build_kuwo_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_name)
def _resolve_kuwo_toplist_name(self, remote_id: str) -> str | None:
if not remote_id:
return None
with warnings.catch_warnings():
warnings.simplefilter("ignore", InsecureRequestWarning)
response = requests.get(
"https://www.kuwo.cn/rankList",
params={"bangId": remote_id},
timeout=15,
verify=False,
)
response.raise_for_status()
for candidate in parse_kuwo_toplist_html(response.text):
if candidate.remote_id == remote_id:
return candidate.name
return None
@staticmethod
def _resolve_raw_tracks(client, raw_tracks: list[dict]) -> list[object]:
song_infos = []
for track in raw_tracks:
song_info_flac = client._parsewiththirdpartapis(track, {})
try:
song_info = client._parsewithofficialapiv1(
track,
song_info_flac=song_info_flac,
lossless_quality_is_sufficient=not bool(client.default_cookies),
request_overrides={},
)
except Exception:
song_info = song_info_flac
if not song_info.with_valid_download_url:
song_info = song_info_flac
if song_info.with_valid_download_url:
song_infos.append(song_info)
return song_infos
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,262 @@
from __future__ import annotations
import argparse
import csv
import json
import re
import sqlite3
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple
LIVE_NAME_KEYWORDS = ("live", "现场", "演唱会")
LIVE_ALBUM_KEYWORDS = (
"演唱会",
"我是歌手",
"我们的歌",
"声生不息",
"时光音乐会",
"天赐的声音",
"披荆斩棘",
"乘风",
)
@dataclass(frozen=True)
class SuspectedLiveSong:
song_id: int
platform: str
remote_song_id: str
name: str
singers: str
album: str
reason_codes: Tuple[str, ...]
def _normalize_text(value: Any) -> str:
return str(value or "").strip()
def _normalize_compact_text(value: Any) -> str:
text = _normalize_text(value).lower()
return re.sub(r"[\s\W_]+", "", text, flags=re.UNICODE)
def detect_suspected_live_reason_codes(name: Any, album: Any) -> List[str]:
name_text = _normalize_text(name)
album_text = _normalize_text(album)
normalized_name = name_text.lower()
normalized_album = album_text.lower()
reason_codes: List[str] = []
if any(keyword in normalized_name for keyword in LIVE_NAME_KEYWORDS):
reason_codes.append("name_keyword")
if not album_text or album_text.upper() == "NULL":
return reason_codes
compact_name = _normalize_compact_text(name_text)
compact_album = _normalize_compact_text(album_text)
if compact_name and compact_album and (
compact_album == compact_name or compact_album.startswith(compact_name)
):
return reason_codes
if any(keyword in normalized_album for keyword in LIVE_ALBUM_KEYWORDS):
reason_codes.append("album_show_keyword")
return reason_codes
def _connect_readonly_database(db_path: str | Path) -> sqlite3.Connection:
path = Path(db_path).resolve()
if not path.exists():
raise FileNotFoundError(f"Database not found: {path}")
conn = sqlite3.connect(f"{path.as_uri()}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
return conn
def _song_scan_query(downloaded_only: bool) -> str:
where_clause = "WHERE d.song_id IS NOT NULL" if downloaded_only else ""
return f"""
WITH downloaded_song_ids AS (
SELECT DISTINCT fa.song_id
FROM file_locations AS fl
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
)
SELECT
s.id,
s.platform,
s.remote_song_id,
s.name,
s.singers,
s.album
FROM songs AS s
LEFT JOIN downloaded_song_ids AS d ON d.song_id = s.id
{where_clause}
ORDER BY s.id DESC
"""
def scan_suspected_live_songs(
db_path: str | Path,
*,
downloaded_only: bool = True,
limit: Optional[int] = None,
) -> List[SuspectedLiveSong]:
normalized_limit = None if limit is None else max(int(limit), 0)
if normalized_limit == 0:
return []
conn = _connect_readonly_database(db_path)
try:
rows = conn.execute(_song_scan_query(downloaded_only)).fetchall()
finally:
conn.close()
matches: List[SuspectedLiveSong] = []
for row in rows:
reason_codes = detect_suspected_live_reason_codes(
name=row["name"],
album=row["album"],
)
if not reason_codes:
continue
matches.append(
SuspectedLiveSong(
song_id=int(row["id"]),
platform=_normalize_text(row["platform"]),
remote_song_id=_normalize_text(row["remote_song_id"]),
name=_normalize_text(row["name"]),
singers=_normalize_text(row["singers"]),
album=_normalize_text(row["album"]),
reason_codes=tuple(reason_codes),
)
)
if normalized_limit is not None and len(matches) >= normalized_limit:
break
return matches
def _song_to_row(song: SuspectedLiveSong) -> Dict[str, Any]:
return {
"song_id": song.song_id,
"platform": song.platform,
"remote_song_id": song.remote_song_id,
"name": song.name,
"singers": song.singers,
"album": song.album,
"reason_codes": ",".join(song.reason_codes),
}
def _write_csv(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
fieldnames = [
"song_id",
"platform",
"remote_song_id",
"name",
"singers",
"album",
"reason_codes",
]
writer = csv.DictWriter(stream, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def _write_jsonl(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
for row in rows:
stream.write(json.dumps(row, ensure_ascii=False) + "\n")
def _write_table(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
headers = [
"song_id",
"platform",
"remote_song_id",
"name",
"singers",
"album",
"reason_codes",
]
stream.write("\t".join(headers) + "\n")
for row in rows:
stream.write("\t".join(str(row[header]) for header in headers) + "\n")
def _write_report(
songs: List[SuspectedLiveSong],
*,
output_format: str,
stream: TextIO,
) -> None:
rows = [_song_to_row(song) for song in songs]
if output_format == "csv":
_write_csv(rows, stream)
return
if output_format == "jsonl":
_write_jsonl(rows, stream)
return
_write_table(rows, stream)
def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="List suspected live/stage versions without modifying catalog-sync data.",
)
parser.add_argument("--db", required=True, help="Path to catalogsync.db")
parser.add_argument(
"--limit",
type=int,
default=None,
help="Maximum number of matched songs to return.",
)
parser.add_argument(
"--include-undownloaded",
action="store_true",
help="Scan all songs instead of only songs with active local files.",
)
parser.add_argument(
"--format",
choices=("table", "csv", "jsonl"),
default="table",
help="Output format for stdout and optional file output.",
)
parser.add_argument(
"--output",
help="Optional path to write the report file.",
)
return parser.parse_args(argv)
def main(argv: Optional[List[str]] = None) -> int:
args = parse_args(argv)
songs = scan_suspected_live_songs(
args.db,
downloaded_only=not args.include_undownloaded,
limit=args.limit,
)
print(f"matched_song_count={len(songs)}", file=sys.stderr)
_write_report(songs, output_format=args.format, stream=sys.stdout)
if args.output:
output_path = Path(args.output).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8", newline="") as handle:
_write_report(songs, output_format=args.format, stream=handle)
print(f"wrote_report={output_path}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())
@@ -0,0 +1,450 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>{{ title or "Catalogsync Ops" }}</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
font-size: 14px;
line-height: 1.35;
margin: 0;
background: #f5f7fb;
color: #1b2533;
}
[hidden] {
display: none !important;
}
nav {
background: #0f172a;
padding: 0.65rem 0.85rem;
}
nav a {
color: #dbeafe;
text-decoration: none;
margin-right: 0.85rem;
font-size: 0.9rem;
}
main {
padding: 0.85rem;
}
table {
border-collapse: collapse;
width: 100%;
background: #fff;
}
th, td {
border: 1px solid #dbe2ea;
padding: 0.32rem 0.42rem;
text-align: left;
vertical-align: top;
font-size: 0.86rem;
}
.playlist-sort-th {
padding: 0;
}
.playlist-sort-link {
display: flex;
align-items: center;
justify-content: space-between;
gap: 0.35rem;
width: 100%;
box-sizing: border-box;
color: inherit;
text-decoration: none;
padding: 0.32rem 0.42rem;
}
.playlist-sort-link:hover {
text-decoration: underline;
background: #f8fafc;
}
.playlist-sort-indicator {
color: #475569;
font-size: 0.75rem;
line-height: 1;
}
h1 {
margin-top: 0;
margin-bottom: 0.7rem;
font-size: 1.35rem;
}
h2, h3 {
margin-top: 0;
margin-bottom: 0.55rem;
}
.card {
background: #fff;
border: 1px solid #dbe2ea;
border-radius: 6px;
padding: 0.7rem;
margin-bottom: 0.8rem;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
gap: 0.8rem;
}
form {
display: grid;
gap: 0.55rem;
}
input, select, button, textarea {
font: inherit;
}
input, select, textarea {
width: 100%;
box-sizing: border-box;
padding: 0.38rem 0.48rem;
border: 1px solid #cbd5e1;
border-radius: 6px;
background: #fff;
}
button {
width: fit-content;
padding: 0.38rem 0.6rem;
border: 0;
border-radius: 6px;
background: #0f172a;
color: #fff;
cursor: pointer;
font-size: 0.85rem;
line-height: 1.2;
}
button.secondary {
background: #475569;
}
.button-grid {
display: flex;
flex-wrap: wrap;
gap: 0.45rem;
}
.muted {
color: #64748b;
}
.progress-cell {
min-width: 180px;
}
.progress-meta {
display: flex;
justify-content: space-between;
gap: 0.5rem;
margin-bottom: 0.2rem;
font-size: 0.78rem;
}
.progress-bar {
width: 100%;
height: 0.5rem;
background: #e2e8f0;
border-radius: 999px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #0f766e, #14b8a6);
}
.progress-note {
margin-top: 0.25rem;
font-size: 0.85rem;
}
.task-playlist-tree {
margin-top: 0.9rem;
}
.task-tree-columns {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
gap: 0.85rem;
align-items: start;
}
.task-tree-panel {
display: grid;
gap: 0.55rem;
min-width: 0;
align-content: start;
align-self: start;
}
.task-tree-panel-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 0.5rem;
flex-wrap: wrap;
}
.task-tree-panel-head h3 {
margin: 0;
font-size: 1rem;
}
.task-tree {
display: grid;
gap: 0.45rem;
}
.task-tree-node {
border: 1px solid #dbe2ea;
border-radius: 6px;
background: #f8fafc;
}
.task-tree-node-playlist,
.task-tree-song {
border-color: #e2e8f0;
background: #fff;
}
.task-tree-row {
display: grid;
grid-template-columns: auto minmax(0, 1fr) minmax(180px, 250px) auto;
gap: 0.5rem;
align-items: center;
padding: 0.5rem 0.6rem;
}
.task-tree-row-child {
padding-left: 1.1rem;
}
.task-tree-main {
min-width: 0;
}
.task-tree-title-line {
display: flex;
flex-wrap: wrap;
gap: 0.35rem;
align-items: center;
}
.task-tree-title-line strong {
font-size: 0.88rem;
line-height: 1.2;
}
.task-tree-meta-inline {
flex: 1 1 180px;
min-width: 0;
font-size: 0.72rem;
line-height: 1.15;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.task-tree-progress {
min-width: 0;
}
.task-tree-state {
max-width: 280px;
font-size: 0.74rem;
line-height: 1.2;
}
.task-tree-actions {
display: flex;
justify-content: flex-end;
}
.task-tree-children {
display: grid;
gap: 0.4rem;
padding: 0 0.6rem 0.55rem 0.6rem;
}
.task-tree-children-songs {
padding-left: 2rem;
}
.task-tree-song {
display: grid;
grid-template-columns: 2rem minmax(0, 1fr) auto minmax(100px, 210px);
gap: 0.45rem;
align-items: center;
padding: 0.45rem 0.55rem;
}
.task-tree-song-index {
color: #64748b;
font-size: 0.75rem;
}
.task-tree-song-note {
color: #334155;
font-size: 0.74rem;
line-height: 1.2;
}
.tree-toggle {
min-width: 1.55rem;
padding: 0.16rem 0.32rem;
font-size: 0.78rem;
line-height: 1.05;
border-radius: 4px;
}
.tree-spacer {
display: block;
width: 1.55rem;
height: 1.45rem;
}
.inline-tree .tree-toggle {
min-width: 2rem;
padding: 0.25rem 0.5rem;
}
.tree-row-detail > td {
background: #f8fafc;
}
.song-progress-table {
margin-top: 0.35rem;
}
.song-note {
color: #334155;
}
.mono {
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
.playlist-name-button {
border: 0;
padding: 0;
margin: 0;
background: transparent;
color: #0f4c81;
text-decoration: underline;
cursor: pointer;
font: inherit;
line-height: inherit;
}
.playlist-name-button:hover {
color: #0b3a62;
}
.playlist-modal {
position: fixed;
inset: 0;
z-index: 2000;
}
.playlist-modal-backdrop {
position: absolute;
inset: 0;
background: rgba(15, 23, 42, 0.45);
}
.playlist-modal-panel {
position: relative;
z-index: 1;
width: min(96vw, 1440px);
max-height: 88vh;
margin: 2.2vh auto;
background: #fff;
border: 1px solid #dbe2ea;
border-radius: 10px;
box-shadow: 0 22px 70px rgba(15, 23, 42, 0.28);
display: grid;
grid-template-rows: auto 1fr;
overflow: hidden;
}
.playlist-modal-header {
display: flex;
align-items: flex-start;
justify-content: space-between;
gap: 0.8rem;
padding: 0.8rem 0.9rem 0.65rem 0.9rem;
border-bottom: 1px solid #e2e8f0;
}
.playlist-modal-header h2 {
margin-bottom: 0.2rem;
}
.playlist-modal-meta {
margin: 0;
font-size: 0.78rem;
}
.playlist-modal-body {
padding: 0.75rem 0.9rem 0.9rem 0.9rem;
overflow: auto;
}
.playlist-modal-table-wrap {
overflow: auto;
}
.playlist-song-locations {
min-width: 160px;
font-size: 0.74rem;
line-height: 1.25;
color: #334155;
word-break: break-all;
}
.playlist-song-locations .muted {
display: block;
font-size: 0.72rem;
}
.playlist-modal-close {
min-width: 2rem;
}
.status-tag {
display: inline-flex;
align-items: center;
padding: 0.08rem 0.34rem;
border-radius: 999px;
border: 1px solid #cbd5e1;
font-size: 0.68rem;
line-height: 1.05;
margin-right: 0;
margin-bottom: 0;
background: #f8fafc;
color: #334155;
}
.status-downloaded {
background: #dcfce7;
border-color: #86efac;
color: #166534;
}
.status-running {
background: #dbeafe;
border-color: #93c5fd;
color: #1d4ed8;
}
.status-pending {
background: #f1f5f9;
border-color: #cbd5e1;
color: #334155;
}
.status-failed {
background: #fee2e2;
border-color: #fca5a5;
color: #991b1b;
}
.status-skipped {
background: #fef3c7;
border-color: #fcd34d;
color: #92400e;
}
.status-tag.non-music {
background: #fff7ed;
border-color: #fdba74;
color: #9a3412;
}
pre {
background: #0f172a;
color: #e2e8f0;
padding: 0.8rem;
overflow: auto;
}
code {
background: #eef2f7;
padding: 0.1rem 0.3rem;
}
@media (max-width: 900px) {
.task-tree-columns {
grid-template-columns: 1fr;
}
.task-tree-row,
.task-tree-song {
grid-template-columns: auto minmax(0, 1fr);
align-items: start;
}
.task-tree-actions {
justify-content: flex-start;
}
.task-tree-children-songs {
padding-left: 1rem;
}
.task-tree-meta-inline {
flex-basis: 100%;
white-space: normal;
}
}
</style>
<script src="/static/ops/app.js?v=20260418_playlist_playcount_v1" defer></script>
</head>
<body{% if sse_url %} data-sse-url="{{ sse_url }}"{% endif %}{% if dashboard_api_url %} data-dashboard-api="{{ dashboard_api_url }}"{% endif %}>
<nav>
<a href="/dashboard">Dashboard</a>
<a href="/jobs">Jobs</a>
<a href="/playlists">Playlists</a>
<a href="/songs">Songs</a>
<a href="/logs">Logs</a>
<a href="/config">Config</a>
</nav>
<main>
{% block content %}{% endblock %}
</main>
</body>
</html>
@@ -0,0 +1,57 @@
{% extends "ops/base.html" %}
{% block content %}
<h1>Config</h1>
<div class="card">
<h2>Current Env</h2>
<pre>{{ env_content }}</pre>
</div>
<div class="card">
<h2>Parsed Values</h2>
<table>
<thead>
<tr>
<th>Key</th>
<th>Value</th>
</tr>
</thead>
<tbody>
{% for key, value in env_values.items() %}
<tr>
<td><code>{{ key }}</code></td>
<td>{{ value }}</td>
</tr>
{% else %}
<tr><td colspan="2">No parsed values.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Revisions</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Created</th>
<th>Applied</th>
<th>Note</th>
</tr>
</thead>
<tbody>
{% for revision in revisions %}
<tr>
<td>{{ revision.id }}</td>
<td>{{ revision.created_at }}</td>
<td>{{ revision.applied_at or "-" }}</td>
<td>{{ revision.note or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="4">No revisions.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}
@@ -0,0 +1,273 @@
{% extends "ops/base.html" %}
{% block content %}
{% set done_statuses = ("completed", "completed_with_errors", "failed", "canceled") %}
{% macro render_task_tree_node(row) -%}
{% set row_status = row.status or "" %}
{% set toggle_command = "resume" if row_status in ("paused", "pause_requested") else "pause" if row_status in ("queued", "running") else "" %}
{% set can_cancel = row_status in ("queued", "running", "paused", "pause_requested") %}
<section class="task-tree-node task-tree-node-task" data-task-node="{{ row.id }}">
<div class="task-tree-row">
<button type="button" class="tree-toggle" data-task-toggle="{{ row.id }}" aria-expanded="false" aria-label="Expand task {{ row.id }}">+</button>
<div class="task-tree-main">
<div class="task-tree-title-line">
<strong data-task-name>{{ row.display_name }}</strong>
<span class="muted task-tree-meta-inline" data-task-meta-inline>#{{ row.id }} / {{ row.job_type }} / {{ row.scope_summary }} / {{ row.queue_label or row.lane_type or "-" }} / workers {{ row.active_worker_count }}</span>
<span class="status-tag status-{{ row.status }}" data-task-status>{{ row.status }}</span>
</div>
</div>
<div class="task-tree-progress" data-task-progress>
<div class="progress-meta">
<span>{{ row.primary_progress_text or "-" }}</span>
<strong>{{ row.primary_progress_percent or 0 }}%</strong>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: {{ row.primary_progress_percent or 0 }}%;"></div>
</div>
</div>
<div class="task-tree-actions">
<div class="button-grid">
{% if toggle_command %}
<button
type="button"
data-task-command-toggle="{{ row.id }}"
data-task-command-type="{{ toggle_command }}"
>
{% if toggle_command == "resume" %}&gt;{% else %}||{% endif %}
</button>
{% else %}
<span class="muted">-</span>
{% endif %}
{% if can_cancel %}
<button type="button" class="secondary" data-task-command-cancel="{{ row.id }}">x</button>
{% endif %}
</div>
</div>
</div>
<div class="task-tree-children" data-task-children="{{ row.id }}" hidden>
<p class="muted">Expand to load playlists...</p>
</div>
</section>
{%- endmacro %}
<h1>Task Center</h1>
<div class="card">
<div data-live-status>Live snapshot: waiting...</div>
</div>
<div class="grid">
<div class="card">
<h2>Summary</h2>
<table>
<tr><th>Total Jobs</th><td data-summary-field="total_jobs">{{ summary.total_jobs }}</td></tr>
<tr><th>Queued</th><td data-summary-field="queued_jobs">{{ summary.queued_jobs }}</td></tr>
<tr><th>Queued Download Jobs</th><td data-summary-field="queued_download_jobs">{{ summary.queued_download_jobs }}</td></tr>
<tr><th>Running</th><td data-summary-field="running_jobs">{{ summary.running_jobs }}</td></tr>
<tr><th>Paused</th><td data-summary-field="paused_jobs">{{ summary.paused_jobs }}</td></tr>
<tr><th>Failed / Errors</th><td data-summary-field="failed_jobs">{{ summary.failed_jobs }}</td></tr>
<tr><th>Downloaded Songs</th><td data-download-field="downloaded_songs">{{ download_stats.downloaded_songs }}</td></tr>
<tr><th>Running Songs</th><td data-download-field="running_song_items">{{ download_stats.running_song_items }}</td></tr>
</table>
</div>
<div class="card">
<h2>Quick Actions</h2>
<div class="button-grid">
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="catalog_sync" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="sources" value="{{ default_sources }}" />
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
<button type="submit">Full Pipeline</button>
</form>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="collect_only" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="sources" value="{{ default_sources }}" />
<button type="submit">Collect</button>
</form>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="sync_only" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="sources" value="{{ default_sources }}" />
<button type="submit">Sync</button>
</form>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="download_only" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
<button type="submit">Download</button>
</form>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="upload_only" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
<button type="submit">Upload</button>
</form>
</div>
</div>
<div class="card">
<h2>Create Job</h2>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<label>
Job Type
<select name="job_type">
{% for value, label in job_type_options %}
<option value="{{ value }}">{{ label }}</option>
{% endfor %}
</select>
</label>
<label>
Requested By
<input type="text" name="requested_by" value="ops-console" />
</label>
<label>
Collect Sources
<input type="text" name="sources" value="{{ default_sources }}" />
</label>
<label>
Download Sources
<input type="text" name="download_sources" value="{{ default_download_sources }}" />
</label>
<button type="submit">Create Job</button>
</form>
</div>
<div class="card">
<h2>Playlist Coverage</h2>
<table>
<thead>
<tr>
<th>Platform</th>
<th>Pool Kind</th>
<th>Pool Name</th>
<th>Playlists</th>
</tr>
</thead>
<tbody data-playlist-sources-body>
{% for row in playlist_sources %}
<tr>
<td>{{ row.platform }}</td>
<td>{{ row.pool_kind }}</td>
<td>{{ row.pool_name }}</td>
<td>{{ row.playlist_count }}</td>
</tr>
{% else %}
<tr><td colspan="4">No playlist sources collected yet.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div
class="card"
data-maintenance-panel="local-duplicates"
data-scan-api="{{ maintenance_local_duplicates_scan_api }}"
data-dedupe-api="{{ maintenance_local_duplicates_dedupe_api }}"
>
<h2>Maintenance</h2>
<div class="button-grid">
<button type="button" data-maintenance-action="scan">Scan Duplicate Local Copies</button>
<button type="button" class="secondary" data-maintenance-action="dedupe">Run Local Dedupe</button>
</div>
<p class="muted" data-maintenance-status>No local duplicate scan has been run yet.</p>
<div data-maintenance-result>
<p class="muted">Scan first to inspect duplicate local file copies before dedupe.</p>
</div>
</div>
</div>
<div class="card">
<div class="task-tree-panel-head">
<h2>Task Center</h2>
<span class="muted" data-task-center-transfer>Down {{ transfer_stats.download_speed_text }} | Up {{ transfer_stats.upload_speed_text }}</span>
</div>
<div class="task-tree-columns">
<section class="task-tree-panel">
<div class="task-tree-panel-head">
<h3>Doing</h3>
<span class="muted">Task -> Playlist -> Song</span>
</div>
<div class="task-tree" data-task-tree-root="doing">
{% for row in doing_task_rows %}
{{ render_task_tree_node(row) }}
{% else %}
<p class="muted" data-task-tree-empty>No active tasks.</p>
{% endfor %}
</div>
</section>
<section class="task-tree-panel">
<div class="task-tree-panel-head">
<h3>Recent Done</h3>
<span class="muted">Task -> Playlist</span>
</div>
<div class="task-tree" data-task-tree-root="done">
{% for row in done_task_rows %}
{{ render_task_tree_node(row) }}
{% else %}
<p class="muted" data-task-tree-empty>No recently finished tasks.</p>
{% endfor %}
</div>
</section>
</div>
</div>
<div class="grid">
<div class="card">
<h2>Active Workers</h2>
<table>
<thead>
<tr>
<th>Worker</th>
<th>Status</th>
<th>Stage</th>
<th>Current Item</th>
<th>Progress</th>
</tr>
</thead>
<tbody data-workers-body>
{% for worker in workers %}
<tr>
<td>{{ worker.worker_name }}</td>
<td>{{ worker.status }}</td>
<td>{{ worker.stage_type or "-" }}</td>
<td>{{ worker.display_text or "-" }}</td>
<td>{{ worker.last_progress_text or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="5">No active workers.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Running Items</h2>
<table>
<thead>
<tr>
<th>Job</th>
<th>Worker</th>
<th>Stage</th>
<th>Item</th>
<th>Started</th>
</tr>
</thead>
<tbody data-running-items-body>
{% for item in running_items %}
<tr>
<td><a href="/jobs/{{ item.job_run_id }}">{{ item.job_run_id }}</a></td>
<td>{{ item.worker_name or "-" }}</td>
<td>{{ item.stage_type }}</td>
<td>{{ item.display_name }}</td>
<td>{{ item.started_at or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="5">No running items.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endblock %}
@@ -0,0 +1,223 @@
{% extends "ops/base.html" %}
{% block content %}
<p><a href="/dashboard">Back to Dashboard</a></p>
<h1>Job {{ job.id }}</h1>
<div class="grid">
<div class="card">
<table>
<tr><th>Type</th><td>{{ job.job_type }}</td></tr>
<tr><th>Status</th><td>{{ job.status }}</td></tr>
<tr><th>Requested By</th><td>{{ job.requested_by or "-" }}</td></tr>
<tr><th>Created</th><td>{{ job.created_at or "-" }}</td></tr>
<tr><th>Started</th><td>{{ job.started_at or "-" }}</td></tr>
<tr><th>Ended</th><td>{{ job.ended_at or "-" }}</td></tr>
</table>
</div>
<div class="card">
<h2>Job Commands</h2>
<div class="button-grid">
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
<input type="hidden" name="command_type" value="pause" />
<button type="submit">暂停任务</button>
</form>
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
<input type="hidden" name="command_type" value="resume" />
<button type="submit">继续任务</button>
</form>
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
<input type="hidden" name="command_type" value="cancel" />
<button type="submit" class="secondary">取消任务</button>
</form>
</div>
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
<input type="hidden" name="command_type" value="retry_item" />
<label>
Retry Item Id
<input type="number" name="target_item_id" min="1" />
</label>
<button type="submit">Retry Item</button>
</form>
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
<input type="hidden" name="command_type" value="force_retry_item" />
<label>
Force Retry Item Id
<input type="number" name="target_item_id" min="1" />
</label>
<button type="submit">Force Retry Item</button>
<p class="muted">Use this when a single item needs to be replayed from scratch.</p>
</form>
</div>
<div class="card">
<h2>Download Stats</h2>
<table>
<tr><th>Total Songs</th><td>{{ download_stats.total_songs }}</td></tr>
<tr><th>Downloaded Songs</th><td>{{ download_stats.downloaded_songs }}</td></tr>
<tr><th>Local Files</th><td>{{ download_stats.local_file_locations }}</td></tr>
<tr><th>Running Songs</th><td>{{ download_stats.running_song_items }}</td></tr>
</table>
</div>
</div>
<div class="card">
<h2>Stages</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Stage</th>
<th>Status</th>
<th>Total</th>
<th>Pending</th>
<th>Running</th>
<th>Succeeded</th>
<th>Failed</th>
</tr>
</thead>
<tbody>
{% for stage in stages %}
<tr>
<td>{{ stage.id }}</td>
<td>{{ stage.stage_type }}</td>
<td>{{ stage.status }}</td>
<td>{{ stage.total_items }}</td>
<td>{{ stage.pending_items }}</td>
<td>{{ stage.running_items }}</td>
<td>{{ stage.success_items }}</td>
<td>{{ stage.failed_items }}</td>
</tr>
{% else %}
<tr><td colspan="8">No stages.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Playlist Progress</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Playlist</th>
<th>Progress</th>
<th>Total Songs</th>
<th>Downloaded</th>
<th>Running</th>
<th>Pending</th>
<th>Failed</th>
<th>Skipped</th>
</tr>
</thead>
<tbody>
{% for playlist in playlist_progress %}
<tr>
<td>{{ playlist.playlist_id }}</td>
<td>{{ playlist.playlist_name }}</td>
<td class="progress-cell">
<div class="progress-meta">
<span>{{ playlist.downloaded_songs or 0 }} / {{ playlist.total_songs or 0 }}</span>
<strong>{{ playlist.progress_percent or 0 }}%</strong>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: {{ playlist.progress_percent or 0 }}%;"></div>
</div>
</td>
<td>{{ playlist.total_songs or 0 }}</td>
<td>{{ playlist.downloaded_songs or 0 }}</td>
<td>{{ playlist.running_songs or 0 }}</td>
<td>{{ playlist.pending_songs or 0 }}</td>
<td>{{ playlist.failed_songs or 0 }}</td>
<td>{{ playlist.skipped_songs or 0 }}</td>
</tr>
{% else %}
<tr><td colspan="9">No playlist-scoped progress for this job.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Workers</h2>
<table>
<thead>
<tr>
<th>Worker</th>
<th>Status</th>
<th>Stage</th>
<th>Current Song / Playlist</th>
<th>Progress</th>
</tr>
</thead>
<tbody>
{% for worker in workers %}
<tr>
<td>{{ worker.worker_name }}</td>
<td>{{ worker.status }}</td>
<td>{{ worker.stage_type or "-" }}</td>
<td>{{ worker.display_text or "-" }}</td>
<td>{{ worker.last_progress_text or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="5">No workers recorded yet.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Running Items</h2>
<table>
<thead>
<tr>
<th>Worker</th>
<th>Stage</th>
<th>Item</th>
<th>Started</th>
</tr>
</thead>
<tbody>
{% for item in running_items %}
<tr>
<td>{{ item.worker_name or "-" }}</td>
<td>{{ item.stage_type }}</td>
<td>{{ item.display_name }}</td>
<td>{{ item.started_at or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="4">No running items.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Commands</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Type</th>
<th>Status</th>
<th>Created</th>
<th>Applied</th>
</tr>
</thead>
<tbody>
{% for command in commands %}
<tr>
<td>{{ command.id }}</td>
<td>{{ command.command_type }}</td>
<td>{{ command.status }}</td>
<td>{{ command.created_at }}</td>
<td>{{ command.applied_at or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="5">No commands.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}
@@ -0,0 +1,30 @@
{% extends "ops/base.html" %}
{% block content %}
<h1>Jobs Archive</h1>
<p class="muted">Use <a href="/dashboard">Dashboard</a> for the main task center. This page stays available for fallback browsing.</p>
<table>
<thead>
<tr>
<th>ID</th>
<th>Type</th>
<th>Status</th>
<th>Requested By</th>
<th>Created</th>
</tr>
</thead>
<tbody>
{% for job in jobs %}
<tr>
<td><a href="/jobs/{{ job.id }}">{{ job.id }}</a></td>
<td>{{ job.job_type }}</td>
<td>{{ job.status }}</td>
<td>{{ job.requested_by or "-" }}</td>
<td>{{ job.created_at or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="5">No jobs found.</td></tr>
{% endfor %}
</tbody>
</table>
{% endblock %}
@@ -0,0 +1,29 @@
{% extends "ops/base.html" %}
{% block content %}
<h1>Events</h1>
<table>
<thead>
<tr>
<th>ID</th>
<th>Job ID</th>
<th>Event Type</th>
<th>Message</th>
<th>Created</th>
</tr>
</thead>
<tbody>
{% for event in events %}
<tr>
<td>{{ event.id }}</td>
<td>{{ event.job_run_id }}</td>
<td>{{ event.event_type }}</td>
<td>{{ event.message or "-" }}</td>
<td>{{ event.created_at }}</td>
</tr>
{% else %}
<tr><td colspan="5">No events.</td></tr>
{% endfor %}
</tbody>
</table>
{% endblock %}
@@ -0,0 +1,276 @@
{% extends "ops/base.html" %}
{% block content %}
<section data-playlists-page>
<h1>Playlists</h1>
<div class="card">
<h2>Playlist Coverage</h2>
<table>
<thead>
<tr>
<th>Platform</th>
<th>Pool Kind</th>
<th>Pool Name</th>
<th>Playlists</th>
</tr>
</thead>
<tbody>
{% for row in playlist_sources %}
<tr>
<td>{{ row.platform }}</td>
<td>{{ row.pool_kind }}</td>
<td>{{ row.pool_name }}</td>
<td>{{ row.playlist_count }}</td>
</tr>
{% else %}
<tr><td colspan="4">No playlist sources collected yet.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Filters</h2>
<form method="get" action="/playlists">
<input type="hidden" name="sort_by" value="{{ filters.sort_by }}" />
<input type="hidden" name="sort_dir" value="{{ filters.sort_dir }}" />
<div class="grid">
<label>
Keyword
<input type="text" name="keyword" value="{{ filters.keyword }}" placeholder="Name / remote id" />
</label>
<label>
Platform
<select name="platform">
<option value="">All</option>
{% for option in filter_options.platforms %}
<option value="{{ option }}" {% if filters.platform == option %}selected{% endif %}>{{ option }}</option>
{% endfor %}
</select>
</label>
<label>
Pool Kind
<select name="pool_kind">
<option value="">All</option>
{% for option in filter_options.pool_kinds %}
<option value="{{ option }}" {% if filters.pool_kind == option %}selected{% endif %}>{{ option }}</option>
{% endfor %}
</select>
</label>
<label>
Status
<select name="status">
<option value="" {% if not filters.status %}selected{% endif %}>All</option>
<option value="unsynced" {% if filters.status == "unsynced" %}selected{% endif %}>Unsynced</option>
<option value="not_downloaded" {% if filters.status == "not_downloaded" %}selected{% endif %}>Not Downloaded</option>
<option value="downloading" {% if filters.status == "downloading" %}selected{% endif %}>Downloading</option>
<option value="partial" {% if filters.status == "partial" %}selected{% endif %}>Partial</option>
<option value="downloaded" {% if filters.status == "downloaded" %}selected{% endif %}>Downloaded</option>
</select>
</label>
<label>
Wanted
<select name="wanted_only">
<option value="" {% if not filters.wanted_only %}selected{% endif %}>All</option>
<option value="1" {% if filters.wanted_only %}selected{% endif %}>Wanted only</option>
</select>
</label>
<label>
Page Size
<select name="page_size">
<option value="20" {% if filters.page_size == 20 %}selected{% endif %}>20</option>
<option value="50" {% if filters.page_size == 50 %}selected{% endif %}>50</option>
<option value="100" {% if filters.page_size == 100 %}selected{% endif %}>100</option>
</select>
</label>
</div>
<button type="submit">Apply Filters</button>
</form>
</div>
<div class="card">
<div class="button-grid">
<button type="button" data-playlist-select-all>Select All On Page</button>
<button type="button" class="secondary" data-playlist-clear-selection>Clear Selection</button>
<span>Selected: <strong data-playlist-selection-count>0</strong></span>
<form action="/api/jobs" method="post" data-json-form data-success="reload">
<input type="hidden" name="job_type" value="collect_only" />
<input type="hidden" name="requested_by" value="ops-console" />
<input type="hidden" name="sources" value="{{ default_sources }}" />
<button type="submit" class="secondary">Collect Playlist Sources</button>
</form>
</div>
<div class="button-grid" style="margin-top: 0.8rem;">
<button type="button" data-playlist-action="sync">Sync Selected Playlists</button>
<button type="button" data-playlist-action="download">Download Selected Playlists</button>
<button type="button" class="secondary" data-playlist-action="export-selected">Export Selected</button>
<button type="button" class="secondary" data-playlist-action="mark-wanted">Mark Wanted</button>
<button type="button" class="secondary" data-playlist-action="unmark-wanted">Unmark Wanted</button>
</div>
</div>
<table>
<thead>
<tr>
<th>Select</th>
<th class="playlist-sort-th">
<a class="playlist-sort-link" data-playlist-sort-link="id" href="{{ sort_links.id.href }}">
<span>ID</span>
{% if sort_links.id.indicator %}
<span class="playlist-sort-indicator" data-playlist-sort-indicator="id">{{ sort_links.id.indicator }}</span>
{% endif %}
</a>
</th>
<th class="playlist-sort-th">
<a class="playlist-sort-link" data-playlist-sort-link="platform" href="{{ sort_links.platform.href }}">
<span>Platform</span>
{% if sort_links.platform.indicator %}
<span class="playlist-sort-indicator" data-playlist-sort-indicator="platform">{{ sort_links.platform.indicator }}</span>
{% endif %}
</a>
</th>
<th>Remote ID</th>
<th class="playlist-sort-th">
<a class="playlist-sort-link" data-playlist-sort-link="name" href="{{ sort_links.name.href }}">
<span>Name</span>
{% if sort_links.name.indicator %}
<span class="playlist-sort-indicator" data-playlist-sort-indicator="name">{{ sort_links.name.indicator }}</span>
{% endif %}
</a>
</th>
<th class="playlist-sort-th">
<a class="playlist-sort-link" data-playlist-sort-link="play_count" href="{{ sort_links.play_count.href }}">
<span>&#28909;&#24230;/&#25773;&#25918;&#37327;</span>
{% if sort_links.play_count.indicator %}
<span class="playlist-sort-indicator" data-playlist-sort-indicator="play_count">{{ sort_links.play_count.indicator }}</span>
{% endif %}
</a>
</th>
<th>Pools</th>
<th>Songs</th>
<th>Downloaded</th>
<th>Progress</th>
<th>Status</th>
<th>Wanted</th>
<th>Updated</th>
</tr>
</thead>
<tbody>
{% for playlist in playlists %}
<tr>
<td>
<input type="checkbox" data-playlist-checkbox value="{{ playlist.id }}" />
</td>
<td>{{ playlist.id }}</td>
<td>{{ playlist.platform }}</td>
<td>{{ playlist.remote_playlist_id }}</td>
<td>
{% if (playlist.song_count or 0) > 0 %}
<button
type="button"
class="playlist-name-button"
data-playlist-open-songs="{{ playlist.id }}"
data-playlist-name="{{ playlist.name }}"
data-playlist-platform="{{ playlist.platform }}"
data-playlist-remote-id="{{ playlist.remote_playlist_id }}"
>{{ playlist.name }}</button>
{% else %}
{{ playlist.name }}
{% endif %}
</td>
<td>{{ playlist.play_count if playlist.play_count is not none else "-" }}</td>
<td>{{ playlist.pool_names or "-" }}</td>
<td>
<div>{{ playlist.display_song_count or 0 }}</div>
{% if playlist.is_song_count_estimated %}
<div class="muted">Collected {{ playlist.collected_song_count }}</div>
{% endif %}
</td>
<td>{{ playlist.downloaded_song_count or 0 }}</td>
<td class="progress-cell">
<div class="progress-meta">
<span>{{ playlist.downloaded_song_count or 0 }} / {{ playlist.song_count or 0 }}</span>
<strong>{{ playlist.progress_percent or 0 }}%</strong>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: {{ playlist.progress_percent or 0 }}%;"></div>
</div>
{% if (playlist.song_count or 0) == 0 or playlist.running_download_song_count %}
<div class="progress-note muted">
{% if (playlist.song_count or 0) == 0 and playlist.collected_song_count is not none %}
Collected {{ playlist.collected_song_count }}, sync recommended
{% elif (playlist.song_count or 0) == 0 %}
0 songs, sync recommended
{% elif playlist.running_download_song_count %}
Running {{ playlist.running_download_song_count }}
{% endif %}
</div>
{% endif %}
</td>
<td>{{ playlist.state_label or playlist.state_code or "-" }}</td>
<td>{% if playlist.is_wanted %}Yes{% else %}No{% endif %}</td>
<td>{{ playlist.updated_at }}</td>
</tr>
{% else %}
<tr><td colspan="13">No playlists.</td></tr>
{% endfor %}
</tbody>
</table>
<div class="card" data-playlist-pagination>
<p>
Page {{ playlist_page.page }} / {{ playlist_page.total_pages if playlist_page.total_pages > 0 else 1 }}
- Total {{ playlist_page.total_count }} playlists
</p>
<div class="button-grid">
{% if previous_page_url %}
<a href="{{ previous_page_url }}">Previous</a>
{% else %}
<span class="muted">Previous</span>
{% endif %}
{% if next_page_url %}
<a href="{{ next_page_url }}">Next</a>
{% else %}
<span class="muted">Next</span>
{% endif %}
</div>
</div>
<div class="playlist-modal" data-playlist-songs-modal hidden>
<div class="playlist-modal-backdrop" data-playlist-modal-close></div>
<div class="playlist-modal-panel" role="dialog" aria-modal="true" aria-labelledby="playlist-modal-title">
<div class="playlist-modal-header">
<div>
<h2 id="playlist-modal-title" data-playlist-modal-title>Playlist Songs</h2>
<p class="playlist-modal-meta muted" data-playlist-modal-meta>-</p>
</div>
<div class="button-grid">
<button type="button" class="secondary" data-playlist-export disabled>Export</button>
<button type="button" class="secondary playlist-modal-close" data-playlist-modal-close aria-label="Close">x</button>
</div>
</div>
<div class="playlist-modal-body">
<p class="muted" data-playlist-modal-state>Select a playlist to preview songs.</p>
<div class="playlist-modal-table-wrap" data-playlist-modal-table-wrap hidden>
<table>
<thead>
<tr>
<th>Song ID</th>
<th>Name</th>
<th>Singers</th>
<th>Size</th>
<th>Format</th>
<th>Local</th>
<th>Uploaded</th>
</tr>
</thead>
<tbody data-playlist-songs-body>
<tr><td colspan="7">No songs.</td></tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
</section>
{% endblock %}
@@ -0,0 +1,97 @@
{% extends "ops/base.html" %}
{% block content %}
<h1>Songs</h1>
<div class="grid">
<div class="card">
<h2>Download Stats</h2>
<table>
<tr><th>Total Songs</th><td>{{ download_stats.total_songs }}</td></tr>
<tr><th>Downloaded Songs</th><td>{{ download_stats.downloaded_songs }}</td></tr>
<tr><th>Local Files</th><td>{{ download_stats.local_file_locations }}</td></tr>
</table>
</div>
<div class="card">
<h2>Active Workers</h2>
<table>
<thead>
<tr>
<th>Worker</th>
<th>Status</th>
<th>Stage</th>
<th>Current Song / Playlist</th>
</tr>
</thead>
<tbody>
{% for worker in workers %}
<tr>
<td>{{ worker.worker_name }}</td>
<td>{{ worker.status }}</td>
<td>{{ worker.stage_type or "-" }}</td>
<td>{{ worker.display_text or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="4">No active workers.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="card">
<h2>Running Items</h2>
<table>
<thead>
<tr>
<th>Job</th>
<th>Worker</th>
<th>Stage</th>
<th>Item</th>
</tr>
</thead>
<tbody>
{% for item in running_items %}
<tr>
<td><a href="/jobs/{{ item.job_run_id }}">{{ item.job_run_id }}</a></td>
<td>{{ item.worker_name or "-" }}</td>
<td>{{ item.stage_type }}</td>
<td>{{ item.display_name }}</td>
</tr>
{% else %}
<tr><td colspan="4">No running items.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card">
<h2>Song Catalog</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Platform</th>
<th>Remote ID</th>
<th>Name</th>
<th>Singers</th>
<th>Updated</th>
</tr>
</thead>
<tbody>
{% for song in songs %}
<tr>
<td>{{ song.id }}</td>
<td>{{ song.platform }}</td>
<td>{{ song.remote_song_id }}</td>
<td>{{ song.name }}</td>
<td>{{ song.singers or "-" }}</td>
<td>{{ song.updated_at }}</td>
</tr>
{% else %}
<tr><td colspan="6">No songs.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}
@@ -0,0 +1,221 @@
from __future__ import annotations
import json
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from .repository import CatalogRepository
def load_backend_config(backend_row) -> dict:
return json.loads(backend_row["config_json"] or "{}")
def normalize_prefix(value: str | None) -> str:
return str(value or "").strip().strip("/")
def build_target_locator(base_prefix: str | None, relative_locator: str) -> str:
normalized_relative = str(relative_locator).strip().lstrip("/")
normalized_prefix = normalize_prefix(base_prefix)
if not normalized_prefix:
return normalized_relative
return f"{normalized_prefix}/{normalized_relative}"
def derive_public_url(public_base_url: str | None, locator: str, base_prefix: str | None) -> str | None:
base_url = str(public_base_url or "").strip().rstrip("/")
if not base_url:
return None
normalized_prefix = normalize_prefix(base_prefix)
normalized_locator = str(locator).strip().lstrip("/")
if normalized_prefix:
prefix_segment = f"{normalized_prefix}/"
if base_url.endswith(f"/{normalized_prefix}") and normalized_locator.startswith(prefix_segment):
normalized_locator = normalized_locator[len(prefix_segment) :]
normalized_locator = normalized_locator.lstrip("/")
if not normalized_locator:
return base_url
return f"{base_url}/{normalized_locator}"
def build_s3_client(backend_row):
config = load_backend_config(backend_row)
credential_env_prefix = str(config.get("credential_env_prefix") or "").strip()
if not credential_env_prefix:
raise RuntimeError("Object storage backend is missing credential_env_prefix")
access_key_id = os.getenv(f"{credential_env_prefix}_ACCESS_KEY_ID")
secret_access_key = os.getenv(f"{credential_env_prefix}_SECRET_ACCESS_KEY")
session_token = os.getenv(f"{credential_env_prefix}_SESSION_TOKEN")
if not access_key_id or not secret_access_key:
raise RuntimeError(f"Missing credentials for backend {backend_row['name']}")
try:
import boto3
from botocore.config import Config
except ImportError as exc:
raise RuntimeError("boto3 is required for object storage uploads") from exc
addressing_style = str(config.get("addressing_style") or "").strip().lower()
client_config = None
if addressing_style in {"path", "virtual"}:
client_config = Config(s3={"addressing_style": addressing_style})
region = str(config.get("region") or "").strip()
if region.lower() == "auto":
region = ""
return boto3.client(
"s3",
endpoint_url=config.get("endpoint"),
region_name=region or None,
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
aws_session_token=session_token or None,
config=client_config,
)
class S3CompatibleUploader:
def __init__(self, backend_row, client=None):
self.backend = backend_row
self.config = load_backend_config(backend_row)
self.client = client or build_s3_client(backend_row)
def upload_file(self, local_path: Path, container_name: str, locator: str) -> dict[str, str | None]:
self.client.upload_file(str(local_path), container_name, locator, ExtraArgs=None)
return {
"public_url": derive_public_url(
self.config.get("public_base_url"),
locator,
self.config.get("base_prefix"),
),
"download_url": None,
}
class CatalogUploader:
def __init__(
self,
repository: CatalogRepository,
worker_count: int = 4,
client_factory=None,
):
self.repository = repository
self.worker_count = max(1, worker_count)
self.client_factory = client_factory or (lambda backend_row: build_s3_client(backend_row))
def get_backend(self, backend_name: str):
backend = self.repository.get_backend_by_name(backend_name)
if backend is None:
raise RuntimeError(f"Unknown backend: {backend_name}")
if backend["backend_type"] != "object_storage":
raise RuntimeError(f"Backend {backend_name} is not object storage")
return backend
def enqueue_missing_uploads(
self,
backend_name: str,
sources: list[str] | None = None,
limit: int | None = None,
playlist_ids: list[int] | None = None,
) -> int:
backend = self.get_backend(backend_name)
candidates = self.repository.list_missing_object_upload_candidates(
target_backend_id=int(backend["id"]),
sources=sources,
limit=limit,
playlist_ids=playlist_ids,
)
queued_count = 0
seen_task_ids: set[int] = set()
for candidate in candidates:
task_id = self.repository.enqueue_upload_task(
file_asset_id=int(candidate["file_asset_id"]),
source_location_id=int(candidate["source_location_id"]),
target_backend_id=int(backend["id"]),
target_container_name=candidate["target_container_name"],
target_locator=candidate["target_locator"],
)
if task_id not in seen_task_ids:
seen_task_ids.add(task_id)
queued_count += 1
return queued_count
def process_upload_task_row(self, task_row, backend_name: str) -> str:
backend = self.get_backend(backend_name)
source_path_text = task_row["absolute_path"] if task_row is not None else None
uploader = None
if source_path_text and Path(source_path_text).exists():
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
return self._process_task(task_row, backend, uploader)
def run(self, backend_name: str, limit: int | None = None) -> dict[str, int]:
backend = self.get_backend(backend_name)
backend_id = int(backend["id"])
pending_tasks = self.repository.list_pending_upload_tasks(target_backend_id=backend_id, limit=limit)
uploader = None
if any(row["absolute_path"] and Path(row["absolute_path"]).exists() for row in pending_tasks):
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
def worker():
local_summary = {"succeeded": 0, "failed": 0, "skipped": 0}
while True:
task = self.repository.claim_next_upload_task(target_backend_id=backend_id)
if task is None:
break
result = self._process_task(task, backend, uploader)
local_summary[result] += 1
return local_summary
summary = {
"queued": len(pending_tasks),
"succeeded": 0,
"failed": 0,
"skipped": 0,
"workers": self.worker_count,
}
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
futures = [executor.submit(worker) for _ in range(self.worker_count)]
for future in as_completed(futures):
worker_summary = future.result()
for key in ("succeeded", "failed", "skipped"):
summary[key] += int(worker_summary[key])
return summary
def _process_task(self, task, backend, uploader: S3CompatibleUploader | None) -> str:
source_path_text = task["absolute_path"]
source_path = Path(source_path_text) if source_path_text else None
if source_path is None or not source_path.exists():
missing_path = str(source_path) if source_path is not None else "<missing>"
self.repository.mark_upload_task_status(
task_id=int(task["id"]),
status="failed",
last_error=f"Source file does not exist: {missing_path}",
)
return "failed"
try:
active_uploader = uploader or S3CompatibleUploader(backend, client=self.client_factory(backend))
result = active_uploader.upload_file(
local_path=source_path,
container_name=task["target_container_name"] or backend["container_name"],
locator=task["target_locator"],
)
self.repository.record_remote_file(
file_asset_id=int(task["file_asset_id"]),
backend_id=int(task["target_backend_id"]),
container_name=task["target_container_name"] or backend["container_name"],
locator=task["target_locator"],
public_url=result["public_url"],
download_url=result["download_url"],
)
self.repository.mark_upload_task_status(
task_id=int(task["id"]),
status="succeeded",
last_error=None,
)
return "succeeded"
except Exception as exc:
self.repository.mark_upload_task_status(
task_id=int(task["id"]),
status="failed",
last_error=f"{type(exc).__name__}: {exc}",
)
return "failed"