Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
"""Catalog sync package for playlist harvesting and deduplicated downloads."""
|
||||
|
||||
from .db import REQUIRED_TABLES, initialize_database
|
||||
from .models import CatalogSong, PlaylistCandidate, extract_artist_names, normalize_source_name
|
||||
|
||||
__all__ = [
|
||||
"CatalogSong",
|
||||
"PlaylistCandidate",
|
||||
"REQUIRED_TABLES",
|
||||
"extract_artist_names",
|
||||
"initialize_database",
|
||||
"normalize_source_name",
|
||||
]
|
||||
@@ -0,0 +1,66 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any, Mapping
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CatalogExportResult:
|
||||
status: str
|
||||
command: str | None = None
|
||||
workdir: str | None = None
|
||||
returncode: int | None = None
|
||||
stdout: str = ""
|
||||
stderr: str = ""
|
||||
|
||||
|
||||
def _read_config_value(config_snapshot: Mapping[str, Any] | None, key: str) -> str | None:
|
||||
snapshot = config_snapshot or {}
|
||||
if key in snapshot:
|
||||
value = snapshot.get(key)
|
||||
return None if value is None else str(value)
|
||||
value = os.environ.get(key)
|
||||
return None if value is None else str(value)
|
||||
|
||||
|
||||
def run_catalog_export_command(config_snapshot: Mapping[str, Any] | None) -> CatalogExportResult:
|
||||
command = _read_config_value(config_snapshot, "CATALOG_EXPORT_COMMAND")
|
||||
workdir = _read_config_value(config_snapshot, "CATALOG_EXPORT_WORKDIR")
|
||||
normalized_command = (command or "").strip()
|
||||
|
||||
if not normalized_command:
|
||||
return CatalogExportResult(
|
||||
status="skipped",
|
||||
command=normalized_command or None,
|
||||
workdir=workdir,
|
||||
)
|
||||
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
normalized_command,
|
||||
shell=True,
|
||||
cwd=workdir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except Exception as exc:
|
||||
return CatalogExportResult(
|
||||
status="failed",
|
||||
command=normalized_command,
|
||||
workdir=workdir,
|
||||
stderr=str(exc) or exc.__class__.__name__,
|
||||
)
|
||||
|
||||
status = "succeeded" if completed.returncode == 0 else "failed"
|
||||
|
||||
return CatalogExportResult(
|
||||
status=status,
|
||||
command=normalized_command,
|
||||
workdir=workdir,
|
||||
returncode=completed.returncode,
|
||||
stdout=completed.stdout or "",
|
||||
stderr=completed.stderr or "",
|
||||
)
|
||||
@@ -0,0 +1,418 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import click
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
except Exception: # pragma: no cover - exercised only when uvicorn is missing
|
||||
class _MissingUvicorn:
|
||||
def run(self, *args, **kwargs):
|
||||
raise click.ClickException("serve command requires uvicorn. Install runtime dependencies first.")
|
||||
|
||||
uvicorn = _MissingUvicorn()
|
||||
|
||||
from .db import initialize_database
|
||||
from .downloader import CatalogDownloader, DEFAULT_DOWNLOAD_WORKERS
|
||||
from .manual_playlists import parse_playlist_file
|
||||
from .repository import CatalogRepository
|
||||
from .resolver import DEFAULT_DOWNLOAD_SOURCES
|
||||
from .resolver_stats import default_resolver_stats_db_path, initialize_resolver_stats_database
|
||||
from .services import CatalogSyncService
|
||||
from .uploader import CatalogUploader
|
||||
|
||||
|
||||
def parse_sources(value: str) -> list[str]:
|
||||
return [item.strip() for item in value.split(",") if item.strip()]
|
||||
|
||||
|
||||
def parse_int_list(value: str | None) -> list[int] | None:
|
||||
if not value:
|
||||
return None
|
||||
return [int(item.strip()) for item in value.split(",") if item.strip()]
|
||||
|
||||
|
||||
def format_lyrics_progress(state: dict[str, object]) -> str:
|
||||
total = int(state.get("total") or 0)
|
||||
processed = int(state.get("processed") or 0)
|
||||
progress_percent = int(state.get("progress_percent") or 0)
|
||||
saved = int(state.get("saved") or 0)
|
||||
skipped = int(state.get("skipped") or 0)
|
||||
failed = int(state.get("failed") or 0)
|
||||
return (
|
||||
f"Lyrics progress: {processed}/{total} ({progress_percent}%) "
|
||||
f"saved={saved} skipped={skipped} failed={failed}"
|
||||
)
|
||||
|
||||
|
||||
PORT_RANGE = click.IntRange(1, 65535)
|
||||
|
||||
|
||||
def create_ops_web_app(*, db_path: str, env_path: str):
|
||||
from .ops.web import create_app
|
||||
|
||||
return create_app(db_path=db_path, env_path=env_path, start_runner=True)
|
||||
|
||||
|
||||
class CatalogSyncApplication:
|
||||
def __init__(self, db_path: str, library_root: str | None = None):
|
||||
self.db_path = db_path
|
||||
self.library_root = library_root
|
||||
init_conn = initialize_database(db_path, default_library_root=library_root)
|
||||
init_conn.close()
|
||||
resolver_stats_init_conn = initialize_resolver_stats_database(default_resolver_stats_db_path(db_path))
|
||||
resolver_stats_init_conn.close()
|
||||
self.repository = CatalogRepository(db_path)
|
||||
self.service = CatalogSyncService(self.repository)
|
||||
self.downloader = CatalogDownloader(self.repository)
|
||||
|
||||
def init_db(self):
|
||||
init_conn = initialize_database(self.db_path, default_library_root=self.library_root)
|
||||
init_conn.close()
|
||||
resolver_stats_init_conn = initialize_resolver_stats_database(
|
||||
default_resolver_stats_db_path(self.db_path)
|
||||
)
|
||||
resolver_stats_init_conn.close()
|
||||
|
||||
def collect_playlists(self, sources: list[str], include_playlist_square: bool = True, include_toplist: bool = True):
|
||||
return self.service.collect_playlists(sources, include_playlist_square, include_toplist)
|
||||
|
||||
def sync_playlist_catalog(self, sources: list[str] | None = None, limit: int | None = None):
|
||||
return self.service.sync_playlist_catalog(sources=sources, limit=limit)
|
||||
|
||||
def download_pending(
|
||||
self,
|
||||
sources: list[str] | None = None,
|
||||
limit: int | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
workers: int = DEFAULT_DOWNLOAD_WORKERS,
|
||||
download_sources: list[str] | None = None,
|
||||
lyrics_enabled: bool = True,
|
||||
overwrite_lyrics: bool = False,
|
||||
):
|
||||
if not self.library_root:
|
||||
raise click.ClickException("download command requires --library-root")
|
||||
downloader = CatalogDownloader(self.repository, worker_count=workers)
|
||||
return downloader.download_pending(
|
||||
self.library_root,
|
||||
sources=sources,
|
||||
limit=limit,
|
||||
playlist_ids=playlist_ids,
|
||||
download_sources=download_sources,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
|
||||
def run_playlist_file(
|
||||
self,
|
||||
playlist_file: str,
|
||||
limit: int | None = None,
|
||||
workers: int = DEFAULT_DOWNLOAD_WORKERS,
|
||||
download_sources: list[str] | None = None,
|
||||
lyrics_enabled: bool = True,
|
||||
overwrite_lyrics: bool = False,
|
||||
) -> dict[str, int]:
|
||||
parsed = parse_playlist_file(playlist_file)
|
||||
if not parsed.entries:
|
||||
raise click.ClickException("playlist file does not contain any valid playlist URLs")
|
||||
playlist_ids = self.service.import_manual_playlists(playlist_file, parsed.entries)
|
||||
if limit is not None:
|
||||
playlist_ids = playlist_ids[:limit]
|
||||
synchronized_songs = self.service.sync_specific_playlists(playlist_ids)
|
||||
downloaded_songs = self.download_pending(
|
||||
playlist_ids=playlist_ids,
|
||||
workers=workers,
|
||||
download_sources=download_sources,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
return {
|
||||
"total_lines": parsed.total_lines,
|
||||
"valid_playlists": len(parsed.entries),
|
||||
"skipped_lines": parsed.skipped_lines,
|
||||
"synchronized_songs": synchronized_songs,
|
||||
"downloaded_songs": downloaded_songs,
|
||||
}
|
||||
|
||||
def register_object_backend(
|
||||
self,
|
||||
backend_name: str,
|
||||
container_name: str,
|
||||
endpoint: str,
|
||||
region: str | None,
|
||||
base_prefix: str | None,
|
||||
credential_env_prefix: str,
|
||||
addressing_style: str | None = None,
|
||||
public_base_url: str | None = None,
|
||||
) -> int:
|
||||
return self.repository.upsert_object_storage_backend(
|
||||
name=backend_name,
|
||||
container_name=container_name,
|
||||
endpoint=endpoint,
|
||||
region=region,
|
||||
base_prefix=base_prefix,
|
||||
credential_env_prefix=credential_env_prefix,
|
||||
addressing_style=addressing_style,
|
||||
public_base_url=public_base_url,
|
||||
)
|
||||
|
||||
def upload_files(
|
||||
self,
|
||||
backend_name: str,
|
||||
sources: list[str] | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
limit: int | None = None,
|
||||
workers: int = 4,
|
||||
) -> dict[str, int]:
|
||||
uploader = CatalogUploader(self.repository, worker_count=workers)
|
||||
queued = uploader.enqueue_missing_uploads(
|
||||
backend_name=backend_name,
|
||||
sources=sources,
|
||||
limit=limit,
|
||||
playlist_ids=playlist_ids,
|
||||
)
|
||||
summary = uploader.run(backend_name=backend_name)
|
||||
summary["queued"] = queued
|
||||
return summary
|
||||
|
||||
def sync_local_lyrics(
|
||||
self,
|
||||
sources: list[str] | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
limit: int | None = None,
|
||||
workers: int = DEFAULT_DOWNLOAD_WORKERS,
|
||||
progress_callback=None,
|
||||
overwrite_lyrics: bool = False,
|
||||
) -> dict[str, int]:
|
||||
downloader = CatalogDownloader(self.repository, worker_count=workers)
|
||||
return downloader.sync_local_lyrics(
|
||||
sources=sources,
|
||||
playlist_ids=playlist_ids,
|
||||
limit=limit,
|
||||
progress_callback=progress_callback,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
"""Catalog sync CLI for harvesting playlists and downloading songs."""
|
||||
|
||||
|
||||
@cli.command("init-db")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
|
||||
def init_db_command(db_path: str, library_root: str | None):
|
||||
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
|
||||
app.init_db()
|
||||
click.echo(f"Initialized catalog database at {db_path}")
|
||||
|
||||
|
||||
@cli.command("collect")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
|
||||
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
|
||||
@click.option("--playlist-square/--no-playlist-square", default=True, show_default=True)
|
||||
@click.option("--toplist/--no-toplist", default=True, show_default=True)
|
||||
def collect_command(db_path: str, sources: str, library_root: str | None, playlist_square: bool, toplist: bool):
|
||||
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
|
||||
result = app.collect_playlists(parse_sources(sources), playlist_square, toplist)
|
||||
click.echo(f"Collected playlists: {result}")
|
||||
|
||||
|
||||
@cli.command("sync")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
|
||||
@click.option("--library-root", type=click.Path(file_okay=False), required=False)
|
||||
@click.option("--limit", type=int, default=None)
|
||||
def sync_command(db_path: str, sources: str, library_root: str | None, limit: int | None):
|
||||
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
|
||||
count = app.sync_playlist_catalog(parse_sources(sources), limit=limit)
|
||||
click.echo(f"Synchronized songs: {count}")
|
||||
|
||||
|
||||
@cli.command("download")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
|
||||
@click.option("--download-sources", default=",".join(DEFAULT_DOWNLOAD_SOURCES), show_default=True)
|
||||
@click.option("--library-root", type=click.Path(file_okay=False), required=True)
|
||||
@click.option("--limit", type=int, default=None)
|
||||
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
|
||||
@click.option("--lyrics/--no-lyrics", "lyrics_enabled", default=True, show_default=True)
|
||||
@click.option("--overwrite-lyrics", is_flag=True, default=False)
|
||||
def download_command(
|
||||
db_path: str,
|
||||
sources: str,
|
||||
download_sources: str,
|
||||
library_root: str,
|
||||
limit: int | None,
|
||||
workers: int,
|
||||
lyrics_enabled: bool,
|
||||
overwrite_lyrics: bool,
|
||||
):
|
||||
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
|
||||
count = app.download_pending(
|
||||
parse_sources(sources),
|
||||
limit=limit,
|
||||
workers=workers,
|
||||
download_sources=parse_sources(download_sources),
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
click.echo(f"Downloaded songs: {count}")
|
||||
|
||||
|
||||
@cli.command("run")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--sources", default="netease,qq,kuwo", show_default=True)
|
||||
@click.option("--download-sources", default=",".join(DEFAULT_DOWNLOAD_SOURCES), show_default=True)
|
||||
@click.option("--library-root", type=click.Path(file_okay=False), required=True)
|
||||
@click.option("--playlist-file", type=click.Path(dir_okay=False, exists=True), required=False)
|
||||
@click.option("--limit", type=int, default=None)
|
||||
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
|
||||
@click.option("--lyrics/--no-lyrics", "lyrics_enabled", default=True, show_default=True)
|
||||
@click.option("--overwrite-lyrics", is_flag=True, default=False)
|
||||
def run_command(
|
||||
db_path: str,
|
||||
sources: str,
|
||||
download_sources: str,
|
||||
library_root: str,
|
||||
playlist_file: str | None,
|
||||
limit: int | None,
|
||||
workers: int,
|
||||
lyrics_enabled: bool,
|
||||
overwrite_lyrics: bool,
|
||||
):
|
||||
app = CatalogSyncApplication(db_path=db_path, library_root=library_root)
|
||||
parsed_download_sources = parse_sources(download_sources)
|
||||
if playlist_file:
|
||||
app.run_playlist_file(
|
||||
playlist_file=playlist_file,
|
||||
limit=limit,
|
||||
workers=workers,
|
||||
download_sources=parsed_download_sources,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
click.echo("Catalog sync pipeline completed")
|
||||
return
|
||||
parsed_sources = parse_sources(sources)
|
||||
app.collect_playlists(parsed_sources)
|
||||
app.sync_playlist_catalog(parsed_sources, limit=limit)
|
||||
app.download_pending(
|
||||
parsed_sources,
|
||||
limit=limit,
|
||||
workers=workers,
|
||||
download_sources=parsed_download_sources,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
click.echo("Catalog sync pipeline completed")
|
||||
|
||||
|
||||
@cli.command("register-object-backend")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--backend", "backend_name", required=True)
|
||||
@click.option("--bucket", "container_name", required=True)
|
||||
@click.option("--endpoint", required=True)
|
||||
@click.option("--region", default=None)
|
||||
@click.option("--base-prefix", default=None)
|
||||
@click.option("--credential-env-prefix", required=True)
|
||||
@click.option("--addressing-style", default=None)
|
||||
@click.option("--public-base-url", default=None)
|
||||
def register_object_backend_command(
|
||||
db_path: str,
|
||||
backend_name: str,
|
||||
container_name: str,
|
||||
endpoint: str,
|
||||
region: str | None,
|
||||
base_prefix: str | None,
|
||||
credential_env_prefix: str,
|
||||
addressing_style: str | None,
|
||||
public_base_url: str | None,
|
||||
):
|
||||
app = CatalogSyncApplication(db_path=db_path)
|
||||
backend_id = app.register_object_backend(
|
||||
backend_name=backend_name,
|
||||
container_name=container_name,
|
||||
endpoint=endpoint,
|
||||
region=region,
|
||||
base_prefix=base_prefix,
|
||||
credential_env_prefix=credential_env_prefix,
|
||||
addressing_style=addressing_style,
|
||||
public_base_url=public_base_url,
|
||||
)
|
||||
click.echo(f"Registered object backend: {backend_id}")
|
||||
|
||||
|
||||
@cli.command("upload")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--backend", "backend_name", required=True)
|
||||
@click.option("--sources", default=None)
|
||||
@click.option("--playlist-ids", default=None)
|
||||
@click.option("--limit", type=int, default=None)
|
||||
@click.option("--workers", type=int, default=4, show_default=True)
|
||||
def upload_command(
|
||||
db_path: str,
|
||||
backend_name: str,
|
||||
sources: str | None,
|
||||
playlist_ids: str | None,
|
||||
limit: int | None,
|
||||
workers: int,
|
||||
):
|
||||
app = CatalogSyncApplication(db_path=db_path)
|
||||
summary = app.upload_files(
|
||||
backend_name=backend_name,
|
||||
sources=parse_sources(sources) if sources else None,
|
||||
playlist_ids=parse_int_list(playlist_ids),
|
||||
limit=limit,
|
||||
workers=workers,
|
||||
)
|
||||
click.echo(f"Upload summary: {summary}")
|
||||
|
||||
|
||||
@cli.command("lyrics")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--sources", default=None)
|
||||
@click.option("--playlist-ids", default=None)
|
||||
@click.option("--limit", type=int, default=None)
|
||||
@click.option("--workers", type=int, default=DEFAULT_DOWNLOAD_WORKERS, envvar="DOWNLOAD_WORKERS", show_default=True)
|
||||
@click.option("--overwrite-lyrics", is_flag=True, default=False)
|
||||
def lyrics_command(
|
||||
db_path: str,
|
||||
sources: str | None,
|
||||
playlist_ids: str | None,
|
||||
limit: int | None,
|
||||
workers: int,
|
||||
overwrite_lyrics: bool,
|
||||
):
|
||||
app = CatalogSyncApplication(db_path=db_path)
|
||||
def progress_callback(**state):
|
||||
click.echo(format_lyrics_progress(state))
|
||||
|
||||
summary = app.sync_local_lyrics(
|
||||
sources=parse_sources(sources) if sources else None,
|
||||
playlist_ids=parse_int_list(playlist_ids),
|
||||
limit=limit,
|
||||
workers=workers,
|
||||
progress_callback=progress_callback,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
click.echo(f"Lyrics summary: {summary}")
|
||||
|
||||
|
||||
@cli.command("serve")
|
||||
@click.option("--db", "db_path", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--env-file", required=True, type=click.Path(dir_okay=False))
|
||||
@click.option("--host", default="127.0.0.1", show_default=True)
|
||||
@click.option("--port", type=PORT_RANGE, default=18080, show_default=True)
|
||||
def serve_command(db_path: str, env_file: str, host: str, port: int):
|
||||
app = create_ops_web_app(db_path=db_path, env_path=env_file)
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
|
||||
|
||||
def main():
|
||||
cli()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,15 @@
|
||||
from .kuwo import KuwoCollector, parse_playlist_square_html as parse_kuwo_playlist_square_html, parse_toplist_html as parse_kuwo_toplist_html
|
||||
from .netease import NeteaseCollector, parse_playlist_square_html as parse_netease_playlist_square_html, parse_toplist_payload as parse_netease_toplist_payload
|
||||
from .qq import QQCollector, parse_playlist_square_payload as parse_qq_playlist_square_payload, parse_toplist_payload as parse_qq_toplist_payload
|
||||
|
||||
__all__ = [
|
||||
"KuwoCollector",
|
||||
"NeteaseCollector",
|
||||
"QQCollector",
|
||||
"parse_kuwo_playlist_square_html",
|
||||
"parse_kuwo_toplist_html",
|
||||
"parse_netease_playlist_square_html",
|
||||
"parse_netease_toplist_payload",
|
||||
"parse_qq_playlist_square_payload",
|
||||
"parse_qq_toplist_payload",
|
||||
]
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseCollector:
|
||||
headers: dict[str, str] = field(default_factory=lambda: {"User-Agent": "Mozilla/5.0"})
|
||||
session: requests.Session = field(default_factory=requests.Session)
|
||||
|
||||
def get(self, url: str, **kwargs):
|
||||
response = self.session.get(url, headers=self.headers, timeout=15, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
@@ -0,0 +1,260 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://www.kuwo.cn/playlist"
|
||||
TOPLIST_URL = "https://www.kuwo.cn/rankList"
|
||||
NUXT_SCRIPT_RE = re.compile(r"<script>\s*window\.__NUXT__=(.*?)</script>", re.DOTALL)
|
||||
NUXT_FUNCTION_RE = re.compile(
|
||||
r"^\(function\((?P<params>.*?)\)\s*\{\s*return\s+(?P<body>.*)\}\)\((?P<args>.*)\)\s*;?\s*$",
|
||||
re.DOTALL,
|
||||
)
|
||||
_COUNT_UNIT_MULTIPLIERS = {
|
||||
"万": 10_000,
|
||||
"亿": 100_000_000,
|
||||
}
|
||||
|
||||
|
||||
def _parse_play_count(value: object) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
text = re.sub(r"\s+", "", str(value)).replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
if text.isdigit():
|
||||
return int(text)
|
||||
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
|
||||
if not match:
|
||||
return None
|
||||
number_value = float(match.group(1))
|
||||
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
|
||||
if multiplier is None:
|
||||
return None
|
||||
return int(number_value * multiplier)
|
||||
|
||||
|
||||
def split_js_arguments(text: str) -> list[str]:
|
||||
items: list[str] = []
|
||||
current: list[str] = []
|
||||
quote_char = ""
|
||||
escape = False
|
||||
depth = 0
|
||||
for char in str(text or ""):
|
||||
if escape:
|
||||
current.append(char)
|
||||
escape = False
|
||||
continue
|
||||
if quote_char:
|
||||
current.append(char)
|
||||
if char == "\\":
|
||||
escape = True
|
||||
elif char == quote_char:
|
||||
quote_char = ""
|
||||
continue
|
||||
if char in {"'", '"'}:
|
||||
current.append(char)
|
||||
quote_char = char
|
||||
continue
|
||||
if char in "([{":
|
||||
depth += 1
|
||||
current.append(char)
|
||||
continue
|
||||
if char in ")]}":
|
||||
depth = max(depth - 1, 0)
|
||||
current.append(char)
|
||||
continue
|
||||
if char == "," and depth == 0:
|
||||
item = "".join(current).strip()
|
||||
if item:
|
||||
items.append(item)
|
||||
current = []
|
||||
continue
|
||||
current.append(char)
|
||||
tail = "".join(current).strip()
|
||||
if tail:
|
||||
items.append(tail)
|
||||
return items
|
||||
|
||||
|
||||
def resolve_js_value(token: str, variables: dict[str, object] | None = None):
|
||||
token = str(token or "").strip()
|
||||
variables = variables or {}
|
||||
if not token:
|
||||
return None
|
||||
if token in variables:
|
||||
return variables[token]
|
||||
if token in {"true", "false", "null"}:
|
||||
return {"true": True, "false": False, "null": None}[token]
|
||||
if token.startswith(("'", '"')) and token.endswith(("'", '"')):
|
||||
normalized = token
|
||||
if token.startswith("'") and token.endswith("'"):
|
||||
normalized = '"' + token[1:-1].replace("\\", "\\\\").replace('"', '\\"') + '"'
|
||||
return json.loads(normalized)
|
||||
try:
|
||||
if "." in token:
|
||||
return float(token)
|
||||
return int(token)
|
||||
except ValueError:
|
||||
return token
|
||||
|
||||
|
||||
def extract_kuwo_bang_menu_items(script_body: str) -> list[dict]:
|
||||
match = NUXT_FUNCTION_RE.match(str(script_body or "").strip())
|
||||
if not match:
|
||||
return []
|
||||
params = [part.strip() for part in str(match.group("params") or "").split(",") if part.strip()]
|
||||
args = [resolve_js_value(part) for part in split_js_arguments(match.group("args") or "")]
|
||||
variables = {name: value for name, value in zip(params, args)}
|
||||
body = str(match.group("body") or "")
|
||||
if "bangMenu" not in body:
|
||||
return []
|
||||
|
||||
item_pattern = re.compile(
|
||||
r"\{sourceid:(?P<sourceid>[^,]+),.*?name:(?P<name>[^,]+),\s*id:(?P<id>[^,]+),\s*source:(?P<source>[^,]+),\s*pic:(?P<pic>[^,]+),\s*pub:(?P<pub>[^,}\]]+)(?:,\s*(?:listencnt|playCount|listenCount):(?P<play_count>[^,}\]]+))?",
|
||||
re.DOTALL,
|
||||
)
|
||||
items: list[dict] = []
|
||||
for item_match in item_pattern.finditer(body):
|
||||
resolved = {
|
||||
key: resolve_js_value(item_match.group(key), variables)
|
||||
for key in ("sourceid", "name", "id", "source", "pic", "pub", "play_count")
|
||||
}
|
||||
if not resolved.get("id"):
|
||||
continue
|
||||
items.append(resolved)
|
||||
return items
|
||||
|
||||
|
||||
def extract_nuxt_state(html: str) -> dict | None:
|
||||
match = NUXT_SCRIPT_RE.search(html)
|
||||
if not match:
|
||||
return None
|
||||
script_body = match.group(1)
|
||||
node_script = (
|
||||
"const window = {}; "
|
||||
f"window.__NUXT__={script_body}; "
|
||||
"process.stdout.write(JSON.stringify(window.__NUXT__));"
|
||||
)
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["node", "-e", node_script],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
timeout=10,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
output = completed.stdout.decode("utf-8", errors="ignore").strip()
|
||||
if not output:
|
||||
return None
|
||||
try:
|
||||
return json.loads(output)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
items: list[PlaylistCandidate] = []
|
||||
seen: set[str] = set()
|
||||
for anchor in soup.select("a[href*='playlist_detail/']"):
|
||||
href = anchor.get("href", "").strip()
|
||||
remote_id = href.rstrip("/").split("/")[-1]
|
||||
if not remote_id or remote_id in seen:
|
||||
continue
|
||||
seen.add(remote_id)
|
||||
absolute_url = href if href.startswith("http") else f"https://www.kuwo.cn{href}"
|
||||
name = anchor.get("title") or anchor.get_text(strip=True) or remote_id
|
||||
cover = (anchor.find("img") or {}).get("src")
|
||||
play_count_node = anchor.select_one(".num")
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=name,
|
||||
url=absolute_url,
|
||||
cover_url=cover,
|
||||
play_count=_parse_play_count(
|
||||
play_count_node.get_text(" ", strip=True) if play_count_node else None
|
||||
),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _extract_toplist_play_count(entry: dict) -> int | None:
|
||||
for key in ("listencnt", "play_count", "playCount", "listenCount"):
|
||||
parsed = _parse_play_count(entry.get(key))
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
return None
|
||||
|
||||
|
||||
def parse_toplist_html(html: str) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
state = extract_nuxt_state(html)
|
||||
if not state:
|
||||
for entry in extract_kuwo_bang_menu_items(NUXT_SCRIPT_RE.search(html).group(1) if NUXT_SCRIPT_RE.search(html) else ""):
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
|
||||
cover_url=entry.get("pic"),
|
||||
parse_strategy="kuwo_toplist",
|
||||
play_count=_extract_toplist_play_count(entry),
|
||||
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
for group in state.get("data", []) or []:
|
||||
for menu in group.get("bangMenu", []) or []:
|
||||
for entry in menu.get("list", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
|
||||
cover_url=entry.get("pic"),
|
||||
parse_strategy="kuwo_toplist",
|
||||
play_count=_extract_toplist_play_count(entry),
|
||||
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class KuwoCollector(BaseCollector):
|
||||
def collect_playlist_square(self, page: int = 1, page_size: int = 30) -> list[PlaylistCandidate]:
|
||||
response = self.get(
|
||||
PLAYLIST_SQUARE_URL,
|
||||
params={"pn": str(max(page, 1)), "rn": str(max(page_size, 1))},
|
||||
)
|
||||
return parse_playlist_square_html(response.text)
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_URL)
|
||||
return parse_toplist_html(response.text)
|
||||
@@ -0,0 +1,113 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://music.163.com/discover/playlist"
|
||||
TOPLIST_API_URL = "https://music.163.com/api/toplist/detail"
|
||||
|
||||
_COUNT_UNIT_MULTIPLIERS = {
|
||||
"万": 10_000,
|
||||
"亿": 100_000_000,
|
||||
}
|
||||
|
||||
|
||||
def _parse_play_count(value: object) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
text = re.sub(r"\s+", "", str(value)).replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
if text.isdigit():
|
||||
return int(text)
|
||||
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
|
||||
if not match:
|
||||
return None
|
||||
number_value = float(match.group(1))
|
||||
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
|
||||
if multiplier is None:
|
||||
return None
|
||||
return int(number_value * multiplier)
|
||||
|
||||
|
||||
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
items: list[PlaylistCandidate] = []
|
||||
seen: set[str] = set()
|
||||
for anchor in soup.select("a.msk[href*='/playlist?id=']"):
|
||||
href = anchor.get("href", "")
|
||||
remote_id = href.split("id=")[-1].strip()
|
||||
if not remote_id or remote_id in seen:
|
||||
continue
|
||||
seen.add(remote_id)
|
||||
cover_node = anchor.parent if anchor.parent else anchor
|
||||
play_count_node = cover_node.select_one(".nb")
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="netease",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=anchor.get("title") or remote_id,
|
||||
url=f"https://music.163.com/#/playlist?id={remote_id}",
|
||||
cover_url=(anchor.find_previous("img") or {}).get("src"),
|
||||
play_count=_parse_play_count(
|
||||
play_count_node.get_text(" ", strip=True) if play_count_node else None
|
||||
),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("list", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="netease",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://music.163.com/#/playlist?id={remote_id}",
|
||||
cover_url=entry.get("coverImgUrl"),
|
||||
parse_strategy="netease_toplist",
|
||||
play_count=_parse_play_count(
|
||||
entry.get("playCount") or entry.get("subscribedCount")
|
||||
),
|
||||
metadata={"update_frequency": entry.get("updateFrequency")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class NeteaseCollector(BaseCollector):
|
||||
def collect_playlist_square(
|
||||
self,
|
||||
category: str = "全部",
|
||||
order: str = "hot",
|
||||
page: int = 1,
|
||||
page_size: int = 35,
|
||||
offset: int | None = None,
|
||||
) -> list[PlaylistCandidate]:
|
||||
if offset is None:
|
||||
offset = max(page - 1, 0) * max(page_size, 1)
|
||||
response = self.get(
|
||||
PLAYLIST_SQUARE_URL,
|
||||
params={"cat": category, "order": order, "offset": offset},
|
||||
)
|
||||
return parse_playlist_square_html(response.text)
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_API_URL)
|
||||
return parse_toplist_payload(response.json())
|
||||
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import requests
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg"
|
||||
TOPLIST_URL = "https://c.y.qq.com/v8/fcg-bin/fcg_myqq_toplist.fcg"
|
||||
|
||||
|
||||
def _extract_collected_song_count(entry: dict) -> int | None:
|
||||
for key in ("songnum", "song_num", "songCount", "song_count", "trackCount", "track_count"):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, bool):
|
||||
continue
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
if isinstance(value, str) and value.strip().isdigit():
|
||||
return int(value.strip())
|
||||
return None
|
||||
|
||||
|
||||
def parse_playlist_square_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("data", {}).get("list", []) or []:
|
||||
remote_id = str(entry.get("dissid", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
creator = entry.get("creator") or {}
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("dissname") or remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/playlist/{remote_id}",
|
||||
cover_url=entry.get("imgurl"),
|
||||
creator_name=creator.get("name"),
|
||||
play_count=entry.get("listennum"),
|
||||
collected_song_count=_extract_collected_song_count(entry),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("data", {}).get("topList", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("topTitle") or remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/toplist/{remote_id}",
|
||||
cover_url=entry.get("picUrl"),
|
||||
play_count=entry.get("listenCount"),
|
||||
collected_song_count=_extract_collected_song_count(entry),
|
||||
parse_strategy="qq_toplist",
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class QQCollector(BaseCollector):
|
||||
def __init__(self, headers: dict[str, str] | None = None, session: requests.Session | None = None):
|
||||
super().__init__(headers=headers or {"User-Agent": "Mozilla/5.0"}, session=session or requests.Session())
|
||||
self.headers.update({"Referer": "https://y.qq.com/", "Origin": "https://y.qq.com/"})
|
||||
|
||||
def collect_playlist_square(
|
||||
self,
|
||||
category_id: int = 10000000,
|
||||
sort_id: int = 5,
|
||||
page: int = 1,
|
||||
page_size: int = 30,
|
||||
) -> list[PlaylistCandidate]:
|
||||
params = {
|
||||
"picmid": "1",
|
||||
"rnd": "0.1",
|
||||
"g_tk": "732560869",
|
||||
"loginUin": "0",
|
||||
"hostUin": "0",
|
||||
"format": "json",
|
||||
"inCharset": "utf8",
|
||||
"outCharset": "utf-8",
|
||||
"notice": "0",
|
||||
"platform": "yqq.json",
|
||||
"needNewCode": "0",
|
||||
"categoryId": str(category_id),
|
||||
"sortId": str(sort_id),
|
||||
"sin": str(max(page - 1, 0) * page_size),
|
||||
"ein": str(max(page, 1) * page_size - 1),
|
||||
}
|
||||
response = self.get(PLAYLIST_SQUARE_URL, params=params)
|
||||
return parse_playlist_square_payload(response.json())
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_URL, params={"format": "json"})
|
||||
return parse_toplist_payload(response.json())
|
||||
@@ -0,0 +1,492 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
|
||||
SQLITE_BUSY_TIMEOUT_MS = 30000
|
||||
|
||||
|
||||
REQUIRED_TABLES = {
|
||||
"playlist_pools",
|
||||
"playlists",
|
||||
"playlist_download_preferences",
|
||||
"pool_playlists",
|
||||
"artist_pools",
|
||||
"artists",
|
||||
"pool_artists",
|
||||
"songs",
|
||||
"playlist_songs",
|
||||
"artist_songs",
|
||||
"storage_backends",
|
||||
"file_assets",
|
||||
"file_locations",
|
||||
"download_tasks",
|
||||
"song_backend_presence",
|
||||
"upload_tasks",
|
||||
"job_runs",
|
||||
"job_stages",
|
||||
"job_items",
|
||||
"job_workers",
|
||||
"job_commands",
|
||||
"job_events",
|
||||
"job_logs",
|
||||
"config_revisions",
|
||||
}
|
||||
|
||||
|
||||
SCHEMA_STATEMENTS = [
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS playlist_pools (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform TEXT NOT NULL,
|
||||
pool_kind TEXT NOT NULL,
|
||||
external_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(platform, pool_kind, external_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS playlists (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform TEXT NOT NULL,
|
||||
remote_playlist_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
parse_strategy TEXT NOT NULL DEFAULT 'playlist_url',
|
||||
cover_url TEXT,
|
||||
creator_name TEXT,
|
||||
play_count INTEGER,
|
||||
collected_song_count INTEGER,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(platform, remote_playlist_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS playlist_download_preferences (
|
||||
playlist_id INTEGER PRIMARY KEY,
|
||||
is_wanted INTEGER NOT NULL DEFAULT 1,
|
||||
marked_by TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pool_playlists (
|
||||
pool_id INTEGER NOT NULL,
|
||||
playlist_id INTEGER NOT NULL,
|
||||
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY(pool_id, playlist_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS artist_pools (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform TEXT NOT NULL,
|
||||
pool_kind TEXT NOT NULL,
|
||||
external_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
source_playlist_pool_id INTEGER,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(platform, pool_kind, external_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS artists (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
artist_key TEXT NOT NULL UNIQUE,
|
||||
platform TEXT NOT NULL,
|
||||
remote_artist_id TEXT,
|
||||
name TEXT NOT NULL,
|
||||
normalized_name TEXT NOT NULL,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pool_artists (
|
||||
pool_id INTEGER NOT NULL,
|
||||
artist_id INTEGER NOT NULL,
|
||||
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY(pool_id, artist_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS songs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform TEXT NOT NULL,
|
||||
remote_song_id TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
singers TEXT,
|
||||
album TEXT,
|
||||
duration_seconds INTEGER,
|
||||
ext TEXT,
|
||||
file_size_bytes INTEGER,
|
||||
quality_label TEXT,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(platform, remote_song_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS playlist_songs (
|
||||
playlist_id INTEGER NOT NULL,
|
||||
song_id INTEGER NOT NULL,
|
||||
position INTEGER,
|
||||
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY(playlist_id, song_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS artist_songs (
|
||||
artist_id INTEGER NOT NULL,
|
||||
song_id INTEGER NOT NULL,
|
||||
discovered_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY(artist_id, song_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS storage_backends (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
backend_type TEXT NOT NULL,
|
||||
base_path TEXT,
|
||||
container_name TEXT,
|
||||
config_json TEXT,
|
||||
is_default INTEGER NOT NULL DEFAULT 0,
|
||||
is_active INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS file_assets (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
song_id INTEGER NOT NULL,
|
||||
quality_label TEXT,
|
||||
ext TEXT,
|
||||
file_size_bytes INTEGER,
|
||||
checksum_sha256 TEXT,
|
||||
metadata_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS file_locations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
file_asset_id INTEGER NOT NULL,
|
||||
backend_id INTEGER NOT NULL,
|
||||
container_name TEXT,
|
||||
locator TEXT NOT NULL,
|
||||
absolute_path TEXT,
|
||||
remote_file_id TEXT,
|
||||
public_url TEXT,
|
||||
download_url TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
is_primary INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(file_asset_id, backend_id, locator)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS download_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
song_id INTEGER NOT NULL,
|
||||
target_backend_id INTEGER,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
last_error TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS song_backend_presence (
|
||||
song_id INTEGER NOT NULL,
|
||||
backend_id INTEGER NOT NULL,
|
||||
has_active_file INTEGER NOT NULL DEFAULT 0,
|
||||
active_file_count INTEGER NOT NULL DEFAULT 0,
|
||||
primary_file_location_id INTEGER,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY(song_id, backend_id)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS upload_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
file_asset_id INTEGER NOT NULL,
|
||||
source_location_id INTEGER NOT NULL,
|
||||
target_backend_id INTEGER NOT NULL,
|
||||
target_container_name TEXT,
|
||||
target_locator TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
last_error TEXT,
|
||||
queued_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
started_at TEXT,
|
||||
finished_at TEXT,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(file_asset_id, target_backend_id, target_locator)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_type TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
priority INTEGER NOT NULL DEFAULT 100,
|
||||
requested_by TEXT,
|
||||
config_snapshot_json TEXT NOT NULL,
|
||||
sources TEXT,
|
||||
download_sources TEXT,
|
||||
playlist_scope_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
started_at TEXT,
|
||||
ended_at TEXT,
|
||||
last_error TEXT,
|
||||
resume_token TEXT
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_stages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_run_id INTEGER NOT NULL,
|
||||
stage_type TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
seq_no INTEGER NOT NULL DEFAULT 0,
|
||||
total_items INTEGER NOT NULL DEFAULT 0,
|
||||
pending_items INTEGER NOT NULL DEFAULT 0,
|
||||
running_items INTEGER NOT NULL DEFAULT 0,
|
||||
success_items INTEGER NOT NULL DEFAULT 0,
|
||||
failed_items INTEGER NOT NULL DEFAULT 0,
|
||||
skipped_items INTEGER NOT NULL DEFAULT 0,
|
||||
started_at TEXT,
|
||||
ended_at TEXT,
|
||||
last_error TEXT,
|
||||
UNIQUE(job_run_id, stage_type)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_items (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_stage_id INTEGER NOT NULL,
|
||||
item_type TEXT NOT NULL,
|
||||
item_key TEXT NOT NULL,
|
||||
playlist_pool_id INTEGER,
|
||||
playlist_id INTEGER,
|
||||
song_id INTEGER,
|
||||
file_location_id INTEGER,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||
max_attempts INTEGER NOT NULL DEFAULT 3,
|
||||
worker_id INTEGER,
|
||||
started_at TEXT,
|
||||
ended_at TEXT,
|
||||
last_error TEXT,
|
||||
last_error_code TEXT,
|
||||
payload_json TEXT,
|
||||
UNIQUE(job_stage_id, item_key)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_workers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_run_id INTEGER,
|
||||
job_stage_id INTEGER,
|
||||
worker_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'idle',
|
||||
current_job_item_id INTEGER,
|
||||
current_song_id INTEGER,
|
||||
current_playlist_id INTEGER,
|
||||
current_display_text TEXT,
|
||||
heartbeat_at TEXT,
|
||||
last_progress_text TEXT,
|
||||
processed_count INTEGER NOT NULL DEFAULT 0,
|
||||
error_count INTEGER NOT NULL DEFAULT 0,
|
||||
downloaded_bytes INTEGER,
|
||||
total_bytes INTEGER,
|
||||
speed_bytes_per_sec INTEGER,
|
||||
progress_percent REAL
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_commands (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_run_id INTEGER NOT NULL,
|
||||
command_type TEXT NOT NULL,
|
||||
target_item_id INTEGER,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
applied_at TEXT,
|
||||
payload_json TEXT
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_run_id INTEGER NOT NULL,
|
||||
job_stage_id INTEGER,
|
||||
job_item_id INTEGER,
|
||||
worker_id INTEGER,
|
||||
event_type TEXT NOT NULL,
|
||||
message TEXT,
|
||||
details_json TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS job_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_run_id INTEGER NOT NULL,
|
||||
job_stage_id INTEGER,
|
||||
worker_id INTEGER,
|
||||
level TEXT NOT NULL DEFAULT 'info',
|
||||
message TEXT NOT NULL,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS config_revisions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_type TEXT NOT NULL DEFAULT 'env_file',
|
||||
file_path TEXT NOT NULL,
|
||||
content_text TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
applied_at TEXT,
|
||||
note TEXT,
|
||||
UNIQUE(source_type, file_path, content_hash)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_playlist_download_preferences_is_wanted
|
||||
ON playlist_download_preferences (is_wanted, updated_at DESC)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_pool_playlists_playlist_id
|
||||
ON pool_playlists (playlist_id, pool_id)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_playlist_songs_song_id
|
||||
ON playlist_songs (song_id, playlist_id)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_file_assets_song_id
|
||||
ON file_assets (song_id)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_job_items_running_song_id
|
||||
ON job_items (song_id, status)
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
def connect_database(db_path: str | Path) -> sqlite3.Connection:
|
||||
path = Path(db_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(path, timeout=SQLITE_BUSY_TIMEOUT_MS / 1000)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA foreign_keys = ON")
|
||||
conn.execute(f"PRAGMA busy_timeout = {SQLITE_BUSY_TIMEOUT_MS}")
|
||||
with suppress(sqlite3.OperationalError):
|
||||
conn.execute("PRAGMA journal_mode = WAL")
|
||||
with suppress(sqlite3.OperationalError):
|
||||
conn.execute("PRAGMA synchronous = NORMAL")
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_default_local_backend(conn: sqlite3.Connection, library_root: str | Path) -> None:
|
||||
resolved_root = str(Path(library_root).resolve())
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO storage_backends (name, backend_type, base_path, is_default)
|
||||
VALUES (?, ?, ?, 1)
|
||||
ON CONFLICT(name) DO UPDATE SET
|
||||
backend_type = excluded.backend_type,
|
||||
base_path = excluded.base_path,
|
||||
is_default = excluded.is_default,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
("default-local", "local_fs", resolved_root),
|
||||
)
|
||||
|
||||
|
||||
_JOB_WORKER_THROUGHPUT_COLUMNS: dict[str, str] = {
|
||||
"downloaded_bytes": "INTEGER",
|
||||
"total_bytes": "INTEGER",
|
||||
"speed_bytes_per_sec": "INTEGER",
|
||||
"progress_percent": "REAL",
|
||||
}
|
||||
|
||||
_PLAYLIST_COLUMNS: dict[str, str] = {
|
||||
"play_count": "INTEGER",
|
||||
"collected_song_count": "INTEGER",
|
||||
}
|
||||
|
||||
|
||||
def _ensure_table_columns(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
table_name: str,
|
||||
required_columns: dict[str, str],
|
||||
) -> None:
|
||||
table_exists = conn.execute(
|
||||
"SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?",
|
||||
(table_name,),
|
||||
).fetchone()
|
||||
if table_exists is None:
|
||||
return
|
||||
existing_columns = {
|
||||
str(row["name"])
|
||||
for row in conn.execute(f"PRAGMA table_info({table_name})").fetchall()
|
||||
}
|
||||
for column_name, column_type in required_columns.items():
|
||||
if column_name in existing_columns:
|
||||
continue
|
||||
conn.execute(
|
||||
f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
|
||||
)
|
||||
|
||||
|
||||
def _ensure_job_worker_throughput_columns(conn: sqlite3.Connection) -> None:
|
||||
_ensure_table_columns(
|
||||
conn,
|
||||
table_name="job_workers",
|
||||
required_columns=_JOB_WORKER_THROUGHPUT_COLUMNS,
|
||||
)
|
||||
|
||||
|
||||
def _ensure_playlist_columns(conn: sqlite3.Connection) -> None:
|
||||
_ensure_table_columns(
|
||||
conn,
|
||||
table_name="playlists",
|
||||
required_columns=_PLAYLIST_COLUMNS,
|
||||
)
|
||||
|
||||
|
||||
def initialize_database(
|
||||
db_path: str | Path,
|
||||
default_library_root: str | Path | None = None,
|
||||
) -> sqlite3.Connection:
|
||||
conn = connect_database(db_path)
|
||||
for statement in SCHEMA_STATEMENTS:
|
||||
conn.execute(statement)
|
||||
_ensure_job_worker_throughput_columns(conn)
|
||||
_ensure_playlist_columns(conn)
|
||||
if default_library_root is not None:
|
||||
Path(default_library_root).mkdir(parents=True, exist_ok=True)
|
||||
ensure_default_local_backend(conn, default_library_root)
|
||||
conn.commit()
|
||||
return conn
|
||||
@@ -0,0 +1,332 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from musicdl.modules.utils import SongInfo, safeextractfromdict, seconds2hms
|
||||
|
||||
|
||||
def _parse_duration_seconds(value) -> int:
|
||||
try:
|
||||
return max(int(float(value or 0)), 0)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _has_positive_value(value) -> bool:
|
||||
try:
|
||||
return float(value or 0) > 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_text(value, default: str = "NULL") -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return default
|
||||
return text
|
||||
|
||||
|
||||
def _join_artist_names(value) -> str:
|
||||
if isinstance(value, (list, tuple)):
|
||||
names = []
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
name = str(item.get("name", "")).strip()
|
||||
else:
|
||||
name = str(item or "").strip()
|
||||
if name and name not in names:
|
||||
names.append(name)
|
||||
return ", ".join(names) if names else "NULL"
|
||||
text = str(value or "").replace("/", ", ").strip()
|
||||
return text or "NULL"
|
||||
|
||||
|
||||
def _normalize_audio_ext(value: str | None) -> str:
|
||||
return str(value or "").strip().lower().lstrip(".")
|
||||
|
||||
|
||||
def _remove_suffix(value: str, suffix: str) -> str:
|
||||
text = str(value or "")
|
||||
token = str(suffix or "")
|
||||
if token and text.endswith(token):
|
||||
return text[: -len(token)]
|
||||
return text
|
||||
|
||||
|
||||
def _remove_prefix(value: str, prefix: str) -> str:
|
||||
text = str(value or "")
|
||||
token = str(prefix or "")
|
||||
if token and text.startswith(token):
|
||||
return text[len(token) :]
|
||||
return text
|
||||
|
||||
|
||||
def guess_rough_audio_format(source: str, search_result: dict) -> str:
|
||||
source_name = str(source or "")
|
||||
if source_name == "QQMusicClient":
|
||||
file_meta = safeextractfromdict(search_result, ["file"], {}) or {}
|
||||
if any(_has_positive_value(file_meta.get(key)) for key in ("size_hires", "size_try", "size_flac", "size_ape")):
|
||||
return "flac"
|
||||
if any(_has_positive_value(file_meta.get(key)) for key in ("size_320mp3", "size_mp3", "size_128mp3")):
|
||||
return "mp3"
|
||||
if any(_has_positive_value(search_result.get(key)) for key in ("sizeflac", "sizeape")):
|
||||
return "flac"
|
||||
if any(_has_positive_value(search_result.get(key)) for key in ("size320", "size128", "sizeogg")):
|
||||
return "mp3"
|
||||
return ""
|
||||
if source_name == "KuwoMusicClient":
|
||||
meta_text = str(search_result.get("MINFO") or search_result.get("formats") or "").lower()
|
||||
if any(token in meta_text for token in ("flac", "ape", "wav", "lossless", "hires")):
|
||||
return "flac"
|
||||
if any(token in meta_text for token in ("mp3", "320kmp3", "192kmp3", "128kmp3")):
|
||||
return "mp3"
|
||||
return ""
|
||||
if source_name == "NeteaseMusicClient":
|
||||
if _has_positive_value(safeextractfromdict(search_result, ["hr", "size"], 0)) or _has_positive_value(
|
||||
safeextractfromdict(search_result, ["sq", "size"], 0)
|
||||
):
|
||||
return "flac"
|
||||
if any(
|
||||
_has_positive_value(safeextractfromdict(search_result, [quality_key, "size"], 0))
|
||||
for quality_key in ("h", "m", "l")
|
||||
):
|
||||
return "mp3"
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
def build_deferred_song_info(
|
||||
source: str,
|
||||
raw_search_result: dict,
|
||||
identifier,
|
||||
song_name,
|
||||
singers,
|
||||
album: str | None = None,
|
||||
duration_s: int | float = 0,
|
||||
cover_url: str | None = None,
|
||||
ext: str | None = None,
|
||||
) -> SongInfo:
|
||||
duration_seconds = _parse_duration_seconds(duration_s)
|
||||
return SongInfo(
|
||||
raw_data={"search": copy.deepcopy(raw_search_result or {}), "deferred_search": True},
|
||||
source=str(source),
|
||||
song_name=_normalize_text(song_name),
|
||||
singers=_normalize_text(singers),
|
||||
album=_normalize_text(album),
|
||||
ext=_normalize_audio_ext(ext),
|
||||
file_size_bytes=None,
|
||||
file_size=None,
|
||||
identifier=str(identifier or "").strip(),
|
||||
duration_s=duration_seconds,
|
||||
duration=seconds2hms(duration_seconds) if duration_seconds > 0 else "-:-:-",
|
||||
lyric="NULL",
|
||||
cover_url=str(cover_url or "").strip() or None,
|
||||
download_url=None,
|
||||
download_url_status={},
|
||||
)
|
||||
|
||||
|
||||
def _apply_work_dir(client, playlist_name: str, song_infos: list[SongInfo]) -> list[SongInfo]:
|
||||
if not song_infos:
|
||||
return []
|
||||
if hasattr(client, "_constructuniqueworkdir") and callable(client._constructuniqueworkdir):
|
||||
work_dir = client._constructuniqueworkdir(keyword=playlist_name)
|
||||
for song_info in song_infos:
|
||||
song_info.work_dir = work_dir
|
||||
if hasattr(client, "_removeduplicates") and callable(client._removeduplicates):
|
||||
return client._removeduplicates(song_infos=song_infos)
|
||||
return song_infos
|
||||
|
||||
|
||||
def _extract_playlist_id_from_url(playlist_url: str, query_keys: tuple[str, ...] = ("id", "pid", "bangId")) -> str:
|
||||
parsed = urlparse(str(playlist_url or "").strip())
|
||||
query_candidates = [parsed.query]
|
||||
fragment = str(parsed.fragment or "").strip()
|
||||
if fragment:
|
||||
fragment_url = fragment if "://" in fragment else f"https://placeholder{fragment if fragment.startswith('/') else '/' + fragment}"
|
||||
query_candidates.append(urlparse(fragment_url).query)
|
||||
for query_text in query_candidates:
|
||||
parsed_query = parse_qs(query_text, keep_blank_values=True)
|
||||
for query_key in query_keys:
|
||||
candidate = str((parsed_query.get(query_key) or [""])[0]).strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
for path_part in reversed([part for part in parsed.path.split("/") if part]):
|
||||
candidate = _remove_suffix(_remove_suffix(str(path_part), ".html"), ".htm").strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
return ""
|
||||
|
||||
|
||||
def build_netease_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
|
||||
request_overrides = copy.deepcopy(request_overrides or {})
|
||||
request_overrides.setdefault("timeout", (10, 30))
|
||||
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id",))
|
||||
if not playlist_id:
|
||||
return []
|
||||
response = client.post("https://music.163.com/api/v6/playlist/detail", data={"id": playlist_id}, **request_overrides)
|
||||
response.raise_for_status()
|
||||
playlist_result = response.json()
|
||||
playlist_info = safeextractfromdict(playlist_result, ["playlist"], {}) or {}
|
||||
track_refs = safeextractfromdict(playlist_info, ["trackIds"], []) or []
|
||||
playlist_name = _normalize_text(playlist_info.get("name"), f"playlist-{playlist_id}")
|
||||
if not track_refs:
|
||||
return []
|
||||
|
||||
track_details_by_id: dict[str, dict] = {}
|
||||
for track_info in safeextractfromdict(playlist_info, ["tracks"], []) or []:
|
||||
track_id = str(track_info.get("id") or "").strip()
|
||||
if track_id:
|
||||
track_details_by_id[track_id] = track_info
|
||||
|
||||
missing_track_ids = [
|
||||
str(track_ref.get("id") or "").strip()
|
||||
for track_ref in track_refs
|
||||
if str(track_ref.get("id") or "").strip() and str(track_ref.get("id") or "").strip() not in track_details_by_id
|
||||
]
|
||||
for offset in range(0, len(missing_track_ids), 200):
|
||||
batch_track_ids = [track_id for track_id in missing_track_ids[offset : offset + 200] if track_id.isdigit()]
|
||||
if not batch_track_ids:
|
||||
continue
|
||||
payload = json.dumps([{"id": int(track_id), "v": 0} for track_id in batch_track_ids], ensure_ascii=False, separators=(",", ":"))
|
||||
detail_response = client.post(
|
||||
"https://interface3.music.163.com/api/v3/song/detail",
|
||||
data={"c": payload},
|
||||
**request_overrides,
|
||||
)
|
||||
detail_response.raise_for_status()
|
||||
for track_info in detail_response.json().get("songs", []) or []:
|
||||
track_id = str(track_info.get("id") or "").strip()
|
||||
if track_id:
|
||||
track_details_by_id[track_id] = track_info
|
||||
|
||||
song_infos: list[SongInfo] = []
|
||||
for track_ref in track_refs:
|
||||
track_id = str(track_ref.get("id") or "").strip()
|
||||
track_info = track_details_by_id.get(track_id)
|
||||
if not track_id or not isinstance(track_info, dict):
|
||||
continue
|
||||
duration_value = 0
|
||||
if str(track_info.get("dt", "")).strip():
|
||||
try:
|
||||
duration_value = float(track_info.get("dt", 0) or 0) / 1000
|
||||
except Exception:
|
||||
duration_value = 0
|
||||
song_infos.append(
|
||||
build_deferred_song_info(
|
||||
source=client.source,
|
||||
raw_search_result=track_info,
|
||||
identifier=track_id,
|
||||
song_name=track_info.get("name"),
|
||||
singers=_join_artist_names(track_info.get("ar") or []),
|
||||
album=safeextractfromdict(track_info, ["al", "name"], None),
|
||||
duration_s=duration_value,
|
||||
cover_url=safeextractfromdict(track_info, ["al", "picUrl"], None),
|
||||
ext=guess_rough_audio_format(client.source, track_info),
|
||||
)
|
||||
)
|
||||
return _apply_work_dir(client, playlist_name, song_infos)
|
||||
|
||||
|
||||
def build_qq_raw_track_song_infos(client, raw_tracks: list[dict], playlist_name: str | None = None) -> list[SongInfo]:
|
||||
song_infos: list[SongInfo] = []
|
||||
for track_info in raw_tracks or []:
|
||||
track_id = track_info.get("mid") or track_info.get("songmid") or track_info.get("songid") or track_info.get("id")
|
||||
if not track_id:
|
||||
continue
|
||||
cover_mid = safeextractfromdict(track_info, ["album", "mid"], "") or track_info.get("albummid")
|
||||
song_infos.append(
|
||||
build_deferred_song_info(
|
||||
source=client.source,
|
||||
raw_search_result=track_info,
|
||||
identifier=track_id,
|
||||
song_name=track_info.get("title") or track_info.get("songname") or track_info.get("name"),
|
||||
singers=_join_artist_names(track_info.get("singer") or []),
|
||||
album=safeextractfromdict(track_info, ["album", "title"], None) or track_info.get("albumname"),
|
||||
duration_s=track_info.get("interval", 0),
|
||||
cover_url=f"https://y.gtimg.cn/music/photo_new/T002R800x800M000{cover_mid}.jpg" if cover_mid else None,
|
||||
ext=guess_rough_audio_format(client.source, track_info),
|
||||
)
|
||||
)
|
||||
return _apply_work_dir(client, _normalize_text(playlist_name, "playlist"), song_infos)
|
||||
|
||||
|
||||
def build_qq_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
|
||||
request_overrides = copy.deepcopy(request_overrides or {})
|
||||
request_overrides.setdefault("timeout", (10, 30))
|
||||
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id", "disstid"))
|
||||
if not playlist_id:
|
||||
return []
|
||||
response = client.get(
|
||||
"https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg",
|
||||
headers={"Referer": f"https://y.qq.com/n/ryqq/playlist/{playlist_id}"},
|
||||
params={"disstid": str(playlist_id), "type": "1", "json": "1", "utf8": "1", "onlysong": "0", "format": "json"},
|
||||
**request_overrides,
|
||||
)
|
||||
response.raise_for_status()
|
||||
playlist_result = response.json()
|
||||
raw_tracks = (
|
||||
safeextractfromdict(playlist_result, ["cdlist", 0, "songlist"], [])
|
||||
or safeextractfromdict(playlist_result, ["cdlist", 0, "list"], [])
|
||||
or safeextractfromdict(playlist_result, ["songlist"], [])
|
||||
or []
|
||||
)
|
||||
playlist_name = safeextractfromdict(playlist_result, ["cdlist", 0, "dissname"], None) or f"playlist-{playlist_id}"
|
||||
return build_qq_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_name)
|
||||
|
||||
|
||||
def build_kuwo_raw_track_song_infos(client, raw_tracks: list[dict], playlist_name: str | None = None) -> list[SongInfo]:
|
||||
song_infos: list[SongInfo] = []
|
||||
for track_info in raw_tracks or []:
|
||||
track_id = _remove_prefix(str(track_info.get("MUSICRID") or track_info.get("musicrid") or track_info.get("rid") or ""), "MUSIC_")
|
||||
if not track_id:
|
||||
continue
|
||||
duration_value = track_info.get("DURATION") or track_info.get("duration", 0)
|
||||
song_infos.append(
|
||||
build_deferred_song_info(
|
||||
source=client.source,
|
||||
raw_search_result=track_info,
|
||||
identifier=track_id,
|
||||
song_name=track_info.get("SONGNAME") or track_info.get("name"),
|
||||
singers=track_info.get("ARTIST") or track_info.get("artist"),
|
||||
album=track_info.get("ALBUM") or track_info.get("album"),
|
||||
duration_s=duration_value,
|
||||
cover_url=track_info.get("hts_MVPIC") or track_info.get("albumpic") or track_info.get("pic"),
|
||||
ext=guess_rough_audio_format(client.source, track_info),
|
||||
)
|
||||
)
|
||||
return _apply_work_dir(client, _normalize_text(playlist_name, "playlist"), song_infos)
|
||||
|
||||
|
||||
def build_kuwo_playlist_song_infos(client, playlist_url: str, request_overrides: dict | None = None) -> list[SongInfo]:
|
||||
request_overrides = copy.deepcopy(request_overrides or {})
|
||||
request_overrides.setdefault("timeout", (10, 30))
|
||||
playlist_id = _extract_playlist_id_from_url(playlist_url, query_keys=("id", "pid"))
|
||||
if not playlist_id:
|
||||
return []
|
||||
|
||||
raw_tracks: list[dict] = []
|
||||
page = 1
|
||||
playlist_result_first = {}
|
||||
while True:
|
||||
response = client.get(
|
||||
f"https://m.kuwo.cn/newh5app/wapi/api/www/playlist/playListInfo?pid={playlist_id}&pn={page}&rn=100",
|
||||
**request_overrides,
|
||||
)
|
||||
response.raise_for_status()
|
||||
playlist_result = response.json()
|
||||
page_tracks = safeextractfromdict(playlist_result, ["data", "musicList"], []) or []
|
||||
if not page_tracks:
|
||||
break
|
||||
raw_tracks.extend(page_tracks)
|
||||
page += 1
|
||||
if not playlist_result_first:
|
||||
playlist_result_first = copy.deepcopy(playlist_result)
|
||||
if float(safeextractfromdict(playlist_result, ["data", "total"], 0) or 0) <= len(raw_tracks):
|
||||
break
|
||||
|
||||
deduped_tracks = list({str(track.get("musicrid") or track.get("rid") or ""): track for track in raw_tracks}.values())
|
||||
playlist_name = safeextractfromdict(playlist_result_first, ["data", "name"], None) or f"playlist-{playlist_id}"
|
||||
return build_kuwo_raw_track_song_infos(client, deduped_tracks, playlist_name=playlist_name)
|
||||
@@ -0,0 +1,684 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import signal
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .models import deserialize_song_info, normalize_source_name
|
||||
from .repository import CatalogRepository
|
||||
from .resolver import DEFAULT_DOWNLOAD_SOURCES, MultiSourceSongResolver, SOURCE_CLIENT_NAMES, normalize_audio_ext
|
||||
from .resolver_stats import ResolverStatsRepository, default_resolver_stats_db_path
|
||||
from .runtime import build_download_relative_dir
|
||||
from musicdl.modules.utils.lyric import LyricSearchClient
|
||||
from musicdl.modules.utils.misc import shortenpathsinsonginfos
|
||||
from musicdl.modules.utils.songinfoutils import SongInfoUtils
|
||||
|
||||
LOSSLESS_EXTENSIONS = {"flac", "wav", "alac", "ape", "wv", "tta", "dsf", "dff"}
|
||||
DEFAULT_DOWNLOAD_WORKERS = 10
|
||||
DEFAULT_LYRIC_SEARCH_TIMEOUT_SECONDS = 20
|
||||
|
||||
|
||||
class _LyricSearchTimeout(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolvedDownloadPayload:
|
||||
row: dict[str, object]
|
||||
display_text: str
|
||||
default_root: Path
|
||||
target_root: Path
|
||||
backend_id: int
|
||||
expected_bytes: int | None
|
||||
resolved_song_info: object
|
||||
|
||||
|
||||
def _progress_percent(completed: int | None, total: int | None) -> int:
|
||||
normalized_total = max(int(total or 0), 0)
|
||||
normalized_completed = max(int(completed or 0), 0)
|
||||
if normalized_total <= 0:
|
||||
return 0
|
||||
if normalized_completed >= normalized_total:
|
||||
return 100
|
||||
return int((normalized_completed * 100) / normalized_total)
|
||||
|
||||
|
||||
def _format_progress_text(downloaded_bytes: int | None, total_bytes: int | None) -> str:
|
||||
downloaded_value = max(int(downloaded_bytes or 0), 0)
|
||||
total_value = max(int(total_bytes or 0), downloaded_value)
|
||||
return f"{downloaded_value / 1024 / 1024:.2f}MB/{total_value / 1024 / 1024:.2f}MB"
|
||||
|
||||
|
||||
class DownloadPlanner:
|
||||
def __init__(self, repository: CatalogRepository):
|
||||
self.repository = repository
|
||||
|
||||
def build_download_queue(
|
||||
self,
|
||||
sources: list[str] | None = None,
|
||||
limit: int | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
) -> list[dict]:
|
||||
rows = self.repository.list_pending_download_songs(
|
||||
sources=sources,
|
||||
limit=limit,
|
||||
playlist_ids=playlist_ids,
|
||||
)
|
||||
queue = []
|
||||
for row in rows:
|
||||
if self.repository.song_has_active_local_file(int(row["id"])):
|
||||
continue
|
||||
item = dict(row)
|
||||
item["song_id"] = int(row["id"])
|
||||
queue.append(item)
|
||||
return queue
|
||||
|
||||
|
||||
class CatalogDownloader:
|
||||
def __init__(
|
||||
self,
|
||||
repository: CatalogRepository,
|
||||
work_dir: str = "musicdl_outputs/catalogsync",
|
||||
worker_count: int = DEFAULT_DOWNLOAD_WORKERS,
|
||||
):
|
||||
self.repository = repository
|
||||
self.work_dir = work_dir
|
||||
self.worker_count = max(1, worker_count)
|
||||
self._clients: dict[str, object] = {}
|
||||
self._client_lock = threading.Lock()
|
||||
self._space_lock = threading.Lock()
|
||||
self._current_library_root: Path | None = None
|
||||
self._lyric_search_timeout_seconds = DEFAULT_LYRIC_SEARCH_TIMEOUT_SECONDS
|
||||
resolver_stats_repo = ResolverStatsRepository(default_resolver_stats_db_path(self.repository.db_path))
|
||||
self._resolver = MultiSourceSongResolver(
|
||||
client_factory=lambda platform: self.get_client(platform),
|
||||
request_overrides_factory=lambda timeout: self._request_overrides(timeout),
|
||||
resolver_stats_repo=resolver_stats_repo,
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def _lyric_search_timeout_guard(self):
|
||||
timeout_seconds = float(self._lyric_search_timeout_seconds or 0)
|
||||
if timeout_seconds <= 0:
|
||||
yield
|
||||
return
|
||||
if threading.current_thread() is not threading.main_thread():
|
||||
yield
|
||||
return
|
||||
if not hasattr(signal, "SIGALRM") or not hasattr(signal, "setitimer"):
|
||||
yield
|
||||
return
|
||||
|
||||
def _handle_timeout(_signum, _frame):
|
||||
raise _LyricSearchTimeout()
|
||||
|
||||
previous_handler = signal.getsignal(signal.SIGALRM)
|
||||
signal.signal(signal.SIGALRM, _handle_timeout)
|
||||
signal.setitimer(signal.ITIMER_REAL, timeout_seconds)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
signal.setitimer(signal.ITIMER_REAL, 0)
|
||||
signal.signal(signal.SIGALRM, previous_handler)
|
||||
|
||||
@staticmethod
|
||||
def _request_overrides(timeout: tuple[int, int]) -> dict:
|
||||
return {"timeout": timeout}
|
||||
|
||||
def get_client(self, platform: str):
|
||||
platform = normalize_source_name(platform)
|
||||
client_key = f"{threading.get_ident()}:{platform}"
|
||||
if client_key not in self._clients:
|
||||
with self._client_lock:
|
||||
if client_key not in self._clients:
|
||||
from musicdl.modules import BuildMusicClient
|
||||
|
||||
self._clients[client_key] = BuildMusicClient(
|
||||
{
|
||||
"type": SOURCE_CLIENT_NAMES[platform],
|
||||
"disable_print": True,
|
||||
"maintain_session": False,
|
||||
"work_dir": self.work_dir,
|
||||
"search_size_per_source": 5,
|
||||
"search_size_per_page": 5,
|
||||
"strict_limit_search_size_per_page": True,
|
||||
}
|
||||
)
|
||||
return self._clients[client_key]
|
||||
|
||||
def ensure_space(self, root_path: str | Path, required_bytes: int | None) -> Path:
|
||||
with self._space_lock:
|
||||
root = self._current_library_root or Path(root_path).resolve()
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
if required_bytes is None or required_bytes <= 0:
|
||||
self._current_library_root = root
|
||||
return root
|
||||
while shutil.disk_usage(root).free < required_bytes:
|
||||
new_root = input("磁盘空间不足,请输入新的下载目录继续: ").strip()
|
||||
if not new_root:
|
||||
raise RuntimeError("Disk space is insufficient and no new directory was provided")
|
||||
root = Path(new_root).resolve()
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
self._current_library_root = root
|
||||
return root
|
||||
|
||||
def _initialize_library_root(self, root_path: str | Path) -> Path:
|
||||
normalized_root = Path(root_path).resolve()
|
||||
with self._space_lock:
|
||||
if self._current_library_root is None:
|
||||
self._current_library_root = normalized_root
|
||||
return self._current_library_root
|
||||
|
||||
@staticmethod
|
||||
def _normalize_singers(value: object) -> str | None:
|
||||
if not isinstance(value, str):
|
||||
return None
|
||||
text = value.strip()
|
||||
if not text or text.upper() == "NULL":
|
||||
return None
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _detect_download_platform(song_info: object, fallback_platform: str) -> str:
|
||||
detected_platform = normalize_source_name(getattr(song_info, "source", None))
|
||||
if detected_platform == "unknown":
|
||||
return normalize_source_name(fallback_platform)
|
||||
return detected_platform
|
||||
|
||||
def resolve_song_info_for_download(
|
||||
self,
|
||||
row: dict,
|
||||
song_info: object,
|
||||
download_sources: list[str] | None = None,
|
||||
progress_callback=None,
|
||||
) -> object:
|
||||
return self._resolver.resolve_song_info(
|
||||
row=row,
|
||||
snapshot_song_info=song_info,
|
||||
download_sources=download_sources or DEFAULT_DOWNLOAD_SOURCES,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _detect_quality_label(song_info: object, actual_ext: str | None, fallback: str | None = None) -> str | None:
|
||||
raw_data = getattr(song_info, "raw_data", None)
|
||||
if isinstance(raw_data, dict):
|
||||
quality = raw_data.get("quality")
|
||||
if quality:
|
||||
return str(quality)
|
||||
normalized_ext = normalize_audio_ext(actual_ext or getattr(song_info, "ext", None))
|
||||
if normalized_ext in LOSSLESS_EXTENSIONS:
|
||||
return "lossless"
|
||||
if normalized_ext:
|
||||
return "standard"
|
||||
return fallback
|
||||
|
||||
@staticmethod
|
||||
def _build_display_text(row: dict) -> str:
|
||||
display_name = str(row.get("name") or row.get("id") or "")
|
||||
singers = str(row.get("singers") or "").strip()
|
||||
return f"{display_name} / {singers}".strip(" /")
|
||||
|
||||
@staticmethod
|
||||
def _normalize_lyric_text(value: object) -> str | None:
|
||||
if not isinstance(value, str):
|
||||
return None
|
||||
text = value.replace("\r\n", "\n").strip()
|
||||
if not text or text.upper() == "NULL":
|
||||
return None
|
||||
return text
|
||||
|
||||
def _resolve_lyrics_text(self, *, song_info: object | None, row: dict[str, object] | None = None) -> str | None:
|
||||
lyric_text = self._normalize_lyric_text(getattr(song_info, "lyric", None))
|
||||
if lyric_text:
|
||||
return lyric_text
|
||||
row = row or {}
|
||||
title = self._normalize_lyric_text(getattr(song_info, "song_name", None)) or self._normalize_lyric_text(row.get("name"))
|
||||
singers = self._normalize_singers(getattr(song_info, "singers", None)) or self._normalize_singers(row.get("singers"))
|
||||
if not title or not singers:
|
||||
return None
|
||||
try:
|
||||
with self._lyric_search_timeout_guard():
|
||||
_lyric_result, lyric = LyricSearchClient.search(track_name=title, artist_name=singers)
|
||||
except _LyricSearchTimeout:
|
||||
return None
|
||||
return self._normalize_lyric_text(lyric)
|
||||
|
||||
def _sync_lyrics_for_saved_song(
|
||||
self,
|
||||
*,
|
||||
row: dict[str, object],
|
||||
song_info: object | None,
|
||||
saved_path: Path,
|
||||
overwrite_lyrics: bool,
|
||||
worker_callback=None,
|
||||
display_text: str | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
lyric_text = self._resolve_lyrics_text(song_info=song_info, row=row)
|
||||
if not lyric_text:
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text="lyrics unavailable",
|
||||
)
|
||||
return "skipped"
|
||||
if hasattr(song_info, "lyric"):
|
||||
song_info.lyric = lyric_text
|
||||
lrc_path = saved_path.with_suffix(".lrc")
|
||||
if lrc_path.exists() and not overwrite_lyrics:
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text="lyrics exists, skipped",
|
||||
)
|
||||
return "skipped"
|
||||
saved = SongInfoUtils.savelrctofile(saved_path, lyric_text, overwrite=overwrite_lyrics)
|
||||
if saved:
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text="lyrics saved",
|
||||
)
|
||||
return "saved"
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text="lyrics skipped",
|
||||
)
|
||||
return "skipped"
|
||||
except Exception as exc:
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text=f"lyrics failed: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
return "failed"
|
||||
|
||||
@staticmethod
|
||||
def _emit_worker_progress(
|
||||
row: dict,
|
||||
progress_callback,
|
||||
*,
|
||||
display_text: str | None,
|
||||
downloaded_bytes: int | None = None,
|
||||
total_bytes: int | None = None,
|
||||
speed_bytes_per_sec: int | None = None,
|
||||
progress_percent: int | None = None,
|
||||
last_progress_text: str | None = None,
|
||||
) -> None:
|
||||
if progress_callback is None:
|
||||
return
|
||||
state: dict[str, object] = {
|
||||
"current_song_id": int(row["id"]) if row.get("id") is not None else None,
|
||||
"current_playlist_id": row.get("playlist_id"),
|
||||
"current_display_text": display_text,
|
||||
}
|
||||
if downloaded_bytes is not None:
|
||||
state["downloaded_bytes"] = int(downloaded_bytes)
|
||||
if total_bytes is not None:
|
||||
state["total_bytes"] = int(total_bytes)
|
||||
if speed_bytes_per_sec is not None:
|
||||
state["speed_bytes_per_sec"] = int(speed_bytes_per_sec)
|
||||
if progress_percent is not None:
|
||||
state["progress_percent"] = int(progress_percent)
|
||||
if last_progress_text is not None:
|
||||
state["last_progress_text"] = str(last_progress_text)
|
||||
progress_callback(**state)
|
||||
|
||||
def _monitor_save_path(
|
||||
self,
|
||||
*,
|
||||
save_path: Path,
|
||||
expected_bytes: int | None,
|
||||
progress_callback,
|
||||
stop_event: threading.Event,
|
||||
row: dict,
|
||||
display_text: str | None,
|
||||
interval_seconds: float = 0.02,
|
||||
) -> None:
|
||||
last_size = 0
|
||||
last_change_at = time.monotonic()
|
||||
while not stop_event.wait(interval_seconds):
|
||||
if not save_path.exists():
|
||||
continue
|
||||
try:
|
||||
current_size = int(save_path.stat().st_size)
|
||||
except OSError:
|
||||
continue
|
||||
if current_size <= last_size:
|
||||
continue
|
||||
now = time.monotonic()
|
||||
delta_bytes = current_size - last_size
|
||||
delta_seconds = max(now - last_change_at, 1e-6)
|
||||
total_bytes = int(expected_bytes or current_size)
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
progress_callback,
|
||||
display_text=display_text,
|
||||
downloaded_bytes=current_size,
|
||||
total_bytes=total_bytes,
|
||||
speed_bytes_per_sec=int(delta_bytes / delta_seconds),
|
||||
progress_percent=_progress_percent(current_size, total_bytes),
|
||||
last_progress_text=_format_progress_text(current_size, total_bytes),
|
||||
)
|
||||
last_size = current_size
|
||||
last_change_at = now
|
||||
|
||||
def resolve_song_row(
|
||||
self,
|
||||
row,
|
||||
library_root: str | Path,
|
||||
download_sources: list[str] | None = None,
|
||||
worker_callback=None,
|
||||
) -> ResolvedDownloadPayload | None:
|
||||
"""Resolve a snapshot into a downloadable payload and choose the active local target."""
|
||||
row_dict = dict(row)
|
||||
default_root = self._initialize_library_root(library_root)
|
||||
self.repository.ensure_local_backend(default_root, name="default-local", is_default=True)
|
||||
display_text = self._build_display_text(row_dict)
|
||||
self._emit_worker_progress(
|
||||
row_dict,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
)
|
||||
metadata = json.loads(row_dict["metadata_json"]) if row_dict.get("metadata_json") else {}
|
||||
song_info = deserialize_song_info(metadata.get("snapshot"))
|
||||
if song_info is None:
|
||||
return None
|
||||
resolve_progress_callback = None
|
||||
if worker_callback is not None:
|
||||
resolve_progress_callback = lambda message: self._emit_worker_progress(
|
||||
row_dict,
|
||||
worker_callback,
|
||||
display_text=display_text,
|
||||
last_progress_text=message,
|
||||
)
|
||||
resolved_song_info = self.resolve_song_info_for_download(
|
||||
row=row_dict,
|
||||
song_info=song_info,
|
||||
download_sources=download_sources,
|
||||
progress_callback=resolve_progress_callback,
|
||||
)
|
||||
if resolved_song_info is None:
|
||||
return None
|
||||
target_root = self.ensure_space(
|
||||
default_root,
|
||||
getattr(resolved_song_info, "file_size_bytes", None) or row_dict.get("file_size_bytes"),
|
||||
)
|
||||
is_default_root = target_root.resolve() == default_root
|
||||
backend_id = self.repository.ensure_local_backend(
|
||||
target_root,
|
||||
name="default-local" if is_default_root else None,
|
||||
is_default=is_default_root,
|
||||
)
|
||||
expected_bytes = int(
|
||||
getattr(resolved_song_info, "file_size_bytes", None) or row_dict.get("file_size_bytes") or 0
|
||||
) or None
|
||||
return ResolvedDownloadPayload(
|
||||
row=row_dict,
|
||||
display_text=display_text,
|
||||
default_root=default_root,
|
||||
target_root=target_root,
|
||||
backend_id=backend_id,
|
||||
expected_bytes=expected_bytes,
|
||||
resolved_song_info=resolved_song_info,
|
||||
)
|
||||
|
||||
def download_resolved_song(
|
||||
self,
|
||||
resolved_payload: ResolvedDownloadPayload,
|
||||
worker_callback=None,
|
||||
lyrics_enabled: bool = True,
|
||||
overwrite_lyrics: bool = False,
|
||||
) -> bool:
|
||||
row = resolved_payload.row
|
||||
song_info = resolved_payload.resolved_song_info
|
||||
download_platform = self._detect_download_platform(song_info, str(row["platform"]))
|
||||
client = self.get_client(download_platform)
|
||||
singers = self._normalize_singers(getattr(song_info, "singers", None)) or self._normalize_singers(
|
||||
row.get("singers")
|
||||
)
|
||||
relative_dir = build_download_relative_dir(platform=download_platform, singers=singers)
|
||||
target_dir = resolved_payload.target_root / relative_dir
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
song_info.work_dir = str(target_dir)
|
||||
if hasattr(song_info, "_save_path"):
|
||||
song_info._save_path = None
|
||||
save_path: Path | None = None
|
||||
monitor_stop: threading.Event | None = None
|
||||
monitor_thread: threading.Thread | None = None
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=resolved_payload.display_text,
|
||||
last_progress_text=f"starting download via {download_platform}",
|
||||
)
|
||||
if worker_callback is not None and hasattr(song_info, "save_path"):
|
||||
shortenpathsinsonginfos([song_info])
|
||||
save_path = Path(song_info.save_path)
|
||||
monitor_stop = threading.Event()
|
||||
monitor_thread = threading.Thread(
|
||||
target=self._monitor_save_path,
|
||||
kwargs={
|
||||
"save_path": save_path,
|
||||
"expected_bytes": resolved_payload.expected_bytes,
|
||||
"progress_callback": worker_callback,
|
||||
"stop_event": monitor_stop,
|
||||
"row": row,
|
||||
"display_text": resolved_payload.display_text,
|
||||
},
|
||||
daemon=True,
|
||||
name=f"download-monitor-{row.get('id')}",
|
||||
)
|
||||
monitor_thread.start()
|
||||
try:
|
||||
downloaded = client.download(
|
||||
[song_info],
|
||||
num_threadings=1,
|
||||
request_overrides=self._request_overrides((10, 60)),
|
||||
auto_supplement_song=False,
|
||||
)
|
||||
except TypeError:
|
||||
downloaded = client.download(
|
||||
[song_info],
|
||||
num_threadings=1,
|
||||
auto_supplement_song=False,
|
||||
)
|
||||
finally:
|
||||
if monitor_stop is not None:
|
||||
monitor_stop.set()
|
||||
if monitor_thread is not None:
|
||||
monitor_thread.join(timeout=1.0)
|
||||
if save_path is not None and save_path.exists():
|
||||
try:
|
||||
final_size = int(save_path.stat().st_size)
|
||||
except OSError:
|
||||
final_size = 0
|
||||
if final_size > 0:
|
||||
total_bytes = int(resolved_payload.expected_bytes or final_size)
|
||||
self._emit_worker_progress(
|
||||
row,
|
||||
worker_callback,
|
||||
display_text=resolved_payload.display_text,
|
||||
downloaded_bytes=final_size,
|
||||
total_bytes=total_bytes,
|
||||
progress_percent=_progress_percent(final_size, total_bytes),
|
||||
last_progress_text=_format_progress_text(final_size, total_bytes),
|
||||
)
|
||||
if not downloaded:
|
||||
return False
|
||||
saved_song = downloaded[0]
|
||||
saved_path = Path(saved_song.save_path)
|
||||
relative_path = saved_path.relative_to(resolved_payload.target_root).as_posix()
|
||||
actual_size = saved_path.stat().st_size if saved_path.exists() else row.get("file_size_bytes")
|
||||
actual_ext = saved_path.suffix.lstrip(".") or row.get("ext")
|
||||
self.repository.record_local_file(
|
||||
song_id=int(row["id"]),
|
||||
backend_id=resolved_payload.backend_id,
|
||||
relative_path=relative_path,
|
||||
file_size_bytes=actual_size,
|
||||
ext=actual_ext,
|
||||
quality_label=self._detect_quality_label(song_info, actual_ext, fallback=row.get("quality_label")),
|
||||
)
|
||||
if lyrics_enabled:
|
||||
lyrics_song_info = saved_song if self._normalize_lyric_text(getattr(saved_song, "lyric", None)) else song_info
|
||||
self._sync_lyrics_for_saved_song(
|
||||
row=row,
|
||||
song_info=lyrics_song_info,
|
||||
saved_path=saved_path,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
worker_callback=worker_callback,
|
||||
display_text=resolved_payload.display_text,
|
||||
)
|
||||
return True
|
||||
|
||||
def download_song_row(
|
||||
self,
|
||||
row,
|
||||
library_root: str | Path,
|
||||
download_sources: list[str] | None = None,
|
||||
worker_callback=None,
|
||||
lyrics_enabled: bool = True,
|
||||
overwrite_lyrics: bool = False,
|
||||
) -> bool:
|
||||
resolved_payload = self.resolve_song_row(
|
||||
row=row,
|
||||
library_root=library_root,
|
||||
download_sources=download_sources,
|
||||
worker_callback=worker_callback,
|
||||
)
|
||||
if resolved_payload is None:
|
||||
return False
|
||||
return self.download_resolved_song(
|
||||
resolved_payload=resolved_payload,
|
||||
worker_callback=worker_callback,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
|
||||
def download_pending(
|
||||
self,
|
||||
library_root: str | Path,
|
||||
sources: list[str] | None = None,
|
||||
limit: int | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
download_sources: list[str] | None = None,
|
||||
lyrics_enabled: bool = True,
|
||||
overwrite_lyrics: bool = False,
|
||||
) -> int:
|
||||
planner = DownloadPlanner(self.repository)
|
||||
queue = planner.build_download_queue(sources=sources, limit=limit, playlist_ids=playlist_ids)
|
||||
default_root = self._initialize_library_root(library_root)
|
||||
self.repository.ensure_local_backend(default_root, name="default-local", is_default=True)
|
||||
downloaded_count = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
self.download_song_row,
|
||||
row=row,
|
||||
library_root=default_root,
|
||||
download_sources=download_sources,
|
||||
lyrics_enabled=lyrics_enabled,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
)
|
||||
for row in queue
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
downloaded_count += 1
|
||||
return downloaded_count
|
||||
|
||||
def sync_local_lyrics(
|
||||
self,
|
||||
sources: list[str] | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
limit: int | None = None,
|
||||
overwrite_lyrics: bool = False,
|
||||
progress_callback=None,
|
||||
) -> dict[str, int]:
|
||||
rows = self.repository.list_local_songs_for_lyrics(
|
||||
sources=sources,
|
||||
playlist_ids=playlist_ids,
|
||||
limit=limit,
|
||||
)
|
||||
summary = {"total": len(rows), "processed": 0, "saved": 0, "skipped": 0, "failed": 0}
|
||||
|
||||
def emit_progress(*, row_dict: dict[str, object] | None = None, display_text: str | None = None, last_status: str | None = None, last_progress_text: str | None = None) -> None:
|
||||
if progress_callback is None:
|
||||
return
|
||||
state: dict[str, object] = {
|
||||
"total": summary["total"],
|
||||
"processed": summary["processed"],
|
||||
"saved": summary["saved"],
|
||||
"skipped": summary["skipped"],
|
||||
"failed": summary["failed"],
|
||||
"progress_percent": _progress_percent(summary["processed"], summary["total"]),
|
||||
}
|
||||
if row_dict is not None:
|
||||
state["current_song_id"] = int(row_dict["id"]) if row_dict.get("id") is not None else None
|
||||
state["current_playlist_id"] = row_dict.get("playlist_id")
|
||||
if display_text is not None:
|
||||
state["current_display_text"] = display_text
|
||||
if last_status is not None:
|
||||
state["last_status"] = last_status
|
||||
if last_progress_text is not None:
|
||||
state["last_progress_text"] = last_progress_text
|
||||
progress_callback(**state)
|
||||
|
||||
def process_row(row) -> tuple[dict[str, object], str, str, str]:
|
||||
row_dict = dict(row)
|
||||
display_text = self._build_display_text(row_dict)
|
||||
try:
|
||||
local_file_path = row_dict.get("local_file_path")
|
||||
if not local_file_path:
|
||||
return row_dict, display_text, "failed", "missing local file path"
|
||||
saved_path = Path(str(local_file_path))
|
||||
if not saved_path.exists():
|
||||
return row_dict, display_text, "failed", "local file missing"
|
||||
metadata = json.loads(row_dict["metadata_json"]) if row_dict.get("metadata_json") else {}
|
||||
song_info = deserialize_song_info(metadata.get("snapshot")) if isinstance(metadata, dict) else None
|
||||
status = self._sync_lyrics_for_saved_song(
|
||||
row=row_dict,
|
||||
song_info=song_info,
|
||||
saved_path=saved_path,
|
||||
overwrite_lyrics=overwrite_lyrics,
|
||||
display_text=display_text,
|
||||
)
|
||||
normalized_status = status if status in {"saved", "skipped", "failed"} else "failed"
|
||||
status_text = {
|
||||
"saved": "lyrics saved",
|
||||
"skipped": "lyrics skipped",
|
||||
"failed": "lyrics failed",
|
||||
}[normalized_status]
|
||||
return row_dict, display_text, normalized_status, status_text
|
||||
except Exception as exc:
|
||||
return row_dict, display_text, "failed", f"lyrics failed: {type(exc).__name__}: {exc}"
|
||||
|
||||
emit_progress()
|
||||
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
|
||||
futures = [executor.submit(process_row, row) for row in rows]
|
||||
for future in as_completed(futures):
|
||||
row_dict, display_text, status, status_text = future.result()
|
||||
summary["processed"] += 1
|
||||
summary[status] += 1
|
||||
emit_progress(
|
||||
row_dict=row_dict,
|
||||
display_text=display_text,
|
||||
last_status=status,
|
||||
last_progress_text=status_text,
|
||||
)
|
||||
return summary
|
||||
@@ -0,0 +1,118 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
from uuid import uuid4
|
||||
|
||||
from .runtime import sanitize_path_component
|
||||
|
||||
INTERNAL_BUNDLE_NAME_SEPARATOR = "--"
|
||||
|
||||
|
||||
def default_bundle_root() -> Path:
|
||||
root = Path(tempfile.gettempdir()) / "musicdl-catalogsync" / "bundles"
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
return root
|
||||
|
||||
|
||||
def build_single_playlist_bundle_filename(
|
||||
*,
|
||||
platform: str,
|
||||
playlist_id: int,
|
||||
playlist_name: str,
|
||||
) -> str:
|
||||
safe_platform = sanitize_path_component(str(platform or ""), "unknown")
|
||||
safe_name = sanitize_path_component(str(playlist_name or ""), f"playlist-{int(playlist_id)}")
|
||||
return f"playlist-{safe_platform}-{int(playlist_id)}-{safe_name}.zip"
|
||||
|
||||
|
||||
def build_multi_playlist_bundle_filename(*, created_at: datetime | None = None) -> str:
|
||||
now = created_at or datetime.now()
|
||||
return "playlists-export-" + now.strftime("%Y%m%d-%H%M%S") + ".zip"
|
||||
|
||||
|
||||
def bundle_download_filename(bundle_path_or_name: str | Path) -> str:
|
||||
filename = Path(bundle_path_or_name).name
|
||||
if INTERNAL_BUNDLE_NAME_SEPARATOR not in filename:
|
||||
return filename
|
||||
return filename.split(INTERNAL_BUNDLE_NAME_SEPARATOR, 1)[1]
|
||||
|
||||
|
||||
def resolve_bundle_download_path(bundle_root: Path, bundle_name: str) -> Path | None:
|
||||
normalized_name = str(bundle_name or "").strip()
|
||||
if not normalized_name:
|
||||
return None
|
||||
safe_name = sanitize_path_component(normalized_name, "")
|
||||
if not safe_name or safe_name != normalized_name:
|
||||
return None
|
||||
return Path(bundle_root) / f"{normalized_name}.zip"
|
||||
|
||||
|
||||
def create_single_playlist_bundle(
|
||||
*,
|
||||
playlist_dir: Path,
|
||||
bundle_root: Path,
|
||||
platform: str,
|
||||
playlist_id: int,
|
||||
playlist_name: str,
|
||||
) -> Path:
|
||||
source_dir = Path(playlist_dir)
|
||||
if not source_dir.exists() or not source_dir.is_dir():
|
||||
raise FileNotFoundError(f"playlist directory not found: {source_dir}")
|
||||
root = Path(bundle_root)
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
bundle_path = root / build_single_playlist_bundle_filename(
|
||||
platform=platform,
|
||||
playlist_id=playlist_id,
|
||||
playlist_name=playlist_name,
|
||||
)
|
||||
_write_zip_from_directories(bundle_path, [(source_dir, source_dir.name)])
|
||||
return bundle_path
|
||||
|
||||
|
||||
def create_multi_playlist_bundle(
|
||||
*,
|
||||
playlist_dirs: Iterable[Path],
|
||||
bundle_root: Path,
|
||||
created_at: datetime | None = None,
|
||||
) -> Path:
|
||||
resolved_dirs: list[Path] = []
|
||||
for item in playlist_dirs:
|
||||
playlist_dir = Path(item)
|
||||
if not playlist_dir.exists() or not playlist_dir.is_dir():
|
||||
raise FileNotFoundError(f"playlist directory not found: {playlist_dir}")
|
||||
resolved_dirs.append(playlist_dir)
|
||||
if not resolved_dirs:
|
||||
raise ValueError("playlist_dirs is required")
|
||||
|
||||
root = Path(bundle_root)
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
friendly_name = build_multi_playlist_bundle_filename(created_at=created_at)
|
||||
unique_storage_name = (
|
||||
datetime.now().strftime("%Y%m%d%H%M%S%f")
|
||||
+ "-"
|
||||
+ uuid4().hex[:8]
|
||||
+ INTERNAL_BUNDLE_NAME_SEPARATOR
|
||||
+ friendly_name
|
||||
)
|
||||
bundle_path = root / unique_storage_name
|
||||
_write_zip_from_directories(
|
||||
bundle_path,
|
||||
[(playlist_dir, f"playlists/{playlist_dir.name}") for playlist_dir in resolved_dirs],
|
||||
)
|
||||
return bundle_path
|
||||
|
||||
|
||||
def _write_zip_from_directories(bundle_path: Path, directories: list[tuple[Path, str]]) -> None:
|
||||
if bundle_path.exists():
|
||||
bundle_path.unlink()
|
||||
with zipfile.ZipFile(bundle_path, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
|
||||
for source_dir, zip_root in directories:
|
||||
for child in sorted(source_dir.rglob("*")):
|
||||
if not child.is_file():
|
||||
continue
|
||||
relative_path = child.relative_to(source_dir).as_posix()
|
||||
archive.write(child, arcname=f"{zip_root}/{relative_path}")
|
||||
@@ -0,0 +1,179 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .models import PlaylistCandidate
|
||||
|
||||
|
||||
SUPPORTED_PLATFORMS = {"netease", "qq", "kuwo"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedPlaylistFile:
|
||||
entries: list[PlaylistCandidate]
|
||||
total_lines: int
|
||||
skipped_lines: int
|
||||
|
||||
|
||||
def infer_platform_from_url(url: str) -> str | None:
|
||||
parsed = urlparse(url)
|
||||
host = parsed.netloc.lower()
|
||||
if host in {"music.163.com", "163.com"}:
|
||||
return "netease"
|
||||
if host.endswith("y.qq.com") or host == "qq.com":
|
||||
return "qq"
|
||||
if host.endswith("kuwo.cn") or host == "kuwo.cn":
|
||||
return "kuwo"
|
||||
return None
|
||||
|
||||
|
||||
def build_playlist_candidate(platform: str, url: str) -> PlaylistCandidate | None:
|
||||
platform = platform.strip().lower()
|
||||
normalized_url = url.strip()
|
||||
if platform not in SUPPORTED_PLATFORMS or not normalized_url:
|
||||
return None
|
||||
if platform == "netease":
|
||||
return _build_netease_candidate(normalized_url)
|
||||
if platform == "qq":
|
||||
return _build_qq_candidate(normalized_url)
|
||||
if platform == "kuwo":
|
||||
return _build_kuwo_candidate(normalized_url)
|
||||
return None
|
||||
|
||||
|
||||
def parse_playlist_file(path: str | Path) -> ParsedPlaylistFile:
|
||||
playlist_path = Path(path)
|
||||
raw_text = playlist_path.read_text(encoding="utf-8")
|
||||
lines = raw_text.splitlines()
|
||||
if raw_text.endswith(("\n", "\r")):
|
||||
lines.append("")
|
||||
|
||||
entries: list[PlaylistCandidate] = []
|
||||
seen: set[str] = set()
|
||||
skipped_lines = 0
|
||||
|
||||
for raw_line in lines:
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
platform: str | None = None
|
||||
url = line
|
||||
if "," in line:
|
||||
platform_text, url_text = line.split(",", 1)
|
||||
platform = platform_text.strip().lower()
|
||||
url = url_text.strip()
|
||||
|
||||
if platform is None:
|
||||
platform = infer_platform_from_url(url)
|
||||
|
||||
candidate = build_playlist_candidate(platform or "", url)
|
||||
if candidate is None:
|
||||
skipped_lines += 1
|
||||
continue
|
||||
if candidate.playlist_key in seen:
|
||||
continue
|
||||
seen.add(candidate.playlist_key)
|
||||
entries.append(candidate)
|
||||
|
||||
return ParsedPlaylistFile(entries=entries, total_lines=len(lines), skipped_lines=skipped_lines)
|
||||
|
||||
|
||||
def _build_netease_candidate(url: str) -> PlaylistCandidate | None:
|
||||
parsed = urlparse(url)
|
||||
if parsed.netloc.lower() not in {"music.163.com", "163.com"}:
|
||||
return None
|
||||
if not _path_matches(parsed.path, "/playlist") and not _fragment_path_matches(parsed.fragment, "/playlist"):
|
||||
return None
|
||||
remote_id = _extract_query_value(parsed, "id")
|
||||
if not remote_id:
|
||||
return None
|
||||
return PlaylistCandidate(
|
||||
platform="netease",
|
||||
pool_kind="manual_file",
|
||||
remote_id=remote_id,
|
||||
name=remote_id,
|
||||
url=f"https://music.163.com/#/playlist?id={remote_id}",
|
||||
)
|
||||
|
||||
|
||||
def _build_qq_candidate(url: str) -> PlaylistCandidate | None:
|
||||
parsed = urlparse(url)
|
||||
if not (parsed.netloc.lower().endswith("y.qq.com") or parsed.netloc.lower() == "qq.com"):
|
||||
return None
|
||||
path_parts = [part for part in parsed.path.split("/") if part]
|
||||
if len(path_parts) < 2:
|
||||
return None
|
||||
remote_id = path_parts[-1].strip()
|
||||
if not remote_id:
|
||||
return None
|
||||
if "playlist" in path_parts:
|
||||
return PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="manual_file",
|
||||
remote_id=remote_id,
|
||||
name=remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/playlist/{remote_id}",
|
||||
)
|
||||
if "toplist" in path_parts:
|
||||
return PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="manual_file",
|
||||
remote_id=remote_id,
|
||||
name=remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/toplist/{remote_id}",
|
||||
parse_strategy="qq_toplist",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _build_kuwo_candidate(url: str) -> PlaylistCandidate | None:
|
||||
parsed = urlparse(url)
|
||||
if not (parsed.netloc.lower().endswith("kuwo.cn") or parsed.netloc.lower() == "kuwo.cn"):
|
||||
return None
|
||||
path_parts = [part for part in parsed.path.split("/") if part]
|
||||
if "playlist_detail" in path_parts:
|
||||
remote_id = path_parts[-1].strip()
|
||||
if not remote_id:
|
||||
return None
|
||||
return PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="manual_file",
|
||||
remote_id=remote_id,
|
||||
name=remote_id,
|
||||
url=f"https://www.kuwo.cn/playlist_detail/{remote_id}",
|
||||
)
|
||||
if "rankList" in path_parts:
|
||||
remote_id = _extract_query_value(parsed, "bangId")
|
||||
if not remote_id:
|
||||
return None
|
||||
return PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="manual_file",
|
||||
remote_id=remote_id,
|
||||
name=remote_id,
|
||||
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
|
||||
parse_strategy="kuwo_toplist",
|
||||
metadata={"bang_id": remote_id},
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _extract_query_value(parsed, key: str) -> str | None:
|
||||
for query_text in (parsed.query, urlparse(parsed.fragment).query):
|
||||
value = parse_qs(query_text).get(key)
|
||||
if value and value[0].strip():
|
||||
return value[0].strip()
|
||||
return None
|
||||
|
||||
|
||||
def _path_matches(path: str, expected_suffix: str) -> bool:
|
||||
return path.rstrip("/").endswith(expected_suffix)
|
||||
|
||||
|
||||
def _fragment_path_matches(fragment: str, expected_suffix: str) -> bool:
|
||||
if not fragment:
|
||||
return False
|
||||
return urlparse(fragment).path.rstrip("/").endswith(expected_suffix)
|
||||
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
SOURCE_NAME_MAP = {
|
||||
"NeteaseMusicClient": "netease",
|
||||
"QQMusicClient": "qq",
|
||||
"KuwoMusicClient": "kuwo",
|
||||
"netease": "netease",
|
||||
"qq": "qq",
|
||||
"kuwo": "kuwo",
|
||||
}
|
||||
|
||||
ARTIST_SPLIT_RE = re.compile(r"\s*(?:/|,|,|、|&|\|)\s*")
|
||||
|
||||
|
||||
def remove_suffix(value: str, suffix: str) -> str:
|
||||
if suffix and value.endswith(suffix):
|
||||
return value[: -len(suffix)]
|
||||
return value
|
||||
|
||||
|
||||
def normalize_source_name(source: str | None) -> str:
|
||||
if not source:
|
||||
return "unknown"
|
||||
return SOURCE_NAME_MAP.get(source, remove_suffix(str(source), "MusicClient").lower())
|
||||
|
||||
|
||||
def get_field(obj: Any, key: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key, default)
|
||||
return getattr(obj, key, default)
|
||||
|
||||
|
||||
def serialize_song_info(song_info: Any) -> dict[str, Any]:
|
||||
if song_info is None:
|
||||
return {}
|
||||
if isinstance(song_info, dict):
|
||||
return dict(song_info)
|
||||
if hasattr(song_info, "todict") and callable(song_info.todict):
|
||||
return song_info.todict()
|
||||
if hasattr(song_info, "__dict__"):
|
||||
return {
|
||||
key: value
|
||||
for key, value in vars(song_info).items()
|
||||
if not key.startswith("_")
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
def deserialize_song_info(snapshot: dict[str, Any] | None):
|
||||
if not snapshot:
|
||||
return None
|
||||
from musicdl.modules.utils.data import SongInfo
|
||||
|
||||
return SongInfo.fromdict(snapshot)
|
||||
|
||||
|
||||
def parse_size_to_bytes(file_size: Any) -> int | None:
|
||||
if file_size in {None, "", "NULL"}:
|
||||
return None
|
||||
if isinstance(file_size, (int, float)):
|
||||
return int(file_size)
|
||||
text = str(file_size).strip().upper().replace("IB", "B")
|
||||
match = re.match(r"^([0-9]+(?:\.[0-9]+)?)\s*([KMGTP]?B)$", text)
|
||||
if not match:
|
||||
return None
|
||||
value = float(match.group(1))
|
||||
unit = match.group(2)
|
||||
multiplier = {
|
||||
"B": 1,
|
||||
"KB": 1024,
|
||||
"MB": 1024**2,
|
||||
"GB": 1024**3,
|
||||
"TB": 1024**4,
|
||||
"PB": 1024**5,
|
||||
}[unit]
|
||||
return int(value * multiplier)
|
||||
|
||||
|
||||
def dedupe_preserve_order(values: list[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
stripped = value.strip()
|
||||
if not stripped or stripped in seen:
|
||||
continue
|
||||
seen.add(stripped)
|
||||
result.append(stripped)
|
||||
return result
|
||||
|
||||
|
||||
def extract_artist_names(raw_data: dict | None, singers_text: str | None = None) -> list[str]:
|
||||
raw_data = raw_data or {}
|
||||
search_data = raw_data.get("search") if isinstance(raw_data, dict) else {}
|
||||
candidates: list[str] = []
|
||||
|
||||
for key in ("ar", "artists", "singer"):
|
||||
value = search_data.get(key)
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict) and item.get("name"):
|
||||
candidates.append(str(item["name"]))
|
||||
|
||||
for key in ("artist", "ARTIST", "author", "singerName", "singers"):
|
||||
value = search_data.get(key)
|
||||
if isinstance(value, str):
|
||||
candidates.extend(ARTIST_SPLIT_RE.split(value))
|
||||
|
||||
if singers_text:
|
||||
candidates.extend(ARTIST_SPLIT_RE.split(str(singers_text)))
|
||||
|
||||
return dedupe_preserve_order(candidates)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlaylistCandidate:
|
||||
platform: str
|
||||
pool_kind: str
|
||||
remote_id: str
|
||||
name: str
|
||||
url: str
|
||||
parse_strategy: str = "playlist_url"
|
||||
cover_url: str | None = None
|
||||
creator_name: str | None = None
|
||||
play_count: int | None = None
|
||||
collected_song_count: int | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def playlist_key(self) -> str:
|
||||
return f"{self.platform}:{self.remote_id}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogSong:
|
||||
platform: str
|
||||
remote_song_id: str
|
||||
name: str | None = None
|
||||
singers: str | None = None
|
||||
album: str | None = None
|
||||
ext: str | None = None
|
||||
file_size_bytes: int | None = None
|
||||
file_size_label: str | None = None
|
||||
quality_label: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def song_key(self) -> str:
|
||||
return f"{self.platform}:{self.remote_song_id}"
|
||||
|
||||
@classmethod
|
||||
def from_song_info(cls, song_info: Any) -> "CatalogSong":
|
||||
raw_data = get_field(song_info, "raw_data", {}) or {}
|
||||
file_size_bytes = get_field(song_info, "file_size_bytes")
|
||||
if file_size_bytes is None:
|
||||
file_size_bytes = parse_size_to_bytes(get_field(song_info, "file_size"))
|
||||
return cls(
|
||||
platform=normalize_source_name(get_field(song_info, "source")),
|
||||
remote_song_id=str(get_field(song_info, "identifier")),
|
||||
name=get_field(song_info, "song_name"),
|
||||
singers=get_field(song_info, "singers"),
|
||||
album=get_field(song_info, "album"),
|
||||
ext=get_field(song_info, "ext"),
|
||||
file_size_bytes=file_size_bytes,
|
||||
file_size_label=get_field(song_info, "file_size"),
|
||||
quality_label=raw_data.get("quality"),
|
||||
metadata={"raw_data": raw_data, "snapshot": serialize_song_info(song_info)},
|
||||
)
|
||||
@@ -0,0 +1,12 @@
|
||||
from .models import ItemStatus, JobItem, JobRun, JobStatus, JobStage, StageStatus
|
||||
from .repository import OpsRepository
|
||||
|
||||
__all__ = [
|
||||
"ItemStatus",
|
||||
"JobItem",
|
||||
"JobRun",
|
||||
"JobStatus",
|
||||
"JobStage",
|
||||
"OpsRepository",
|
||||
"StageStatus",
|
||||
]
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
def _parse_sources(value: str | None) -> list[str]:
|
||||
if not value:
|
||||
return []
|
||||
return [item.strip() for item in value.split(",") if item and item.strip()]
|
||||
|
||||
|
||||
def _normalize_env_value(raw_value: str) -> str:
|
||||
stripped_value = raw_value.strip()
|
||||
if (
|
||||
len(stripped_value) >= 2
|
||||
and stripped_value[0] == stripped_value[-1]
|
||||
and stripped_value[0] in {"'", '"'}
|
||||
):
|
||||
return stripped_value[1:-1]
|
||||
return raw_value
|
||||
|
||||
|
||||
def _parse_env(content: str) -> dict[str, str]:
|
||||
mapping: dict[str, str] = {}
|
||||
for raw_line in content.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
normalized = raw_line.lstrip()
|
||||
if normalized.startswith("export "):
|
||||
normalized = normalized[len("export ") :]
|
||||
if "=" not in normalized:
|
||||
continue
|
||||
key, value = normalized.split("=", 1)
|
||||
key = key.strip()
|
||||
if not key:
|
||||
continue
|
||||
mapping[key] = _normalize_env_value(value)
|
||||
return mapping
|
||||
|
||||
|
||||
class CatalogsyncEnvManager:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
db_path: str | Path,
|
||||
env_file_path: str | Path,
|
||||
repository: OpsRepository | None = None,
|
||||
):
|
||||
self.env_file_path = Path(env_file_path)
|
||||
self.repository = repository or OpsRepository(db_path)
|
||||
|
||||
def load_current(self) -> dict[str, str]:
|
||||
if not self.env_file_path.exists():
|
||||
return {}
|
||||
content = self.env_file_path.read_text(encoding="utf-8")
|
||||
return _parse_env(content)
|
||||
|
||||
def build_job_snapshot(self) -> dict[str, Any]:
|
||||
current = self.load_current()
|
||||
snapshot: dict[str, Any] = dict(current)
|
||||
snapshot["download_sources"] = _parse_sources(current.get("DOWNLOAD_SOURCES"))
|
||||
return snapshot
|
||||
|
||||
def save_revision(self, note: str | None = None, source_type: str = "env_file") -> int:
|
||||
content = ""
|
||||
if self.env_file_path.exists():
|
||||
content = self.env_file_path.read_text(encoding="utf-8")
|
||||
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
return self.repository.create_config_revision(
|
||||
source_type=source_type,
|
||||
file_path=str(self.env_file_path.resolve()),
|
||||
content_text=content,
|
||||
content_hash=content_hash,
|
||||
note=note,
|
||||
)
|
||||
|
||||
def list_revisions(self, limit: int = 50) -> list[dict[str, Any]]:
|
||||
return self.repository.list_config_revisions(limit=limit)
|
||||
|
||||
def apply_revision(self, revision_id: int) -> None:
|
||||
revision = self.repository.get_config_revision(revision_id)
|
||||
if revision is None:
|
||||
raise ValueError(f"config revision not found: {revision_id}")
|
||||
self.env_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.env_file_path.write_text(revision["content_text"], encoding="utf-8")
|
||||
self.repository.mark_config_revision_applied(revision_id)
|
||||
@@ -0,0 +1,466 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from musicdl.catalogsync.downloader import CatalogDownloader
|
||||
from musicdl.catalogsync.repository import CatalogRepository
|
||||
from musicdl.catalogsync.services import CatalogSyncService
|
||||
from musicdl.catalogsync.uploader import CatalogUploader
|
||||
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
NON_MUSIC_RESOURCE_REASON = "非音乐资源(有声榜条目)"
|
||||
NON_MUSIC_RESOURCE_CODE = "NON_MUSIC_RESOURCE"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolvedStageDownloadTask:
|
||||
item_id: int
|
||||
playlist_id: int | None
|
||||
row: dict[str, object]
|
||||
resolved_payload: object
|
||||
|
||||
|
||||
def _format_error(exc: Exception) -> str:
|
||||
return f"{type(exc).__name__}: {exc}"
|
||||
|
||||
|
||||
class _TransitionUpdateError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _ensure_transition_applied(applied: bool, *, item_id: int, action: str) -> None:
|
||||
if applied:
|
||||
return
|
||||
raise _TransitionUpdateError(
|
||||
f"CAS transition failed for item {item_id}: {action} returned False"
|
||||
)
|
||||
|
||||
|
||||
def _mark_failed_or_raise(ops_repo: OpsRepository, *, item_id: int, error_message: str, cause: Exception) -> None:
|
||||
if ops_repo.mark_item_failed(item_id=item_id, error_message=error_message):
|
||||
return
|
||||
raise RuntimeError(
|
||||
f"CAS transition failed for item {item_id}: mark_item_failed returned False while handling error: {error_message}"
|
||||
) from cause
|
||||
|
||||
|
||||
def _mark_non_music_resource_skipped_or_raise(ops_repo: OpsRepository, *, item_id: int) -> None:
|
||||
_ensure_transition_applied(
|
||||
ops_repo.mark_item_skipped(
|
||||
item_id=item_id,
|
||||
reason_message=NON_MUSIC_RESOURCE_REASON,
|
||||
reason_code=NON_MUSIC_RESOURCE_CODE,
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_skipped",
|
||||
)
|
||||
|
||||
|
||||
def _is_non_music_resource_download_row(row: dict[str, object] | None) -> bool:
|
||||
row = row or {}
|
||||
remote_song_id = str(row.get("remote_song_id") or "").strip().lower()
|
||||
if remote_song_id.startswith("qqtop_"):
|
||||
return True
|
||||
metadata_json = row.get("metadata_json")
|
||||
if not metadata_json:
|
||||
return False
|
||||
try:
|
||||
metadata = json.loads(str(metadata_json))
|
||||
except Exception:
|
||||
return False
|
||||
if not isinstance(metadata, dict):
|
||||
return False
|
||||
snapshot = metadata.get("snapshot")
|
||||
if not isinstance(snapshot, dict):
|
||||
return False
|
||||
raw_data = snapshot.get("raw_data")
|
||||
if not isinstance(raw_data, dict):
|
||||
return False
|
||||
search = raw_data.get("search")
|
||||
if not isinstance(search, dict):
|
||||
return False
|
||||
return bool(search.get("qq_toplist_fallback"))
|
||||
|
||||
|
||||
class CollectStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
service: CatalogSyncService | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.service = service or CatalogSyncService(repository=self.catalog_repo)
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
item = self.ops_repo.get_item(item_id)
|
||||
if item is None:
|
||||
raise RuntimeError(f"Unknown item: {item_id}")
|
||||
source = str(item.payload.get("source") or "").strip()
|
||||
if not source:
|
||||
raise RuntimeError(f"Collect item {item_id} is missing source")
|
||||
display_text = f"collect:{source}"
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_display_text=display_text,
|
||||
last_progress_text="starting playlist collection",
|
||||
)
|
||||
counts = self.service.collect_playlists(
|
||||
sources=[source],
|
||||
include_playlist_square=bool(item.payload.get("include_playlist_square", True)),
|
||||
include_toplist=bool(item.payload.get("include_toplist", True)),
|
||||
progress_callback=lambda event_type, payload: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_display_text=display_text,
|
||||
last_progress_text=self._format_progress_text(event_type, payload),
|
||||
),
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id, result_payload={"counts": counts}),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _format_progress_text(event_type: str, payload: dict[str, object]) -> str:
|
||||
if event_type == "playlist_square_page":
|
||||
page = int(payload.get("page") or 0)
|
||||
total = int(payload.get("total") or 0)
|
||||
new_count = int(payload.get("new_count") or 0)
|
||||
if payload.get("duplicate_page"):
|
||||
return f"page {page}: duplicate page detected, stopping at {total}"
|
||||
return f"page {page}: +{new_count}, total {total}"
|
||||
if event_type == "toplist_collected":
|
||||
return f"toplist: {int(payload.get('count') or 0)}"
|
||||
if event_type == "source_finished":
|
||||
counts = payload.get("counts") if isinstance(payload.get("counts"), dict) else {}
|
||||
playlist_square = int(counts.get("playlist_square") or 0)
|
||||
toplist = int(counts.get("toplist") or 0)
|
||||
return f"done: square {playlist_square}, toplist {toplist}"
|
||||
return str(event_type).replace("_", " ")
|
||||
|
||||
|
||||
class DownloadStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
library_root: str | Path,
|
||||
download_sources: list[str] | None = None,
|
||||
downloader: CatalogDownloader | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.library_root = Path(library_root)
|
||||
self.download_sources = list(download_sources or [])
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.downloader = downloader or CatalogDownloader(repository=self.catalog_repo)
|
||||
|
||||
def process_resolve_item(
|
||||
self,
|
||||
item_id: int,
|
||||
worker_name: str,
|
||||
*,
|
||||
ready_queue,
|
||||
already_claimed: bool = False,
|
||||
) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
row = self.ops_repo.build_download_row(item_id=item_id)
|
||||
song_id = int(row.get("id") or row.get("song_id") or 0)
|
||||
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_song_id=song_id,
|
||||
current_playlist_id=row.get("playlist_id"),
|
||||
current_display_text=str(row.get("name") or row.get("id") or song_id),
|
||||
last_progress_text="already downloaded",
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"already_downloaded": True},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
resolved_payload = self.downloader.resolve_song_row(
|
||||
row=row,
|
||||
library_root=self.library_root,
|
||||
download_sources=self.download_sources,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if resolved_payload is None:
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message="resolve returned no downloadable song",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
return
|
||||
ready_queue.put(
|
||||
ResolvedStageDownloadTask(
|
||||
item_id=item_id,
|
||||
playlist_id=row.get("playlist_id"),
|
||||
row=row,
|
||||
resolved_payload=resolved_payload,
|
||||
)
|
||||
)
|
||||
|
||||
def process_download_task(self, task: ResolvedStageDownloadTask, worker_name: str) -> None:
|
||||
try:
|
||||
succeeded = self.downloader.download_resolved_song(
|
||||
resolved_payload=task.resolved_payload,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=task.item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if succeeded:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=task.item_id),
|
||||
item_id=task.item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
if _is_non_music_resource_download_row(task.row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=task.item_id,
|
||||
error_message="download returned no file",
|
||||
),
|
||||
item_id=task.item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
if _is_non_music_resource_download_row(task.row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
return
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=task.item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
row: dict[str, object] | None = None
|
||||
try:
|
||||
row = self.ops_repo.build_download_row(item_id=item_id)
|
||||
song_id = int(row.get("id") or row.get("song_id") or 0)
|
||||
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_song_id=song_id,
|
||||
current_playlist_id=row.get("playlist_id"),
|
||||
current_display_text=str(row.get("name") or row.get("id") or song_id),
|
||||
last_progress_text="already downloaded",
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"already_downloaded": True},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
succeeded = self.downloader.download_song_row(
|
||||
row=row,
|
||||
library_root=self.library_root,
|
||||
download_sources=self.download_sources,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if succeeded:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message="download returned no file",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
return
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
|
||||
class SyncStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
service: CatalogSyncService | None = None,
|
||||
service_factory: Callable[[], CatalogSyncService] | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
if service_factory is not None:
|
||||
self._service_factory = service_factory
|
||||
elif service is not None:
|
||||
self._service_factory = lambda: service
|
||||
else:
|
||||
self._service_factory = lambda: CatalogSyncService(repository=self.catalog_repo)
|
||||
self._service_local = threading.local()
|
||||
|
||||
def _get_service(self) -> CatalogSyncService:
|
||||
service = getattr(self._service_local, "service", None)
|
||||
if service is None:
|
||||
service = self._service_factory()
|
||||
self._service_local.service = service
|
||||
return service
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
playlist_row = self.ops_repo.get_playlist_row_for_item(item_id=item_id)
|
||||
linked_count = int(self._get_service().sync_playlist_row(playlist_row))
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"linked_count": linked_count},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
|
||||
class UploadStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
backend_name: str,
|
||||
uploader: CatalogUploader | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.backend_name = str(backend_name)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.uploader = uploader or CatalogUploader(repository=self.catalog_repo)
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
upload_row = self.ops_repo.get_upload_row_for_item(item_id=item_id)
|
||||
result = str(
|
||||
self.uploader.process_upload_task_row(
|
||||
task_row=upload_row,
|
||||
backend_name=self.backend_name,
|
||||
)
|
||||
)
|
||||
if result == "succeeded":
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
else:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message=f"upload result: {result}",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
DOWNLOAD_LANE = "download"
|
||||
GENERAL_LANE = "general"
|
||||
|
||||
JOB_STAGE_SEQUENCES: dict[str, tuple[str, ...]] = {
|
||||
"catalog_sync": ("collect", "sync", "download"),
|
||||
"collect_only": ("collect",),
|
||||
"sync_only": ("sync",),
|
||||
"sync_download": ("sync", "download"),
|
||||
"download_only": ("download",),
|
||||
"upload_only": ("upload",),
|
||||
"download_upload": ("download", "upload"),
|
||||
}
|
||||
|
||||
def job_has_stage(job_type: str, stage_type: str) -> bool:
|
||||
sequence = JOB_STAGE_SEQUENCES.get(str(job_type), ())
|
||||
return str(stage_type) in sequence
|
||||
|
||||
|
||||
def job_lane_type(job_type: str) -> str:
|
||||
if job_has_stage(job_type, "download"):
|
||||
return DOWNLOAD_LANE
|
||||
return GENERAL_LANE
|
||||
|
||||
|
||||
def primary_stage_type(job_type: str) -> str | None:
|
||||
for stage_type in ("download", "upload", "sync", "collect"):
|
||||
if job_has_stage(job_type, stage_type):
|
||||
return stage_type
|
||||
return None
|
||||
|
||||
|
||||
def display_name(job_type: str, playlist_scope: dict[str, Any] | None = None) -> str:
|
||||
playlist_ids = (playlist_scope or {}).get("playlist_ids")
|
||||
is_scoped = isinstance(playlist_ids, list) and len(playlist_ids) > 0
|
||||
mapping = {
|
||||
"catalog_sync": "Full Pipeline",
|
||||
"collect_only": "Collect",
|
||||
"sync_only": "Sync Selected Playlists" if is_scoped else "Sync",
|
||||
"sync_download": "Sync Then Download" if is_scoped else "Sync Then Download All",
|
||||
"download_only": "Download Selected Playlists" if is_scoped else "Download",
|
||||
"upload_only": "Upload",
|
||||
"download_upload": "Download Then Upload",
|
||||
}
|
||||
return mapping.get(str(job_type), str(job_type))
|
||||
@@ -0,0 +1,402 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sqlite3
|
||||
from contextlib import contextmanager, suppress
|
||||
from pathlib import Path, PurePath
|
||||
from typing import Any
|
||||
|
||||
from musicdl.catalogsync.db import connect_database
|
||||
|
||||
|
||||
_COPY_SUFFIX_RE = re.compile(r" \(\d+\)(?=(\.[^.]+)?$)")
|
||||
|
||||
|
||||
class LocalDedupeBlockedError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _coerce_int(value: Any) -> int | None:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _row_value(row: sqlite3.Row | dict[str, Any], key: str) -> Any:
|
||||
if isinstance(row, sqlite3.Row):
|
||||
try:
|
||||
return row[key]
|
||||
except IndexError:
|
||||
return None
|
||||
return row.get(key)
|
||||
|
||||
|
||||
def _path_for_location(row: sqlite3.Row | dict[str, Any]) -> Path | None:
|
||||
absolute_path = str(_row_value(row, "absolute_path") or "").strip()
|
||||
if absolute_path:
|
||||
return Path(absolute_path)
|
||||
base_path = str(_row_value(row, "base_path") or "").strip()
|
||||
locator = str(_row_value(row, "locator") or "").strip()
|
||||
if not base_path or not locator:
|
||||
return None
|
||||
return Path(base_path) / locator
|
||||
|
||||
|
||||
def _resolved_path(path: Path | None) -> Path | None:
|
||||
if path is None:
|
||||
return None
|
||||
with suppress(OSError, RuntimeError):
|
||||
return path.resolve(strict=False)
|
||||
return path
|
||||
|
||||
|
||||
def _paths_match(left: Path | None, right: Path | None) -> bool:
|
||||
if left is None or right is None:
|
||||
return False
|
||||
return _resolved_path(left) == _resolved_path(right)
|
||||
|
||||
|
||||
def _has_copy_suffix(locator: str | None) -> bool:
|
||||
return bool(_COPY_SUFFIX_RE.search(PurePath(str(locator or "")).name))
|
||||
|
||||
|
||||
def _location_payload(row: sqlite3.Row | dict[str, Any]) -> dict[str, Any]:
|
||||
path = _path_for_location(row)
|
||||
file_exists = bool(path and path.exists())
|
||||
actual_file_size_bytes = None
|
||||
if file_exists and path is not None:
|
||||
with suppress(OSError):
|
||||
actual_file_size_bytes = int(path.stat().st_size)
|
||||
return {
|
||||
"id": int(row["location_id"]),
|
||||
"file_asset_id": int(row["file_asset_id"]),
|
||||
"song_id": int(row["song_id"]),
|
||||
"backend_id": int(row["backend_id"]),
|
||||
"backend_name": str(row["backend_name"] or ""),
|
||||
"locator": str(row["locator"] or ""),
|
||||
"absolute_path": str(row["absolute_path"] or ""),
|
||||
"file_exists": file_exists,
|
||||
"file_size_bytes": _coerce_int(row["file_size_bytes"]),
|
||||
"actual_file_size_bytes": actual_file_size_bytes,
|
||||
"song_name": str(row["song_name"] or ""),
|
||||
"singers": str(row["singers"] or ""),
|
||||
"_path": path,
|
||||
}
|
||||
|
||||
|
||||
def _location_sort_key(location: dict[str, Any]) -> tuple[int, int, int, int]:
|
||||
return (
|
||||
0 if location["file_exists"] else 1,
|
||||
0 if not _has_copy_suffix(location["locator"]) else 1,
|
||||
len(location["locator"]),
|
||||
int(location["id"]),
|
||||
)
|
||||
|
||||
|
||||
def _duplicate_size_bytes(location: dict[str, Any]) -> int:
|
||||
size_value = location.get("actual_file_size_bytes")
|
||||
if size_value is None:
|
||||
size_value = location.get("file_size_bytes")
|
||||
return max(int(size_value or 0), 0)
|
||||
|
||||
|
||||
class LocalMaintenanceService:
|
||||
def __init__(self, db_path: str | Path):
|
||||
self.db_path = Path(db_path)
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
return connect_database(self.db_path)
|
||||
|
||||
@contextmanager
|
||||
def _connection(self):
|
||||
conn = self._connect()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def scan_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
|
||||
with self._connection() as conn:
|
||||
groups = self._load_duplicate_groups(conn)
|
||||
scanned_row = conn.execute(
|
||||
"""
|
||||
SELECT COUNT(*) AS count_value
|
||||
FROM file_locations AS fl
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
AND sb.backend_type = 'local_fs'
|
||||
"""
|
||||
).fetchone()
|
||||
return self._build_scan_payload(
|
||||
groups,
|
||||
scanned_active_local_location_count=int(scanned_row["count_value"]) if scanned_row else 0,
|
||||
sample_limit=sample_limit,
|
||||
)
|
||||
|
||||
def dedupe_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
|
||||
with self._connection() as conn:
|
||||
self._raise_if_running_work(conn)
|
||||
groups = self._load_duplicate_groups(conn)
|
||||
execution = {
|
||||
"deduped_group_count": 0,
|
||||
"inactive_location_count": 0,
|
||||
"deleted_file_count": 0,
|
||||
"released_bytes": 0,
|
||||
"repointed_upload_task_count": 0,
|
||||
"repointed_job_item_count": 0,
|
||||
}
|
||||
affected_pairs: set[tuple[int, int]] = set()
|
||||
for group in groups:
|
||||
keep = group["keep"]
|
||||
duplicates = list(group["duplicates"])
|
||||
if not duplicates:
|
||||
continue
|
||||
execution["deduped_group_count"] += 1
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE file_locations
|
||||
SET
|
||||
is_primary = CASE WHEN id = ? THEN 1 ELSE 0 END,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE file_asset_id = ? AND backend_id = ?
|
||||
""",
|
||||
(
|
||||
int(keep["id"]),
|
||||
int(group["file_asset_id"]),
|
||||
int(group["backend_id"]),
|
||||
),
|
||||
)
|
||||
for duplicate in duplicates:
|
||||
duplicate_id = int(duplicate["id"])
|
||||
upload_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE upload_tasks
|
||||
SET
|
||||
source_location_id = ?,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE source_location_id = ?
|
||||
""",
|
||||
(int(keep["id"]), duplicate_id),
|
||||
)
|
||||
execution["repointed_upload_task_count"] += max(upload_cursor.rowcount, 0)
|
||||
item_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE job_items
|
||||
SET file_location_id = ?
|
||||
WHERE file_location_id = ?
|
||||
""",
|
||||
(int(keep["id"]), duplicate_id),
|
||||
)
|
||||
execution["repointed_job_item_count"] += max(item_cursor.rowcount, 0)
|
||||
inactive_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE file_locations
|
||||
SET
|
||||
status = 'inactive',
|
||||
is_primary = 0,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ? AND status = 'active'
|
||||
""",
|
||||
(duplicate_id,),
|
||||
)
|
||||
execution["inactive_location_count"] += max(inactive_cursor.rowcount, 0)
|
||||
duplicate_path = duplicate["_path"]
|
||||
if (
|
||||
duplicate_path is not None
|
||||
and duplicate_path.exists()
|
||||
and not _paths_match(duplicate_path, keep["_path"])
|
||||
):
|
||||
duplicate_size_bytes = _duplicate_size_bytes(duplicate)
|
||||
with suppress(OSError):
|
||||
duplicate_path.unlink()
|
||||
execution["deleted_file_count"] += 1
|
||||
execution["released_bytes"] += duplicate_size_bytes
|
||||
affected_pairs.add((int(group["song_id"]), int(group["backend_id"])))
|
||||
for song_id, backend_id in affected_pairs:
|
||||
self._refresh_song_backend_presence_with_connection(
|
||||
conn,
|
||||
song_id=song_id,
|
||||
backend_id=backend_id,
|
||||
)
|
||||
payload = self.scan_local_duplicates(sample_limit=sample_limit)
|
||||
payload["execution"] = execution
|
||||
return payload
|
||||
|
||||
def _raise_if_running_work(self, conn: sqlite3.Connection) -> None:
|
||||
running_jobs_row = conn.execute(
|
||||
"SELECT COUNT(*) AS count_value FROM job_runs WHERE status = 'running'"
|
||||
).fetchone()
|
||||
running_items_row = conn.execute(
|
||||
"SELECT COUNT(*) AS count_value FROM job_items WHERE status = 'running'"
|
||||
).fetchone()
|
||||
running_jobs = int(running_jobs_row["count_value"]) if running_jobs_row else 0
|
||||
running_items = int(running_items_row["count_value"]) if running_items_row else 0
|
||||
if running_jobs > 0 or running_items > 0:
|
||||
raise LocalDedupeBlockedError(
|
||||
f"cannot dedupe while jobs or items are running (jobs={running_jobs}, items={running_items})"
|
||||
)
|
||||
|
||||
def _load_duplicate_groups(self, conn: sqlite3.Connection) -> list[dict[str, Any]]:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
WITH duplicate_pairs AS (
|
||||
SELECT fl.file_asset_id, fl.backend_id
|
||||
FROM file_locations AS fl
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
AND sb.backend_type = 'local_fs'
|
||||
GROUP BY fl.file_asset_id, fl.backend_id
|
||||
HAVING COUNT(*) > 1
|
||||
)
|
||||
SELECT
|
||||
fl.id AS location_id,
|
||||
fl.file_asset_id,
|
||||
fa.song_id,
|
||||
fl.backend_id,
|
||||
sb.name AS backend_name,
|
||||
sb.base_path,
|
||||
fl.locator,
|
||||
fl.absolute_path,
|
||||
COALESCE(fa.file_size_bytes, s.file_size_bytes) AS file_size_bytes,
|
||||
s.name AS song_name,
|
||||
s.singers
|
||||
FROM file_locations AS fl
|
||||
JOIN duplicate_pairs AS dp
|
||||
ON dp.file_asset_id = fl.file_asset_id
|
||||
AND dp.backend_id = fl.backend_id
|
||||
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
|
||||
JOIN songs AS s ON s.id = fa.song_id
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
ORDER BY fl.file_asset_id ASC, fl.backend_id ASC, fl.id ASC
|
||||
"""
|
||||
).fetchall()
|
||||
grouped: dict[tuple[int, int], list[dict[str, Any]]] = {}
|
||||
for row in rows:
|
||||
location = _location_payload(row)
|
||||
key = (int(location["file_asset_id"]), int(location["backend_id"]))
|
||||
grouped.setdefault(key, []).append(location)
|
||||
|
||||
groups: list[dict[str, Any]] = []
|
||||
for (file_asset_id, backend_id), locations in grouped.items():
|
||||
ordered_locations = sorted(locations, key=_location_sort_key)
|
||||
keep = ordered_locations[0]
|
||||
groups.append(
|
||||
{
|
||||
"file_asset_id": int(file_asset_id),
|
||||
"backend_id": int(backend_id),
|
||||
"backend_name": keep["backend_name"],
|
||||
"song_id": int(keep["song_id"]),
|
||||
"song_name": keep["song_name"],
|
||||
"singers": keep["singers"],
|
||||
"keep": keep,
|
||||
"duplicates": ordered_locations[1:],
|
||||
}
|
||||
)
|
||||
groups.sort(
|
||||
key=lambda group: (
|
||||
int(group["song_id"]),
|
||||
int(group["file_asset_id"]),
|
||||
int(group["backend_id"]),
|
||||
)
|
||||
)
|
||||
return groups
|
||||
|
||||
def _build_scan_payload(
|
||||
self,
|
||||
groups: list[dict[str, Any]],
|
||||
*,
|
||||
scanned_active_local_location_count: int,
|
||||
sample_limit: int,
|
||||
) -> dict[str, Any]:
|
||||
normalized_sample_limit = max(int(sample_limit or 20), 1)
|
||||
return {
|
||||
"summary": {
|
||||
"duplicate_group_count": len(groups),
|
||||
"duplicate_location_count": sum(len(group["duplicates"]) for group in groups),
|
||||
"duplicate_file_size_bytes": sum(
|
||||
_duplicate_size_bytes(location)
|
||||
for group in groups
|
||||
for location in group["duplicates"]
|
||||
),
|
||||
"scanned_active_local_location_count": int(scanned_active_local_location_count),
|
||||
},
|
||||
"groups": [self._serialize_group(group) for group in groups[:normalized_sample_limit]],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _serialize_group(group: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"file_asset_id": int(group["file_asset_id"]),
|
||||
"backend_id": int(group["backend_id"]),
|
||||
"backend_name": str(group["backend_name"]),
|
||||
"song_id": int(group["song_id"]),
|
||||
"song_name": str(group["song_name"]),
|
||||
"singers": str(group["singers"]),
|
||||
"keep": LocalMaintenanceService._serialize_location(group["keep"]),
|
||||
"duplicates": [
|
||||
LocalMaintenanceService._serialize_location(location)
|
||||
for location in group["duplicates"]
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _serialize_location(location: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"id": int(location["id"]),
|
||||
"locator": str(location["locator"]),
|
||||
"absolute_path": str(location["absolute_path"]),
|
||||
"file_exists": bool(location["file_exists"]),
|
||||
"file_size_bytes": _coerce_int(location["file_size_bytes"]),
|
||||
"actual_file_size_bytes": _coerce_int(location["actual_file_size_bytes"]),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _refresh_song_backend_presence_with_connection(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
song_id: int,
|
||||
backend_id: int,
|
||||
) -> None:
|
||||
summary = conn.execute(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(*) AS active_file_count,
|
||||
MIN(fl.id) AS primary_file_location_id
|
||||
FROM file_locations AS fl
|
||||
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
|
||||
WHERE fa.song_id = ?
|
||||
AND fl.backend_id = ?
|
||||
AND fl.status = 'active'
|
||||
""",
|
||||
(int(song_id), int(backend_id)),
|
||||
).fetchone()
|
||||
active_file_count = int(summary["active_file_count"]) if summary else 0
|
||||
has_active_file = 1 if active_file_count > 0 else 0
|
||||
primary_file_location_id = summary["primary_file_location_id"] if summary else None
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO song_backend_presence (
|
||||
song_id,
|
||||
backend_id,
|
||||
has_active_file,
|
||||
active_file_count,
|
||||
primary_file_location_id
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(song_id, backend_id) DO UPDATE SET
|
||||
has_active_file = excluded.has_active_file,
|
||||
active_file_count = excluded.active_file_count,
|
||||
primary_file_location_id = excluded.primary_file_location_id,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(
|
||||
int(song_id),
|
||||
int(backend_id),
|
||||
has_active_file,
|
||||
active_file_count,
|
||||
primary_file_location_id,
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
QUEUED = "queued"
|
||||
RUNNING = "running"
|
||||
PAUSE_REQUESTED = "pause_requested"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
COMPLETED_WITH_ERRORS = "completed_with_errors"
|
||||
FAILED = "failed"
|
||||
CANCELED = "canceled"
|
||||
|
||||
|
||||
class StageStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSE_REQUESTED = "pause_requested"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
SKIPPED = "skipped"
|
||||
|
||||
|
||||
class ItemStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
INTERRUPTED = "interrupted"
|
||||
SKIPPED = "skipped"
|
||||
CANCELED = "canceled"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobRun:
|
||||
id: int
|
||||
job_type: str
|
||||
status: JobStatus
|
||||
priority: int
|
||||
requested_by: str | None
|
||||
config_snapshot: dict[str, Any]
|
||||
sources: list[str]
|
||||
download_sources: list[str]
|
||||
playlist_scope: dict[str, Any]
|
||||
created_at: str | None
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
resume_token: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobStage:
|
||||
id: int
|
||||
job_run_id: int
|
||||
stage_type: str
|
||||
seq_no: int
|
||||
status: StageStatus
|
||||
total_items: int
|
||||
pending_items: int
|
||||
running_items: int
|
||||
success_items: int
|
||||
failed_items: int
|
||||
skipped_items: int
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobItem:
|
||||
id: int
|
||||
job_stage_id: int
|
||||
item_type: str
|
||||
item_key: str
|
||||
playlist_pool_id: int | None
|
||||
playlist_id: int | None
|
||||
song_id: int | None
|
||||
file_location_id: int | None
|
||||
status: ItemStatus
|
||||
attempt_count: int
|
||||
max_attempts: int
|
||||
worker_id: int | None
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
last_error_code: str | None
|
||||
payload: dict[str, Any]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,896 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from collections import Counter
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import Any
|
||||
|
||||
from musicdl.catalogsync.catalog_export import run_catalog_export_command
|
||||
from musicdl.catalogsync.downloader import DownloadPlanner
|
||||
from musicdl.catalogsync.repository import CatalogRepository
|
||||
from musicdl.catalogsync.services import CatalogSyncService
|
||||
from musicdl.catalogsync.uploader import CatalogUploader
|
||||
|
||||
from .jobdefs import DOWNLOAD_LANE, JOB_STAGE_SEQUENCES, job_lane_type
|
||||
from .executors import (
|
||||
CollectStageExecutor,
|
||||
DownloadStageExecutor,
|
||||
SyncStageExecutor,
|
||||
UploadStageExecutor,
|
||||
)
|
||||
from .models import JobStatus, StageStatus
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
DEFAULT_DOWNLOAD_WORKERS = 10
|
||||
DEFAULT_SYNC_WORKERS = 4
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _unique_preserve_order(values: list[str]) -> list[str]:
|
||||
normalized: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
item = str(value).strip()
|
||||
if not item or item in seen:
|
||||
continue
|
||||
normalized.append(item)
|
||||
seen.add(item)
|
||||
return normalized
|
||||
|
||||
|
||||
def _split_csv(value: Any) -> list[str]:
|
||||
if isinstance(value, list):
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
if not value:
|
||||
return []
|
||||
return [part.strip() for part in str(value).split(",") if part.strip()]
|
||||
|
||||
|
||||
def _int_value(value: Any, default: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return parsed if parsed > 0 else default
|
||||
|
||||
|
||||
class OpsRunner:
|
||||
def __init__(
|
||||
self,
|
||||
repository: OpsRepository,
|
||||
sleep_seconds: float = 1.0,
|
||||
*,
|
||||
download_lane_concurrency: int = 1,
|
||||
general_lane_concurrency: int = 3,
|
||||
):
|
||||
self.repository = repository
|
||||
self.sleep_seconds = max(float(sleep_seconds), 0.1)
|
||||
self.download_lane_concurrency = 1
|
||||
self.general_lane_concurrency = max(int(general_lane_concurrency), 1)
|
||||
self._job_pool = ThreadPoolExecutor(
|
||||
max_workers=self.download_lane_concurrency + self.general_lane_concurrency
|
||||
)
|
||||
self._futures: dict[int, Future[None]] = {}
|
||||
self._futures_lock = threading.Lock()
|
||||
self._playlist_export_lock = threading.Lock()
|
||||
self._catalog_export_lock = threading.Lock()
|
||||
self._exported_stage_playlists: set[tuple[int, int]] = set()
|
||||
self.db_path = Path(self.repository.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
|
||||
def recover_incomplete_jobs(self) -> None:
|
||||
for job in self.repository.list_recoverable_jobs():
|
||||
self.repository.pause_job_for_recovery(job.id)
|
||||
for item in self.repository.list_running_items(job.id):
|
||||
self.repository.mark_item_interrupted(
|
||||
item.id,
|
||||
last_error="Recovery interrupted running item after runner restart.",
|
||||
)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"recovery_requeued",
|
||||
"Recovered incomplete job and re-queued resumable work.",
|
||||
)
|
||||
self.repository.resume_job(job.id)
|
||||
|
||||
def apply_pending_commands(self) -> None:
|
||||
for command in self.repository.list_pending_commands():
|
||||
command_type = str(command["command_type"])
|
||||
job_id = int(command["job_run_id"])
|
||||
command_id = int(command["id"])
|
||||
target_item_id = command["target_item_id"]
|
||||
|
||||
if command_type == "pause":
|
||||
self.repository.request_job_pause(job_id)
|
||||
elif command_type == "resume":
|
||||
self.repository.resume_job(job_id)
|
||||
elif command_type == "cancel":
|
||||
self.repository.cancel_job(job_id)
|
||||
elif command_type == "retry_item":
|
||||
if target_item_id is None:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"retry_item command missing target_item_id.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif not self.repository.requeue_item(
|
||||
int(target_item_id), force=False, job_id=job_id
|
||||
):
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"retry_rejected",
|
||||
"retry_item command rejected.",
|
||||
item_id=int(target_item_id),
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif command_type == "force_retry_item":
|
||||
if target_item_id is None:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"force_retry_item command missing target_item_id.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif not self.repository.requeue_item(
|
||||
int(target_item_id), force=True, job_id=job_id
|
||||
):
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"retry_rejected",
|
||||
"force_retry_item command rejected.",
|
||||
item_id=int(target_item_id),
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
else:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"Unsupported command type.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
|
||||
self.repository.mark_command_applied(command_id)
|
||||
|
||||
def reconcile_pause_state(self, job_id: int) -> None:
|
||||
if self.repository.job_has_running_items(job_id):
|
||||
return
|
||||
self.repository.finalize_pause(job_id)
|
||||
|
||||
def run_forever(self, stop_event=None) -> None:
|
||||
self.recover_incomplete_jobs()
|
||||
while stop_event is None or not stop_event.is_set():
|
||||
worked = self.loop_once()
|
||||
if worked:
|
||||
continue
|
||||
if stop_event is not None:
|
||||
stop_event.wait(self.sleep_seconds)
|
||||
else:
|
||||
time.sleep(self.sleep_seconds)
|
||||
|
||||
def loop_once(self) -> bool:
|
||||
had_commands = bool(self.repository.list_pending_commands())
|
||||
self.apply_pending_commands()
|
||||
finished = self._reap_finished_jobs()
|
||||
started = self._start_eligible_jobs()
|
||||
return bool(had_commands or finished or started)
|
||||
|
||||
def _reap_finished_jobs(self) -> int:
|
||||
finished_count = 0
|
||||
finished_futures: list[tuple[int, Future[None]]] = []
|
||||
with self._futures_lock:
|
||||
for job_id, future in list(self._futures.items()):
|
||||
if not future.done():
|
||||
continue
|
||||
del self._futures[job_id]
|
||||
finished_futures.append((job_id, future))
|
||||
for job_id, future in finished_futures:
|
||||
try:
|
||||
future.result()
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"job_future_error",
|
||||
str(exc),
|
||||
)
|
||||
job = self.repository.get_job(job_id)
|
||||
if job is not None and job.status not in {
|
||||
JobStatus.COMPLETED,
|
||||
JobStatus.COMPLETED_WITH_ERRORS,
|
||||
JobStatus.FAILED,
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.FAILED,
|
||||
last_error=str(exc),
|
||||
)
|
||||
finished_count += 1
|
||||
return finished_count
|
||||
|
||||
def _submit_job(self, job_id: int) -> bool:
|
||||
with self._futures_lock:
|
||||
if job_id in self._futures:
|
||||
return False
|
||||
self._futures[job_id] = self._job_pool.submit(self._run_job, job_id)
|
||||
return True
|
||||
|
||||
def _start_eligible_jobs(self) -> int:
|
||||
started_count = 0
|
||||
active_jobs = self.repository.list_active_jobs()
|
||||
lane_counts = Counter(job_lane_type(job.job_type) for job in active_jobs)
|
||||
for active_job in active_jobs:
|
||||
if active_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(active_job.id)
|
||||
continue
|
||||
if self._submit_job(active_job.id):
|
||||
started_count += 1
|
||||
for queued_job in self.repository.list_queued_jobs():
|
||||
lane_type = job_lane_type(queued_job.job_type)
|
||||
lane_limit = (
|
||||
self.download_lane_concurrency
|
||||
if lane_type == DOWNLOAD_LANE
|
||||
else self.general_lane_concurrency
|
||||
)
|
||||
if lane_counts[lane_type] >= lane_limit:
|
||||
continue
|
||||
claimed = self.repository.claim_job_if_queued(queued_job.id)
|
||||
if claimed is None:
|
||||
continue
|
||||
lane_counts[lane_type] += 1
|
||||
if self._submit_job(claimed.id):
|
||||
started_count += 1
|
||||
return started_count
|
||||
|
||||
def _run_job(self, job_id: int) -> None:
|
||||
try:
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSED:
|
||||
return
|
||||
if not self.repository.mark_job_running(job_id):
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is not None:
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
elif current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
self._ensure_job_stages(current_job)
|
||||
|
||||
while True:
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
|
||||
stage = self._next_runnable_stage(job_id)
|
||||
if stage is None:
|
||||
if self._job_is_finished(job_id):
|
||||
self._finalize_job(job_id)
|
||||
return
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if any(
|
||||
stage_row.status in {StageStatus.PAUSED, StageStatus.PAUSE_REQUESTED}
|
||||
for stage_row in stages
|
||||
):
|
||||
self.repository.pause_job_for_recovery(job_id)
|
||||
return
|
||||
raise RuntimeError("Job has no runnable stages but is not finished.")
|
||||
|
||||
self._run_stage(current_job, stage)
|
||||
|
||||
refreshed_job = self.repository.get_job(job_id)
|
||||
if refreshed_job is None:
|
||||
return
|
||||
if refreshed_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if refreshed_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
if self._job_is_finished(job_id):
|
||||
self._finalize_job(job_id)
|
||||
return
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"job_execution_error",
|
||||
str(exc),
|
||||
)
|
||||
job = self.repository.get_job(job_id)
|
||||
if job is not None and job.status not in {
|
||||
JobStatus.COMPLETED,
|
||||
JobStatus.COMPLETED_WITH_ERRORS,
|
||||
JobStatus.FAILED,
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.FAILED,
|
||||
last_error=str(exc),
|
||||
)
|
||||
|
||||
def _ensure_job_stages(self, job) -> None:
|
||||
existing = self.repository.list_job_stages(job.id)
|
||||
if existing:
|
||||
return
|
||||
for seq_no, stage_type in enumerate(
|
||||
JOB_STAGE_SEQUENCES.get(str(job.job_type), []), start=1
|
||||
):
|
||||
self.repository.create_stage(job_run_id=job.id, stage_type=stage_type, seq_no=seq_no)
|
||||
|
||||
def _next_runnable_stage(self, job_id: int):
|
||||
for stage in self.repository.list_job_stages(job_id):
|
||||
if stage.status in {StageStatus.PENDING, StageStatus.RUNNING}:
|
||||
return stage
|
||||
return None
|
||||
|
||||
def _job_sources(self, job) -> list[str]:
|
||||
return _unique_preserve_order(
|
||||
list(job.sources or _split_csv(job.config_snapshot.get("SOURCES")))
|
||||
)
|
||||
|
||||
def _job_download_sources(self, job) -> list[str]:
|
||||
return _unique_preserve_order(
|
||||
list(
|
||||
job.download_sources
|
||||
or _split_csv(job.config_snapshot.get("download_sources"))
|
||||
or _split_csv(job.config_snapshot.get("DOWNLOAD_SOURCES"))
|
||||
)
|
||||
)
|
||||
|
||||
def _job_playlist_ids(self, job) -> list[int] | None:
|
||||
raw_value = job.playlist_scope.get("playlist_ids")
|
||||
if not isinstance(raw_value, list):
|
||||
return None
|
||||
playlist_ids = []
|
||||
for item in raw_value:
|
||||
try:
|
||||
playlist_ids.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return playlist_ids or None
|
||||
|
||||
def _resolve_library_root(self, job) -> Path:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
|
||||
if library_dir:
|
||||
return Path(str(library_dir)).resolve()
|
||||
try:
|
||||
backend = self.catalog_repo.get_backend(self.catalog_repo.get_default_backend_id())
|
||||
except Exception:
|
||||
backend = None
|
||||
if backend and backend["base_path"]:
|
||||
return Path(str(backend["base_path"])).resolve()
|
||||
raise RuntimeError("No library root configured for download stage")
|
||||
|
||||
def _resolve_playlists_root(self, job) -> Path | None:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
root_dir = mapping.get("ROOT_DIR") or mapping.get("root_dir")
|
||||
if root_dir:
|
||||
path = Path(str(root_dir)).resolve() / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
|
||||
if library_dir:
|
||||
path = Path(str(library_dir)).resolve().parent / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
library_root = self.catalog_repo.get_default_local_library_root()
|
||||
if library_root is None:
|
||||
return None
|
||||
path = library_root.parent / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
def _mark_playlist_exported(self, stage_id: int, playlist_id: int) -> bool:
|
||||
key = (int(stage_id), int(playlist_id))
|
||||
with self._playlist_export_lock:
|
||||
if key in self._exported_stage_playlists:
|
||||
return False
|
||||
self._exported_stage_playlists.add(key)
|
||||
return True
|
||||
|
||||
def _forget_playlist_exported(self, stage_id: int, playlist_id: int) -> None:
|
||||
key = (int(stage_id), int(playlist_id))
|
||||
with self._playlist_export_lock:
|
||||
self._exported_stage_playlists.discard(key)
|
||||
|
||||
def _export_playlist_artifacts_for_playlist_if_ready(self, job, stage, playlist_id: int | None) -> bool:
|
||||
if str(stage.stage_type) != "download" or playlist_id is None:
|
||||
return False
|
||||
scoped_playlist_ids = self._job_playlist_ids(job)
|
||||
normalized_playlist_id = int(playlist_id)
|
||||
if not scoped_playlist_ids or normalized_playlist_id not in scoped_playlist_ids:
|
||||
return False
|
||||
if self.repository.playlist_has_open_items(stage.id, normalized_playlist_id):
|
||||
return False
|
||||
if not self._mark_playlist_exported(stage.id, normalized_playlist_id):
|
||||
return False
|
||||
|
||||
playlists_root = self._resolve_playlists_root(job)
|
||||
if playlists_root is None:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_skipped",
|
||||
"Playlists root is not configured for scoped download export.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
service = CatalogSyncService(
|
||||
repository=self.catalog_repo,
|
||||
playlists_root=playlists_root,
|
||||
)
|
||||
try:
|
||||
folder_path = service.ensure_playlist_artifacts_for_playlist(normalized_playlist_id)
|
||||
except Exception as exc:
|
||||
self._forget_playlist_exported(stage.id, normalized_playlist_id)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
if folder_path is None:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_skipped",
|
||||
"Playlist export row is unavailable.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_ready",
|
||||
f"Exported playlist artifacts for playlist {normalized_playlist_id}.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id, "playlist_dir": str(folder_path)},
|
||||
)
|
||||
return True
|
||||
|
||||
def _refresh_ready_playlist_artifacts(self, job, stage) -> list[int]:
|
||||
if str(stage.stage_type) != "download":
|
||||
return []
|
||||
playlist_ids = self._job_playlist_ids(job)
|
||||
if not playlist_ids:
|
||||
return []
|
||||
exported_ids: list[int] = []
|
||||
for playlist_id in playlist_ids:
|
||||
if self._export_playlist_artifacts_for_playlist_if_ready(job, stage, int(playlist_id)):
|
||||
exported_ids.append(int(playlist_id))
|
||||
return exported_ids
|
||||
|
||||
def _resolve_backend_name(self, job) -> str:
|
||||
value = (
|
||||
job.config_snapshot.get("OBJECT_BACKEND_NAME")
|
||||
or job.config_snapshot.get("object_backend_name")
|
||||
or ""
|
||||
)
|
||||
return str(value).strip()
|
||||
|
||||
def _worker_count(self, job, stage_type: str) -> int:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
if stage_type == "download":
|
||||
return _int_value(mapping.get("DOWNLOAD_WORKERS"), DEFAULT_DOWNLOAD_WORKERS)
|
||||
if stage_type == "sync":
|
||||
return _int_value(mapping.get("SYNC_WORKERS"), DEFAULT_SYNC_WORKERS)
|
||||
if stage_type == "upload":
|
||||
return _int_value(mapping.get("UPLOAD_WORKERS"), 4)
|
||||
return 1
|
||||
|
||||
def _download_stage_worker_split(self, total_workers: int) -> tuple[int, int]:
|
||||
normalized_total = max(int(total_workers or 0), 1)
|
||||
if normalized_total == 1:
|
||||
return 1, 0
|
||||
if normalized_total == 2:
|
||||
return 1, 1
|
||||
if normalized_total <= 5:
|
||||
download_workers = 1
|
||||
else:
|
||||
download_workers = 2
|
||||
resolver_workers = max(1, normalized_total - download_workers)
|
||||
return resolver_workers, download_workers
|
||||
|
||||
def _materialize_stage_items(self, job, stage) -> None:
|
||||
refreshed_stage = self.repository.get_stage(stage.id)
|
||||
if refreshed_stage is None or refreshed_stage.total_items > 0:
|
||||
return
|
||||
|
||||
playlist_ids = self._job_playlist_ids(job)
|
||||
if stage.stage_type == "collect":
|
||||
for source in self._job_sources(job):
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="collect_source",
|
||||
item_key=f"collect:{source}",
|
||||
payload={
|
||||
"source": source,
|
||||
"include_playlist_square": True,
|
||||
"include_toplist": True,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "sync":
|
||||
if playlist_ids:
|
||||
playlist_rows = self.catalog_repo.list_playlists_by_ids(playlist_ids)
|
||||
else:
|
||||
playlist_rows = self.catalog_repo.list_playlists(sources=self._job_sources(job))
|
||||
for row in playlist_rows:
|
||||
playlist_id = int(row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="playlist_sync",
|
||||
item_key=f"playlist:{playlist_id}",
|
||||
playlist_id=playlist_id,
|
||||
payload={"playlist_row": dict(row)},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "download":
|
||||
planner = DownloadPlanner(self.catalog_repo)
|
||||
for row in planner.build_download_queue(
|
||||
sources=self._job_sources(job),
|
||||
playlist_ids=playlist_ids,
|
||||
):
|
||||
song_id = int(row.get("song_id") or row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="song_download",
|
||||
item_key=f"song:{song_id}",
|
||||
song_id=song_id,
|
||||
playlist_id=row.get("playlist_id"),
|
||||
payload={"row": dict(row)},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "upload":
|
||||
backend_name = self._resolve_backend_name(job)
|
||||
if not backend_name:
|
||||
return
|
||||
uploader = CatalogUploader(self.catalog_repo)
|
||||
uploader.enqueue_missing_uploads(
|
||||
backend_name=backend_name,
|
||||
sources=self._job_sources(job) or None,
|
||||
playlist_ids=playlist_ids,
|
||||
)
|
||||
backend = self.catalog_repo.get_backend_by_name(backend_name)
|
||||
if backend is None:
|
||||
return
|
||||
rows = self.catalog_repo.list_pending_upload_tasks(target_backend_id=int(backend["id"]))
|
||||
for row in rows:
|
||||
upload_task_id = int(row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="file_upload",
|
||||
item_key=f"upload:{upload_task_id}",
|
||||
file_location_id=row["source_location_id"],
|
||||
payload={
|
||||
"upload_task_id": upload_task_id,
|
||||
"upload_row": dict(row),
|
||||
},
|
||||
)
|
||||
|
||||
def _build_executor(self, job, stage):
|
||||
if stage.stage_type == "collect":
|
||||
return CollectStageExecutor(self.db_path, ops_repo=self.repository)
|
||||
if stage.stage_type == "sync":
|
||||
return SyncStageExecutor(self.db_path, ops_repo=self.repository)
|
||||
if stage.stage_type == "download":
|
||||
return DownloadStageExecutor(
|
||||
self.db_path,
|
||||
library_root=self._resolve_library_root(job),
|
||||
download_sources=self._job_download_sources(job),
|
||||
ops_repo=self.repository,
|
||||
)
|
||||
if stage.stage_type == "upload":
|
||||
backend_name = self._resolve_backend_name(job)
|
||||
if not backend_name:
|
||||
raise RuntimeError("No object backend configured for upload stage")
|
||||
return UploadStageExecutor(
|
||||
self.db_path,
|
||||
backend_name=backend_name,
|
||||
ops_repo=self.repository,
|
||||
)
|
||||
raise RuntimeError(f"Unsupported stage type: {stage.stage_type}")
|
||||
|
||||
def _export_playlist_artifacts_for_job(self, job, stage) -> None:
|
||||
exported_ids = self._refresh_ready_playlist_artifacts(job, stage)
|
||||
playlist_ids = self._job_playlist_ids(job) or []
|
||||
if str(stage.stage_type) != "download" or not playlist_ids:
|
||||
return
|
||||
try:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_exported",
|
||||
f"Refreshed playlist export folders for {len(exported_ids)} playlists.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_ids": exported_ids, "scoped_playlist_ids": playlist_ids},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to persist playlist_exported event for job %s stage %s.",
|
||||
job.id,
|
||||
stage.id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _run_catalog_export_for_stage(self, job, stage) -> None:
|
||||
if str(stage.stage_type) != "download":
|
||||
return
|
||||
|
||||
with self._catalog_export_lock:
|
||||
refreshed_job = self.repository.get_job(job.id) or job
|
||||
if refreshed_job.status in {
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
return
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"catalog_export_started",
|
||||
"Started post-download catalog export command.",
|
||||
stage_id=stage.id,
|
||||
)
|
||||
try:
|
||||
result = run_catalog_export_command(refreshed_job.config_snapshot)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"catalog_export_failed",
|
||||
f"Catalog export command raised an error: {exc}",
|
||||
stage_id=stage.id,
|
||||
details={"error": str(exc) or exc.__class__.__name__},
|
||||
)
|
||||
return
|
||||
|
||||
details: dict[str, Any] = {}
|
||||
if result.command:
|
||||
details["command"] = result.command
|
||||
if result.workdir:
|
||||
details["workdir"] = result.workdir
|
||||
if result.returncode is not None:
|
||||
details["returncode"] = result.returncode
|
||||
if result.stdout:
|
||||
details["stdout"] = result.stdout
|
||||
if result.stderr:
|
||||
details["stderr"] = result.stderr
|
||||
|
||||
normalized_status = str(result.status).strip().lower()
|
||||
if normalized_status == "succeeded":
|
||||
event_type = "catalog_export_succeeded"
|
||||
message = "Catalog export command completed successfully."
|
||||
elif normalized_status == "skipped":
|
||||
event_type = "catalog_export_skipped"
|
||||
message = "Catalog export command was skipped."
|
||||
else:
|
||||
event_type = "catalog_export_failed"
|
||||
message = "Catalog export command failed."
|
||||
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
event_type,
|
||||
message,
|
||||
stage_id=stage.id,
|
||||
details=details or None,
|
||||
)
|
||||
|
||||
def _run_stage_with_single_pool(self, job, stage, executor, worker_count: int) -> None:
|
||||
def worker_loop(worker_index: int) -> None:
|
||||
worker_name = f"{stage.stage_type}-{worker_index + 1}"
|
||||
while True:
|
||||
active_job = self.repository.get_job(job.id)
|
||||
if active_job is None or active_job.status in {
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.CANCELED,
|
||||
}:
|
||||
return
|
||||
item = self.repository.claim_next_stage_item(stage.id, worker_name)
|
||||
if item is None:
|
||||
return
|
||||
try:
|
||||
executor.process_item(item.id, worker_name, already_claimed=True)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
item.playlist_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=item.id,
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=worker_count) as pool:
|
||||
futures = [pool.submit(worker_loop, index) for index in range(worker_count)]
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
def _run_download_stage_pipeline(self, job, stage, executor, worker_count: int) -> None:
|
||||
resolver_workers, download_workers = self._download_stage_worker_split(worker_count)
|
||||
if download_workers == 0:
|
||||
self._run_stage_with_single_pool(job, stage, executor, worker_count)
|
||||
return
|
||||
|
||||
ready_queue: Queue = Queue(maxsize=max(1, download_workers * 2))
|
||||
stop_event = threading.Event()
|
||||
sentinel = object()
|
||||
|
||||
def resolver_loop(worker_index: int) -> None:
|
||||
worker_name = f"resolve-{worker_index + 1}"
|
||||
while not stop_event.is_set():
|
||||
active_job = self.repository.get_job(job.id)
|
||||
if active_job is None or active_job.status in {
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.CANCELED,
|
||||
}:
|
||||
stop_event.set()
|
||||
return
|
||||
item = self.repository.claim_next_stage_item(stage.id, worker_name)
|
||||
if item is None:
|
||||
return
|
||||
try:
|
||||
executor.process_resolve_item(
|
||||
item.id,
|
||||
worker_name,
|
||||
ready_queue=ready_queue,
|
||||
already_claimed=True,
|
||||
)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
item.playlist_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=item.id,
|
||||
)
|
||||
|
||||
def download_loop(worker_index: int) -> None:
|
||||
worker_name = f"download-{worker_index + 1}"
|
||||
while True:
|
||||
task = ready_queue.get()
|
||||
if task is sentinel:
|
||||
return
|
||||
try:
|
||||
executor.process_download_task(task, worker_name)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
getattr(task, "playlist_id", None),
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=getattr(task, "item_id", None),
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=resolver_workers + download_workers) as pool:
|
||||
resolver_futures = [pool.submit(resolver_loop, index) for index in range(resolver_workers)]
|
||||
download_futures = [pool.submit(download_loop, index) for index in range(download_workers)]
|
||||
for future in resolver_futures:
|
||||
future.result()
|
||||
for _ in range(download_workers):
|
||||
ready_queue.put(sentinel)
|
||||
for future in download_futures:
|
||||
future.result()
|
||||
|
||||
def _run_stage(self, job, stage) -> None:
|
||||
if stage.status == StageStatus.PENDING:
|
||||
self.repository.mark_stage_running(stage.id)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"stage_started",
|
||||
f"Started stage {stage.stage_type}.",
|
||||
stage_id=stage.id,
|
||||
)
|
||||
|
||||
self._materialize_stage_items(job, stage)
|
||||
refreshed_stage = self.repository.get_stage(stage.id)
|
||||
if refreshed_stage is None:
|
||||
return
|
||||
if refreshed_stage.total_items == 0:
|
||||
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
|
||||
final_stage = self.repository.get_stage(stage.id)
|
||||
if final_stage is not None:
|
||||
self._export_playlist_artifacts_for_job(job, final_stage)
|
||||
self._run_catalog_export_for_stage(job, final_stage)
|
||||
return
|
||||
|
||||
executor = self._build_executor(job, refreshed_stage)
|
||||
worker_count = self._worker_count(job, refreshed_stage.stage_type)
|
||||
if refreshed_stage.stage_type == "download":
|
||||
self._run_download_stage_pipeline(job, refreshed_stage, executor, worker_count)
|
||||
else:
|
||||
self._run_stage_with_single_pool(job, refreshed_stage, executor, worker_count)
|
||||
|
||||
current_job = self.repository.get_job(job.id)
|
||||
if current_job is not None:
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job.id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job.id)
|
||||
return
|
||||
|
||||
current_stage = self.repository.get_stage(stage.id)
|
||||
if current_stage is None:
|
||||
return
|
||||
if self.repository.stage_has_open_items(stage.id):
|
||||
return
|
||||
if current_stage.failed_items > 0:
|
||||
self.repository.mark_stage_finished(
|
||||
stage.id,
|
||||
status=StageStatus.FAILED,
|
||||
last_error="One or more stage items failed.",
|
||||
)
|
||||
else:
|
||||
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
|
||||
final_stage = self.repository.get_stage(stage.id)
|
||||
if final_stage is not None:
|
||||
self._export_playlist_artifacts_for_job(job, final_stage)
|
||||
self._run_catalog_export_for_stage(job, final_stage)
|
||||
|
||||
def _job_is_finished(self, job_id: int) -> bool:
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if not stages:
|
||||
return True
|
||||
return all(
|
||||
stage.status in {StageStatus.COMPLETED, StageStatus.FAILED, StageStatus.SKIPPED}
|
||||
for stage in stages
|
||||
)
|
||||
|
||||
def _finalize_job(self, job_id: int) -> None:
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if not stages:
|
||||
self.repository.mark_job_finished(job_id, status=JobStatus.COMPLETED)
|
||||
return
|
||||
has_errors = any(
|
||||
stage.status == StageStatus.FAILED or stage.failed_items > 0 for stage in stages
|
||||
)
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.COMPLETED_WITH_ERRORS if has_errors else JobStatus.COMPLETED,
|
||||
last_error="One or more stage items failed." if has_errors else None,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,265 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from .runtime import sanitize_path_component
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
PLAYLIST_META_FILENAME = ".playlist_meta.json"
|
||||
PLAYLIST_YAML_FILENAME = "playlist.yaml"
|
||||
PLAYLIST_COVERS_DIRNAME = "covers"
|
||||
MAX_COVER_BYTES = 10 * 1024 * 1024
|
||||
|
||||
_YAML_SAFE_TEXT_RE = re.compile(r"^[A-Za-z0-9_./%+\- :]+$")
|
||||
_SPECIAL_YAML_TOKENS = (": ", "#", "[", "]", "{", "}", ",", "&", "*", "!", "|", ">", "'", '"', "@", "`")
|
||||
_COVER_EXT_RE = re.compile(r"\.(jpg|jpeg|png|webp|gif|bmp)$", re.IGNORECASE)
|
||||
|
||||
_CONTENT_TYPE_TO_EXT = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/jpg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"image/gif": ".gif",
|
||||
"image/bmp": ".bmp",
|
||||
}
|
||||
|
||||
|
||||
def yaml_scalar(value: Any) -> str:
|
||||
if value in (None, ""):
|
||||
return "null"
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (int, float)):
|
||||
return str(int(value)) if isinstance(value, bool) or float(value).is_integer() else str(value)
|
||||
text = str(value)
|
||||
if _YAML_SAFE_TEXT_RE.match(text) and not any(token in text for token in _SPECIAL_YAML_TOKENS):
|
||||
return text
|
||||
return json.dumps(text, ensure_ascii=False)
|
||||
|
||||
|
||||
def build_playlist_dir_name(playlist_name: str | None, playlist_id: int) -> str:
|
||||
safe_name = sanitize_path_component(str(playlist_name or ""), f"playlist-{int(playlist_id)}")
|
||||
return f"{safe_name}_{int(playlist_id)}"
|
||||
|
||||
|
||||
def build_playlist_meta_payload(playlist: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"playlist_id": int(playlist.get("id") or 0),
|
||||
"platform": str(playlist.get("platform") or ""),
|
||||
"remote_playlist_id": str(playlist.get("remote_playlist_id") or ""),
|
||||
"name": str(playlist.get("name") or ""),
|
||||
}
|
||||
|
||||
|
||||
def read_playlist_meta(playlist_dir: Path) -> dict[str, Any] | None:
|
||||
meta_path = playlist_dir / PLAYLIST_META_FILENAME
|
||||
if not meta_path.exists():
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def locate_playlist_dir(playlists_root: Path, playlist: dict[str, Any]) -> Path | None:
|
||||
if not playlists_root.exists():
|
||||
return None
|
||||
playlist_id = int(playlist.get("id") or 0)
|
||||
if playlist_id <= 0:
|
||||
return None
|
||||
preferred = playlists_root / build_playlist_dir_name(str(playlist.get("name") or ""), playlist_id)
|
||||
if preferred.exists():
|
||||
return preferred
|
||||
|
||||
suffix = f"_{playlist_id}"
|
||||
candidates: list[Path] = []
|
||||
for child in playlists_root.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.endswith(suffix):
|
||||
candidates.append(child)
|
||||
continue
|
||||
meta_payload = read_playlist_meta(child)
|
||||
if int(meta_payload.get("playlist_id") or 0) == playlist_id if meta_payload else False:
|
||||
candidates.append(child)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
candidates.sort(key=lambda path: path.stat().st_mtime, reverse=True)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
def ensure_playlist_dir(playlists_root: Path, playlist: dict[str, Any]) -> Path:
|
||||
existing = locate_playlist_dir(playlists_root, playlist)
|
||||
if existing is not None:
|
||||
return existing
|
||||
playlist_id = int(playlist.get("id") or 0)
|
||||
target = playlists_root / build_playlist_dir_name(str(playlist.get("name") or ""), playlist_id)
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
return target
|
||||
|
||||
|
||||
def _guess_cover_extension(url: str | None, content_type: str | None) -> str:
|
||||
parsed = urlparse(str(url or ""))
|
||||
match = _COVER_EXT_RE.search(str(parsed.path or ""))
|
||||
if match:
|
||||
return "." + str(match.group(1)).lower()
|
||||
normalized_type = str(content_type or "").split(";", 1)[0].strip().lower()
|
||||
return _CONTENT_TYPE_TO_EXT.get(normalized_type, ".jpg")
|
||||
|
||||
|
||||
def download_cover_file(
|
||||
*,
|
||||
cover_url: str,
|
||||
covers_dir: Path,
|
||||
file_stem: str,
|
||||
timeout: tuple[int, int] = (10, 20),
|
||||
) -> str | None:
|
||||
normalized_url = str(cover_url or "").strip()
|
||||
if not normalized_url:
|
||||
return None
|
||||
try:
|
||||
response = requests.get(normalized_url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
content = bytes(response.content or b"")
|
||||
except Exception:
|
||||
LOGGER.warning("Failed to download cover image: %s", normalized_url, exc_info=True)
|
||||
return None
|
||||
|
||||
if not content:
|
||||
return None
|
||||
if len(content) > MAX_COVER_BYTES:
|
||||
LOGGER.warning(
|
||||
"Skipped oversized cover image (> %d bytes): %s",
|
||||
MAX_COVER_BYTES,
|
||||
normalized_url,
|
||||
)
|
||||
return None
|
||||
|
||||
extension = _guess_cover_extension(normalized_url, response.headers.get("Content-Type"))
|
||||
normalized_stem = sanitize_path_component(file_stem, "cover")
|
||||
filename = f"{normalized_stem}{extension}"
|
||||
destination_path = covers_dir / filename
|
||||
destination_path.write_bytes(content)
|
||||
return f"{PLAYLIST_COVERS_DIRNAME}/{filename}"
|
||||
|
||||
|
||||
def serialize_playlist_yaml(playlist: dict[str, Any], items: list[dict[str, Any]]) -> str:
|
||||
lines = [
|
||||
"playlist_id: " + yaml_scalar(playlist.get("id")),
|
||||
"playlist_name: " + yaml_scalar(playlist.get("name")),
|
||||
"platform: " + yaml_scalar(playlist.get("platform")),
|
||||
"play_count: " + yaml_scalar(playlist.get("play_count")),
|
||||
"playlist_cover_url: " + yaml_scalar(playlist.get("cover_url")),
|
||||
"playlist_cover_file: " + yaml_scalar(playlist.get("cover_file")),
|
||||
]
|
||||
if not items:
|
||||
lines.append("songs: []")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
lines.append("songs:")
|
||||
for song in items:
|
||||
uploaded_locations = song.get("uploaded_locations")
|
||||
normalized_locations = (
|
||||
list(uploaded_locations)
|
||||
if isinstance(uploaded_locations, list)
|
||||
else []
|
||||
)
|
||||
lines.append(" - local_song_id: " + yaml_scalar(song.get("song_id")))
|
||||
lines.append(" platform_song_id: " + yaml_scalar(song.get("remote_song_id")))
|
||||
lines.append(" platform: " + yaml_scalar(song.get("platform")))
|
||||
lines.append(" name: " + yaml_scalar(song.get("name")))
|
||||
lines.append(" singers: " + yaml_scalar(song.get("singers")))
|
||||
lines.append(" album: " + yaml_scalar(song.get("album")))
|
||||
lines.append(" ext: " + yaml_scalar(song.get("ext")))
|
||||
lines.append(" file_size_bytes: " + yaml_scalar(song.get("file_size_bytes")))
|
||||
lines.append(" cover_url: " + yaml_scalar(song.get("cover_url")))
|
||||
lines.append(" cover_file: " + yaml_scalar(song.get("cover_file")))
|
||||
lines.append(" local_file_path: " + yaml_scalar(song.get("local_file_path")))
|
||||
if not normalized_locations:
|
||||
lines.append(" uploaded_locations: []")
|
||||
continue
|
||||
lines.append(" uploaded_locations:")
|
||||
for location in normalized_locations:
|
||||
payload = dict(location or {})
|
||||
lines.append(" - backend_name: " + yaml_scalar(payload.get("backend_name")))
|
||||
lines.append(" backend_type: " + yaml_scalar(payload.get("backend_type")))
|
||||
lines.append(" uploaded_url: " + yaml_scalar(payload.get("url")))
|
||||
lines.append(" container_name: " + yaml_scalar(payload.get("container_name")))
|
||||
lines.append(" locator: " + yaml_scalar(payload.get("locator")))
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def write_playlist_artifacts(
|
||||
*,
|
||||
playlist: dict[str, Any],
|
||||
songs: list[dict[str, Any]],
|
||||
playlists_root: Path,
|
||||
) -> Path:
|
||||
playlists_root.mkdir(parents=True, exist_ok=True)
|
||||
playlist_dir = ensure_playlist_dir(playlists_root, playlist)
|
||||
covers_dir = playlist_dir / PLAYLIST_COVERS_DIRNAME
|
||||
covers_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cover_file_cache: dict[str, str] = {}
|
||||
playlist_cover_url = str(playlist.get("cover_url") or "").strip()
|
||||
playlist_cover_file = None
|
||||
if playlist_cover_url:
|
||||
playlist_cover_file = download_cover_file(
|
||||
cover_url=playlist_cover_url,
|
||||
covers_dir=covers_dir,
|
||||
file_stem="playlist-cover",
|
||||
)
|
||||
if playlist_cover_file:
|
||||
cover_file_cache[playlist_cover_url] = playlist_cover_file
|
||||
|
||||
normalized_songs: list[dict[str, Any]] = []
|
||||
for index, song in enumerate(songs, start=1):
|
||||
payload = dict(song)
|
||||
cover_url = str(payload.get("cover_url") or "").strip()
|
||||
cover_file = None
|
||||
if cover_url:
|
||||
cover_file = cover_file_cache.get(cover_url)
|
||||
if not cover_file:
|
||||
remote_song_id = sanitize_path_component(
|
||||
str(payload.get("remote_song_id") or payload.get("song_id") or index),
|
||||
str(index),
|
||||
)
|
||||
cover_file = download_cover_file(
|
||||
cover_url=cover_url,
|
||||
covers_dir=covers_dir,
|
||||
file_stem=f"song-{index}-{remote_song_id}",
|
||||
)
|
||||
if cover_file:
|
||||
cover_file_cache[cover_url] = cover_file
|
||||
payload["cover_url"] = cover_url or None
|
||||
payload["cover_file"] = cover_file
|
||||
normalized_songs.append(payload)
|
||||
|
||||
playlist_payload = {
|
||||
"id": int(playlist.get("id") or 0),
|
||||
"name": str(playlist.get("name") or ""),
|
||||
"platform": str(playlist.get("platform") or ""),
|
||||
"play_count": playlist.get("play_count"),
|
||||
"cover_url": playlist_cover_url or None,
|
||||
"cover_file": playlist_cover_file,
|
||||
}
|
||||
(playlist_dir / PLAYLIST_YAML_FILENAME).write_text(
|
||||
serialize_playlist_yaml(playlist_payload, normalized_songs),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(playlist_dir / PLAYLIST_META_FILENAME).write_text(
|
||||
json.dumps(build_playlist_meta_payload(playlist), ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return playlist_dir
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,378 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import re
|
||||
from typing import Any, Callable
|
||||
|
||||
from .models import normalize_source_name, parse_size_to_bytes
|
||||
|
||||
|
||||
SOURCE_CLIENT_NAMES = {
|
||||
"netease": "NeteaseMusicClient",
|
||||
"qq": "QQMusicClient",
|
||||
"kuwo": "KuwoMusicClient",
|
||||
"migu": "MiguMusicClient",
|
||||
"qianqian": "QianqianMusicClient",
|
||||
"kugou": "KugouMusicClient",
|
||||
}
|
||||
|
||||
DEFAULT_DOWNLOAD_SOURCES = ["qq", "kuwo", "migu", "qianqian", "kugou", "netease"]
|
||||
DEFAULT_FALLBACK_RANK_WARMUP_ATTEMPTS = 1000
|
||||
LOSSLESS_EXTENSIONS = {"flac", "wav", "alac", "ape", "wv", "tta", "dsf", "dff"}
|
||||
ARTIST_SEPARATOR_RE = re.compile(r"\s*(?:/|,|&|\|)\s*")
|
||||
|
||||
|
||||
def normalize_audio_ext(value: str | None) -> str:
|
||||
return str(value or "").strip().lower().lstrip(".")
|
||||
|
||||
|
||||
def normalize_keyword(value: str | None) -> str:
|
||||
return " ".join(str(value or "").strip().lower().split())
|
||||
|
||||
|
||||
def normalize_artist_keyword(value: str | None) -> str:
|
||||
normalized = normalize_keyword(value)
|
||||
for token in ("&", "/", "\\", ",", "|", ";"):
|
||||
normalized = normalized.replace(token, " ")
|
||||
return " ".join(normalized.split())
|
||||
|
||||
|
||||
def dedupe_preserve_order(values: list[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
normalized = normalize_source_name(value)
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
result.append(normalized)
|
||||
return result
|
||||
|
||||
|
||||
def candidate_file_size_bytes(song_info: Any) -> int:
|
||||
size_bytes = getattr(song_info, "file_size_bytes", None)
|
||||
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
||||
return int(size_bytes)
|
||||
return int(parse_size_to_bytes(getattr(song_info, "file_size", None)) or 0)
|
||||
|
||||
|
||||
def search_result_quality_group(song_info: Any) -> int:
|
||||
ext_candidates = [
|
||||
normalize_audio_ext(getattr(song_info, "ext", None)),
|
||||
normalize_audio_ext(getattr(song_info, "codec", None)),
|
||||
]
|
||||
download_url_status = getattr(song_info, "download_url_status", None)
|
||||
if isinstance(download_url_status, dict):
|
||||
probe_status = download_url_status.get("probe_status") or {}
|
||||
ext_candidates.append(normalize_audio_ext(probe_status.get("ext")))
|
||||
for ext in ext_candidates:
|
||||
if not ext:
|
||||
continue
|
||||
if ext in LOSSLESS_EXTENSIONS:
|
||||
return 0
|
||||
if ext == "mp3":
|
||||
return 1
|
||||
return 2
|
||||
|
||||
|
||||
def song_info_match_priority(candidate_song_info: Any, target_song_info: Any) -> int:
|
||||
candidate_source = normalize_source_name(getattr(candidate_song_info, "source", None))
|
||||
target_source = normalize_source_name(getattr(target_song_info, "source", None))
|
||||
candidate_identifier = str(getattr(candidate_song_info, "identifier", "") or "").strip()
|
||||
target_identifier = str(getattr(target_song_info, "identifier", "") or "").strip()
|
||||
candidate_song_name = normalize_keyword(getattr(candidate_song_info, "song_name", None))
|
||||
target_song_name = normalize_keyword(getattr(target_song_info, "song_name", None))
|
||||
candidate_singers = normalize_artist_keyword(getattr(candidate_song_info, "singers", None))
|
||||
target_singers = normalize_artist_keyword(getattr(target_song_info, "singers", None))
|
||||
if candidate_source == target_source and candidate_identifier and target_identifier and candidate_identifier == target_identifier:
|
||||
return 0
|
||||
if candidate_song_name and target_song_name and candidate_song_name == target_song_name and candidate_singers and target_singers and candidate_singers == target_singers:
|
||||
return 1
|
||||
if candidate_song_name and target_song_name and candidate_song_name == target_song_name:
|
||||
return 2
|
||||
return 99
|
||||
|
||||
|
||||
def match_priority_group(match_priority: int) -> int:
|
||||
if match_priority >= 99:
|
||||
return 99
|
||||
if match_priority <= 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def is_high_confidence_match(match_priority: int) -> bool:
|
||||
return match_priority_group(match_priority) == 0
|
||||
|
||||
|
||||
def build_resolve_keyword(song_info: Any, row: dict[str, Any]) -> str:
|
||||
keyword_parts: list[str] = []
|
||||
for value in (
|
||||
getattr(song_info, "song_name", None),
|
||||
row.get("name"),
|
||||
getattr(song_info, "singers", None),
|
||||
row.get("singers"),
|
||||
):
|
||||
text = str(value or "").strip()
|
||||
if text and text.upper() != "NULL" and text not in keyword_parts:
|
||||
keyword_parts.append(text)
|
||||
if keyword_parts:
|
||||
return " ".join(keyword_parts)
|
||||
return str(getattr(song_info, "identifier", None) or row.get("remote_song_id") or "").strip()
|
||||
|
||||
|
||||
def merge_resolved_song_info(base_song_info: Any, resolved_song_info: Any) -> Any:
|
||||
if not resolved_song_info or not getattr(resolved_song_info, "with_valid_download_url", False):
|
||||
return copy.deepcopy(base_song_info)
|
||||
merged_song_info = copy.deepcopy(resolved_song_info)
|
||||
merged_song_info.work_dir = getattr(base_song_info, "work_dir", getattr(merged_song_info, "work_dir", None))
|
||||
if not isinstance(getattr(merged_song_info, "raw_data", None), dict):
|
||||
merged_song_info.raw_data = {}
|
||||
base_raw_data = getattr(base_song_info, "raw_data", None)
|
||||
if isinstance(base_raw_data, dict) and "search" in base_raw_data and "search" not in merged_song_info.raw_data:
|
||||
merged_song_info.raw_data["search"] = copy.deepcopy(base_raw_data["search"])
|
||||
merged_song_info.raw_data["deferred_search"] = False
|
||||
if not getattr(merged_song_info, "source", None):
|
||||
merged_song_info.source = getattr(base_song_info, "source", None)
|
||||
if not getattr(merged_song_info, "root_source", None):
|
||||
merged_song_info.root_source = getattr(base_song_info, "root_source", None)
|
||||
for attr in ("song_name", "singers", "album", "duration_s", "duration", "cover_url"):
|
||||
current_value = getattr(merged_song_info, attr, None)
|
||||
fallback_value = getattr(base_song_info, attr, None)
|
||||
if current_value in {None, "", "NULL", "-:-:-"} and fallback_value not in {None, "", "NULL"}:
|
||||
setattr(merged_song_info, attr, fallback_value)
|
||||
if not getattr(merged_song_info, "ext", None):
|
||||
merged_song_info.ext = getattr(base_song_info, "ext", None)
|
||||
if not getattr(merged_song_info, "file_size_bytes", None):
|
||||
merged_song_info.file_size_bytes = getattr(base_song_info, "file_size_bytes", None)
|
||||
if not getattr(merged_song_info, "file_size", None):
|
||||
merged_song_info.file_size = getattr(base_song_info, "file_size", None)
|
||||
return merged_song_info
|
||||
|
||||
|
||||
class MultiSourceSongResolver:
|
||||
def __init__(
|
||||
self,
|
||||
client_factory: Callable[[str], object],
|
||||
request_overrides_factory: Callable[[tuple[int, int]], dict] | None = None,
|
||||
resolver_stats_repo: Any | None = None,
|
||||
warmup_attempts: int = DEFAULT_FALLBACK_RANK_WARMUP_ATTEMPTS,
|
||||
):
|
||||
self.client_factory = client_factory
|
||||
self.request_overrides_factory = request_overrides_factory or (lambda timeout: {"timeout": timeout})
|
||||
self.resolver_stats_repo = resolver_stats_repo
|
||||
self.warmup_attempts = max(0, int(warmup_attempts))
|
||||
|
||||
@staticmethod
|
||||
def _has_valid_download_url(song_info: Any) -> bool:
|
||||
return bool(getattr(song_info, "with_valid_download_url", False))
|
||||
|
||||
def _request_overrides(self, timeout: tuple[int, int]) -> dict:
|
||||
return dict(self.request_overrides_factory(timeout))
|
||||
|
||||
@staticmethod
|
||||
def _emit_progress(progress_callback: Callable[[str], None] | None, message: str) -> None:
|
||||
if progress_callback is None:
|
||||
return
|
||||
progress_callback(str(message))
|
||||
|
||||
def _refresh_song_info(self, client: object, song_info: Any) -> Any:
|
||||
if self._has_valid_download_url(song_info):
|
||||
return copy.deepcopy(song_info)
|
||||
raw_data = getattr(song_info, "raw_data", None)
|
||||
search_result = raw_data.get("search") if isinstance(raw_data, dict) else None
|
||||
if not isinstance(search_result, dict):
|
||||
return copy.deepcopy(song_info)
|
||||
|
||||
request_overrides = self._request_overrides((10, 30))
|
||||
third_party_song = None
|
||||
if hasattr(client, "_parsewiththirdpartapis"):
|
||||
try:
|
||||
third_party_song = client._parsewiththirdpartapis(
|
||||
search_result=search_result,
|
||||
request_overrides=request_overrides,
|
||||
)
|
||||
except Exception:
|
||||
third_party_song = None
|
||||
|
||||
refreshed_song = None
|
||||
if hasattr(client, "_parsewithofficialapiv1"):
|
||||
try:
|
||||
kwargs = {
|
||||
"search_result": search_result,
|
||||
"request_overrides": request_overrides,
|
||||
}
|
||||
if third_party_song is not None:
|
||||
kwargs["song_info_flac"] = third_party_song
|
||||
refreshed_song = client._parsewithofficialapiv1(**kwargs)
|
||||
except TypeError:
|
||||
try:
|
||||
refreshed_song = client._parsewithofficialapiv1(
|
||||
search_result=search_result,
|
||||
request_overrides=request_overrides,
|
||||
)
|
||||
except Exception:
|
||||
refreshed_song = None
|
||||
except Exception:
|
||||
refreshed_song = None
|
||||
|
||||
for candidate in (refreshed_song, third_party_song):
|
||||
if not self._has_valid_download_url(candidate):
|
||||
continue
|
||||
return merge_resolved_song_info(song_info, candidate)
|
||||
return copy.deepcopy(song_info)
|
||||
|
||||
def _search_source_candidates(self, source: str, keyword: str) -> list[Any]:
|
||||
if not keyword:
|
||||
return []
|
||||
try:
|
||||
client = self.client_factory(source)
|
||||
results = client.search(
|
||||
keyword=keyword,
|
||||
num_threadings=1,
|
||||
request_overrides=self._request_overrides((10, 30)),
|
||||
rule={},
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
return list(results or [])
|
||||
|
||||
def _pick_best_candidate(self, candidates: list[Any], target_song_info: Any, source_rank: int) -> Any:
|
||||
matched_candidates: list[tuple[Any, int, int]] = []
|
||||
for candidate in candidates:
|
||||
if not self._has_valid_download_url(candidate):
|
||||
continue
|
||||
match_priority = song_info_match_priority(candidate, target_song_info)
|
||||
if match_priority >= 99:
|
||||
continue
|
||||
matched_candidates.append((candidate, match_priority, source_rank))
|
||||
if not matched_candidates:
|
||||
return None
|
||||
matched_candidates.sort(
|
||||
key=lambda item: (
|
||||
match_priority_group(item[1]),
|
||||
search_result_quality_group(item[0]),
|
||||
-candidate_file_size_bytes(item[0]),
|
||||
item[2],
|
||||
item[1],
|
||||
)
|
||||
)
|
||||
return matched_candidates[0][0]
|
||||
|
||||
def _build_target_song_info(self, row: dict[str, Any], snapshot_song_info: Any):
|
||||
if snapshot_song_info is not None:
|
||||
return copy.deepcopy(snapshot_song_info)
|
||||
from musicdl.modules.utils.data import SongInfo
|
||||
|
||||
return SongInfo(
|
||||
source=SOURCE_CLIENT_NAMES.get(normalize_source_name(row.get("platform"))),
|
||||
identifier=str(row.get("remote_song_id") or row.get("id") or ""),
|
||||
song_name=row.get("name"),
|
||||
singers=row.get("singers"),
|
||||
album=row.get("album"),
|
||||
ext=row.get("ext"),
|
||||
file_size_bytes=row.get("file_size_bytes"),
|
||||
raw_data={},
|
||||
)
|
||||
|
||||
def _rank_fallback_sources(self, origin_source: str, fallback_sources: list[str]) -> list[str]:
|
||||
ordered_sources = dedupe_preserve_order(list(fallback_sources))
|
||||
if len(ordered_sources) <= 1 or self.resolver_stats_repo is None:
|
||||
return ordered_sources
|
||||
try:
|
||||
ranked_sources = self.resolver_stats_repo.rank_fallback_sources(
|
||||
origin_source,
|
||||
ordered_sources,
|
||||
warmup_attempts=self.warmup_attempts,
|
||||
)
|
||||
except Exception:
|
||||
return ordered_sources
|
||||
ranked_ordered_sources = dedupe_preserve_order(list(ranked_sources or []))
|
||||
filtered_ranked_sources = [source for source in ranked_ordered_sources if source in ordered_sources]
|
||||
for source in ordered_sources:
|
||||
if source not in filtered_ranked_sources:
|
||||
filtered_ranked_sources.append(source)
|
||||
return filtered_ranked_sources
|
||||
|
||||
def _record_fallback_result(self, origin_source: str, candidate_source: str, *, succeeded: bool) -> None:
|
||||
if self.resolver_stats_repo is None:
|
||||
return
|
||||
try:
|
||||
self.resolver_stats_repo.record_fallback_result(
|
||||
origin_source,
|
||||
candidate_source,
|
||||
succeeded=succeeded,
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def resolve_song_info(
|
||||
self,
|
||||
row: dict[str, Any],
|
||||
snapshot_song_info: Any,
|
||||
download_sources: list[str] | None = None,
|
||||
progress_callback: Callable[[str], None] | None = None,
|
||||
) -> Any:
|
||||
target_song_info = self._build_target_song_info(row=row, snapshot_song_info=snapshot_song_info)
|
||||
preferred_source = normalize_source_name(getattr(target_song_info, "source", None) or row.get("platform"))
|
||||
ordered_sources = dedupe_preserve_order(list(download_sources or DEFAULT_DOWNLOAD_SOURCES))
|
||||
keyword = build_resolve_keyword(target_song_info, row)
|
||||
|
||||
candidate_rows: list[tuple[Any, int, int]] = []
|
||||
fallback_sources = [source for source in ordered_sources if source != preferred_source]
|
||||
ranked_fallback_sources = self._rank_fallback_sources(preferred_source, fallback_sources)
|
||||
should_attempt_preferred = preferred_source not in {"", "unknown", None}
|
||||
total_attempts = len(ranked_fallback_sources) + (1 if should_attempt_preferred else 0)
|
||||
if should_attempt_preferred:
|
||||
source_rank = 0
|
||||
self._emit_progress(
|
||||
progress_callback,
|
||||
f"resolving source {preferred_source} ({source_rank + 1}/{total_attempts})",
|
||||
)
|
||||
try:
|
||||
client = self.client_factory(preferred_source)
|
||||
refreshed_song = self._refresh_song_info(client, target_song_info)
|
||||
if self._has_valid_download_url(refreshed_song):
|
||||
merged_refreshed = merge_resolved_song_info(target_song_info, refreshed_song)
|
||||
refreshed_match_priority = song_info_match_priority(merged_refreshed, target_song_info)
|
||||
candidate_rows.append((merged_refreshed, refreshed_match_priority, source_rank))
|
||||
if is_high_confidence_match(refreshed_match_priority):
|
||||
return merged_refreshed
|
||||
search_candidates = self._search_source_candidates(preferred_source, keyword)
|
||||
best_candidate = self._pick_best_candidate(search_candidates, target_song_info, source_rank)
|
||||
if best_candidate is not None:
|
||||
merged_candidate = merge_resolved_song_info(target_song_info, best_candidate)
|
||||
match_priority = song_info_match_priority(merged_candidate, target_song_info)
|
||||
candidate_rows.append((merged_candidate, match_priority, source_rank))
|
||||
if is_high_confidence_match(match_priority):
|
||||
return merged_candidate
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
fallback_start_rank = 2 if should_attempt_preferred else 1
|
||||
for source_rank, source in enumerate(ranked_fallback_sources, start=fallback_start_rank):
|
||||
self._emit_progress(
|
||||
progress_callback,
|
||||
f"resolving source {source} ({source_rank}/{total_attempts})",
|
||||
)
|
||||
search_candidates = self._search_source_candidates(source, keyword)
|
||||
best_candidate = self._pick_best_candidate(search_candidates, target_song_info, source_rank - 1)
|
||||
if best_candidate is None:
|
||||
self._record_fallback_result(preferred_source, source, succeeded=False)
|
||||
continue
|
||||
self._record_fallback_result(preferred_source, source, succeeded=True)
|
||||
return merge_resolved_song_info(target_song_info, best_candidate)
|
||||
|
||||
if not candidate_rows:
|
||||
return target_song_info
|
||||
|
||||
candidate_rows.sort(
|
||||
key=lambda item: (
|
||||
match_priority_group(item[1]),
|
||||
search_result_quality_group(item[0]),
|
||||
-candidate_file_size_bytes(item[0]),
|
||||
item[2],
|
||||
item[1],
|
||||
)
|
||||
)
|
||||
return candidate_rows[0][0]
|
||||
@@ -0,0 +1,166 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
SQLITE_BUSY_TIMEOUT_MS = 30000
|
||||
RESOLVER_STATS_DB_FILENAME = "resolver_stats.db"
|
||||
|
||||
|
||||
SCHEMA_STATEMENTS = [
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS resolver_source_stats (
|
||||
origin_source TEXT NOT NULL,
|
||||
candidate_source TEXT NOT NULL,
|
||||
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||
resolve_success_count INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
last_attempt_at TEXT,
|
||||
last_success_at TEXT,
|
||||
PRIMARY KEY(origin_source, candidate_source)
|
||||
)
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_resolver_source_stats_origin_source
|
||||
ON resolver_source_stats (origin_source)
|
||||
""",
|
||||
]
|
||||
|
||||
|
||||
def default_resolver_stats_db_path(db_path: str | Path) -> Path:
|
||||
return Path(db_path).parent / RESOLVER_STATS_DB_FILENAME
|
||||
|
||||
|
||||
def connect_resolver_stats_database(db_path: str | Path) -> sqlite3.Connection:
|
||||
path = Path(db_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(path, timeout=SQLITE_BUSY_TIMEOUT_MS / 1000)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute(f"PRAGMA busy_timeout = {SQLITE_BUSY_TIMEOUT_MS}")
|
||||
with suppress(sqlite3.OperationalError):
|
||||
conn.execute("PRAGMA journal_mode = WAL")
|
||||
with suppress(sqlite3.OperationalError):
|
||||
conn.execute("PRAGMA synchronous = NORMAL")
|
||||
return conn
|
||||
|
||||
|
||||
def initialize_resolver_stats_database(db_path: str | Path) -> sqlite3.Connection:
|
||||
conn = connect_resolver_stats_database(db_path)
|
||||
for statement in SCHEMA_STATEMENTS:
|
||||
conn.execute(statement)
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
|
||||
class ResolverStatsRepository:
|
||||
def __init__(self, db_path: str | Path):
|
||||
self.db_path = Path(db_path)
|
||||
conn = initialize_resolver_stats_database(self.db_path)
|
||||
conn.close()
|
||||
|
||||
def record_fallback_result(
|
||||
self,
|
||||
origin_source: str,
|
||||
candidate_source: str,
|
||||
*,
|
||||
succeeded: bool,
|
||||
) -> None:
|
||||
conn = connect_resolver_stats_database(self.db_path)
|
||||
try:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO resolver_source_stats (
|
||||
origin_source,
|
||||
candidate_source,
|
||||
attempt_count,
|
||||
resolve_success_count,
|
||||
created_at,
|
||||
updated_at,
|
||||
last_attempt_at,
|
||||
last_success_at
|
||||
)
|
||||
VALUES (
|
||||
?, ?, 1, ?,
|
||||
CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP,
|
||||
CASE WHEN ? THEN CURRENT_TIMESTAMP ELSE NULL END
|
||||
)
|
||||
ON CONFLICT(origin_source, candidate_source) DO UPDATE SET
|
||||
attempt_count = attempt_count + 1,
|
||||
resolve_success_count = (
|
||||
resolve_success_count + excluded.resolve_success_count
|
||||
),
|
||||
updated_at = CURRENT_TIMESTAMP,
|
||||
last_attempt_at = CURRENT_TIMESTAMP,
|
||||
last_success_at = CASE
|
||||
WHEN excluded.resolve_success_count > 0
|
||||
THEN CURRENT_TIMESTAMP
|
||||
ELSE last_success_at
|
||||
END
|
||||
""",
|
||||
(
|
||||
origin_source,
|
||||
candidate_source,
|
||||
int(succeeded),
|
||||
int(succeeded),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def rank_fallback_sources(
|
||||
self,
|
||||
origin_source: str,
|
||||
fallback_sources: Iterable[str],
|
||||
*,
|
||||
warmup_attempts: int = 1000,
|
||||
) -> list[str]:
|
||||
sources = list(fallback_sources)
|
||||
if len(sources) <= 1:
|
||||
return sources
|
||||
|
||||
conn = connect_resolver_stats_database(self.db_path)
|
||||
try:
|
||||
if warmup_attempts > 0:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT COALESCE(SUM(attempt_count), 0) AS total_attempt_count
|
||||
FROM resolver_source_stats
|
||||
WHERE origin_source = ?
|
||||
""",
|
||||
(origin_source,),
|
||||
).fetchone()
|
||||
total_attempt_count = int(row["total_attempt_count"] if row else 0)
|
||||
if total_attempt_count < warmup_attempts:
|
||||
return sources
|
||||
|
||||
placeholders = ", ".join("?" for _ in sources)
|
||||
rows = conn.execute(
|
||||
f"""
|
||||
SELECT candidate_source, attempt_count, resolve_success_count
|
||||
FROM resolver_source_stats
|
||||
WHERE origin_source = ? AND candidate_source IN ({placeholders})
|
||||
""",
|
||||
(origin_source, *sources),
|
||||
).fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
stats_by_source = {
|
||||
str(row["candidate_source"]): (
|
||||
int(row["attempt_count"]),
|
||||
int(row["resolve_success_count"]),
|
||||
)
|
||||
for row in rows
|
||||
}
|
||||
order_index = {source: idx for idx, source in enumerate(sources)}
|
||||
|
||||
def _sort_key(source: str) -> tuple[float, int]:
|
||||
attempts, successes = stats_by_source.get(source, (0, 0))
|
||||
smoothed_success_rate = (successes + 1) / (attempts + 2)
|
||||
return (-smoothed_success_rate, order_index[source])
|
||||
|
||||
return sorted(sources, key=_sort_key)
|
||||
@@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
INVALID_PATH_CHARS_RE = re.compile(r'[<>:"/\\|?*\x00-\x1f]')
|
||||
DEFAULT_WEB_PORT = 18080
|
||||
|
||||
|
||||
def sanitize_path_component(value: str, fallback: str) -> str:
|
||||
cleaned = INVALID_PATH_CHARS_RE.sub("_", (value or "").strip()).rstrip(". ")
|
||||
return cleaned or fallback
|
||||
|
||||
|
||||
def pick_first_artist_name(singers: str | None) -> str:
|
||||
for candidate in re.split(r"\s*(?:/|,|&|\|)\s*", singers or ""):
|
||||
if candidate.strip():
|
||||
return sanitize_path_component(candidate, "Unknown Artist")
|
||||
return "Unknown Artist"
|
||||
|
||||
|
||||
def build_download_relative_dir(platform: str, singers: str | None) -> Path:
|
||||
return Path(sanitize_path_component(platform, "unknown")) / pick_first_artist_name(
|
||||
singers
|
||||
)
|
||||
|
||||
|
||||
def parse_web_port(value: str | int | None, fallback: int = DEFAULT_WEB_PORT) -> int:
|
||||
try:
|
||||
parsed = int(value) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return fallback
|
||||
if 1 <= parsed <= 65535:
|
||||
return parsed
|
||||
return fallback
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogSyncRuntimeConfig:
|
||||
root_dir: Path
|
||||
app_home: Path
|
||||
library_dir: Path
|
||||
db_path: Path
|
||||
env_file: Path
|
||||
input_dir: Path
|
||||
log_dir: Path
|
||||
python_bin: str
|
||||
venv_dir: Path
|
||||
web_host: str
|
||||
web_port: int
|
||||
download_layout: str
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, mapping: dict[str, str]) -> "CatalogSyncRuntimeConfig":
|
||||
root_dir = Path(mapping["ROOT_DIR"])
|
||||
app_home = Path(mapping.get("APP_HOME", root_dir / "catalogsync"))
|
||||
library_dir = Path(mapping.get("LIBRARY_DIR", root_dir / "library"))
|
||||
web_port = parse_web_port(mapping.get("WEB_PORT"), fallback=DEFAULT_WEB_PORT)
|
||||
return cls(
|
||||
root_dir=root_dir,
|
||||
app_home=app_home,
|
||||
library_dir=library_dir,
|
||||
db_path=Path(mapping.get("DB_PATH", app_home / "data" / "catalogsync.db")),
|
||||
env_file=Path(mapping.get("ENV_FILE", app_home / "config" / "catalogsync.env")),
|
||||
input_dir=Path(mapping.get("INPUT_DIR", app_home / "inputs")),
|
||||
log_dir=Path(mapping.get("LOG_DIR", app_home / "logs")),
|
||||
python_bin=mapping.get("PYTHON_BIN", "python3"),
|
||||
venv_dir=Path(mapping.get("VENV_DIR", app_home / "app" / ".venv")),
|
||||
web_host=mapping.get("WEB_HOST", "127.0.0.1"),
|
||||
web_port=web_port,
|
||||
download_layout=mapping.get("DOWNLOAD_LAYOUT", "platform_first_artist"),
|
||||
)
|
||||
|
||||
def ensure_directories(self) -> None:
|
||||
for path in (
|
||||
self.root_dir,
|
||||
self.library_dir,
|
||||
self.app_home / "app",
|
||||
self.app_home / "bin",
|
||||
self.app_home / "config",
|
||||
self.db_path.parent,
|
||||
self.env_file.parent,
|
||||
self.input_dir,
|
||||
self.log_dir,
|
||||
):
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
@@ -0,0 +1,643 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import inspect
|
||||
import logging
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict
|
||||
|
||||
import requests
|
||||
from urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
from .collectors import KuwoCollector, NeteaseCollector, QQCollector, parse_kuwo_toplist_html
|
||||
from .deferred import (
|
||||
build_kuwo_playlist_song_infos,
|
||||
build_kuwo_raw_track_song_infos,
|
||||
build_netease_playlist_song_infos,
|
||||
build_qq_playlist_song_infos,
|
||||
build_qq_raw_track_song_infos,
|
||||
)
|
||||
from .models import CatalogSong, extract_artist_names
|
||||
from .playlist_artifacts import write_playlist_artifacts
|
||||
from .repository import CatalogRepository
|
||||
|
||||
|
||||
SOURCE_CLIENT_NAMES = {
|
||||
"netease": "NeteaseMusicClient",
|
||||
"qq": "QQMusicClient",
|
||||
"kuwo": "KuwoMusicClient",
|
||||
}
|
||||
|
||||
SOURCE_POOL_NAMES = {
|
||||
"netease": {"playlist_square": "网易云歌单广场", "toplist": "网易云排行榜"},
|
||||
"qq": {"playlist_square": "QQ 音乐歌单广场", "toplist": "QQ 音乐排行榜"},
|
||||
"kuwo": {"playlist_square": "酷我歌单广场", "toplist": "酷我排行榜"},
|
||||
}
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
PLAYLIST_SQUARE_PAGE_SIZES = {
|
||||
"netease": 35,
|
||||
"qq": 30,
|
||||
"kuwo": 30,
|
||||
}
|
||||
|
||||
PlaylistProgressCallback = Callable[[str, Dict[str, Any]], None]
|
||||
|
||||
|
||||
class CatalogSyncService:
|
||||
def __init__(
|
||||
self,
|
||||
repository: CatalogRepository,
|
||||
collectors: dict[str, object] | None = None,
|
||||
work_dir: str = "musicdl_outputs/catalogsync",
|
||||
playlists_root: str | Path | None = None,
|
||||
):
|
||||
self.repository = repository
|
||||
self.collectors = collectors or {
|
||||
"netease": NeteaseCollector(),
|
||||
"qq": QQCollector(),
|
||||
"kuwo": KuwoCollector(),
|
||||
}
|
||||
self.work_dir = work_dir
|
||||
self.playlists_root = Path(playlists_root).resolve() if playlists_root else None
|
||||
self._clients: dict[str, object] = {}
|
||||
|
||||
def get_client(self, platform: str):
|
||||
if platform not in self._clients:
|
||||
from musicdl.modules import BuildMusicClient
|
||||
|
||||
self._clients[platform] = BuildMusicClient(
|
||||
{
|
||||
"type": SOURCE_CLIENT_NAMES[platform],
|
||||
"disable_print": True,
|
||||
"maintain_session": False,
|
||||
"work_dir": self.work_dir,
|
||||
"search_size_per_source": 1,
|
||||
"search_size_per_page": 1,
|
||||
"strict_limit_search_size_per_page": True,
|
||||
}
|
||||
)
|
||||
return self._clients[platform]
|
||||
|
||||
def store_playlist_candidates(
|
||||
self,
|
||||
platform: str,
|
||||
pool_kind: str,
|
||||
pool_name: str,
|
||||
candidates: list,
|
||||
pool_external_id: str | None = None,
|
||||
) -> int:
|
||||
pool_id = self.repository.upsert_playlist_pool(
|
||||
platform=platform,
|
||||
pool_kind=pool_kind,
|
||||
external_id=pool_external_id or pool_kind,
|
||||
name=pool_name,
|
||||
)
|
||||
for candidate in candidates:
|
||||
playlist_id = self.repository.upsert_playlist(candidate)
|
||||
self.repository.link_pool_playlist(pool_id, playlist_id)
|
||||
return pool_id
|
||||
|
||||
def collect_playlists(
|
||||
self,
|
||||
sources: list[str],
|
||||
include_playlist_square: bool = True,
|
||||
include_toplist: bool = True,
|
||||
progress_callback: PlaylistProgressCallback | None = None,
|
||||
) -> dict[str, int]:
|
||||
counts = {"playlist_square": 0, "toplist": 0}
|
||||
for source in sources:
|
||||
collector = self.collectors[source]
|
||||
self._emit_progress(
|
||||
progress_callback,
|
||||
"source_started",
|
||||
source=source,
|
||||
include_playlist_square=include_playlist_square,
|
||||
include_toplist=include_toplist,
|
||||
)
|
||||
if include_playlist_square:
|
||||
try:
|
||||
counts["playlist_square"] += self._collect_playlist_square(
|
||||
source,
|
||||
collector,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
except Exception as exc:
|
||||
LOGGER.warning(
|
||||
"playlist_square collection failed for source=%s",
|
||||
source,
|
||||
exc_info=True,
|
||||
)
|
||||
if include_toplist:
|
||||
toplist_candidates = collector.collect_toplist()
|
||||
self.store_playlist_candidates(
|
||||
platform=source,
|
||||
pool_kind="toplist",
|
||||
pool_name=SOURCE_POOL_NAMES[source]["toplist"],
|
||||
candidates=toplist_candidates,
|
||||
)
|
||||
counts["toplist"] += len(toplist_candidates)
|
||||
self._emit_progress(
|
||||
progress_callback,
|
||||
"toplist_collected",
|
||||
source=source,
|
||||
count=len(toplist_candidates),
|
||||
)
|
||||
self._emit_progress(progress_callback, "source_finished", source=source, counts=dict(counts))
|
||||
return counts
|
||||
|
||||
def _collect_playlist_square(
|
||||
self,
|
||||
source: str,
|
||||
collector: object,
|
||||
*,
|
||||
progress_callback: PlaylistProgressCallback | None = None,
|
||||
) -> int:
|
||||
total = 0
|
||||
page = 1
|
||||
seen_remote_ids: set[str] = set()
|
||||
while True:
|
||||
candidates, should_continue = self._collect_playlist_square_page(source, collector, page)
|
||||
unique_candidates = []
|
||||
for candidate in candidates:
|
||||
remote_id = str(getattr(candidate, "remote_id", "") or "").strip()
|
||||
dedupe_key = f"{source}:{remote_id}"
|
||||
if remote_id and dedupe_key in seen_remote_ids:
|
||||
continue
|
||||
if remote_id:
|
||||
seen_remote_ids.add(dedupe_key)
|
||||
unique_candidates.append(candidate)
|
||||
if unique_candidates:
|
||||
self.store_playlist_candidates(
|
||||
platform=source,
|
||||
pool_kind="playlist_square",
|
||||
pool_name=SOURCE_POOL_NAMES[source]["playlist_square"],
|
||||
candidates=unique_candidates,
|
||||
)
|
||||
total += len(unique_candidates)
|
||||
self._emit_progress(
|
||||
progress_callback,
|
||||
"playlist_square_page",
|
||||
source=source,
|
||||
page=page,
|
||||
page_count=len(candidates),
|
||||
new_count=len(unique_candidates),
|
||||
total=total,
|
||||
duplicate_page=bool(candidates) and not bool(unique_candidates),
|
||||
)
|
||||
if (candidates and not unique_candidates) or not should_continue:
|
||||
break
|
||||
page += 1
|
||||
return total
|
||||
|
||||
def _collect_playlist_square_page(self, source: str, collector: object, page: int) -> tuple[list, bool]:
|
||||
method = collector.collect_playlist_square
|
||||
kwargs = self._build_playlist_square_kwargs(method, source, page)
|
||||
if kwargs is None:
|
||||
candidates, has_more = self._normalize_playlist_square_result(method())
|
||||
if self._is_mock_side_effect_iterator(method):
|
||||
return candidates, (bool(candidates) and (has_more is not False))
|
||||
return candidates, bool(has_more)
|
||||
|
||||
candidates, has_more = self._normalize_playlist_square_result(method(**kwargs))
|
||||
if has_more is False:
|
||||
return candidates, False
|
||||
return candidates, bool(candidates)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_playlist_square_result(result) -> tuple[list, bool | None]:
|
||||
if isinstance(result, tuple) and len(result) == 2:
|
||||
candidates = list(result[0] or [])
|
||||
has_more = result[1]
|
||||
return candidates, None if has_more is None else bool(has_more)
|
||||
if isinstance(result, dict):
|
||||
raw_candidates = result.get("candidates")
|
||||
if raw_candidates is None:
|
||||
raw_candidates = result.get("items", [])
|
||||
candidates = list(raw_candidates or [])
|
||||
has_more = result.get("has_more")
|
||||
return candidates, None if has_more is None else bool(has_more)
|
||||
return list(result or []), None
|
||||
|
||||
@staticmethod
|
||||
def _is_mock_side_effect_iterator(method) -> bool:
|
||||
side_effect = getattr(method, "side_effect", None)
|
||||
if side_effect is None:
|
||||
return False
|
||||
if isinstance(side_effect, BaseException):
|
||||
return False
|
||||
return not callable(side_effect)
|
||||
|
||||
@staticmethod
|
||||
def _build_playlist_square_kwargs(method, source: str, page: int) -> dict[str, int] | None:
|
||||
try:
|
||||
signature = inspect.signature(method)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
parameters = signature.parameters
|
||||
kwargs: dict[str, int] = {}
|
||||
page_size = PLAYLIST_SQUARE_PAGE_SIZES.get(source, 30)
|
||||
if "page" in parameters:
|
||||
kwargs["page"] = max(page, 1)
|
||||
if "page_size" in parameters:
|
||||
kwargs["page_size"] = page_size
|
||||
if "offset" in parameters and "page" not in parameters:
|
||||
kwargs["offset"] = max(page - 1, 0) * page_size
|
||||
return kwargs or None
|
||||
|
||||
@staticmethod
|
||||
def _emit_progress(
|
||||
callback: PlaylistProgressCallback | None,
|
||||
event_type: str,
|
||||
**payload: Any,
|
||||
) -> None:
|
||||
if callback is None:
|
||||
return
|
||||
callback(event_type, payload)
|
||||
|
||||
def import_manual_playlists(self, playlist_file: str | Path, candidates: list) -> list[int]:
|
||||
playlist_ids: list[int] = []
|
||||
pool_ids_by_platform: dict[str, int] = {}
|
||||
for candidate in candidates:
|
||||
pool_id = pool_ids_by_platform.get(candidate.platform)
|
||||
if pool_id is None:
|
||||
pool_id = self.repository.get_or_create_manual_file_pool(playlist_file, candidate.platform)
|
||||
pool_ids_by_platform[candidate.platform] = pool_id
|
||||
playlist_id = self.repository.upsert_playlist(candidate)
|
||||
self.repository.link_pool_playlist(pool_id, playlist_id)
|
||||
playlist_ids.append(playlist_id)
|
||||
return playlist_ids
|
||||
|
||||
def store_playlist_songs(self, playlist_id: int, source_pool_id: int, song_infos: list[object]) -> int:
|
||||
pool_row = self.repository.get_playlist_pool(source_pool_id)
|
||||
if not pool_row:
|
||||
raise RuntimeError(f"Unknown playlist pool: {source_pool_id}")
|
||||
artist_pool_id = self.repository.ensure_derived_artist_pool(
|
||||
platform=pool_row["platform"],
|
||||
source_pool_id=source_pool_id,
|
||||
source_pool_name=pool_row["name"],
|
||||
)
|
||||
for position, song_info in enumerate(song_infos, start=1):
|
||||
song = CatalogSong.from_song_info(song_info)
|
||||
song_id = self.repository.upsert_song(song)
|
||||
self.repository.link_playlist_song(playlist_id, song_id, position)
|
||||
for artist_name in extract_artist_names(song.metadata.get("raw_data"), song.singers):
|
||||
artist_id = self.repository.upsert_artist(song.platform, artist_name)
|
||||
self.repository.link_pool_artist(artist_pool_id, artist_id)
|
||||
self.repository.link_artist_song(artist_id, song_id)
|
||||
return artist_pool_id
|
||||
|
||||
def sync_playlist_catalog(self, sources: list[str] | None = None, limit: int | None = None) -> int:
|
||||
processed = 0
|
||||
for playlist_row in self.repository.list_playlists(sources=sources, limit=limit):
|
||||
processed += self.sync_playlist_row(playlist_row)
|
||||
return processed
|
||||
|
||||
def sync_specific_playlists(self, playlist_ids: list[int]) -> int:
|
||||
processed = 0
|
||||
for playlist_row in self.repository.list_playlists_by_ids(playlist_ids):
|
||||
processed += self.sync_playlist_row(playlist_row)
|
||||
return processed
|
||||
|
||||
def sync_playlist_row(self, playlist_row) -> int:
|
||||
song_infos = self.resolve_playlist_song_infos(playlist_row)
|
||||
pool_ids = self.repository.get_pool_ids_for_playlist(int(playlist_row["id"]))
|
||||
for pool_id in pool_ids:
|
||||
self.store_playlist_songs(int(playlist_row["id"]), pool_id, song_infos)
|
||||
self._backfill_playlist_play_count(playlist_row)
|
||||
return len(song_infos)
|
||||
|
||||
def _backfill_playlist_play_count(self, playlist_row) -> None:
|
||||
playlist_id = int(playlist_row["id"])
|
||||
play_count = self.resolve_playlist_play_count(playlist_row)
|
||||
if play_count is None:
|
||||
return
|
||||
self.repository.update_playlist_play_count(playlist_id, play_count)
|
||||
|
||||
def _resolve_playlists_root(self) -> Path | None:
|
||||
if self.playlists_root is not None:
|
||||
self.playlists_root.mkdir(parents=True, exist_ok=True)
|
||||
return self.playlists_root
|
||||
library_root = self.repository.get_default_local_library_root()
|
||||
if library_root is None:
|
||||
return None
|
||||
playlists_root = library_root.parent / "playlists"
|
||||
playlists_root.mkdir(parents=True, exist_ok=True)
|
||||
return playlists_root
|
||||
|
||||
def _playlist_export_payload(self, playlist_id: int) -> tuple[dict[str, Any], list[dict[str, Any]]] | None:
|
||||
playlist_rows = self.repository.list_playlists_by_ids([int(playlist_id)])
|
||||
if not playlist_rows:
|
||||
return None
|
||||
playlist_row = dict(playlist_rows[0])
|
||||
payload = {
|
||||
"id": int(playlist_row["id"]),
|
||||
"platform": str(playlist_row["platform"] or ""),
|
||||
"remote_playlist_id": str(playlist_row["remote_playlist_id"] or ""),
|
||||
"name": str(playlist_row["name"] or ""),
|
||||
"play_count": self._coerce_int(playlist_row["play_count"]),
|
||||
"cover_url": str(playlist_row.get("cover_url") or "").strip() or None,
|
||||
}
|
||||
songs = self.repository.list_playlist_song_details(int(playlist_id), limit=5000)
|
||||
return payload, songs
|
||||
|
||||
def _write_playlist_artifacts(self, playlist_row) -> Path | None:
|
||||
playlists_root = self._resolve_playlists_root()
|
||||
if playlists_root is None:
|
||||
return None
|
||||
playlist_id = int(playlist_row["id"])
|
||||
export_payload = self._playlist_export_payload(playlist_id)
|
||||
if export_payload is None:
|
||||
return None
|
||||
playlist_payload, song_items = export_payload
|
||||
try:
|
||||
return write_playlist_artifacts(
|
||||
playlist=playlist_payload,
|
||||
songs=song_items,
|
||||
playlists_root=playlists_root,
|
||||
)
|
||||
except Exception:
|
||||
LOGGER.warning(
|
||||
"Failed to write playlist artifacts: playlist_id=%s",
|
||||
playlist_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
def ensure_playlist_artifacts_for_playlist(self, playlist_id: int) -> Path | None:
|
||||
playlist_rows = self.repository.list_playlists_by_ids([int(playlist_id)])
|
||||
if not playlist_rows:
|
||||
return None
|
||||
return self._write_playlist_artifacts(playlist_rows[0])
|
||||
|
||||
def resolve_playlist_play_count(self, playlist_row) -> int | None:
|
||||
platform = str(playlist_row["platform"] or "").strip()
|
||||
parse_strategy = str(playlist_row["parse_strategy"] or "").strip()
|
||||
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
|
||||
fallback_value = self._coerce_int(playlist_row["play_count"])
|
||||
if parse_strategy != "playlist_url" or platform not in {"netease", "qq", "kuwo"} or not remote_id:
|
||||
return fallback_value
|
||||
|
||||
try:
|
||||
if platform == "netease":
|
||||
client = self.get_client("netease")
|
||||
response = client.post(
|
||||
"https://music.163.com/api/v6/playlist/detail",
|
||||
data={"id": remote_id},
|
||||
timeout=(10, 30),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
playlist_payload = payload.get("playlist") or {}
|
||||
return self._coerce_int(playlist_payload.get("playCount")) or fallback_value
|
||||
|
||||
if platform == "qq":
|
||||
client = self.get_client("qq")
|
||||
response = client.get(
|
||||
"https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg",
|
||||
headers={"Referer": f"https://y.qq.com/n/ryqq/playlist/{remote_id}"},
|
||||
params={
|
||||
"disstid": str(remote_id),
|
||||
"type": "1",
|
||||
"json": "1",
|
||||
"utf8": "1",
|
||||
"onlysong": "0",
|
||||
"format": "json",
|
||||
},
|
||||
timeout=(10, 30),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
playlist_payload = ((payload.get("cdlist") or [{}])[0] or {}) if isinstance(payload, dict) else {}
|
||||
return self._coerce_int(playlist_payload.get("visitnum")) or fallback_value
|
||||
|
||||
client = self.get_client("kuwo")
|
||||
response = client.get(
|
||||
f"https://m.kuwo.cn/newh5app/wapi/api/www/playlist/playListInfo?pid={remote_id}&pn=1&rn=100",
|
||||
timeout=(10, 30),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
data_payload = payload.get("data") or {}
|
||||
return self._coerce_int(data_payload.get("listencnt")) or fallback_value
|
||||
except Exception:
|
||||
LOGGER.warning(
|
||||
"Failed to resolve playlist play_count during sync: platform=%s remote_id=%s",
|
||||
platform,
|
||||
remote_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return fallback_value
|
||||
|
||||
@staticmethod
|
||||
def _coerce_int(value: object) -> int | None:
|
||||
if value in (None, "") or isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
text = str(value).strip().replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
return int(float(text))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def resolve_playlist_song_infos(self, playlist_row) -> list[object]:
|
||||
strategy = playlist_row["parse_strategy"]
|
||||
if strategy == "playlist_url":
|
||||
if playlist_row["platform"] == "netease":
|
||||
return build_netease_playlist_song_infos(self.get_client("netease"), playlist_row["url"])
|
||||
if playlist_row["platform"] == "qq":
|
||||
return build_qq_playlist_song_infos(self.get_client("qq"), playlist_row["url"])
|
||||
if playlist_row["platform"] == "kuwo":
|
||||
return build_kuwo_playlist_song_infos(self.get_client("kuwo"), playlist_row["url"])
|
||||
client = self.get_client(playlist_row["platform"])
|
||||
return client.parseplaylist(playlist_row["url"])
|
||||
if strategy == "netease_toplist":
|
||||
return build_netease_playlist_song_infos(self.get_client("netease"), playlist_row["url"])
|
||||
if strategy == "qq_toplist":
|
||||
return self._resolve_qq_toplist(playlist_row)
|
||||
if strategy == "kuwo_toplist":
|
||||
return self._resolve_kuwo_toplist(playlist_row)
|
||||
raise ValueError(f"Unsupported parse strategy: {strategy}")
|
||||
|
||||
def _resolve_qq_toplist(self, playlist_row) -> list[object]:
|
||||
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", InsecureRequestWarning)
|
||||
response = requests.get(
|
||||
"https://c.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg",
|
||||
params={
|
||||
"topid": remote_id,
|
||||
"tpl": "3",
|
||||
"page": "detail",
|
||||
"type": "top",
|
||||
"format": "json",
|
||||
},
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Referer": "https://y.qq.com/",
|
||||
"Origin": "https://y.qq.com/",
|
||||
},
|
||||
timeout=15,
|
||||
verify=False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
raw_tracks = []
|
||||
for item in response.json().get("songlist", []) or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
track_data = item.get("data")
|
||||
if isinstance(track_data, dict) and track_data:
|
||||
raw_tracks.append(track_data)
|
||||
if not raw_tracks:
|
||||
raw_tracks = self._resolve_qq_toplist_fallback_tracks(remote_id)
|
||||
client = self.get_client("qq")
|
||||
return build_qq_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_row["name"])
|
||||
|
||||
def _resolve_qq_toplist_fallback_tracks(self, remote_id: str) -> list[dict]:
|
||||
if not remote_id:
|
||||
return []
|
||||
request_payload = {
|
||||
"comm": {"ct": 24, "cv": 0},
|
||||
"toplist": {
|
||||
"module": "musicToplist.ToplistInfoServer",
|
||||
"method": "GetDetail",
|
||||
"param": {
|
||||
"topid": int(remote_id) if remote_id.isdigit() else remote_id,
|
||||
"offset": 0,
|
||||
"num": 100,
|
||||
"period": "",
|
||||
},
|
||||
},
|
||||
}
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", InsecureRequestWarning)
|
||||
response = requests.post(
|
||||
"https://u.y.qq.com/cgi-bin/musicu.fcg",
|
||||
json=request_payload,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Referer": "https://y.qq.com/",
|
||||
"Origin": "https://y.qq.com/",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout=15,
|
||||
verify=False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload_raw = response.json()
|
||||
payload = payload_raw if isinstance(payload_raw, dict) else {}
|
||||
toplist_data = payload.get("toplist") or {}
|
||||
toplist_inner = toplist_data.get("data") or {}
|
||||
toplist_detail = toplist_inner.get("data") or {}
|
||||
raw_items = toplist_detail.get("song") or toplist_detail.get("songlist") or []
|
||||
if not isinstance(raw_items, list):
|
||||
return []
|
||||
|
||||
fallback_tracks: list[dict] = []
|
||||
for item in raw_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or item.get("name") or "").strip()
|
||||
singer_text = str(item.get("singerName") or item.get("singers") or "").strip()
|
||||
album_mid = str(item.get("albumMid") or item.get("albummid") or "").strip()
|
||||
if not title:
|
||||
continue
|
||||
track_id = str(
|
||||
item.get("songMid")
|
||||
or item.get("songmid")
|
||||
or item.get("mid")
|
||||
or item.get("songId")
|
||||
or item.get("songid")
|
||||
or ""
|
||||
).strip()
|
||||
if not track_id or track_id == "0":
|
||||
hash_input = f"{remote_id}|{title}|{singer_text}|{album_mid}"
|
||||
track_id = f"qqtop_{remote_id}_{hashlib.md5(hash_input.encode('utf-8')).hexdigest()[:16]}"
|
||||
singer_items = [{"name": part.strip()} for part in singer_text.split("/") if part.strip()]
|
||||
fallback_tracks.append(
|
||||
{
|
||||
"songmid": track_id,
|
||||
"title": title,
|
||||
"singer": singer_items,
|
||||
"album": {"mid": album_mid, "title": str(item.get("albumName") or "").strip()},
|
||||
"albummid": album_mid,
|
||||
"interval": item.get("interval", 0),
|
||||
"qq_toplist_fallback": True,
|
||||
"qq_toplist_remote_id": remote_id,
|
||||
}
|
||||
)
|
||||
return fallback_tracks
|
||||
|
||||
def _resolve_kuwo_toplist(self, playlist_row) -> list[object]:
|
||||
playlist_name = str(playlist_row["name"] or "").strip()
|
||||
remote_id = str(playlist_row["remote_playlist_id"] or "").strip()
|
||||
if not playlist_name or playlist_name == remote_id:
|
||||
playlist_name = self._resolve_kuwo_toplist_name(remote_id) or remote_id
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", InsecureRequestWarning)
|
||||
response = requests.get(
|
||||
"https://kw-api.cenguigui.cn",
|
||||
params={"name": playlist_name, "type": "rank", "page": "1", "limit": "100"},
|
||||
timeout=15,
|
||||
verify=False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
raw_tracks = []
|
||||
for item in response.json().get("data", {}).get("musicList", []) or []:
|
||||
rid = str(item.get("rid", "")).strip()
|
||||
if not rid:
|
||||
continue
|
||||
raw_tracks.append(
|
||||
{
|
||||
"musicrid": f"MUSIC_{rid}",
|
||||
"rid": rid,
|
||||
"name": item.get("name"),
|
||||
"artist": item.get("artist"),
|
||||
"album": item.get("album"),
|
||||
"albumpic": item.get("pic"),
|
||||
}
|
||||
)
|
||||
client = self.get_client("kuwo")
|
||||
return build_kuwo_raw_track_song_infos(client, raw_tracks, playlist_name=playlist_name)
|
||||
|
||||
def _resolve_kuwo_toplist_name(self, remote_id: str) -> str | None:
|
||||
if not remote_id:
|
||||
return None
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", InsecureRequestWarning)
|
||||
response = requests.get(
|
||||
"https://www.kuwo.cn/rankList",
|
||||
params={"bangId": remote_id},
|
||||
timeout=15,
|
||||
verify=False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
for candidate in parse_kuwo_toplist_html(response.text):
|
||||
if candidate.remote_id == remote_id:
|
||||
return candidate.name
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_raw_tracks(client, raw_tracks: list[dict]) -> list[object]:
|
||||
song_infos = []
|
||||
for track in raw_tracks:
|
||||
song_info_flac = client._parsewiththirdpartapis(track, {})
|
||||
try:
|
||||
song_info = client._parsewithofficialapiv1(
|
||||
track,
|
||||
song_info_flac=song_info_flac,
|
||||
lossless_quality_is_sufficient=not bool(client.default_cookies),
|
||||
request_overrides={},
|
||||
)
|
||||
except Exception:
|
||||
song_info = song_info_flac
|
||||
if not song_info.with_valid_download_url:
|
||||
song_info = song_info_flac
|
||||
if song_info.with_valid_download_url:
|
||||
song_infos.append(song_info)
|
||||
return song_infos
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,262 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple
|
||||
|
||||
|
||||
LIVE_NAME_KEYWORDS = ("live", "现场", "演唱会")
|
||||
LIVE_ALBUM_KEYWORDS = (
|
||||
"演唱会",
|
||||
"我是歌手",
|
||||
"我们的歌",
|
||||
"声生不息",
|
||||
"时光音乐会",
|
||||
"天赐的声音",
|
||||
"披荆斩棘",
|
||||
"乘风",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SuspectedLiveSong:
|
||||
song_id: int
|
||||
platform: str
|
||||
remote_song_id: str
|
||||
name: str
|
||||
singers: str
|
||||
album: str
|
||||
reason_codes: Tuple[str, ...]
|
||||
|
||||
|
||||
def _normalize_text(value: Any) -> str:
|
||||
return str(value or "").strip()
|
||||
|
||||
|
||||
def _normalize_compact_text(value: Any) -> str:
|
||||
text = _normalize_text(value).lower()
|
||||
return re.sub(r"[\s\W_]+", "", text, flags=re.UNICODE)
|
||||
|
||||
|
||||
def detect_suspected_live_reason_codes(name: Any, album: Any) -> List[str]:
|
||||
name_text = _normalize_text(name)
|
||||
album_text = _normalize_text(album)
|
||||
normalized_name = name_text.lower()
|
||||
normalized_album = album_text.lower()
|
||||
reason_codes: List[str] = []
|
||||
|
||||
if any(keyword in normalized_name for keyword in LIVE_NAME_KEYWORDS):
|
||||
reason_codes.append("name_keyword")
|
||||
|
||||
if not album_text or album_text.upper() == "NULL":
|
||||
return reason_codes
|
||||
|
||||
compact_name = _normalize_compact_text(name_text)
|
||||
compact_album = _normalize_compact_text(album_text)
|
||||
if compact_name and compact_album and (
|
||||
compact_album == compact_name or compact_album.startswith(compact_name)
|
||||
):
|
||||
return reason_codes
|
||||
|
||||
if any(keyword in normalized_album for keyword in LIVE_ALBUM_KEYWORDS):
|
||||
reason_codes.append("album_show_keyword")
|
||||
|
||||
return reason_codes
|
||||
|
||||
|
||||
def _connect_readonly_database(db_path: str | Path) -> sqlite3.Connection:
|
||||
path = Path(db_path).resolve()
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Database not found: {path}")
|
||||
conn = sqlite3.connect(f"{path.as_uri()}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def _song_scan_query(downloaded_only: bool) -> str:
|
||||
where_clause = "WHERE d.song_id IS NOT NULL" if downloaded_only else ""
|
||||
return f"""
|
||||
WITH downloaded_song_ids AS (
|
||||
SELECT DISTINCT fa.song_id
|
||||
FROM file_locations AS fl
|
||||
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
AND sb.backend_type = 'local_fs'
|
||||
)
|
||||
SELECT
|
||||
s.id,
|
||||
s.platform,
|
||||
s.remote_song_id,
|
||||
s.name,
|
||||
s.singers,
|
||||
s.album
|
||||
FROM songs AS s
|
||||
LEFT JOIN downloaded_song_ids AS d ON d.song_id = s.id
|
||||
{where_clause}
|
||||
ORDER BY s.id DESC
|
||||
"""
|
||||
|
||||
|
||||
def scan_suspected_live_songs(
|
||||
db_path: str | Path,
|
||||
*,
|
||||
downloaded_only: bool = True,
|
||||
limit: Optional[int] = None,
|
||||
) -> List[SuspectedLiveSong]:
|
||||
normalized_limit = None if limit is None else max(int(limit), 0)
|
||||
if normalized_limit == 0:
|
||||
return []
|
||||
|
||||
conn = _connect_readonly_database(db_path)
|
||||
try:
|
||||
rows = conn.execute(_song_scan_query(downloaded_only)).fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
matches: List[SuspectedLiveSong] = []
|
||||
for row in rows:
|
||||
reason_codes = detect_suspected_live_reason_codes(
|
||||
name=row["name"],
|
||||
album=row["album"],
|
||||
)
|
||||
if not reason_codes:
|
||||
continue
|
||||
matches.append(
|
||||
SuspectedLiveSong(
|
||||
song_id=int(row["id"]),
|
||||
platform=_normalize_text(row["platform"]),
|
||||
remote_song_id=_normalize_text(row["remote_song_id"]),
|
||||
name=_normalize_text(row["name"]),
|
||||
singers=_normalize_text(row["singers"]),
|
||||
album=_normalize_text(row["album"]),
|
||||
reason_codes=tuple(reason_codes),
|
||||
)
|
||||
)
|
||||
if normalized_limit is not None and len(matches) >= normalized_limit:
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def _song_to_row(song: SuspectedLiveSong) -> Dict[str, Any]:
|
||||
return {
|
||||
"song_id": song.song_id,
|
||||
"platform": song.platform,
|
||||
"remote_song_id": song.remote_song_id,
|
||||
"name": song.name,
|
||||
"singers": song.singers,
|
||||
"album": song.album,
|
||||
"reason_codes": ",".join(song.reason_codes),
|
||||
}
|
||||
|
||||
|
||||
def _write_csv(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
|
||||
fieldnames = [
|
||||
"song_id",
|
||||
"platform",
|
||||
"remote_song_id",
|
||||
"name",
|
||||
"singers",
|
||||
"album",
|
||||
"reason_codes",
|
||||
]
|
||||
writer = csv.DictWriter(stream, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def _write_jsonl(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
|
||||
for row in rows:
|
||||
stream.write(json.dumps(row, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def _write_table(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
|
||||
headers = [
|
||||
"song_id",
|
||||
"platform",
|
||||
"remote_song_id",
|
||||
"name",
|
||||
"singers",
|
||||
"album",
|
||||
"reason_codes",
|
||||
]
|
||||
stream.write("\t".join(headers) + "\n")
|
||||
for row in rows:
|
||||
stream.write("\t".join(str(row[header]) for header in headers) + "\n")
|
||||
|
||||
|
||||
def _write_report(
|
||||
songs: List[SuspectedLiveSong],
|
||||
*,
|
||||
output_format: str,
|
||||
stream: TextIO,
|
||||
) -> None:
|
||||
rows = [_song_to_row(song) for song in songs]
|
||||
if output_format == "csv":
|
||||
_write_csv(rows, stream)
|
||||
return
|
||||
if output_format == "jsonl":
|
||||
_write_jsonl(rows, stream)
|
||||
return
|
||||
_write_table(rows, stream)
|
||||
|
||||
|
||||
def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="List suspected live/stage versions without modifying catalog-sync data.",
|
||||
)
|
||||
parser.add_argument("--db", required=True, help="Path to catalogsync.db")
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Maximum number of matched songs to return.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-undownloaded",
|
||||
action="store_true",
|
||||
help="Scan all songs instead of only songs with active local files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=("table", "csv", "jsonl"),
|
||||
default="table",
|
||||
help="Output format for stdout and optional file output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
help="Optional path to write the report file.",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None) -> int:
|
||||
args = parse_args(argv)
|
||||
songs = scan_suspected_live_songs(
|
||||
args.db,
|
||||
downloaded_only=not args.include_undownloaded,
|
||||
limit=args.limit,
|
||||
)
|
||||
print(f"matched_song_count={len(songs)}", file=sys.stderr)
|
||||
|
||||
_write_report(songs, output_format=args.format, stream=sys.stdout)
|
||||
|
||||
if args.output:
|
||||
output_path = Path(args.output).resolve()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
_write_report(songs, output_format=args.format, stream=handle)
|
||||
print(f"wrote_report={output_path}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,450 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>{{ title or "Catalogsync Ops" }}</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||
font-size: 14px;
|
||||
line-height: 1.35;
|
||||
margin: 0;
|
||||
background: #f5f7fb;
|
||||
color: #1b2533;
|
||||
}
|
||||
[hidden] {
|
||||
display: none !important;
|
||||
}
|
||||
nav {
|
||||
background: #0f172a;
|
||||
padding: 0.65rem 0.85rem;
|
||||
}
|
||||
nav a {
|
||||
color: #dbeafe;
|
||||
text-decoration: none;
|
||||
margin-right: 0.85rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
main {
|
||||
padding: 0.85rem;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
background: #fff;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid #dbe2ea;
|
||||
padding: 0.32rem 0.42rem;
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
font-size: 0.86rem;
|
||||
}
|
||||
.playlist-sort-th {
|
||||
padding: 0;
|
||||
}
|
||||
.playlist-sort-link {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
padding: 0.32rem 0.42rem;
|
||||
}
|
||||
.playlist-sort-link:hover {
|
||||
text-decoration: underline;
|
||||
background: #f8fafc;
|
||||
}
|
||||
.playlist-sort-indicator {
|
||||
color: #475569;
|
||||
font-size: 0.75rem;
|
||||
line-height: 1;
|
||||
}
|
||||
h1 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.7rem;
|
||||
font-size: 1.35rem;
|
||||
}
|
||||
h2, h3 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.55rem;
|
||||
}
|
||||
.card {
|
||||
background: #fff;
|
||||
border: 1px solid #dbe2ea;
|
||||
border-radius: 6px;
|
||||
padding: 0.7rem;
|
||||
margin-bottom: 0.8rem;
|
||||
}
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
|
||||
gap: 0.8rem;
|
||||
}
|
||||
form {
|
||||
display: grid;
|
||||
gap: 0.55rem;
|
||||
}
|
||||
input, select, button, textarea {
|
||||
font: inherit;
|
||||
}
|
||||
input, select, textarea {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
padding: 0.38rem 0.48rem;
|
||||
border: 1px solid #cbd5e1;
|
||||
border-radius: 6px;
|
||||
background: #fff;
|
||||
}
|
||||
button {
|
||||
width: fit-content;
|
||||
padding: 0.38rem 0.6rem;
|
||||
border: 0;
|
||||
border-radius: 6px;
|
||||
background: #0f172a;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1.2;
|
||||
}
|
||||
button.secondary {
|
||||
background: #475569;
|
||||
}
|
||||
.button-grid {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.45rem;
|
||||
}
|
||||
.muted {
|
||||
color: #64748b;
|
||||
}
|
||||
.progress-cell {
|
||||
min-width: 180px;
|
||||
}
|
||||
.progress-meta {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.2rem;
|
||||
font-size: 0.78rem;
|
||||
}
|
||||
.progress-bar {
|
||||
width: 100%;
|
||||
height: 0.5rem;
|
||||
background: #e2e8f0;
|
||||
border-radius: 999px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, #0f766e, #14b8a6);
|
||||
}
|
||||
.progress-note {
|
||||
margin-top: 0.25rem;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
.task-playlist-tree {
|
||||
margin-top: 0.9rem;
|
||||
}
|
||||
.task-tree-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
|
||||
gap: 0.85rem;
|
||||
align-items: start;
|
||||
}
|
||||
.task-tree-panel {
|
||||
display: grid;
|
||||
gap: 0.55rem;
|
||||
min-width: 0;
|
||||
align-content: start;
|
||||
align-self: start;
|
||||
}
|
||||
.task-tree-panel-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.task-tree-panel-head h3 {
|
||||
margin: 0;
|
||||
font-size: 1rem;
|
||||
}
|
||||
.task-tree {
|
||||
display: grid;
|
||||
gap: 0.45rem;
|
||||
}
|
||||
.task-tree-node {
|
||||
border: 1px solid #dbe2ea;
|
||||
border-radius: 6px;
|
||||
background: #f8fafc;
|
||||
}
|
||||
.task-tree-node-playlist,
|
||||
.task-tree-song {
|
||||
border-color: #e2e8f0;
|
||||
background: #fff;
|
||||
}
|
||||
.task-tree-row {
|
||||
display: grid;
|
||||
grid-template-columns: auto minmax(0, 1fr) minmax(180px, 250px) auto;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
padding: 0.5rem 0.6rem;
|
||||
}
|
||||
.task-tree-row-child {
|
||||
padding-left: 1.1rem;
|
||||
}
|
||||
.task-tree-main {
|
||||
min-width: 0;
|
||||
}
|
||||
.task-tree-title-line {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.35rem;
|
||||
align-items: center;
|
||||
}
|
||||
.task-tree-title-line strong {
|
||||
font-size: 0.88rem;
|
||||
line-height: 1.2;
|
||||
}
|
||||
.task-tree-meta-inline {
|
||||
flex: 1 1 180px;
|
||||
min-width: 0;
|
||||
font-size: 0.72rem;
|
||||
line-height: 1.15;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.task-tree-progress {
|
||||
min-width: 0;
|
||||
}
|
||||
.task-tree-state {
|
||||
max-width: 280px;
|
||||
font-size: 0.74rem;
|
||||
line-height: 1.2;
|
||||
}
|
||||
.task-tree-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
.task-tree-children {
|
||||
display: grid;
|
||||
gap: 0.4rem;
|
||||
padding: 0 0.6rem 0.55rem 0.6rem;
|
||||
}
|
||||
.task-tree-children-songs {
|
||||
padding-left: 2rem;
|
||||
}
|
||||
.task-tree-song {
|
||||
display: grid;
|
||||
grid-template-columns: 2rem minmax(0, 1fr) auto minmax(100px, 210px);
|
||||
gap: 0.45rem;
|
||||
align-items: center;
|
||||
padding: 0.45rem 0.55rem;
|
||||
}
|
||||
.task-tree-song-index {
|
||||
color: #64748b;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
.task-tree-song-note {
|
||||
color: #334155;
|
||||
font-size: 0.74rem;
|
||||
line-height: 1.2;
|
||||
}
|
||||
.tree-toggle {
|
||||
min-width: 1.55rem;
|
||||
padding: 0.16rem 0.32rem;
|
||||
font-size: 0.78rem;
|
||||
line-height: 1.05;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.tree-spacer {
|
||||
display: block;
|
||||
width: 1.55rem;
|
||||
height: 1.45rem;
|
||||
}
|
||||
.inline-tree .tree-toggle {
|
||||
min-width: 2rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
}
|
||||
.tree-row-detail > td {
|
||||
background: #f8fafc;
|
||||
}
|
||||
.song-progress-table {
|
||||
margin-top: 0.35rem;
|
||||
}
|
||||
.song-note {
|
||||
color: #334155;
|
||||
}
|
||||
.mono {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
|
||||
}
|
||||
.playlist-name-button {
|
||||
border: 0;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
background: transparent;
|
||||
color: #0f4c81;
|
||||
text-decoration: underline;
|
||||
cursor: pointer;
|
||||
font: inherit;
|
||||
line-height: inherit;
|
||||
}
|
||||
.playlist-name-button:hover {
|
||||
color: #0b3a62;
|
||||
}
|
||||
.playlist-modal {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
z-index: 2000;
|
||||
}
|
||||
.playlist-modal-backdrop {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
background: rgba(15, 23, 42, 0.45);
|
||||
}
|
||||
.playlist-modal-panel {
|
||||
position: relative;
|
||||
z-index: 1;
|
||||
width: min(96vw, 1440px);
|
||||
max-height: 88vh;
|
||||
margin: 2.2vh auto;
|
||||
background: #fff;
|
||||
border: 1px solid #dbe2ea;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 22px 70px rgba(15, 23, 42, 0.28);
|
||||
display: grid;
|
||||
grid-template-rows: auto 1fr;
|
||||
overflow: hidden;
|
||||
}
|
||||
.playlist-modal-header {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-between;
|
||||
gap: 0.8rem;
|
||||
padding: 0.8rem 0.9rem 0.65rem 0.9rem;
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
}
|
||||
.playlist-modal-header h2 {
|
||||
margin-bottom: 0.2rem;
|
||||
}
|
||||
.playlist-modal-meta {
|
||||
margin: 0;
|
||||
font-size: 0.78rem;
|
||||
}
|
||||
.playlist-modal-body {
|
||||
padding: 0.75rem 0.9rem 0.9rem 0.9rem;
|
||||
overflow: auto;
|
||||
}
|
||||
.playlist-modal-table-wrap {
|
||||
overflow: auto;
|
||||
}
|
||||
.playlist-song-locations {
|
||||
min-width: 160px;
|
||||
font-size: 0.74rem;
|
||||
line-height: 1.25;
|
||||
color: #334155;
|
||||
word-break: break-all;
|
||||
}
|
||||
.playlist-song-locations .muted {
|
||||
display: block;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.playlist-modal-close {
|
||||
min-width: 2rem;
|
||||
}
|
||||
.status-tag {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
padding: 0.08rem 0.34rem;
|
||||
border-radius: 999px;
|
||||
border: 1px solid #cbd5e1;
|
||||
font-size: 0.68rem;
|
||||
line-height: 1.05;
|
||||
margin-right: 0;
|
||||
margin-bottom: 0;
|
||||
background: #f8fafc;
|
||||
color: #334155;
|
||||
}
|
||||
.status-downloaded {
|
||||
background: #dcfce7;
|
||||
border-color: #86efac;
|
||||
color: #166534;
|
||||
}
|
||||
.status-running {
|
||||
background: #dbeafe;
|
||||
border-color: #93c5fd;
|
||||
color: #1d4ed8;
|
||||
}
|
||||
.status-pending {
|
||||
background: #f1f5f9;
|
||||
border-color: #cbd5e1;
|
||||
color: #334155;
|
||||
}
|
||||
.status-failed {
|
||||
background: #fee2e2;
|
||||
border-color: #fca5a5;
|
||||
color: #991b1b;
|
||||
}
|
||||
.status-skipped {
|
||||
background: #fef3c7;
|
||||
border-color: #fcd34d;
|
||||
color: #92400e;
|
||||
}
|
||||
.status-tag.non-music {
|
||||
background: #fff7ed;
|
||||
border-color: #fdba74;
|
||||
color: #9a3412;
|
||||
}
|
||||
pre {
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
padding: 0.8rem;
|
||||
overflow: auto;
|
||||
}
|
||||
code {
|
||||
background: #eef2f7;
|
||||
padding: 0.1rem 0.3rem;
|
||||
}
|
||||
@media (max-width: 900px) {
|
||||
.task-tree-columns {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.task-tree-row,
|
||||
.task-tree-song {
|
||||
grid-template-columns: auto minmax(0, 1fr);
|
||||
align-items: start;
|
||||
}
|
||||
.task-tree-actions {
|
||||
justify-content: flex-start;
|
||||
}
|
||||
.task-tree-children-songs {
|
||||
padding-left: 1rem;
|
||||
}
|
||||
.task-tree-meta-inline {
|
||||
flex-basis: 100%;
|
||||
white-space: normal;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
<script src="/static/ops/app.js?v=20260418_playlist_playcount_v1" defer></script>
|
||||
</head>
|
||||
<body{% if sse_url %} data-sse-url="{{ sse_url }}"{% endif %}{% if dashboard_api_url %} data-dashboard-api="{{ dashboard_api_url }}"{% endif %}>
|
||||
<nav>
|
||||
<a href="/dashboard">Dashboard</a>
|
||||
<a href="/jobs">Jobs</a>
|
||||
<a href="/playlists">Playlists</a>
|
||||
<a href="/songs">Songs</a>
|
||||
<a href="/logs">Logs</a>
|
||||
<a href="/config">Config</a>
|
||||
</nav>
|
||||
<main>
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,57 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<h1>Config</h1>
|
||||
|
||||
<div class="card">
|
||||
<h2>Current Env</h2>
|
||||
<pre>{{ env_content }}</pre>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Parsed Values</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Key</th>
|
||||
<th>Value</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for key, value in env_values.items() %}
|
||||
<tr>
|
||||
<td><code>{{ key }}</code></td>
|
||||
<td>{{ value }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="2">No parsed values.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Revisions</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Created</th>
|
||||
<th>Applied</th>
|
||||
<th>Note</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for revision in revisions %}
|
||||
<tr>
|
||||
<td>{{ revision.id }}</td>
|
||||
<td>{{ revision.created_at }}</td>
|
||||
<td>{{ revision.applied_at or "-" }}</td>
|
||||
<td>{{ revision.note or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No revisions.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,273 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
{% set done_statuses = ("completed", "completed_with_errors", "failed", "canceled") %}
|
||||
{% macro render_task_tree_node(row) -%}
|
||||
{% set row_status = row.status or "" %}
|
||||
{% set toggle_command = "resume" if row_status in ("paused", "pause_requested") else "pause" if row_status in ("queued", "running") else "" %}
|
||||
{% set can_cancel = row_status in ("queued", "running", "paused", "pause_requested") %}
|
||||
<section class="task-tree-node task-tree-node-task" data-task-node="{{ row.id }}">
|
||||
<div class="task-tree-row">
|
||||
<button type="button" class="tree-toggle" data-task-toggle="{{ row.id }}" aria-expanded="false" aria-label="Expand task {{ row.id }}">+</button>
|
||||
<div class="task-tree-main">
|
||||
<div class="task-tree-title-line">
|
||||
<strong data-task-name>{{ row.display_name }}</strong>
|
||||
<span class="muted task-tree-meta-inline" data-task-meta-inline>#{{ row.id }} / {{ row.job_type }} / {{ row.scope_summary }} / {{ row.queue_label or row.lane_type or "-" }} / workers {{ row.active_worker_count }}</span>
|
||||
<span class="status-tag status-{{ row.status }}" data-task-status>{{ row.status }}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="task-tree-progress" data-task-progress>
|
||||
<div class="progress-meta">
|
||||
<span>{{ row.primary_progress_text or "-" }}</span>
|
||||
<strong>{{ row.primary_progress_percent or 0 }}%</strong>
|
||||
</div>
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: {{ row.primary_progress_percent or 0 }}%;"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="task-tree-actions">
|
||||
<div class="button-grid">
|
||||
{% if toggle_command %}
|
||||
<button
|
||||
type="button"
|
||||
data-task-command-toggle="{{ row.id }}"
|
||||
data-task-command-type="{{ toggle_command }}"
|
||||
>
|
||||
{% if toggle_command == "resume" %}>{% else %}||{% endif %}
|
||||
</button>
|
||||
{% else %}
|
||||
<span class="muted">-</span>
|
||||
{% endif %}
|
||||
{% if can_cancel %}
|
||||
<button type="button" class="secondary" data-task-command-cancel="{{ row.id }}">x</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="task-tree-children" data-task-children="{{ row.id }}" hidden>
|
||||
<p class="muted">Expand to load playlists...</p>
|
||||
</div>
|
||||
</section>
|
||||
{%- endmacro %}
|
||||
|
||||
<h1>Task Center</h1>
|
||||
|
||||
<div class="card">
|
||||
<div data-live-status>Live snapshot: waiting...</div>
|
||||
</div>
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h2>Summary</h2>
|
||||
<table>
|
||||
<tr><th>Total Jobs</th><td data-summary-field="total_jobs">{{ summary.total_jobs }}</td></tr>
|
||||
<tr><th>Queued</th><td data-summary-field="queued_jobs">{{ summary.queued_jobs }}</td></tr>
|
||||
<tr><th>Queued Download Jobs</th><td data-summary-field="queued_download_jobs">{{ summary.queued_download_jobs }}</td></tr>
|
||||
<tr><th>Running</th><td data-summary-field="running_jobs">{{ summary.running_jobs }}</td></tr>
|
||||
<tr><th>Paused</th><td data-summary-field="paused_jobs">{{ summary.paused_jobs }}</td></tr>
|
||||
<tr><th>Failed / Errors</th><td data-summary-field="failed_jobs">{{ summary.failed_jobs }}</td></tr>
|
||||
<tr><th>Downloaded Songs</th><td data-download-field="downloaded_songs">{{ download_stats.downloaded_songs }}</td></tr>
|
||||
<tr><th>Running Songs</th><td data-download-field="running_song_items">{{ download_stats.running_song_items }}</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Quick Actions</h2>
|
||||
<div class="button-grid">
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="catalog_sync" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="sources" value="{{ default_sources }}" />
|
||||
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
|
||||
<button type="submit">Full Pipeline</button>
|
||||
</form>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="collect_only" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="sources" value="{{ default_sources }}" />
|
||||
<button type="submit">Collect</button>
|
||||
</form>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="sync_only" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="sources" value="{{ default_sources }}" />
|
||||
<button type="submit">Sync</button>
|
||||
</form>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="download_only" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
|
||||
<button type="submit">Download</button>
|
||||
</form>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="upload_only" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="download_sources" value="{{ default_download_sources }}" />
|
||||
<button type="submit">Upload</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Create Job</h2>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<label>
|
||||
Job Type
|
||||
<select name="job_type">
|
||||
{% for value, label in job_type_options %}
|
||||
<option value="{{ value }}">{{ label }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Requested By
|
||||
<input type="text" name="requested_by" value="ops-console" />
|
||||
</label>
|
||||
<label>
|
||||
Collect Sources
|
||||
<input type="text" name="sources" value="{{ default_sources }}" />
|
||||
</label>
|
||||
<label>
|
||||
Download Sources
|
||||
<input type="text" name="download_sources" value="{{ default_download_sources }}" />
|
||||
</label>
|
||||
<button type="submit">Create Job</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Playlist Coverage</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Platform</th>
|
||||
<th>Pool Kind</th>
|
||||
<th>Pool Name</th>
|
||||
<th>Playlists</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody data-playlist-sources-body>
|
||||
{% for row in playlist_sources %}
|
||||
<tr>
|
||||
<td>{{ row.platform }}</td>
|
||||
<td>{{ row.pool_kind }}</td>
|
||||
<td>{{ row.pool_name }}</td>
|
||||
<td>{{ row.playlist_count }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No playlist sources collected yet.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div
|
||||
class="card"
|
||||
data-maintenance-panel="local-duplicates"
|
||||
data-scan-api="{{ maintenance_local_duplicates_scan_api }}"
|
||||
data-dedupe-api="{{ maintenance_local_duplicates_dedupe_api }}"
|
||||
>
|
||||
<h2>Maintenance</h2>
|
||||
<div class="button-grid">
|
||||
<button type="button" data-maintenance-action="scan">Scan Duplicate Local Copies</button>
|
||||
<button type="button" class="secondary" data-maintenance-action="dedupe">Run Local Dedupe</button>
|
||||
</div>
|
||||
<p class="muted" data-maintenance-status>No local duplicate scan has been run yet.</p>
|
||||
<div data-maintenance-result>
|
||||
<p class="muted">Scan first to inspect duplicate local file copies before dedupe.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="task-tree-panel-head">
|
||||
<h2>Task Center</h2>
|
||||
<span class="muted" data-task-center-transfer>Down {{ transfer_stats.download_speed_text }} | Up {{ transfer_stats.upload_speed_text }}</span>
|
||||
</div>
|
||||
<div class="task-tree-columns">
|
||||
<section class="task-tree-panel">
|
||||
<div class="task-tree-panel-head">
|
||||
<h3>Doing</h3>
|
||||
<span class="muted">Task -> Playlist -> Song</span>
|
||||
</div>
|
||||
<div class="task-tree" data-task-tree-root="doing">
|
||||
{% for row in doing_task_rows %}
|
||||
{{ render_task_tree_node(row) }}
|
||||
{% else %}
|
||||
<p class="muted" data-task-tree-empty>No active tasks.</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
<section class="task-tree-panel">
|
||||
<div class="task-tree-panel-head">
|
||||
<h3>Recent Done</h3>
|
||||
<span class="muted">Task -> Playlist</span>
|
||||
</div>
|
||||
<div class="task-tree" data-task-tree-root="done">
|
||||
{% for row in done_task_rows %}
|
||||
{{ render_task_tree_node(row) }}
|
||||
{% else %}
|
||||
<p class="muted" data-task-tree-empty>No recently finished tasks.</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h2>Active Workers</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Worker</th>
|
||||
<th>Status</th>
|
||||
<th>Stage</th>
|
||||
<th>Current Item</th>
|
||||
<th>Progress</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody data-workers-body>
|
||||
{% for worker in workers %}
|
||||
<tr>
|
||||
<td>{{ worker.worker_name }}</td>
|
||||
<td>{{ worker.status }}</td>
|
||||
<td>{{ worker.stage_type or "-" }}</td>
|
||||
<td>{{ worker.display_text or "-" }}</td>
|
||||
<td>{{ worker.last_progress_text or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No active workers.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Running Items</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Job</th>
|
||||
<th>Worker</th>
|
||||
<th>Stage</th>
|
||||
<th>Item</th>
|
||||
<th>Started</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody data-running-items-body>
|
||||
{% for item in running_items %}
|
||||
<tr>
|
||||
<td><a href="/jobs/{{ item.job_run_id }}">{{ item.job_run_id }}</a></td>
|
||||
<td>{{ item.worker_name or "-" }}</td>
|
||||
<td>{{ item.stage_type }}</td>
|
||||
<td>{{ item.display_name }}</td>
|
||||
<td>{{ item.started_at or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No running items.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,223 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<p><a href="/dashboard">Back to Dashboard</a></p>
|
||||
<h1>Job {{ job.id }}</h1>
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<table>
|
||||
<tr><th>Type</th><td>{{ job.job_type }}</td></tr>
|
||||
<tr><th>Status</th><td>{{ job.status }}</td></tr>
|
||||
<tr><th>Requested By</th><td>{{ job.requested_by or "-" }}</td></tr>
|
||||
<tr><th>Created</th><td>{{ job.created_at or "-" }}</td></tr>
|
||||
<tr><th>Started</th><td>{{ job.started_at or "-" }}</td></tr>
|
||||
<tr><th>Ended</th><td>{{ job.ended_at or "-" }}</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Job Commands</h2>
|
||||
<div class="button-grid">
|
||||
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="command_type" value="pause" />
|
||||
<button type="submit">暂停任务</button>
|
||||
</form>
|
||||
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="command_type" value="resume" />
|
||||
<button type="submit">继续任务</button>
|
||||
</form>
|
||||
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="command_type" value="cancel" />
|
||||
<button type="submit" class="secondary">取消任务</button>
|
||||
</form>
|
||||
</div>
|
||||
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="command_type" value="retry_item" />
|
||||
<label>
|
||||
Retry Item Id
|
||||
<input type="number" name="target_item_id" min="1" />
|
||||
</label>
|
||||
<button type="submit">Retry Item</button>
|
||||
</form>
|
||||
<form action="{{ command_endpoint }}" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="command_type" value="force_retry_item" />
|
||||
<label>
|
||||
Force Retry Item Id
|
||||
<input type="number" name="target_item_id" min="1" />
|
||||
</label>
|
||||
<button type="submit">Force Retry Item</button>
|
||||
<p class="muted">Use this when a single item needs to be replayed from scratch.</p>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Download Stats</h2>
|
||||
<table>
|
||||
<tr><th>Total Songs</th><td>{{ download_stats.total_songs }}</td></tr>
|
||||
<tr><th>Downloaded Songs</th><td>{{ download_stats.downloaded_songs }}</td></tr>
|
||||
<tr><th>Local Files</th><td>{{ download_stats.local_file_locations }}</td></tr>
|
||||
<tr><th>Running Songs</th><td>{{ download_stats.running_song_items }}</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Stages</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Stage</th>
|
||||
<th>Status</th>
|
||||
<th>Total</th>
|
||||
<th>Pending</th>
|
||||
<th>Running</th>
|
||||
<th>Succeeded</th>
|
||||
<th>Failed</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for stage in stages %}
|
||||
<tr>
|
||||
<td>{{ stage.id }}</td>
|
||||
<td>{{ stage.stage_type }}</td>
|
||||
<td>{{ stage.status }}</td>
|
||||
<td>{{ stage.total_items }}</td>
|
||||
<td>{{ stage.pending_items }}</td>
|
||||
<td>{{ stage.running_items }}</td>
|
||||
<td>{{ stage.success_items }}</td>
|
||||
<td>{{ stage.failed_items }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="8">No stages.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Playlist Progress</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Playlist</th>
|
||||
<th>Progress</th>
|
||||
<th>Total Songs</th>
|
||||
<th>Downloaded</th>
|
||||
<th>Running</th>
|
||||
<th>Pending</th>
|
||||
<th>Failed</th>
|
||||
<th>Skipped</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for playlist in playlist_progress %}
|
||||
<tr>
|
||||
<td>{{ playlist.playlist_id }}</td>
|
||||
<td>{{ playlist.playlist_name }}</td>
|
||||
<td class="progress-cell">
|
||||
<div class="progress-meta">
|
||||
<span>{{ playlist.downloaded_songs or 0 }} / {{ playlist.total_songs or 0 }}</span>
|
||||
<strong>{{ playlist.progress_percent or 0 }}%</strong>
|
||||
</div>
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: {{ playlist.progress_percent or 0 }}%;"></div>
|
||||
</div>
|
||||
</td>
|
||||
<td>{{ playlist.total_songs or 0 }}</td>
|
||||
<td>{{ playlist.downloaded_songs or 0 }}</td>
|
||||
<td>{{ playlist.running_songs or 0 }}</td>
|
||||
<td>{{ playlist.pending_songs or 0 }}</td>
|
||||
<td>{{ playlist.failed_songs or 0 }}</td>
|
||||
<td>{{ playlist.skipped_songs or 0 }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="9">No playlist-scoped progress for this job.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Workers</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Worker</th>
|
||||
<th>Status</th>
|
||||
<th>Stage</th>
|
||||
<th>Current Song / Playlist</th>
|
||||
<th>Progress</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for worker in workers %}
|
||||
<tr>
|
||||
<td>{{ worker.worker_name }}</td>
|
||||
<td>{{ worker.status }}</td>
|
||||
<td>{{ worker.stage_type or "-" }}</td>
|
||||
<td>{{ worker.display_text or "-" }}</td>
|
||||
<td>{{ worker.last_progress_text or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No workers recorded yet.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Running Items</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Worker</th>
|
||||
<th>Stage</th>
|
||||
<th>Item</th>
|
||||
<th>Started</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in running_items %}
|
||||
<tr>
|
||||
<td>{{ item.worker_name or "-" }}</td>
|
||||
<td>{{ item.stage_type }}</td>
|
||||
<td>{{ item.display_name }}</td>
|
||||
<td>{{ item.started_at or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No running items.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Commands</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Type</th>
|
||||
<th>Status</th>
|
||||
<th>Created</th>
|
||||
<th>Applied</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for command in commands %}
|
||||
<tr>
|
||||
<td>{{ command.id }}</td>
|
||||
<td>{{ command.command_type }}</td>
|
||||
<td>{{ command.status }}</td>
|
||||
<td>{{ command.created_at }}</td>
|
||||
<td>{{ command.applied_at or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No commands.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,30 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<h1>Jobs Archive</h1>
|
||||
<p class="muted">Use <a href="/dashboard">Dashboard</a> for the main task center. This page stays available for fallback browsing.</p>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Type</th>
|
||||
<th>Status</th>
|
||||
<th>Requested By</th>
|
||||
<th>Created</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in jobs %}
|
||||
<tr>
|
||||
<td><a href="/jobs/{{ job.id }}">{{ job.id }}</a></td>
|
||||
<td>{{ job.job_type }}</td>
|
||||
<td>{{ job.status }}</td>
|
||||
<td>{{ job.requested_by or "-" }}</td>
|
||||
<td>{{ job.created_at or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No jobs found.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,29 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<h1>Events</h1>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Job ID</th>
|
||||
<th>Event Type</th>
|
||||
<th>Message</th>
|
||||
<th>Created</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for event in events %}
|
||||
<tr>
|
||||
<td>{{ event.id }}</td>
|
||||
<td>{{ event.job_run_id }}</td>
|
||||
<td>{{ event.event_type }}</td>
|
||||
<td>{{ event.message or "-" }}</td>
|
||||
<td>{{ event.created_at }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5">No events.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,276 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<section data-playlists-page>
|
||||
<h1>Playlists</h1>
|
||||
|
||||
<div class="card">
|
||||
<h2>Playlist Coverage</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Platform</th>
|
||||
<th>Pool Kind</th>
|
||||
<th>Pool Name</th>
|
||||
<th>Playlists</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for row in playlist_sources %}
|
||||
<tr>
|
||||
<td>{{ row.platform }}</td>
|
||||
<td>{{ row.pool_kind }}</td>
|
||||
<td>{{ row.pool_name }}</td>
|
||||
<td>{{ row.playlist_count }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No playlist sources collected yet.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Filters</h2>
|
||||
<form method="get" action="/playlists">
|
||||
<input type="hidden" name="sort_by" value="{{ filters.sort_by }}" />
|
||||
<input type="hidden" name="sort_dir" value="{{ filters.sort_dir }}" />
|
||||
<div class="grid">
|
||||
<label>
|
||||
Keyword
|
||||
<input type="text" name="keyword" value="{{ filters.keyword }}" placeholder="Name / remote id" />
|
||||
</label>
|
||||
<label>
|
||||
Platform
|
||||
<select name="platform">
|
||||
<option value="">All</option>
|
||||
{% for option in filter_options.platforms %}
|
||||
<option value="{{ option }}" {% if filters.platform == option %}selected{% endif %}>{{ option }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Pool Kind
|
||||
<select name="pool_kind">
|
||||
<option value="">All</option>
|
||||
{% for option in filter_options.pool_kinds %}
|
||||
<option value="{{ option }}" {% if filters.pool_kind == option %}selected{% endif %}>{{ option }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Status
|
||||
<select name="status">
|
||||
<option value="" {% if not filters.status %}selected{% endif %}>All</option>
|
||||
<option value="unsynced" {% if filters.status == "unsynced" %}selected{% endif %}>Unsynced</option>
|
||||
<option value="not_downloaded" {% if filters.status == "not_downloaded" %}selected{% endif %}>Not Downloaded</option>
|
||||
<option value="downloading" {% if filters.status == "downloading" %}selected{% endif %}>Downloading</option>
|
||||
<option value="partial" {% if filters.status == "partial" %}selected{% endif %}>Partial</option>
|
||||
<option value="downloaded" {% if filters.status == "downloaded" %}selected{% endif %}>Downloaded</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Wanted
|
||||
<select name="wanted_only">
|
||||
<option value="" {% if not filters.wanted_only %}selected{% endif %}>All</option>
|
||||
<option value="1" {% if filters.wanted_only %}selected{% endif %}>Wanted only</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>
|
||||
Page Size
|
||||
<select name="page_size">
|
||||
<option value="20" {% if filters.page_size == 20 %}selected{% endif %}>20</option>
|
||||
<option value="50" {% if filters.page_size == 50 %}selected{% endif %}>50</option>
|
||||
<option value="100" {% if filters.page_size == 100 %}selected{% endif %}>100</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
<button type="submit">Apply Filters</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="button-grid">
|
||||
<button type="button" data-playlist-select-all>Select All On Page</button>
|
||||
<button type="button" class="secondary" data-playlist-clear-selection>Clear Selection</button>
|
||||
<span>Selected: <strong data-playlist-selection-count>0</strong></span>
|
||||
<form action="/api/jobs" method="post" data-json-form data-success="reload">
|
||||
<input type="hidden" name="job_type" value="collect_only" />
|
||||
<input type="hidden" name="requested_by" value="ops-console" />
|
||||
<input type="hidden" name="sources" value="{{ default_sources }}" />
|
||||
<button type="submit" class="secondary">Collect Playlist Sources</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="button-grid" style="margin-top: 0.8rem;">
|
||||
<button type="button" data-playlist-action="sync">Sync Selected Playlists</button>
|
||||
<button type="button" data-playlist-action="download">Download Selected Playlists</button>
|
||||
<button type="button" class="secondary" data-playlist-action="export-selected">Export Selected</button>
|
||||
<button type="button" class="secondary" data-playlist-action="mark-wanted">Mark Wanted</button>
|
||||
<button type="button" class="secondary" data-playlist-action="unmark-wanted">Unmark Wanted</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Select</th>
|
||||
<th class="playlist-sort-th">
|
||||
<a class="playlist-sort-link" data-playlist-sort-link="id" href="{{ sort_links.id.href }}">
|
||||
<span>ID</span>
|
||||
{% if sort_links.id.indicator %}
|
||||
<span class="playlist-sort-indicator" data-playlist-sort-indicator="id">{{ sort_links.id.indicator }}</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</th>
|
||||
<th class="playlist-sort-th">
|
||||
<a class="playlist-sort-link" data-playlist-sort-link="platform" href="{{ sort_links.platform.href }}">
|
||||
<span>Platform</span>
|
||||
{% if sort_links.platform.indicator %}
|
||||
<span class="playlist-sort-indicator" data-playlist-sort-indicator="platform">{{ sort_links.platform.indicator }}</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</th>
|
||||
<th>Remote ID</th>
|
||||
<th class="playlist-sort-th">
|
||||
<a class="playlist-sort-link" data-playlist-sort-link="name" href="{{ sort_links.name.href }}">
|
||||
<span>Name</span>
|
||||
{% if sort_links.name.indicator %}
|
||||
<span class="playlist-sort-indicator" data-playlist-sort-indicator="name">{{ sort_links.name.indicator }}</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</th>
|
||||
<th class="playlist-sort-th">
|
||||
<a class="playlist-sort-link" data-playlist-sort-link="play_count" href="{{ sort_links.play_count.href }}">
|
||||
<span>热度/播放量</span>
|
||||
{% if sort_links.play_count.indicator %}
|
||||
<span class="playlist-sort-indicator" data-playlist-sort-indicator="play_count">{{ sort_links.play_count.indicator }}</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</th>
|
||||
<th>Pools</th>
|
||||
<th>Songs</th>
|
||||
<th>Downloaded</th>
|
||||
<th>Progress</th>
|
||||
<th>Status</th>
|
||||
<th>Wanted</th>
|
||||
<th>Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for playlist in playlists %}
|
||||
<tr>
|
||||
<td>
|
||||
<input type="checkbox" data-playlist-checkbox value="{{ playlist.id }}" />
|
||||
</td>
|
||||
<td>{{ playlist.id }}</td>
|
||||
<td>{{ playlist.platform }}</td>
|
||||
<td>{{ playlist.remote_playlist_id }}</td>
|
||||
<td>
|
||||
{% if (playlist.song_count or 0) > 0 %}
|
||||
<button
|
||||
type="button"
|
||||
class="playlist-name-button"
|
||||
data-playlist-open-songs="{{ playlist.id }}"
|
||||
data-playlist-name="{{ playlist.name }}"
|
||||
data-playlist-platform="{{ playlist.platform }}"
|
||||
data-playlist-remote-id="{{ playlist.remote_playlist_id }}"
|
||||
>{{ playlist.name }}</button>
|
||||
{% else %}
|
||||
{{ playlist.name }}
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{ playlist.play_count if playlist.play_count is not none else "-" }}</td>
|
||||
<td>{{ playlist.pool_names or "-" }}</td>
|
||||
<td>
|
||||
<div>{{ playlist.display_song_count or 0 }}</div>
|
||||
{% if playlist.is_song_count_estimated %}
|
||||
<div class="muted">Collected {{ playlist.collected_song_count }}</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{ playlist.downloaded_song_count or 0 }}</td>
|
||||
<td class="progress-cell">
|
||||
<div class="progress-meta">
|
||||
<span>{{ playlist.downloaded_song_count or 0 }} / {{ playlist.song_count or 0 }}</span>
|
||||
<strong>{{ playlist.progress_percent or 0 }}%</strong>
|
||||
</div>
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" style="width: {{ playlist.progress_percent or 0 }}%;"></div>
|
||||
</div>
|
||||
{% if (playlist.song_count or 0) == 0 or playlist.running_download_song_count %}
|
||||
<div class="progress-note muted">
|
||||
{% if (playlist.song_count or 0) == 0 and playlist.collected_song_count is not none %}
|
||||
Collected {{ playlist.collected_song_count }}, sync recommended
|
||||
{% elif (playlist.song_count or 0) == 0 %}
|
||||
0 songs, sync recommended
|
||||
{% elif playlist.running_download_song_count %}
|
||||
Running {{ playlist.running_download_song_count }}
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>{{ playlist.state_label or playlist.state_code or "-" }}</td>
|
||||
<td>{% if playlist.is_wanted %}Yes{% else %}No{% endif %}</td>
|
||||
<td>{{ playlist.updated_at }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="13">No playlists.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="card" data-playlist-pagination>
|
||||
<p>
|
||||
Page {{ playlist_page.page }} / {{ playlist_page.total_pages if playlist_page.total_pages > 0 else 1 }}
|
||||
- Total {{ playlist_page.total_count }} playlists
|
||||
</p>
|
||||
<div class="button-grid">
|
||||
{% if previous_page_url %}
|
||||
<a href="{{ previous_page_url }}">Previous</a>
|
||||
{% else %}
|
||||
<span class="muted">Previous</span>
|
||||
{% endif %}
|
||||
{% if next_page_url %}
|
||||
<a href="{{ next_page_url }}">Next</a>
|
||||
{% else %}
|
||||
<span class="muted">Next</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="playlist-modal" data-playlist-songs-modal hidden>
|
||||
<div class="playlist-modal-backdrop" data-playlist-modal-close></div>
|
||||
<div class="playlist-modal-panel" role="dialog" aria-modal="true" aria-labelledby="playlist-modal-title">
|
||||
<div class="playlist-modal-header">
|
||||
<div>
|
||||
<h2 id="playlist-modal-title" data-playlist-modal-title>Playlist Songs</h2>
|
||||
<p class="playlist-modal-meta muted" data-playlist-modal-meta>-</p>
|
||||
</div>
|
||||
<div class="button-grid">
|
||||
<button type="button" class="secondary" data-playlist-export disabled>Export</button>
|
||||
<button type="button" class="secondary playlist-modal-close" data-playlist-modal-close aria-label="Close">x</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="playlist-modal-body">
|
||||
<p class="muted" data-playlist-modal-state>Select a playlist to preview songs.</p>
|
||||
<div class="playlist-modal-table-wrap" data-playlist-modal-table-wrap hidden>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Song ID</th>
|
||||
<th>Name</th>
|
||||
<th>Singers</th>
|
||||
<th>Size</th>
|
||||
<th>Format</th>
|
||||
<th>Local</th>
|
||||
<th>Uploaded</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody data-playlist-songs-body>
|
||||
<tr><td colspan="7">No songs.</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,97 @@
|
||||
{% extends "ops/base.html" %}
|
||||
{% block content %}
|
||||
<h1>Songs</h1>
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h2>Download Stats</h2>
|
||||
<table>
|
||||
<tr><th>Total Songs</th><td>{{ download_stats.total_songs }}</td></tr>
|
||||
<tr><th>Downloaded Songs</th><td>{{ download_stats.downloaded_songs }}</td></tr>
|
||||
<tr><th>Local Files</th><td>{{ download_stats.local_file_locations }}</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Active Workers</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Worker</th>
|
||||
<th>Status</th>
|
||||
<th>Stage</th>
|
||||
<th>Current Song / Playlist</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for worker in workers %}
|
||||
<tr>
|
||||
<td>{{ worker.worker_name }}</td>
|
||||
<td>{{ worker.status }}</td>
|
||||
<td>{{ worker.stage_type or "-" }}</td>
|
||||
<td>{{ worker.display_text or "-" }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No active workers.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Running Items</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Job</th>
|
||||
<th>Worker</th>
|
||||
<th>Stage</th>
|
||||
<th>Item</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in running_items %}
|
||||
<tr>
|
||||
<td><a href="/jobs/{{ item.job_run_id }}">{{ item.job_run_id }}</a></td>
|
||||
<td>{{ item.worker_name or "-" }}</td>
|
||||
<td>{{ item.stage_type }}</td>
|
||||
<td>{{ item.display_name }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="4">No running items.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Song Catalog</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Platform</th>
|
||||
<th>Remote ID</th>
|
||||
<th>Name</th>
|
||||
<th>Singers</th>
|
||||
<th>Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for song in songs %}
|
||||
<tr>
|
||||
<td>{{ song.id }}</td>
|
||||
<td>{{ song.platform }}</td>
|
||||
<td>{{ song.remote_song_id }}</td>
|
||||
<td>{{ song.name }}</td>
|
||||
<td>{{ song.singers or "-" }}</td>
|
||||
<td>{{ song.updated_at }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="6">No songs.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,221 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from .repository import CatalogRepository
|
||||
|
||||
|
||||
def load_backend_config(backend_row) -> dict:
|
||||
return json.loads(backend_row["config_json"] or "{}")
|
||||
|
||||
|
||||
def normalize_prefix(value: str | None) -> str:
|
||||
return str(value or "").strip().strip("/")
|
||||
|
||||
|
||||
def build_target_locator(base_prefix: str | None, relative_locator: str) -> str:
|
||||
normalized_relative = str(relative_locator).strip().lstrip("/")
|
||||
normalized_prefix = normalize_prefix(base_prefix)
|
||||
if not normalized_prefix:
|
||||
return normalized_relative
|
||||
return f"{normalized_prefix}/{normalized_relative}"
|
||||
|
||||
|
||||
def derive_public_url(public_base_url: str | None, locator: str, base_prefix: str | None) -> str | None:
|
||||
base_url = str(public_base_url or "").strip().rstrip("/")
|
||||
if not base_url:
|
||||
return None
|
||||
normalized_prefix = normalize_prefix(base_prefix)
|
||||
normalized_locator = str(locator).strip().lstrip("/")
|
||||
if normalized_prefix:
|
||||
prefix_segment = f"{normalized_prefix}/"
|
||||
if base_url.endswith(f"/{normalized_prefix}") and normalized_locator.startswith(prefix_segment):
|
||||
normalized_locator = normalized_locator[len(prefix_segment) :]
|
||||
normalized_locator = normalized_locator.lstrip("/")
|
||||
if not normalized_locator:
|
||||
return base_url
|
||||
return f"{base_url}/{normalized_locator}"
|
||||
|
||||
|
||||
def build_s3_client(backend_row):
|
||||
config = load_backend_config(backend_row)
|
||||
credential_env_prefix = str(config.get("credential_env_prefix") or "").strip()
|
||||
if not credential_env_prefix:
|
||||
raise RuntimeError("Object storage backend is missing credential_env_prefix")
|
||||
access_key_id = os.getenv(f"{credential_env_prefix}_ACCESS_KEY_ID")
|
||||
secret_access_key = os.getenv(f"{credential_env_prefix}_SECRET_ACCESS_KEY")
|
||||
session_token = os.getenv(f"{credential_env_prefix}_SESSION_TOKEN")
|
||||
if not access_key_id or not secret_access_key:
|
||||
raise RuntimeError(f"Missing credentials for backend {backend_row['name']}")
|
||||
try:
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("boto3 is required for object storage uploads") from exc
|
||||
addressing_style = str(config.get("addressing_style") or "").strip().lower()
|
||||
client_config = None
|
||||
if addressing_style in {"path", "virtual"}:
|
||||
client_config = Config(s3={"addressing_style": addressing_style})
|
||||
region = str(config.get("region") or "").strip()
|
||||
if region.lower() == "auto":
|
||||
region = ""
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=config.get("endpoint"),
|
||||
region_name=region or None,
|
||||
aws_access_key_id=access_key_id,
|
||||
aws_secret_access_key=secret_access_key,
|
||||
aws_session_token=session_token or None,
|
||||
config=client_config,
|
||||
)
|
||||
|
||||
|
||||
class S3CompatibleUploader:
|
||||
def __init__(self, backend_row, client=None):
|
||||
self.backend = backend_row
|
||||
self.config = load_backend_config(backend_row)
|
||||
self.client = client or build_s3_client(backend_row)
|
||||
|
||||
def upload_file(self, local_path: Path, container_name: str, locator: str) -> dict[str, str | None]:
|
||||
self.client.upload_file(str(local_path), container_name, locator, ExtraArgs=None)
|
||||
return {
|
||||
"public_url": derive_public_url(
|
||||
self.config.get("public_base_url"),
|
||||
locator,
|
||||
self.config.get("base_prefix"),
|
||||
),
|
||||
"download_url": None,
|
||||
}
|
||||
|
||||
|
||||
class CatalogUploader:
|
||||
def __init__(
|
||||
self,
|
||||
repository: CatalogRepository,
|
||||
worker_count: int = 4,
|
||||
client_factory=None,
|
||||
):
|
||||
self.repository = repository
|
||||
self.worker_count = max(1, worker_count)
|
||||
self.client_factory = client_factory or (lambda backend_row: build_s3_client(backend_row))
|
||||
|
||||
def get_backend(self, backend_name: str):
|
||||
backend = self.repository.get_backend_by_name(backend_name)
|
||||
if backend is None:
|
||||
raise RuntimeError(f"Unknown backend: {backend_name}")
|
||||
if backend["backend_type"] != "object_storage":
|
||||
raise RuntimeError(f"Backend {backend_name} is not object storage")
|
||||
return backend
|
||||
|
||||
def enqueue_missing_uploads(
|
||||
self,
|
||||
backend_name: str,
|
||||
sources: list[str] | None = None,
|
||||
limit: int | None = None,
|
||||
playlist_ids: list[int] | None = None,
|
||||
) -> int:
|
||||
backend = self.get_backend(backend_name)
|
||||
candidates = self.repository.list_missing_object_upload_candidates(
|
||||
target_backend_id=int(backend["id"]),
|
||||
sources=sources,
|
||||
limit=limit,
|
||||
playlist_ids=playlist_ids,
|
||||
)
|
||||
queued_count = 0
|
||||
seen_task_ids: set[int] = set()
|
||||
for candidate in candidates:
|
||||
task_id = self.repository.enqueue_upload_task(
|
||||
file_asset_id=int(candidate["file_asset_id"]),
|
||||
source_location_id=int(candidate["source_location_id"]),
|
||||
target_backend_id=int(backend["id"]),
|
||||
target_container_name=candidate["target_container_name"],
|
||||
target_locator=candidate["target_locator"],
|
||||
)
|
||||
if task_id not in seen_task_ids:
|
||||
seen_task_ids.add(task_id)
|
||||
queued_count += 1
|
||||
return queued_count
|
||||
|
||||
def process_upload_task_row(self, task_row, backend_name: str) -> str:
|
||||
backend = self.get_backend(backend_name)
|
||||
source_path_text = task_row["absolute_path"] if task_row is not None else None
|
||||
uploader = None
|
||||
if source_path_text and Path(source_path_text).exists():
|
||||
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
|
||||
return self._process_task(task_row, backend, uploader)
|
||||
|
||||
def run(self, backend_name: str, limit: int | None = None) -> dict[str, int]:
|
||||
backend = self.get_backend(backend_name)
|
||||
backend_id = int(backend["id"])
|
||||
pending_tasks = self.repository.list_pending_upload_tasks(target_backend_id=backend_id, limit=limit)
|
||||
uploader = None
|
||||
if any(row["absolute_path"] and Path(row["absolute_path"]).exists() for row in pending_tasks):
|
||||
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
|
||||
|
||||
def worker():
|
||||
local_summary = {"succeeded": 0, "failed": 0, "skipped": 0}
|
||||
while True:
|
||||
task = self.repository.claim_next_upload_task(target_backend_id=backend_id)
|
||||
if task is None:
|
||||
break
|
||||
result = self._process_task(task, backend, uploader)
|
||||
local_summary[result] += 1
|
||||
return local_summary
|
||||
|
||||
summary = {
|
||||
"queued": len(pending_tasks),
|
||||
"succeeded": 0,
|
||||
"failed": 0,
|
||||
"skipped": 0,
|
||||
"workers": self.worker_count,
|
||||
}
|
||||
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
|
||||
futures = [executor.submit(worker) for _ in range(self.worker_count)]
|
||||
for future in as_completed(futures):
|
||||
worker_summary = future.result()
|
||||
for key in ("succeeded", "failed", "skipped"):
|
||||
summary[key] += int(worker_summary[key])
|
||||
return summary
|
||||
|
||||
def _process_task(self, task, backend, uploader: S3CompatibleUploader | None) -> str:
|
||||
source_path_text = task["absolute_path"]
|
||||
source_path = Path(source_path_text) if source_path_text else None
|
||||
if source_path is None or not source_path.exists():
|
||||
missing_path = str(source_path) if source_path is not None else "<missing>"
|
||||
self.repository.mark_upload_task_status(
|
||||
task_id=int(task["id"]),
|
||||
status="failed",
|
||||
last_error=f"Source file does not exist: {missing_path}",
|
||||
)
|
||||
return "failed"
|
||||
try:
|
||||
active_uploader = uploader or S3CompatibleUploader(backend, client=self.client_factory(backend))
|
||||
result = active_uploader.upload_file(
|
||||
local_path=source_path,
|
||||
container_name=task["target_container_name"] or backend["container_name"],
|
||||
locator=task["target_locator"],
|
||||
)
|
||||
self.repository.record_remote_file(
|
||||
file_asset_id=int(task["file_asset_id"]),
|
||||
backend_id=int(task["target_backend_id"]),
|
||||
container_name=task["target_container_name"] or backend["container_name"],
|
||||
locator=task["target_locator"],
|
||||
public_url=result["public_url"],
|
||||
download_url=result["download_url"],
|
||||
)
|
||||
self.repository.mark_upload_task_status(
|
||||
task_id=int(task["id"]),
|
||||
status="succeeded",
|
||||
last_error=None,
|
||||
)
|
||||
return "succeeded"
|
||||
except Exception as exc:
|
||||
self.repository.mark_upload_task_status(
|
||||
task_id=int(task["id"]),
|
||||
status="failed",
|
||||
last_error=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
return "failed"
|
||||
Reference in New Issue
Block a user