Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,262 @@
from __future__ import annotations
import argparse
import csv
import json
import re
import sqlite3
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple
LIVE_NAME_KEYWORDS = ("live", "现场", "演唱会")
LIVE_ALBUM_KEYWORDS = (
"演唱会",
"我是歌手",
"我们的歌",
"声生不息",
"时光音乐会",
"天赐的声音",
"披荆斩棘",
"乘风",
)
@dataclass(frozen=True)
class SuspectedLiveSong:
song_id: int
platform: str
remote_song_id: str
name: str
singers: str
album: str
reason_codes: Tuple[str, ...]
def _normalize_text(value: Any) -> str:
return str(value or "").strip()
def _normalize_compact_text(value: Any) -> str:
text = _normalize_text(value).lower()
return re.sub(r"[\s\W_]+", "", text, flags=re.UNICODE)
def detect_suspected_live_reason_codes(name: Any, album: Any) -> List[str]:
name_text = _normalize_text(name)
album_text = _normalize_text(album)
normalized_name = name_text.lower()
normalized_album = album_text.lower()
reason_codes: List[str] = []
if any(keyword in normalized_name for keyword in LIVE_NAME_KEYWORDS):
reason_codes.append("name_keyword")
if not album_text or album_text.upper() == "NULL":
return reason_codes
compact_name = _normalize_compact_text(name_text)
compact_album = _normalize_compact_text(album_text)
if compact_name and compact_album and (
compact_album == compact_name or compact_album.startswith(compact_name)
):
return reason_codes
if any(keyword in normalized_album for keyword in LIVE_ALBUM_KEYWORDS):
reason_codes.append("album_show_keyword")
return reason_codes
def _connect_readonly_database(db_path: str | Path) -> sqlite3.Connection:
path = Path(db_path).resolve()
if not path.exists():
raise FileNotFoundError(f"Database not found: {path}")
conn = sqlite3.connect(f"{path.as_uri()}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
return conn
def _song_scan_query(downloaded_only: bool) -> str:
where_clause = "WHERE d.song_id IS NOT NULL" if downloaded_only else ""
return f"""
WITH downloaded_song_ids AS (
SELECT DISTINCT fa.song_id
FROM file_locations AS fl
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
)
SELECT
s.id,
s.platform,
s.remote_song_id,
s.name,
s.singers,
s.album
FROM songs AS s
LEFT JOIN downloaded_song_ids AS d ON d.song_id = s.id
{where_clause}
ORDER BY s.id DESC
"""
def scan_suspected_live_songs(
db_path: str | Path,
*,
downloaded_only: bool = True,
limit: Optional[int] = None,
) -> List[SuspectedLiveSong]:
normalized_limit = None if limit is None else max(int(limit), 0)
if normalized_limit == 0:
return []
conn = _connect_readonly_database(db_path)
try:
rows = conn.execute(_song_scan_query(downloaded_only)).fetchall()
finally:
conn.close()
matches: List[SuspectedLiveSong] = []
for row in rows:
reason_codes = detect_suspected_live_reason_codes(
name=row["name"],
album=row["album"],
)
if not reason_codes:
continue
matches.append(
SuspectedLiveSong(
song_id=int(row["id"]),
platform=_normalize_text(row["platform"]),
remote_song_id=_normalize_text(row["remote_song_id"]),
name=_normalize_text(row["name"]),
singers=_normalize_text(row["singers"]),
album=_normalize_text(row["album"]),
reason_codes=tuple(reason_codes),
)
)
if normalized_limit is not None and len(matches) >= normalized_limit:
break
return matches
def _song_to_row(song: SuspectedLiveSong) -> Dict[str, Any]:
return {
"song_id": song.song_id,
"platform": song.platform,
"remote_song_id": song.remote_song_id,
"name": song.name,
"singers": song.singers,
"album": song.album,
"reason_codes": ",".join(song.reason_codes),
}
def _write_csv(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
fieldnames = [
"song_id",
"platform",
"remote_song_id",
"name",
"singers",
"album",
"reason_codes",
]
writer = csv.DictWriter(stream, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def _write_jsonl(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
for row in rows:
stream.write(json.dumps(row, ensure_ascii=False) + "\n")
def _write_table(rows: Iterable[Dict[str, Any]], stream: TextIO) -> None:
headers = [
"song_id",
"platform",
"remote_song_id",
"name",
"singers",
"album",
"reason_codes",
]
stream.write("\t".join(headers) + "\n")
for row in rows:
stream.write("\t".join(str(row[header]) for header in headers) + "\n")
def _write_report(
songs: List[SuspectedLiveSong],
*,
output_format: str,
stream: TextIO,
) -> None:
rows = [_song_to_row(song) for song in songs]
if output_format == "csv":
_write_csv(rows, stream)
return
if output_format == "jsonl":
_write_jsonl(rows, stream)
return
_write_table(rows, stream)
def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="List suspected live/stage versions without modifying catalog-sync data.",
)
parser.add_argument("--db", required=True, help="Path to catalogsync.db")
parser.add_argument(
"--limit",
type=int,
default=None,
help="Maximum number of matched songs to return.",
)
parser.add_argument(
"--include-undownloaded",
action="store_true",
help="Scan all songs instead of only songs with active local files.",
)
parser.add_argument(
"--format",
choices=("table", "csv", "jsonl"),
default="table",
help="Output format for stdout and optional file output.",
)
parser.add_argument(
"--output",
help="Optional path to write the report file.",
)
return parser.parse_args(argv)
def main(argv: Optional[List[str]] = None) -> int:
args = parse_args(argv)
songs = scan_suspected_live_songs(
args.db,
downloaded_only=not args.include_undownloaded,
limit=args.limit,
)
print(f"matched_song_count={len(songs)}", file=sys.stderr)
_write_report(songs, output_format=args.format, stream=sys.stdout)
if args.output:
output_path = Path(args.output).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8", newline="") as handle:
_write_report(songs, output_format=args.format, stream=handle)
print(f"wrote_report={output_path}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())