Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,16 @@
<div class="playlist-wrap">
<div class="playlist-card">
<a href="https://www.kuwo.cn/playlist_detail/3694434192" title="Kuwo Playlist A">
<img src="https://img1.kuwo.cn/playlist/a.jpg" />
<span class="name">Kuwo Playlist A</span>
<span class="num">321.5万</span>
</a>
</div>
<div class="playlist-card">
<a href="https://www.kuwo.cn/playlist_detail/3690729662" title="Kuwo Playlist B">
<img src="https://img1.kuwo.cn/playlist/b.jpg" />
<span class="name">Kuwo Playlist B</span>
<span class="num">6789</span>
</a>
</div>
</div>
@@ -0,0 +1,15 @@
<script>
window.__NUXT__=(function(a,b,c,d,e,f,g,h){
return {
data: [{
bangMenu: [{
name: f,
list: [
{sourceid:a, intro:"rule", name:b, id:c, source:"2", pic:"https://zimg.kuwo.cn/bang/a.png", pub:e, listencnt:g},
{sourceid:"93", intro:"rule", name:d, id:"489929", source:"2", pic:"https://zimg.kuwo.cn/bang/b.png", pub:e, listencnt:h}
]
}]
}]
}
})("16","Kuwo Top A","489927","Kuwo Top B","Today","Official","1234567","765432");
</script>
@@ -0,0 +1,24 @@
<div id="m-pl-container">
<ul>
<li>
<div class="u-cover u-cover-1">
<img src="https://p1.music.126.net/cover-a.jpg" />
<span class="nb">123万</span>
<a class="msk" href="/playlist?id=7583298906" title="华语清新收藏夹"></a>
</div>
<p class="dec">
<a title="华语清新收藏夹">华语清新收藏夹</a>
</p>
</li>
<li>
<div class="u-cover u-cover-1">
<img src="https://p1.music.126.net/cover-b.jpg" />
<span class="nb">4567</span>
<a class="msk" href="/playlist?id=9345678901" title="深夜循环"></a>
</div>
<p class="dec">
<a title="深夜循环">深夜循环</a>
</p>
</li>
</ul>
</div>
@@ -0,0 +1,19 @@
{
"code": 200,
"list": [
{
"id": 19723756,
"name": "Toplist A",
"coverImgUrl": "https://p1.music.126.net/top-a.jpg",
"updateFrequency": "Just updated",
"playCount": 7654321
},
{
"id": 3779629,
"name": "Toplist B",
"coverImgUrl": "https://p1.music.126.net/top-b.jpg",
"updateFrequency": "Daily",
"subscribedCount": 345678
}
]
}
@@ -0,0 +1,25 @@
{
"code": 0,
"data": {
"list": [
{
"dissid": "7707261125",
"dissname": "甜度爆表 | 旋律说唱狙击少女心",
"imgurl": "http://qpic.y.qq.com/music_cover/a.jpg",
"listennum": 8526515,
"creator": {
"name": "我想要两颗西柚"
}
},
{
"dissid": "7578943835",
"dissname": "丧系Rap丨渐渐不再期待任何东西",
"imgurl": "http://qpic.y.qq.com/music_cover/b.jpg",
"listennum": 1807460,
"creator": {
"name": "半杯柠檬茶"
}
}
]
}
}
@@ -0,0 +1,19 @@
{
"code": 0,
"data": {
"topList": [
{
"id": 4,
"topTitle": "巅峰榜·流行指数",
"picUrl": "http://y.gtimg.cn/music/photo_new/a.jpg",
"listenCount": 7953220
},
{
"id": 26,
"topTitle": "巅峰榜·热歌",
"picUrl": "http://y.gtimg.cn/music/photo_new/b.jpg",
"listenCount": 19600000
}
]
}
}
@@ -0,0 +1,130 @@
import os
import subprocess
import tempfile
from pathlib import Path
from unittest.mock import patch
from musicdl.catalogsync.catalog_export import run_catalog_export_command
def test_run_catalog_export_command_skips_when_command_missing() -> None:
with patch.dict(os.environ, {}, clear=True):
result = run_catalog_export_command({})
assert result.status == "skipped"
assert result.returncode is None
assert result.stdout == ""
assert result.stderr == ""
def test_run_catalog_export_command_succeeds_with_config_values() -> None:
command = "python -c \"print('export ok')\""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
workdir = str(Path(tmpdir))
with patch.dict(
os.environ,
{
"CATALOG_EXPORT_COMMAND": "python -c \"raise SystemExit(9)\"",
"CATALOG_EXPORT_WORKDIR": "C:/ignored",
},
clear=False,
):
with patch(
"musicdl.catalogsync.catalog_export.subprocess.run",
return_value=subprocess.CompletedProcess(
args=command,
returncode=0,
stdout="export ok\n",
stderr="",
),
) as mocked_run:
result = run_catalog_export_command(
{
"CATALOG_EXPORT_COMMAND": command,
"CATALOG_EXPORT_WORKDIR": workdir,
}
)
mocked_run.assert_called_once_with(
command,
shell=True,
cwd=workdir,
capture_output=True,
text=True,
check=False,
)
assert result.status == "succeeded"
assert result.returncode == 0
assert result.stdout == "export ok\n"
assert result.stderr == ""
def test_run_catalog_export_command_fails_with_env_values() -> None:
command = "python -c \"import sys; sys.stderr.write('boom')\""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
workdir = str(Path(tmpdir))
with patch.dict(
os.environ,
{
"CATALOG_EXPORT_COMMAND": command,
"CATALOG_EXPORT_WORKDIR": workdir,
},
clear=False,
):
with patch(
"musicdl.catalogsync.catalog_export.subprocess.run",
return_value=subprocess.CompletedProcess(
args=command,
returncode=5,
stdout="",
stderr="boom",
),
) as mocked_run:
result = run_catalog_export_command({})
mocked_run.assert_called_once_with(
command,
shell=True,
cwd=workdir,
capture_output=True,
text=True,
check=False,
)
assert result.status == "failed"
assert result.returncode == 5
assert result.stdout == ""
assert result.stderr == "boom"
def test_run_catalog_export_command_fails_when_subprocess_raises() -> None:
command = "python -c \"print('never runs')\""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
workdir = str(Path(tmpdir))
with patch.dict(
os.environ,
{
"CATALOG_EXPORT_COMMAND": command,
"CATALOG_EXPORT_WORKDIR": workdir,
},
clear=False,
):
with patch(
"musicdl.catalogsync.catalog_export.subprocess.run",
side_effect=OSError("bad cwd"),
) as mocked_run:
result = run_catalog_export_command({})
mocked_run.assert_called_once_with(
command,
shell=True,
cwd=workdir,
capture_output=True,
text=True,
check=False,
)
assert result.status == "failed"
assert result.command == command
assert result.workdir == workdir
assert result.returncode is None
assert result.stdout == ""
assert result.stderr == "bad cwd"
+477
View File
@@ -0,0 +1,477 @@
import sqlite3
import tempfile
import unittest
from contextlib import closing
from pathlib import Path
from unittest.mock import ANY, patch
from click.testing import CliRunner
class CatalogCliTests(unittest.TestCase):
def test_init_db_command_creates_sqlite_file(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
result = runner.invoke(
cli,
["init-db", "--db", str(db_path), "--library-root", str(library_root)],
)
self.assertEqual(0, result.exit_code, msg=result.output)
self.assertTrue(db_path.exists())
with closing(sqlite3.connect(db_path)) as conn:
table_names = {
row[0]
for row in conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
}
self.assertIn("songs", table_names)
def test_init_db_command_creates_resolver_stats_side_db(self):
from musicdl.catalogsync.cli import cli
from musicdl.catalogsync.resolver_stats import default_resolver_stats_db_path
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
result = runner.invoke(
cli,
["init-db", "--db", str(db_path), "--library-root", str(library_root)],
)
self.assertEqual(0, result.exit_code, msg=result.output)
resolver_stats_db_path = default_resolver_stats_db_path(db_path)
self.assertTrue(resolver_stats_db_path.exists())
def test_run_command_wires_collect_sync_and_download_steps(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"run",
"--db",
str(db_path),
"--sources",
"netease,qq",
"--download-sources",
"qq,kuwo,migu",
"--library-root",
str(Path(tmpdir) / "library"),
"--workers",
"3",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.collect_playlists.assert_called_once()
app.sync_playlist_catalog.assert_called_once()
app.download_pending.assert_called_once_with(
["netease", "qq"],
limit=None,
workers=3,
download_sources=["qq", "kuwo", "migu"],
lyrics_enabled=True,
overwrite_lyrics=False,
)
def test_run_command_uses_playlist_file_branch_without_collect(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
playlist_file = Path(tmpdir) / "playlists.txt"
playlist_file.write_text(
"https://music.163.com/#/playlist?id=17745989905\n",
encoding="utf-8",
)
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"run",
"--db",
str(db_path),
"--library-root",
str(Path(tmpdir) / "library"),
"--playlist-file",
str(playlist_file),
"--download-sources",
"qq,kuwo",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.collect_playlists.assert_not_called()
app.run_playlist_file.assert_called_once_with(
playlist_file=str(playlist_file),
limit=None,
workers=10,
download_sources=["qq", "kuwo"],
lyrics_enabled=True,
overwrite_lyrics=False,
)
def test_download_command_defaults_workers_to_ten(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"download",
"--db",
str(db_path),
"--sources",
"netease,qq",
"--download-sources",
"qq,kuwo,migu",
"--library-root",
str(Path(tmpdir) / "library"),
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.download_pending.assert_called_once_with(
["netease", "qq"],
limit=None,
workers=10,
download_sources=["qq", "kuwo", "migu"],
lyrics_enabled=True,
overwrite_lyrics=False,
)
def test_download_command_reads_workers_from_download_workers_env(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"download",
"--db",
str(db_path),
"--sources",
"netease,qq",
"--download-sources",
"qq,kuwo,migu",
"--library-root",
str(Path(tmpdir) / "library"),
],
env={"DOWNLOAD_WORKERS": "8"},
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.download_pending.assert_called_once_with(
["netease", "qq"],
limit=None,
workers=8,
download_sources=["qq", "kuwo", "migu"],
lyrics_enabled=True,
overwrite_lyrics=False,
)
def test_download_command_forwards_workers(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"download",
"--db",
str(db_path),
"--sources",
"netease,qq",
"--download-sources",
"qq,kuwo,migu",
"--library-root",
str(Path(tmpdir) / "library"),
"--workers",
"5",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.download_pending.assert_called_once_with(
["netease", "qq"],
limit=None,
workers=5,
download_sources=["qq", "kuwo", "migu"],
lyrics_enabled=True,
overwrite_lyrics=False,
)
def test_download_command_forwards_lyrics_flags(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"download",
"--db",
str(db_path),
"--sources",
"netease",
"--download-sources",
"qq",
"--library-root",
str(Path(tmpdir) / "library"),
"--no-lyrics",
"--overwrite-lyrics",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.download_pending.assert_called_once_with(
["netease"],
limit=None,
workers=10,
download_sources=["qq"],
lyrics_enabled=False,
overwrite_lyrics=True,
)
def test_lyrics_command_wires_application_method_and_filters(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
def sync_local_lyrics_side_effect(*args, **kwargs):
progress_callback = kwargs["progress_callback"]
progress_callback(
total=10,
processed=0,
saved=0,
skipped=0,
failed=0,
progress_percent=0,
)
progress_callback(
total=10,
processed=10,
saved=7,
skipped=2,
failed=1,
progress_percent=100,
)
return {"total": 10, "processed": 10, "saved": 7, "skipped": 2, "failed": 1}
app.sync_local_lyrics.side_effect = sync_local_lyrics_side_effect
result = runner.invoke(
cli,
[
"lyrics",
"--db",
str(db_path),
"--sources",
"netease,qq",
"--playlist-ids",
"12,15",
"--limit",
"200",
"--workers",
"8",
"--overwrite-lyrics",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.sync_local_lyrics.assert_called_once_with(
sources=["netease", "qq"],
playlist_ids=[12, 15],
limit=200,
workers=8,
progress_callback=ANY,
overwrite_lyrics=True,
)
self.assertIn("Lyrics progress: 0/10 (0%)", result.output)
self.assertIn("Lyrics progress: 10/10 (100%)", result.output)
def test_register_object_backend_command_wires_application_method(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"register-object-backend",
"--db",
str(db_path),
"--backend",
"main-s3",
"--bucket",
"music-bucket",
"--endpoint",
"https://s3.example.com",
"--region",
"auto",
"--base-prefix",
"music",
"--credential-env-prefix",
"CATALOGSYNC_MAIN_S3",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.register_object_backend.assert_called_once_with(
backend_name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
addressing_style=None,
public_base_url=None,
)
def test_upload_command_wires_application_method_and_filters(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with patch("musicdl.catalogsync.cli.CatalogSyncApplication") as app_cls:
app = app_cls.return_value
result = runner.invoke(
cli,
[
"upload",
"--db",
str(db_path),
"--backend",
"main-s3",
"--sources",
"netease,qq",
"--playlist-ids",
"12,15",
"--limit",
"200",
"--workers",
"4",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
app.upload_files.assert_called_once_with(
backend_name="main-s3",
sources=["netease", "qq"],
playlist_ids=[12, 15],
limit=200,
workers=4,
)
def test_serve_command_wires_ops_web_app_and_uvicorn(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
env_file = Path(tmpdir) / "catalogsync.env"
fake_app = object()
with patch(
"musicdl.catalogsync.cli.create_ops_web_app",
return_value=fake_app,
) as create_app_mock, patch("musicdl.catalogsync.cli.uvicorn.run") as uvicorn_run_mock:
result = runner.invoke(
cli,
[
"serve",
"--db",
str(db_path),
"--env-file",
str(env_file),
"--host",
"0.0.0.0",
"--port",
"19090",
],
)
self.assertEqual(0, result.exit_code, msg=result.output)
create_app_mock.assert_called_once_with(
db_path=str(db_path),
env_path=str(env_file),
)
uvicorn_run_mock.assert_called_once_with(
fake_app,
host="0.0.0.0",
port=19090,
)
def test_serve_command_rejects_out_of_range_port(self):
from musicdl.catalogsync.cli import cli
runner = CliRunner()
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
env_file = Path(tmpdir) / "catalogsync.env"
with patch("musicdl.catalogsync.cli.create_ops_web_app") as create_app_mock:
result = runner.invoke(
cli,
[
"serve",
"--db",
str(db_path),
"--env-file",
str(env_file),
"--port",
"70000",
],
)
self.assertNotEqual(0, result.exit_code)
self.assertIn("is not in the range", result.output)
create_app_mock.assert_not_called()
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,121 @@
import json
import unittest
from pathlib import Path
from unittest.mock import patch
FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures"
def load_fixture(name: str) -> str:
return (FIXTURES_DIR / name).read_text(encoding="utf-8")
class CollectorParsingTests(unittest.TestCase):
def test_netease_playlist_square_parser_extracts_playlist_candidates(self):
from musicdl.catalogsync.collectors.netease import parse_playlist_square_html
items = parse_playlist_square_html(load_fixture("netease_playlist_square.html"))
self.assertEqual(2, len(items))
self.assertEqual("netease", items[0].platform)
self.assertEqual("7583298906", items[0].remote_id)
self.assertEqual("playlist_url", items[0].parse_strategy)
self.assertEqual("https://music.163.com/#/playlist?id=7583298906", items[0].url)
self.assertEqual(1230000, items[0].play_count)
self.assertEqual(4567, items[1].play_count)
def test_netease_toplist_parser_extracts_playlist_style_rankings(self):
from musicdl.catalogsync.collectors.netease import parse_toplist_payload
items = parse_toplist_payload(json.loads(load_fixture("netease_toplist.json")))
self.assertEqual(2, len(items))
self.assertEqual("19723756", items[0].remote_id)
self.assertEqual("netease_toplist", items[0].parse_strategy)
self.assertEqual("https://music.163.com/#/playlist?id=19723756", items[0].url)
self.assertEqual(7654321, items[0].play_count)
self.assertEqual(345678, items[1].play_count)
def test_qq_playlist_square_parser_extracts_playlist_candidates(self):
from musicdl.catalogsync.collectors.qq import parse_playlist_square_payload
items = parse_playlist_square_payload(json.loads(load_fixture("qq_playlist_square.json")))
self.assertEqual(2, len(items))
self.assertEqual("qq", items[0].platform)
self.assertEqual("7707261125", items[0].remote_id)
self.assertEqual("playlist_url", items[0].parse_strategy)
self.assertEqual("https://y.qq.com/n/ryqq/playlist/7707261125", items[0].url)
def test_qq_playlist_square_parser_extracts_collected_song_count_when_present(self):
from musicdl.catalogsync.collectors.qq import parse_playlist_square_payload
payload = {
"data": {
"list": [
{
"dissid": "7707261125",
"dissname": "QQ Count Playlist",
"songnum": 42,
"creator": {"name": "Collector"},
}
]
}
}
items = parse_playlist_square_payload(payload)
self.assertEqual(1, len(items))
self.assertEqual(42, items[0].collected_song_count)
def test_qq_toplist_parser_marks_entries_for_special_detail_resolution(self):
from musicdl.catalogsync.collectors.qq import parse_toplist_payload
items = parse_toplist_payload(json.loads(load_fixture("qq_toplist.json")))
self.assertEqual(2, len(items))
self.assertEqual("4", items[0].remote_id)
self.assertEqual("qq_toplist", items[0].parse_strategy)
self.assertEqual("https://y.qq.com/n/ryqq/toplist/4", items[0].url)
def test_kuwo_playlist_square_parser_extracts_ssr_playlist_links(self):
from musicdl.catalogsync.collectors.kuwo import parse_playlist_square_html
items = parse_playlist_square_html(load_fixture("kuwo_playlist_square.html"))
self.assertEqual(2, len(items))
self.assertEqual("kuwo", items[0].platform)
self.assertEqual("3694434192", items[0].remote_id)
self.assertEqual("playlist_url", items[0].parse_strategy)
self.assertEqual("https://www.kuwo.cn/playlist_detail/3694434192", items[0].url)
self.assertEqual(3215000, items[0].play_count)
self.assertEqual(6789, items[1].play_count)
def test_kuwo_toplist_parser_extracts_rank_entries_from_ssr_state(self):
from musicdl.catalogsync.collectors.kuwo import parse_toplist_html
items = parse_toplist_html(load_fixture("kuwo_toplist.html"))
self.assertEqual(2, len(items))
self.assertEqual("489927", items[0].remote_id)
self.assertEqual("kuwo_toplist", items[0].parse_strategy)
self.assertEqual("16", items[0].metadata["sourceid"])
self.assertEqual("https://www.kuwo.cn/rankList?bangId=489927", items[0].url)
self.assertEqual(1234567, items[0].play_count)
self.assertEqual(765432, items[1].play_count)
def test_kuwo_toplist_parser_can_fallback_without_node(self):
from musicdl.catalogsync.collectors.kuwo import parse_toplist_html
with patch("musicdl.catalogsync.collectors.kuwo.subprocess.run", side_effect=RuntimeError("node missing")):
items = parse_toplist_html(load_fixture("kuwo_toplist.html"))
self.assertEqual(2, len(items))
self.assertEqual("489927", items[0].remote_id)
self.assertEqual("16", items[0].metadata["sourceid"])
self.assertEqual(1234567, items[0].play_count)
if __name__ == "__main__":
unittest.main()
+720
View File
@@ -0,0 +1,720 @@
import sqlite3
import tempfile
import unittest
from contextlib import closing
import json
from pathlib import Path
class DatabaseSchemaTests(unittest.TestCase):
def test_connect_database_enables_sqlite_busy_timeout_and_wal(self):
from musicdl.catalogsync.db import SQLITE_BUSY_TIMEOUT_MS, connect_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with closing(connect_database(db_path)) as conn:
busy_timeout_ms = conn.execute("PRAGMA busy_timeout").fetchone()[0]
foreign_keys_enabled = conn.execute("PRAGMA foreign_keys").fetchone()[0]
journal_mode = str(conn.execute("PRAGMA journal_mode").fetchone()[0]).lower()
self.assertEqual(SQLITE_BUSY_TIMEOUT_MS, busy_timeout_ms)
self.assertEqual(1, foreign_keys_enabled)
self.assertEqual("wal", journal_mode)
def test_initialize_database_creates_expected_tables_and_default_backend(self):
from musicdl.catalogsync.db import REQUIRED_TABLES, initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
conn = initialize_database(db_path, default_library_root=library_root)
conn.close()
del conn
with closing(sqlite3.connect(db_path)) as verify_conn:
table_rows = verify_conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
tables = {row[0] for row in table_rows}
self.assertTrue(REQUIRED_TABLES.issubset(tables))
backend_row = verify_conn.execute(
"""
SELECT backend_type, base_path, is_default
FROM storage_backends
WHERE name = ?
""",
("default-local",),
).fetchone()
del verify_conn
self.assertEqual(("local_fs", str(library_root.resolve()), 1), backend_row)
def test_initialize_database_creates_upload_tables(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
conn = initialize_database(db_path)
conn.close()
with closing(sqlite3.connect(db_path)) as verify_conn:
tables = {
row[0]
for row in verify_conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
}
self.assertIn("song_backend_presence", tables)
self.assertIn("upload_tasks", tables)
def test_initialize_database_creates_playlist_download_preferences_table_and_indexes(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
conn = initialize_database(db_path)
conn.close()
with closing(sqlite3.connect(db_path)) as verify_conn:
table_names = {
row[0]
for row in verify_conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
}
index_names = {
row[0]
for row in verify_conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'index'"
).fetchall()
}
self.assertIn("playlist_download_preferences", table_names)
self.assertIn("idx_playlist_download_preferences_is_wanted", index_names)
self.assertIn("idx_pool_playlists_playlist_id", index_names)
self.assertIn("idx_playlist_songs_song_id", index_names)
self.assertIn("idx_file_assets_song_id", index_names)
self.assertIn("idx_job_items_running_song_id", index_names)
def test_initialize_database_is_idempotent_for_default_backend(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
initialize_database(db_path, default_library_root=library_root).close()
with closing(sqlite3.connect(db_path)) as conn:
backend_count = conn.execute(
"SELECT COUNT(*) FROM storage_backends WHERE name = ?",
("default-local",),
).fetchone()[0]
del conn
self.assertEqual(1, backend_count)
def test_initialize_database_upgrades_job_workers_with_throughput_columns(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with closing(sqlite3.connect(db_path)) as seed_conn:
seed_conn.execute(
"""
CREATE TABLE job_workers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_run_id INTEGER,
job_stage_id INTEGER,
worker_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'idle',
current_job_item_id INTEGER,
current_song_id INTEGER,
current_playlist_id INTEGER,
current_display_text TEXT,
heartbeat_at TEXT,
last_progress_text TEXT,
processed_count INTEGER NOT NULL DEFAULT 0,
error_count INTEGER NOT NULL DEFAULT 0
)
"""
)
seed_conn.commit()
initialize_database(db_path).close()
with closing(sqlite3.connect(db_path)) as verify_conn:
columns = {
row[1] for row in verify_conn.execute("PRAGMA table_info(job_workers)").fetchall()
}
self.assertIn("downloaded_bytes", columns)
self.assertIn("total_bytes", columns)
self.assertIn("speed_bytes_per_sec", columns)
self.assertIn("progress_percent", columns)
def test_initialize_database_upgrades_playlists_with_play_count_column(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with closing(sqlite3.connect(db_path)) as seed_conn:
seed_conn.execute(
"""
CREATE TABLE playlists (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
remote_playlist_id TEXT NOT NULL,
name TEXT NOT NULL,
url TEXT NOT NULL,
parse_strategy TEXT NOT NULL DEFAULT 'playlist_url',
cover_url TEXT,
creator_name TEXT,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, remote_playlist_id)
)
"""
)
seed_conn.commit()
initialize_database(db_path).close()
with closing(sqlite3.connect(db_path)) as verify_conn:
columns = {
row[1] for row in verify_conn.execute("PRAGMA table_info(playlists)").fetchall()
}
self.assertIn("play_count", columns)
def test_initialize_database_upgrades_playlists_with_collected_song_count_column(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
with closing(sqlite3.connect(db_path)) as seed_conn:
seed_conn.execute(
"""
CREATE TABLE playlists (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
remote_playlist_id TEXT NOT NULL,
name TEXT NOT NULL,
url TEXT NOT NULL,
parse_strategy TEXT NOT NULL DEFAULT 'playlist_url',
cover_url TEXT,
creator_name TEXT,
play_count INTEGER,
metadata_json TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, remote_playlist_id)
)
"""
)
seed_conn.commit()
initialize_database(db_path).close()
with closing(sqlite3.connect(db_path)) as verify_conn:
columns = {
row[1] for row in verify_conn.execute("PRAGMA table_info(playlists)").fetchall()
}
self.assertIn("collected_song_count", columns)
class CatalogRepositoryUploadTests(unittest.TestCase):
def test_upsert_object_storage_backend_inserts_and_updates_config(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = CatalogRepository(db_path)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-b",
endpoint="https://s3.example.com",
region="cn-shanghai",
base_prefix="archive",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
backend = repo.get_backend_by_name("main-s3")
config = json.loads(backend["config_json"])
self.assertEqual(backend_id, int(backend["id"]))
self.assertEqual("object_storage", backend["backend_type"])
self.assertEqual("bucket-b", backend["container_name"])
self.assertEqual("https://s3.example.com", config["endpoint"])
self.assertEqual("cn-shanghai", config["region"])
self.assertEqual("archive", config["base_prefix"])
def test_record_remote_file_creates_updates_location_and_refreshes_presence(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
ext="mp3",
file_size_bytes=80,
quality_label="standard",
)
)
local_backend_id = repo.get_default_backend_id()
file_asset_id = repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
repo.record_remote_file(
file_asset_id=file_asset_id,
backend_id=object_backend_id,
container_name="bucket-a",
locator="music/qq/Singer A/song-a.mp3",
public_url="https://cdn.example.com/music/qq/Singer A/song-a.mp3",
download_url=None,
)
repo.record_remote_file(
file_asset_id=file_asset_id,
backend_id=object_backend_id,
container_name="bucket-a",
locator="music/qq/Singer A/song-a.mp3",
public_url="https://cdn.example.com/music/qq/Singer A/song-a-v2.mp3",
download_url=None,
)
remote_row = repo._fetchone(
"""
SELECT *
FROM file_locations
WHERE file_asset_id = ? AND backend_id = ? AND locator = ?
""",
(file_asset_id, object_backend_id, "music/qq/Singer A/song-a.mp3"),
)
presence_row = repo.get_song_backend_presence(song_id=song_id, backend_id=object_backend_id)
self.assertIsNone(remote_row["absolute_path"])
self.assertEqual(0, int(remote_row["is_primary"]))
self.assertEqual("active", remote_row["status"])
self.assertEqual(
"https://cdn.example.com/music/qq/Singer A/song-a-v2.mp3",
remote_row["public_url"],
)
self.assertEqual(1, int(presence_row["has_active_file"]))
self.assertEqual(1, int(presence_row["active_file_count"]))
self.assertEqual(int(remote_row["id"]), int(presence_row["primary_file_location_id"]))
def test_enqueue_upload_task_deduplicates_and_list_pending_supports_status_update(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
local_backend_id = repo.get_default_backend_id()
file_asset_id = repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
source_location = repo._fetchone(
"""
SELECT id
FROM file_locations
WHERE file_asset_id = ? AND backend_id = ?
ORDER BY id ASC
LIMIT 1
""",
(file_asset_id, local_backend_id),
)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
first_task_id = repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
second_task_id = repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
pending_before = repo.list_pending_upload_tasks(target_backend_id=object_backend_id)
repo.claim_next_upload_task(target_backend_id=object_backend_id)
repo.mark_upload_task_status(task_id=first_task_id, status="succeeded", last_error=None)
pending_after = repo.list_pending_upload_tasks(target_backend_id=object_backend_id)
self.assertEqual(first_task_id, second_task_id)
self.assertEqual(1, len(pending_before))
self.assertEqual(0, len(pending_after))
def test_enqueue_upload_task_requeues_failed_task_as_pending(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
local_backend_id = repo.get_default_backend_id()
file_asset_id = repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
source_location = repo._fetchone(
"""
SELECT id
FROM file_locations
WHERE file_asset_id = ? AND backend_id = ?
ORDER BY id ASC
LIMIT 1
""",
(file_asset_id, local_backend_id),
)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
task_id = repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
repo.claim_next_upload_task(target_backend_id=object_backend_id)
repo.mark_upload_task_status(task_id=task_id, status="failed", last_error="network error")
repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
task_row = repo._fetchone("SELECT status, last_error FROM upload_tasks WHERE id = ?", (task_id,))
self.assertEqual("pending", task_row["status"])
self.assertIsNone(task_row["last_error"])
def test_claim_next_upload_task_marks_row_uploading_and_clears_finished_at(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
local_backend_id = repo.get_default_backend_id()
file_asset_id = repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
source_location = repo._fetchone(
"""
SELECT id
FROM file_locations
WHERE file_asset_id = ? AND backend_id = ?
ORDER BY id ASC
LIMIT 1
""",
(file_asset_id, local_backend_id),
)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
task_id = repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
claimed_first = repo.claim_next_upload_task(target_backend_id=object_backend_id)
repo.mark_upload_task_status(task_id=task_id, status="failed", last_error="network error")
repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
claimed_second = repo.claim_next_upload_task(target_backend_id=object_backend_id)
task_row = repo._fetchone(
"SELECT status, attempts, started_at, finished_at FROM upload_tasks WHERE id = ?",
(task_id,),
)
self.assertEqual(task_id, int(claimed_first["id"]))
self.assertEqual(task_id, int(claimed_second["id"]))
self.assertEqual("uploading", task_row["status"])
self.assertEqual(2, int(task_row["attempts"]))
self.assertIsNotNone(task_row["started_at"])
self.assertIsNone(task_row["finished_at"])
def test_mark_upload_task_status_rejects_invalid_transition(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
local_backend_id = repo.get_default_backend_id()
file_asset_id = repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
source_location = repo._fetchone(
"""
SELECT id
FROM file_locations
WHERE file_asset_id = ? AND backend_id = ?
ORDER BY id ASC
LIMIT 1
""",
(file_asset_id, local_backend_id),
)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
task_id = repo.enqueue_upload_task(
file_asset_id=file_asset_id,
source_location_id=int(source_location["id"]),
target_backend_id=object_backend_id,
target_container_name="bucket-a",
target_locator="music/qq/Singer A/song-a.mp3",
)
repo.claim_next_upload_task(target_backend_id=object_backend_id)
repo.mark_upload_task_status(task_id=task_id, status="succeeded", last_error=None)
with self.assertRaises(RuntimeError):
repo.mark_upload_task_status(task_id=task_id, status="uploading", last_error=None)
def test_list_missing_object_upload_candidates_skips_existing_active_remote(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
local_backend_id = repo.get_default_backend_id()
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
song_a_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
song_b_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-b", name="Song B", ext="mp3"))
asset_a_id = repo.record_local_file(
song_id=song_a_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
asset_b_id = repo.record_local_file(
song_id=song_b_id,
backend_id=local_backend_id,
relative_path="qq/Singer B/song-b.mp3",
file_size_bytes=81,
ext="mp3",
quality_label="standard",
)
repo.record_remote_file(
file_asset_id=asset_b_id,
backend_id=object_backend_id,
container_name="bucket-a",
locator="music/qq/Singer B/song-b.mp3",
public_url=None,
download_url=None,
)
candidates = repo.list_missing_object_upload_candidates(target_backend_id=object_backend_id)
self.assertEqual(1, len(candidates))
self.assertEqual(song_a_id, int(candidates[0]["song_id"]))
self.assertEqual(asset_a_id, int(candidates[0]["file_asset_id"]))
self.assertEqual("music/qq/Singer A/song-a.mp3", candidates[0]["target_locator"])
def test_list_missing_object_upload_candidates_supports_sources_playlist_ids_and_limit(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong, PlaylistCandidate
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
playlist_a = repo.upsert_playlist(
PlaylistCandidate(
platform="qq",
pool_kind="manual_file",
remote_id="playlist-a",
name="Playlist A",
url="https://y.qq.com/n/ryqq/playlist/playlist-a",
)
)
playlist_b = repo.upsert_playlist(
PlaylistCandidate(
platform="qq",
pool_kind="manual_file",
remote_id="playlist-b",
name="Playlist B",
url="https://y.qq.com/n/ryqq/playlist/playlist-b",
)
)
qq_song_id = repo.upsert_song(CatalogSong(platform="qq", remote_song_id="song-a", name="Song A", ext="mp3"))
netease_song_id = repo.upsert_song(
CatalogSong(platform="netease", remote_song_id="song-b", name="Song B", ext="flac")
)
local_backend_id = repo.get_default_backend_id()
repo.record_local_file(
song_id=qq_song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=80,
ext="mp3",
quality_label="standard",
)
repo.record_local_file(
song_id=netease_song_id,
backend_id=local_backend_id,
relative_path="netease/Singer B/song-b.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
repo.link_playlist_song(playlist_a, qq_song_id, 1)
repo.link_playlist_song(playlist_b, netease_song_id, 1)
object_backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="bucket-a",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
candidates = repo.list_missing_object_upload_candidates(
target_backend_id=object_backend_id,
sources=["qq"],
playlist_ids=[playlist_a],
limit=1,
)
self.assertEqual(1, len(candidates))
self.assertEqual(qq_song_id, int(candidates[0]["song_id"]))
self.assertEqual("music/qq/Singer A/song-a.mp3", candidates[0]["target_locator"])
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,183 @@
import unittest
from types import SimpleNamespace
import inspect
class FakeResponse:
def __init__(self, payload):
self.payload = payload
def raise_for_status(self):
return None
def json(self):
return self.payload
class DeferredSongInfoTests(unittest.TestCase):
def test_deferred_module_avoids_py39_str_prefix_suffix_methods(self):
import musicdl.catalogsync.deferred as deferred
source = inspect.getsource(deferred)
self.assertNotIn(".removesuffix(", source)
self.assertNotIn(".removeprefix(", source)
def test_extract_playlist_id_from_html_path(self):
from musicdl.catalogsync.deferred import _extract_playlist_id_from_url
playlist_id = _extract_playlist_id_from_url("https://www.kuwo.cn/playlist_detail/3671258656.html")
self.assertEqual("3671258656", playlist_id)
def test_build_kuwo_raw_track_song_infos_strips_music_prefix(self):
from musicdl.catalogsync.deferred import build_kuwo_raw_track_song_infos
client = SimpleNamespace(
source="KuwoMusicClient",
_constructuniqueworkdir=lambda keyword: f"/tmp/{keyword}",
_removeduplicates=lambda song_infos: list(song_infos),
)
song_infos = build_kuwo_raw_track_song_infos(
client,
raw_tracks=[
{
"MUSICRID": "MUSIC_123456",
"SONGNAME": "Song A",
"ARTIST": "Singer A",
"ALBUM": "Album A",
"DURATION": 180,
}
],
playlist_name="Kuwo Playlist",
)
self.assertEqual(1, len(song_infos))
self.assertEqual("123456", song_infos[0].identifier)
def test_build_deferred_song_info_marks_snapshot_as_deferred(self):
from musicdl.catalogsync.deferred import build_deferred_song_info
song_info = build_deferred_song_info(
source="NeteaseMusicClient",
raw_search_result={"id": 101, "name": "Song A"},
identifier="101",
song_name="Song A",
singers="Singer A",
album="Album A",
duration_s=215,
cover_url="https://example.com/a.jpg",
ext="flac",
)
self.assertEqual("NeteaseMusicClient", song_info.source)
self.assertEqual("101", song_info.identifier)
self.assertEqual("Song A", song_info.song_name)
self.assertEqual("Singer A", song_info.singers)
self.assertEqual("Album A", song_info.album)
self.assertEqual("flac", song_info.ext)
self.assertFalse(song_info.with_valid_download_url)
self.assertTrue(song_info.raw_data["deferred_search"])
self.assertEqual(101, song_info.raw_data["search"]["id"])
def test_build_netease_playlist_song_infos_fetches_missing_track_details(self):
from musicdl.catalogsync.deferred import build_netease_playlist_song_infos
class FakeClient:
source = "NeteaseMusicClient"
def __init__(self):
self.calls = []
def post(self, url, data=None, **kwargs):
self.calls.append((url, data))
if url.endswith("/api/v6/playlist/detail"):
return FakeResponse(
{
"playlist": {
"name": "Test Playlist",
"trackIds": [{"id": 101}, {"id": 102}],
"tracks": [
{
"id": 101,
"name": "Song A",
"dt": 215000,
"ar": [{"name": "Singer A"}],
"al": {"name": "Album A", "picUrl": "https://example.com/a.jpg"},
"sq": {"size": 1},
}
],
}
}
)
if url.endswith("/api/v3/song/detail"):
return FakeResponse(
{
"songs": [
{
"id": 102,
"name": "Song B",
"dt": 186000,
"ar": [{"name": "Singer B"}],
"al": {"name": "Album B", "picUrl": "https://example.com/b.jpg"},
"h": {"size": 1},
}
]
}
)
raise AssertionError(f"Unexpected URL: {url}")
def _constructuniqueworkdir(self, keyword):
return f"/tmp/{keyword}"
def _removeduplicates(self, song_infos):
return list(song_infos)
client = FakeClient()
song_infos = build_netease_playlist_song_infos(client, "https://music.163.com/#/playlist?id=999")
self.assertEqual(2, len(song_infos))
self.assertEqual(["101", "102"], [song_info.identifier for song_info in song_infos])
self.assertEqual(["Song A", "Song B"], [song_info.song_name for song_info in song_infos])
self.assertTrue(all(song_info.raw_data["deferred_search"] for song_info in song_infos))
self.assertEqual("flac", song_infos[0].ext)
self.assertEqual("mp3", song_infos[1].ext)
self.assertTrue(any(url.endswith("/api/v3/song/detail") for url, _ in client.calls))
def test_build_qq_raw_track_song_infos_keeps_tracks_without_direct_download_urls(self):
from musicdl.catalogsync.deferred import build_qq_raw_track_song_infos
client = SimpleNamespace(
source="QQMusicClient",
_constructuniqueworkdir=lambda keyword: f"/tmp/{keyword}",
_removeduplicates=lambda song_infos: list(song_infos),
)
song_infos = build_qq_raw_track_song_infos(
client,
raw_tracks=[
{
"songmid": "mid-a",
"songname": "Song A",
"interval": 210,
"singer": [{"name": "Singer A"}],
"albumname": "Album A",
"albummid": "album-a",
"sizeflac": 1024,
},
{
"songmid": "mid-b",
"songname": "Song B",
"interval": 180,
"singer": [{"name": "Singer B"}],
"albumname": "Album B",
"albummid": "album-b",
"size320": 512,
},
],
playlist_name="QQ Playlist",
)
self.assertEqual(2, len(song_infos))
self.assertEqual(["mid-a", "mid-b"], [song_info.identifier for song_info in song_infos])
self.assertEqual(["Song A", "Song B"], [song_info.song_name for song_info in song_infos])
self.assertEqual(["flac", "mp3"], [song_info.ext for song_info in song_infos])
self.assertTrue(all(song_info.raw_data["deferred_search"] for song_info in song_infos))
@@ -0,0 +1,96 @@
import io
import tempfile
import unittest
import zipfile
from datetime import datetime
from pathlib import Path
class ExportBundlesTests(unittest.TestCase):
def _prepare_playlist_dir(self, root: Path, dirname: str) -> Path:
playlist_dir = root / dirname
covers_dir = playlist_dir / "covers"
covers_dir.mkdir(parents=True, exist_ok=True)
(playlist_dir / "playlist.yaml").write_text("playlist_id: 1\n", encoding="utf-8")
(playlist_dir / ".playlist_meta.json").write_text('{"playlist_id": 1}', encoding="utf-8")
(covers_dir / "playlist-cover.jpg").write_bytes(b"cover")
return playlist_dir
def test_create_single_playlist_bundle_includes_playlist_dir_files(self):
from musicdl.catalogsync.export_bundles import create_single_playlist_bundle
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
playlist_dir = self._prepare_playlist_dir(root, "Playlist One_101")
bundle_root = root / "bundles"
bundle_path = create_single_playlist_bundle(
playlist_dir=playlist_dir,
bundle_root=bundle_root,
platform="qq",
playlist_id=101,
playlist_name="Playlist One",
)
self.assertTrue(bundle_path.exists())
self.assertEqual("playlist-qq-101-Playlist One.zip", bundle_path.name)
self.assertEqual(bundle_root.resolve(), bundle_path.parent.resolve())
with zipfile.ZipFile(bundle_path, "r") as archive:
members = set(archive.namelist())
self.assertIn("Playlist One_101/playlist.yaml", members)
self.assertIn("Playlist One_101/.playlist_meta.json", members)
self.assertIn("Playlist One_101/covers/playlist-cover.jpg", members)
def test_create_multi_playlist_bundle_wraps_under_playlists_root(self):
from musicdl.catalogsync.export_bundles import create_multi_playlist_bundle
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
playlist_dir_a = self._prepare_playlist_dir(root, "Playlist A_201")
playlist_dir_b = self._prepare_playlist_dir(root, "Playlist B_202")
bundle_root = root / "bundles"
bundle_path = create_multi_playlist_bundle(
playlist_dirs=[playlist_dir_a, playlist_dir_b],
bundle_root=bundle_root,
created_at=datetime(2026, 4, 19, 12, 34, 56),
)
self.assertTrue(bundle_path.exists())
self.assertTrue(bundle_path.name.endswith("playlists-export-20260419-123456.zip"))
self.assertEqual(bundle_root.resolve(), bundle_path.parent.resolve())
with zipfile.ZipFile(io.BytesIO(bundle_path.read_bytes()), "r") as archive:
members = set(archive.namelist())
self.assertIn("playlists/Playlist A_201/playlist.yaml", members)
self.assertIn("playlists/Playlist B_202/.playlist_meta.json", members)
def test_create_multi_playlist_bundle_uses_unique_storage_path_for_same_timestamp(self):
from musicdl.catalogsync.export_bundles import create_multi_playlist_bundle
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
playlist_dir_a = self._prepare_playlist_dir(root, "Playlist A_201")
playlist_dir_b = self._prepare_playlist_dir(root, "Playlist B_202")
bundle_root = root / "bundles"
created_at = datetime(2026, 4, 19, 12, 34, 56)
first_path = create_multi_playlist_bundle(
playlist_dirs=[playlist_dir_a, playlist_dir_b],
bundle_root=bundle_root,
created_at=created_at,
)
second_path = create_multi_playlist_bundle(
playlist_dirs=[playlist_dir_a, playlist_dir_b],
bundle_root=bundle_root,
created_at=created_at,
)
self.assertNotEqual(first_path, second_path)
self.assertTrue(first_path.exists())
self.assertTrue(second_path.exists())
self.assertTrue(first_path.name.endswith("playlists-export-20260419-123456.zip"))
self.assertTrue(second_path.name.endswith("playlists-export-20260419-123456.zip"))
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,41 @@
import tempfile
import unittest
import warnings
from pathlib import Path
class ExistingClientLoaderTests(unittest.TestCase):
def test_build_music_client_can_instantiate_qq_client_lazily(self):
from musicdl.modules import BuildMusicClient
with warnings.catch_warnings():
warnings.simplefilter("ignore", ResourceWarning)
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
client = BuildMusicClient(
{
"type": "QQMusicClient",
"logger_handle": None,
"disable_print": True,
"work_dir": str(Path(tmpdir) / "outputs"),
"maintain_session": False,
"search_size_per_source": 1,
"search_size_per_page": 1,
"strict_limit_search_size_per_page": True,
}
)
self.assertEqual("QQMusicClient", client.source)
def test_default_download_source_modules_enable_postponed_annotations_for_python38(self):
source_dir = Path("musicdl/modules/sources")
for module_name in ("qq.py", "kuwo.py", "migu.py", "qianqian.py", "kugou.py", "netease.py"):
module_text = (source_dir / module_name).read_text(encoding="utf-8")
self.assertIn(
"from __future__ import annotations",
module_text,
msg=f"{module_name} should postpone annotation evaluation for Python 3.8 compatibility",
)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,63 @@
import tempfile
import unittest
from pathlib import Path
class ManualPlaylistParsingTests(unittest.TestCase):
def test_parse_playlist_file_supports_url_platform_url_and_toplist_urls(self):
from musicdl.catalogsync.manual_playlists import parse_playlist_file
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
playlist_file = Path(tmpdir) / "playlists.txt"
playlist_file.write_text(
"\n".join(
[
"# comment",
"https://music.163.com/#/playlist?id=17745989905",
"qq,https://y.qq.com/n/ryqq/playlist/7707261125",
"https://y.qq.com/n/ryqq/toplist/26",
"https://www.kuwo.cn/rankList?bangId=16",
"https://music.163.com/#/playlist?id=17745989905",
"",
]
),
encoding="utf-8",
)
parsed = parse_playlist_file(playlist_file)
self.assertEqual(7, parsed.total_lines)
self.assertEqual(0, parsed.skipped_lines)
self.assertEqual(4, len(parsed.entries))
self.assertEqual("netease", parsed.entries[0].platform)
self.assertEqual("17745989905", parsed.entries[0].remote_id)
self.assertEqual("qq", parsed.entries[1].platform)
self.assertEqual("playlist_url", parsed.entries[1].parse_strategy)
self.assertEqual("qq_toplist", parsed.entries[2].parse_strategy)
self.assertEqual("kuwo_toplist", parsed.entries[3].parse_strategy)
def test_parse_playlist_file_skips_invalid_lines(self):
from musicdl.catalogsync.manual_playlists import parse_playlist_file
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
playlist_file = Path(tmpdir) / "playlists.txt"
playlist_file.write_text(
"\n".join(
[
"not-a-url",
"apple,https://example.com/list/1",
"https://example.com/list/2",
]
),
encoding="utf-8",
)
parsed = parse_playlist_file(playlist_file)
self.assertEqual(3, parsed.total_lines)
self.assertEqual(3, parsed.skipped_lines)
self.assertEqual([], parsed.entries)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,45 @@
import unittest
from types import SimpleNamespace
class ModelNormalizationTests(unittest.TestCase):
def test_catalog_song_from_song_info_builds_stable_platform_key(self):
from musicdl.catalogsync.models import CatalogSong, normalize_source_name
song_info = SimpleNamespace(
source="QQMusicClient",
identifier="000x564O1lrTDY",
song_name="摆脱地心引力",
singers="时代少年团",
album="摆脱地心引力",
ext="flac",
file_size_bytes=45381227,
file_size="43.29 MB",
raw_data={"quality": "lossless"},
)
normalized = CatalogSong.from_song_info(song_info)
self.assertEqual("qq", normalize_source_name(song_info.source))
self.assertEqual("qq", normalized.platform)
self.assertEqual("000x564O1lrTDY", normalized.remote_song_id)
self.assertEqual("qq:000x564O1lrTDY", normalized.song_key)
self.assertEqual("lossless", normalized.quality_label)
self.assertEqual(45381227, normalized.file_size_bytes)
def test_extract_artist_names_merges_raw_artists_and_display_text(self):
from musicdl.catalogsync.models import extract_artist_names
raw_data = {
"search": {
"singer": [{"name": "周杰伦"}, {"name": "方文山"}],
}
}
names = extract_artist_names(raw_data, "周杰伦 / 方文山, 林俊杰")
self.assertEqual(["周杰伦", "方文山", "林俊杰"], names)
if __name__ == "__main__":
unittest.main()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,170 @@
import tempfile
import time
import unittest
from pathlib import Path
class CatalogsyncEnvManagerTests(unittest.TestCase):
def test_load_current_strips_outer_quotes_from_command_values(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
env_path.write_text(
"\n".join(
[
'CATALOG_EXPORT_COMMAND="sudo /volume4/Music_Cloud/Music_Server/bin/catalog-export-root.sh"',
"CATALOG_EXPORT_WORKDIR=/volume4/Music_Cloud/Music_Server",
"",
]
),
encoding="utf-8",
)
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
current = manager.load_current()
self.assertEqual(
"sudo /volume4/Music_Cloud/Music_Server/bin/catalog-export-root.sh",
current["CATALOG_EXPORT_COMMAND"],
)
self.assertEqual(
"/volume4/Music_Cloud/Music_Server",
current["CATALOG_EXPORT_WORKDIR"],
)
def test_load_snapshot_save_revision_and_list_revisions(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
env_path.write_text(
"\n".join(
[
"ROOT_DIR=/music",
"DOWNLOAD_SOURCES= qq, netease, , kuwo ,",
"LOG_DIR=/music/logs",
"",
]
),
encoding="utf-8",
)
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
current = manager.load_current()
snapshot = manager.build_job_snapshot()
revision_id = manager.save_revision(note="initial import")
revisions = manager.list_revisions()
self.assertEqual("/music", current["ROOT_DIR"])
self.assertEqual(" qq, netease, , kuwo ,", current["DOWNLOAD_SOURCES"])
self.assertEqual(["qq", "netease", "kuwo"], snapshot["download_sources"])
self.assertEqual(revision_id, revisions[0]["id"])
self.assertEqual("initial import", revisions[0]["note"])
self.assertEqual("env_file", revisions[0]["source_type"])
self.assertTrue(revisions[0]["content_hash"])
self.assertIsNone(revisions[0]["applied_at"])
def test_apply_revision_overwrites_env_file_and_sets_applied_at(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
original_env = "ROOT_DIR=/music\nDOWNLOAD_SOURCES=qq,netease\n"
env_path.write_text(original_env, encoding="utf-8")
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
revision_id = manager.save_revision(note="baseline")
env_path.write_text("ROOT_DIR=/other\nDOWNLOAD_SOURCES=migu\n", encoding="utf-8")
manager.apply_revision(revision_id)
restored = env_path.read_text(encoding="utf-8")
revisions = manager.list_revisions()
restored_revision = next(row for row in revisions if row["id"] == revision_id)
self.assertEqual(original_env, restored)
self.assertIsNotNone(restored_revision["applied_at"])
def test_save_revision_same_content_returns_same_id_without_new_row(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
env_path.write_text("ROOT_DIR=/music\nDOWNLOAD_SOURCES=qq\n", encoding="utf-8")
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
first_id = manager.save_revision(note="first")
second_id = manager.save_revision(note="second")
revisions = manager.list_revisions()
self.assertEqual(first_id, second_id)
self.assertEqual(1, len(revisions))
def test_apply_revision_marks_only_target_and_keeps_first_applied_at(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
env_path.write_text("ROOT_DIR=/v1\nDOWNLOAD_SOURCES=qq\n", encoding="utf-8")
revision_1 = manager.save_revision(note="v1")
env_path.write_text("ROOT_DIR=/v2\nDOWNLOAD_SOURCES=netease\n", encoding="utf-8")
revision_2 = manager.save_revision(note="v2")
manager.apply_revision(revision_1)
first_apply_state = manager.list_revisions()
first_applied_at = next(
row for row in first_apply_state if row["id"] == revision_1
)["applied_at"]
self.assertIsNotNone(first_applied_at)
untouched = next(row for row in first_apply_state if row["id"] == revision_2)
self.assertIsNone(untouched["applied_at"])
time.sleep(1.1)
manager.apply_revision(revision_1)
second_apply_state = manager.list_revisions()
second_applied_at = next(
row for row in second_apply_state if row["id"] == revision_1
)["applied_at"]
self.assertEqual(first_applied_at, second_applied_at)
def test_apply_revision_raises_for_missing_revision(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.config import CatalogsyncEnvManager
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
env_path = root / "catalogsync.env"
initialize_database(db_path).close()
manager = CatalogsyncEnvManager(db_path=db_path, env_file_path=env_path)
with self.assertRaises(ValueError):
manager.apply_revision(999999)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,354 @@
import tempfile
import unittest
from pathlib import Path
class OpsDatabaseSchemaTests(unittest.TestCase):
def test_initialize_database_creates_operations_tables(self):
from musicdl.catalogsync.db import initialize_database
expected_tables = {
"job_runs",
"job_stages",
"job_items",
"job_workers",
"job_commands",
"job_events",
"job_logs",
"config_revisions",
}
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
conn = initialize_database(db_path)
conn.close()
verify_conn = initialize_database(db_path)
table_rows = verify_conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
verify_conn.close()
tables = {row["name"] for row in table_rows}
self.assertTrue(expected_tables.issubset(tables))
def test_initialize_database_operations_core_columns_match_baseline(self):
from musicdl.catalogsync.db import initialize_database
expected_job_runs = {
"id",
"job_type",
"status",
"priority",
"requested_by",
"config_snapshot_json",
"sources",
"download_sources",
"playlist_scope_json",
"created_at",
"started_at",
"ended_at",
"last_error",
"resume_token",
}
expected_job_stages = {
"id",
"job_run_id",
"stage_type",
"status",
"seq_no",
"total_items",
"pending_items",
"running_items",
"success_items",
"failed_items",
"skipped_items",
"started_at",
"ended_at",
"last_error",
}
expected_job_items = {
"id",
"job_stage_id",
"item_type",
"item_key",
"playlist_pool_id",
"playlist_id",
"song_id",
"file_location_id",
"status",
"attempt_count",
"max_attempts",
"worker_id",
"started_at",
"ended_at",
"last_error",
"last_error_code",
"payload_json",
}
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
conn = initialize_database(db_path)
job_runs_cols = {
row["name"] for row in conn.execute("PRAGMA table_info(job_runs)").fetchall()
}
job_stages_cols = {
row["name"] for row in conn.execute("PRAGMA table_info(job_stages)").fetchall()
}
job_items_cols = {
row["name"] for row in conn.execute("PRAGMA table_info(job_items)").fetchall()
}
conn.close()
self.assertTrue(expected_job_runs.issubset(job_runs_cols))
self.assertTrue(expected_job_stages.issubset(job_stages_cols))
self.assertTrue(expected_job_items.issubset(job_items_cols))
def test_initialize_database_config_revisions_source_type_has_default(self):
from musicdl.catalogsync.db import initialize_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
conn = initialize_database(db_path)
columns = conn.execute("PRAGMA table_info(config_revisions)").fetchall()
conn.close()
source_type_col = next(row for row in columns if row["name"] == "source_type")
self.assertEqual("'env_file'", source_type_col["dflt_value"])
class OpsRepositoryTests(unittest.TestCase):
def test_create_job_requires_config_snapshot(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
with self.assertRaises(ValueError):
repo.create_job(job_type="catalog_sync", config_snapshot=None)
def test_create_and_read_job_stage_item_with_default_status(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import ItemStatus, JobStatus, StageStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
job_id = repo.create_job(
job_type="catalog_sync",
config_snapshot={
"sources": ["qq", "netease"],
"download": {"enabled": True},
},
sources=["qq", "netease"],
download_sources=["qq"],
)
stage_id = repo.create_stage(job_run_id=job_id, stage_type="collect", seq_no=1)
item_id = repo.create_item(
job_stage_id=stage_id,
item_type="song_sync",
item_key="qq:playlist:123",
song_id=12345,
payload={"platform": "qq"},
)
job = repo.get_job(job_id)
stage = repo.get_stage(stage_id)
item = repo.get_item(item_id)
self.assertIsNotNone(job)
self.assertEqual(JobStatus.QUEUED, job.status)
self.assertEqual(100, job.priority)
self.assertEqual({"sources": ["qq", "netease"], "download": {"enabled": True}}, job.config_snapshot)
self.assertEqual(["qq", "netease"], job.sources)
self.assertEqual(["qq"], job.download_sources)
self.assertIsNotNone(stage)
self.assertEqual(StageStatus.PENDING, stage.status)
self.assertEqual(job_id, stage.job_run_id)
self.assertEqual(1, stage.total_items)
self.assertEqual(1, stage.pending_items)
self.assertEqual(0, stage.running_items)
self.assertEqual(0, stage.success_items)
self.assertEqual(1, stage.seq_no)
self.assertIsNotNone(item)
self.assertEqual(ItemStatus.PENDING, item.status)
self.assertEqual(stage_id, item.job_stage_id)
self.assertEqual(0, item.attempt_count)
self.assertEqual(3, item.max_attempts)
self.assertEqual("song_sync", item.item_type)
self.assertEqual(12345, item.song_id)
self.assertEqual("qq", item.payload["platform"])
def test_create_job_allows_empty_config_snapshot_and_reads_back(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
job_id = repo.create_job(
job_type="catalog_sync",
config_snapshot={},
sources=["qq", "netease"],
download_sources=["qq"],
)
job = repo.get_job(job_id)
self.assertEqual({}, job.config_snapshot)
self.assertEqual(["qq", "netease"], job.sources)
self.assertEqual(["qq"], job.download_sources)
def test_create_item_with_failed_status_updates_stage_aggregate_counts(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
job_id = repo.create_job(job_type="catalog_sync", config_snapshot={})
stage_id = repo.create_stage(job_run_id=job_id, stage_type="collect", seq_no=1)
repo.create_item(
job_stage_id=stage_id,
item_type="song_sync",
item_key="qq:song:failed:1",
status=ItemStatus.FAILED,
)
stage = repo.get_stage(stage_id)
self.assertEqual(1, stage.total_items)
self.assertEqual(0, stage.pending_items)
self.assertEqual(1, stage.failed_items)
def test_list_job_playlist_progress_aggregates_download_states_per_playlist(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong, PlaylistCandidate
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root = Path(tmpdir)
db_path = root / "catalogsync.db"
library_root = root / "library"
initialize_database(db_path, default_library_root=library_root).close()
catalog_repo = CatalogRepository(db_path)
ops_repo = OpsRepository(db_path)
playlist_id = catalog_repo.upsert_playlist(
PlaylistCandidate(
platform="qq",
pool_kind="manual_file",
remote_id="job-progress-1",
name="Job Progress Playlist",
url="https://example.invalid/qq/job-progress-1",
)
)
pool_id = catalog_repo.upsert_playlist_pool(
platform="qq",
pool_kind="manual_file",
external_id="manual_file:job-progress-1",
name="Manual Pool",
url="https://example.invalid/pool/job-progress-1",
)
catalog_repo.link_pool_playlist(pool_id, playlist_id)
downloaded_song_id = catalog_repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="job-song-1",
name="Downloaded Song",
singers="Singer A",
ext="mp3",
file_size_bytes=128,
quality_label="standard",
)
)
running_song_id = catalog_repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="job-song-2",
name="Running Song",
singers="Singer A",
ext="mp3",
file_size_bytes=128,
quality_label="standard",
)
)
failed_song_id = catalog_repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="job-song-3",
name="Failed Song",
singers="Singer A",
ext="mp3",
file_size_bytes=128,
quality_label="standard",
)
)
catalog_repo.link_playlist_song(playlist_id, downloaded_song_id, 1)
catalog_repo.link_playlist_song(playlist_id, running_song_id, 2)
catalog_repo.link_playlist_song(playlist_id, failed_song_id, 3)
backend_id = catalog_repo.get_default_backend_id()
catalog_repo.record_local_file(
song_id=downloaded_song_id,
backend_id=backend_id,
relative_path="qq/Singer A/job-song-1.mp3",
file_size_bytes=128,
ext="mp3",
quality_label="standard",
)
job_id = ops_repo.create_job(
job_type="download_only",
config_snapshot={},
playlist_scope={"playlist_ids": [playlist_id]},
)
stage_id = ops_repo.create_stage(job_run_id=job_id, stage_type="download", seq_no=1)
ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{running_song_id}",
song_id=running_song_id,
status=ItemStatus.RUNNING,
)
ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{failed_song_id}",
song_id=failed_song_id,
status=ItemStatus.FAILED,
)
rows = ops_repo.list_job_playlist_progress(job_id)
self.assertEqual(1, len(rows))
row = rows[0]
self.assertEqual(playlist_id, row["playlist_id"])
self.assertEqual("Job Progress Playlist", row["playlist_name"])
self.assertEqual(3, row["total_songs"])
self.assertEqual(1, row["downloaded_songs"])
self.assertEqual(1, row["running_songs"])
self.assertEqual(0, row["pending_songs"])
self.assertEqual(1, row["failed_songs"])
self.assertEqual(0, row["skipped_songs"])
self.assertEqual(33, row["progress_percent"])
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,901 @@
import json
from contextlib import closing
from queue import Queue
import tempfile
import threading
import unittest
import sqlite3
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
class StageExecutorTests(unittest.TestCase):
def _create_job_stage(self, repo, stage_type: str = "download") -> int:
job_id = repo.create_job(job_type="catalog_sync", config_snapshot={})
return repo.create_stage(job_run_id=job_id, stage_type=stage_type, seq_no=1)
def _insert_song_row(self, db_path: Path, *, platform: str, remote_song_id: str, name: str, singers: str) -> int:
with closing(sqlite3.connect(db_path)) as conn, conn:
cursor = conn.execute(
"""
INSERT INTO songs (platform, remote_song_id, name, singers, ext, file_size_bytes)
VALUES (?, ?, ?, ?, ?, ?)
""",
(platform, remote_song_id, name, singers, "mp3", 128),
)
return int(cursor.lastrowid)
def _insert_playlist_row(self, db_path: Path, *, platform: str, remote_playlist_id: str, name: str, url: str) -> int:
with closing(sqlite3.connect(db_path)) as conn, conn:
cursor = conn.execute(
"""
INSERT INTO playlists (platform, remote_playlist_id, name, url, parse_strategy)
VALUES (?, ?, ?, ?, ?)
""",
(platform, remote_playlist_id, name, url, "playlist_url"),
)
return int(cursor.lastrowid)
def _insert_upload_task_bundle(self, db_path: Path, *, absolute_path: str, target_locator: str) -> int:
with closing(sqlite3.connect(db_path)) as conn, conn:
song_id = int(
conn.execute(
"""
INSERT INTO songs (platform, remote_song_id, name, singers, ext, file_size_bytes)
VALUES (?, ?, ?, ?, ?, ?)
""",
("qq", "song-upload", "Song Upload", "Singer Upload", "mp3", 64),
).lastrowid
)
file_asset_id = int(
conn.execute(
"INSERT INTO file_assets (song_id, ext, file_size_bytes) VALUES (?, ?, ?)",
(song_id, "mp3", 64),
).lastrowid
)
source_location_id = int(
conn.execute(
"""
INSERT INTO file_locations (
file_asset_id, backend_id, container_name, locator, absolute_path, status, is_primary
)
VALUES (?, ?, ?, ?, ?, 'active', 1)
""",
(file_asset_id, 1, "library", "qq/Singer Upload/song-upload.mp3", absolute_path),
).lastrowid
)
task_id = int(
conn.execute(
"""
INSERT INTO upload_tasks (
file_asset_id, source_location_id, target_backend_id, target_container_name, target_locator, status
)
VALUES (?, ?, ?, ?, ?, 'pending')
""",
(file_asset_id, source_location_id, 2, "bucket-a", target_locator),
).lastrowid
)
return task_id
def test_download_executor_marks_item_succeeded_when_hook_returns_true(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:1",
song_id=1,
payload={"row": {"id": 1, "platform": "qq", "name": "Song 1"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
return_value=True,
):
executor.process_item(item_id=item_id, worker_name="download-1")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SUCCEEDED, item.status)
def test_download_executor_skips_redownload_when_song_already_exists_locally(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.repository import CatalogRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
catalog_repo = CatalogRepository(db_path)
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="song-existing-local",
name="Song Existing Local",
singers="Singer Existing",
)
existing_relative_path = "qq/Singer Existing/song-existing-local.mp3"
existing_absolute_path = library_root / existing_relative_path
existing_absolute_path.parent.mkdir(parents=True, exist_ok=True)
existing_absolute_path.write_bytes(b"x" * 128)
backend_id = catalog_repo.ensure_local_backend(
library_root,
name="default-local",
is_default=True,
)
catalog_repo.record_local_file(
song_id=song_id,
backend_id=backend_id,
relative_path=existing_relative_path,
file_size_bytes=128,
ext="mp3",
quality_label="standard",
)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}",
song_id=song_id,
payload={"row": {"id": song_id, "platform": "qq", "name": "Song Existing Local"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
side_effect=AssertionError("download should not be called for existing local file"),
):
executor.process_item(item_id=item_id, worker_name="download-existing-local")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SUCCEEDED, item.status)
def test_download_executor_process_resolve_item_marks_failed_when_resolution_returns_none(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="song-resolve-none",
name="Song Resolve None",
singers="Singer Resolve",
)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}",
song_id=song_id,
payload={"row": {"id": song_id}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
ready_queue: Queue = Queue()
with patch.object(executor.downloader, "resolve_song_row", return_value=None):
executor.process_resolve_item(
item_id=item_id,
worker_name="resolve-1",
ready_queue=ready_queue,
)
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.FAILED, item.status)
self.assertTrue(ready_queue.empty())
def test_download_executor_process_resolve_item_enqueues_resolved_payload(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="song-resolve-ok",
name="Song Resolve OK",
singers="Singer Resolve",
)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}",
song_id=song_id,
playlist_id=33,
payload={"row": {"id": song_id, "playlist_id": 33}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
ready_queue: Queue = Queue()
resolved_payload = SimpleNamespace(
row={"id": song_id, "playlist_id": 33},
display_text="Song Resolve OK / Singer Resolve",
)
with patch.object(executor.downloader, "resolve_song_row", return_value=resolved_payload):
executor.process_resolve_item(
item_id=item_id,
worker_name="resolve-1",
ready_queue=ready_queue,
)
queued = ready_queue.get_nowait()
self.assertEqual(item_id, queued.item_id)
self.assertEqual(33, queued.playlist_id)
self.assertIs(resolved_payload, queued.resolved_payload)
def test_download_executor_process_download_task_marks_item_succeeded(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="song-download-task",
name="Song Download Task",
singers="Singer Download",
)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}",
song_id=song_id,
payload={"row": {"id": song_id}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
ops_repo.claim_item(item_id=item_id, worker_name="resolve-1")
resolved_task = SimpleNamespace(
item_id=item_id,
playlist_id=None,
row={"id": song_id},
resolved_payload=SimpleNamespace(row={"id": song_id}, display_text="Song Download Task / Singer Download"),
)
with patch.object(executor.downloader, "download_resolved_song", return_value=True):
executor.process_download_task(
task=resolved_task,
worker_name="download-1",
)
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SUCCEEDED, item.status)
def test_sync_executor_marks_item_succeeded_and_records_linked_count(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import SyncStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.services import CatalogSyncService
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="sync")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:2",
playlist_id=2,
payload={"playlist_row": {"id": 2, "platform": "qq", "name": "Playlist 2"}},
)
executor = SyncStageExecutor(db_path=db_path)
with patch.object(CatalogSyncService, "sync_playlist_row", return_value=7):
executor.process_item(item_id=item_id, worker_name="sync-1")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SUCCEEDED, item.status)
self.assertEqual(7, item.payload.get("linked_count"))
def test_sync_executor_marks_item_failed_when_hook_raises(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import SyncStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.services import CatalogSyncService
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="sync")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:22",
playlist_id=22,
payload={"playlist_row": {"id": 22, "platform": "qq", "name": "Playlist 22"}},
)
executor = SyncStageExecutor(db_path=db_path)
with patch.object(CatalogSyncService, "sync_playlist_row", side_effect=RuntimeError("sync boom")):
executor.process_item(item_id=item_id, worker_name="sync-2")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.FAILED, item.status)
self.assertIn("sync boom", str(item.last_error))
def test_sync_executor_reuses_service_per_thread_and_isolates_across_threads(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import SyncStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
created_services: list[object] = []
class FakeSyncService:
def sync_playlist_row(self, playlist_row):
return int(playlist_row["id"])
def build_service():
service = FakeSyncService()
created_services.append(service)
return service
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="sync")
first_item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:31",
playlist_id=31,
payload={"playlist_row": {"id": 31, "platform": "qq", "name": "Playlist 31"}},
)
second_item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:32",
playlist_id=32,
payload={"playlist_row": {"id": 32, "platform": "qq", "name": "Playlist 32"}},
)
third_item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:33",
playlist_id=33,
payload={"playlist_row": {"id": 33, "platform": "qq", "name": "Playlist 33"}},
)
executor = SyncStageExecutor(db_path=db_path, service_factory=build_service)
executor.process_item(item_id=first_item_id, worker_name="sync-1")
executor.process_item(item_id=second_item_id, worker_name="sync-1")
self.assertEqual(1, len(created_services))
thread = threading.Thread(
target=executor.process_item,
kwargs={"item_id": third_item_id, "worker_name": "sync-2"},
)
thread.start()
thread.join(timeout=2)
self.assertFalse(thread.is_alive())
first_item = ops_repo.get_item(first_item_id)
second_item = ops_repo.get_item(second_item_id)
third_item = ops_repo.get_item(third_item_id)
self.assertEqual(2, len(created_services))
self.assertEqual(ItemStatus.SUCCEEDED, first_item.status)
self.assertEqual(ItemStatus.SUCCEEDED, second_item.status)
self.assertEqual(ItemStatus.SUCCEEDED, third_item.status)
self.assertEqual(31, first_item.payload.get("linked_count"))
self.assertEqual(32, second_item.payload.get("linked_count"))
self.assertEqual(33, third_item.payload.get("linked_count"))
def test_upload_executor_marks_item_failed_when_hook_result_not_succeeded(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import UploadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="upload")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="file_upload",
item_key="upload:3",
payload={"upload_row": {"id": 3, "target_locator": "music/qq/song.mp3"}},
)
executor = UploadStageExecutor(db_path=db_path, backend_name="main-s3")
with patch.object(CatalogUploader, "process_upload_task_row", return_value="failed"):
executor.process_item(item_id=item_id, worker_name="upload-1")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.FAILED, item.status)
self.assertIn("failed", str(item.last_error))
def test_upload_executor_marks_item_succeeded_when_hook_result_is_succeeded(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import UploadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="upload")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="file_upload",
item_key="upload:33",
payload={"upload_row": {"id": 33, "target_locator": "music/qq/song-33.mp3"}},
)
executor = UploadStageExecutor(db_path=db_path, backend_name="main-s3")
with patch.object(CatalogUploader, "process_upload_task_row", return_value="succeeded"):
executor.process_item(item_id=item_id, worker_name="upload-2")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SUCCEEDED, item.status)
self.assertIsNone(item.last_error)
def test_upload_executor_marks_item_failed_when_hook_raises(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import UploadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="upload")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="file_upload",
item_key="upload:44",
payload={"upload_row": {"id": 44, "target_locator": "music/qq/song-44.mp3"}},
)
executor = UploadStageExecutor(db_path=db_path, backend_name="main-s3")
with patch.object(CatalogUploader, "process_upload_task_row", side_effect=RuntimeError("upload boom")):
executor.process_item(item_id=item_id, worker_name="upload-3")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.FAILED, item.status)
self.assertIn("upload boom", str(item.last_error))
def test_download_executor_marks_item_failed_when_hook_raises(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:4",
song_id=4,
payload={"row": {"id": 4, "platform": "qq", "name": "Song 4"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
side_effect=RuntimeError("boom"),
):
executor.process_item(item_id=item_id, worker_name="download-2")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.FAILED, item.status)
self.assertIn("boom", str(item.last_error))
def test_download_executor_marks_non_music_resource_skipped_when_hook_raises(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="qqtop_75_test_skip_raise",
name="QQ Toplist Audio Error",
singers="Narrator",
)
with closing(sqlite3.connect(db_path)) as conn, conn:
conn.execute(
"UPDATE songs SET metadata_json = ? WHERE id = ?",
(
json.dumps(
{
"snapshot": {
"raw_data": {
"search": {"qq_toplist_fallback": True},
}
}
},
ensure_ascii=False,
),
song_id,
),
)
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:qqtop-skip-raise",
song_id=song_id,
payload={"row": {"id": song_id, "platform": "qq", "name": "QQ Toplist Audio Error"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
side_effect=RuntimeError("boom"),
):
executor.process_item(item_id=item_id, worker_name="download-skip-raise")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SKIPPED, item.status)
self.assertEqual("NON_MUSIC_RESOURCE", item.last_error_code)
def test_repository_get_upload_row_for_item_backfills_skinny_payload(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path, default_library_root=Path(tmpdir) / "library").close()
ops_repo = OpsRepository(db_path)
upload_task_id = self._insert_upload_task_bundle(
db_path,
absolute_path=str(Path(tmpdir) / "library" / "qq" / "Singer Upload" / "song-upload.mp3"),
target_locator="music/qq/Singer Upload/song-upload.mp3",
)
stage_id = self._create_job_stage(ops_repo, stage_type="upload")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="file_upload",
item_key=f"upload:{upload_task_id}",
payload={"upload_row": {"id": upload_task_id}},
)
row = ops_repo.get_upload_row_for_item(item_id)
self.assertEqual(upload_task_id, int(row["id"]))
self.assertEqual("music/qq/Singer Upload/song-upload.mp3", row["target_locator"])
self.assertIn("file_asset_id", row)
self.assertIn("target_backend_id", row)
self.assertIn("absolute_path", row)
def test_repository_build_download_row_backfills_missing_payload_fields_from_song(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="song-dl-1",
name="Song DL 1",
singers="Singer DL",
)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:dl:1",
song_id=song_id,
payload={"row": {"id": song_id}},
)
row = ops_repo.build_download_row(item_id)
self.assertEqual(song_id, int(row["id"]))
self.assertEqual("qq", row["platform"])
self.assertEqual("Song DL 1", row["name"])
def test_repository_get_playlist_row_for_item_backfills_missing_payload_fields_from_playlist(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
ops_repo = OpsRepository(db_path)
playlist_id = self._insert_playlist_row(
db_path,
platform="qq",
remote_playlist_id="playlist-sync-1",
name="Playlist Sync 1",
url="https://y.qq.com/n/ryqq/playlist/playlist-sync-1",
)
stage_id = self._create_job_stage(ops_repo, stage_type="sync")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="playlist_sync",
item_key="playlist:sync:1",
playlist_id=playlist_id,
payload={"playlist_row": {"id": playlist_id}},
)
row = ops_repo.get_playlist_row_for_item(item_id)
self.assertEqual(playlist_id, int(row["id"]))
self.assertEqual("qq", row["platform"])
self.assertEqual("playlist_url", row["parse_strategy"])
def test_download_executor_raises_when_mark_item_succeeded_returns_false(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:cas:ok-to-fail",
song_id=11,
payload={"row": {"id": 11, "platform": "qq", "name": "Song CAS"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
return_value=True,
):
with patch.object(executor.ops_repo, "mark_item_succeeded", return_value=False):
with patch.object(executor.ops_repo, "mark_item_failed", return_value=True):
with self.assertRaises(RuntimeError):
executor.process_item(item_id=item_id, worker_name="download-cas-1")
def test_download_executor_raises_when_mark_item_failed_returns_false(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:cas:failed",
song_id=12,
payload={"row": {"id": 12, "platform": "qq", "name": "Song CAS Failed"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
return_value=False,
):
with patch.object(executor.ops_repo, "mark_item_failed", return_value=False):
with self.assertRaises(RuntimeError):
executor.process_item(item_id=item_id, worker_name="download-cas-2")
def test_download_executor_marks_item_skipped_for_non_music_resource_when_no_file(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.models import ItemStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="qqtop_75_test_skip",
name="QQ Toplist Audio",
singers="Narrator",
)
with closing(sqlite3.connect(db_path)) as conn, conn:
conn.execute(
"UPDATE songs SET metadata_json = ? WHERE id = ?",
(
json.dumps(
{
"snapshot": {
"raw_data": {
"search": {"qq_toplist_fallback": True},
}
}
},
ensure_ascii=False,
),
song_id,
),
)
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:qqtop-skip",
song_id=song_id,
payload={"row": {"id": song_id, "platform": "qq", "name": "QQ Toplist Audio"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
return_value=False,
):
executor.process_item(item_id=item_id, worker_name="download-skip-1")
item = ops_repo.get_item(item_id)
self.assertEqual(ItemStatus.SKIPPED, item.status)
self.assertIn("非音乐资源", str(item.last_error))
def test_download_executor_raises_when_mark_item_skipped_returns_false(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.executors import DownloadStageExecutor
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
ops_repo = OpsRepository(db_path)
stage_id = self._create_job_stage(ops_repo, stage_type="download")
song_id = self._insert_song_row(
db_path,
platform="qq",
remote_song_id="qqtop_75_test_skip_cas",
name="QQ Toplist Audio CAS",
singers="Narrator",
)
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:qqtop-skip-cas",
song_id=song_id,
payload={"row": {"id": song_id, "platform": "qq", "name": "QQ Toplist Audio CAS"}},
)
executor = DownloadStageExecutor(
db_path=db_path,
library_root=library_root,
download_sources=["qq"],
)
with patch(
"musicdl.catalogsync.downloader.CatalogDownloader.download_song_row",
return_value=False,
):
with patch.object(executor.ops_repo, "mark_item_skipped", return_value=False):
with self.assertRaises(RuntimeError):
executor.process_item(item_id=item_id, worker_name="download-skip-cas")
if __name__ == "__main__":
unittest.main()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,210 @@
import tempfile
import unittest
from pathlib import Path
class LocalMaintenanceTests(unittest.TestCase):
def _build_repo(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.repository import CatalogRepository
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
self.addCleanup(tmpdir.cleanup)
root = Path(tmpdir.name)
db_path = root / "catalogsync.db"
initialize_database(db_path).close()
return root, db_path, CatalogRepository(db_path)
def _seed_duplicate_local_files(self):
from musicdl.catalogsync.models import CatalogSong
root, db_path, repo = self._build_repo()
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-dup-1",
name="Duplicate Song",
singers="Singer A",
ext="flac",
file_size_bytes=7,
quality_label="lossless",
metadata={},
)
)
library_root = root / "library"
backend_id = repo.ensure_local_backend(
library_root,
name="default-local",
is_default=True,
)
asset_id = repo.record_local_file(
song_id=song_id,
backend_id=backend_id,
relative_path="Singer A/Duplicate Song.flac",
file_size_bytes=7,
ext="flac",
quality_label="lossless",
)
repo.record_local_file(
song_id=song_id,
backend_id=backend_id,
relative_path="Singer A/Duplicate Song (1).flac",
file_size_bytes=7,
ext="flac",
quality_label="lossless",
)
canonical_path = library_root / "Singer A" / "Duplicate Song.flac"
duplicate_path = library_root / "Singer A" / "Duplicate Song (1).flac"
canonical_path.parent.mkdir(parents=True, exist_ok=True)
canonical_path.write_bytes(b"abcdefg")
duplicate_path.write_bytes(b"abcdefg")
canonical_row = repo._fetchone(
"SELECT * FROM file_locations WHERE locator = ?",
("Singer A/Duplicate Song.flac",),
)
duplicate_row = repo._fetchone(
"SELECT * FROM file_locations WHERE locator = ?",
("Singer A/Duplicate Song (1).flac",),
)
return {
"root": root,
"db_path": db_path,
"repo": repo,
"song_id": song_id,
"backend_id": backend_id,
"asset_id": asset_id,
"canonical_location_id": int(canonical_row["id"]),
"duplicate_location_id": int(duplicate_row["id"]),
"canonical_path": canonical_path,
"duplicate_path": duplicate_path,
}
def test_scan_local_duplicates_reports_groups_and_prefers_canonical_locator(self):
from musicdl.catalogsync.ops.maintenance import LocalMaintenanceService
seeded = self._seed_duplicate_local_files()
service = LocalMaintenanceService(seeded["db_path"])
payload = service.scan_local_duplicates(sample_limit=10)
self.assertEqual(1, payload["summary"]["duplicate_group_count"])
self.assertEqual(1, payload["summary"]["duplicate_location_count"])
self.assertEqual(2, payload["summary"]["scanned_active_local_location_count"])
self.assertEqual(1, len(payload["groups"]))
group = payload["groups"][0]
self.assertEqual(seeded["song_id"], group["song_id"])
self.assertEqual(seeded["backend_id"], group["backend_id"])
self.assertEqual("Duplicate Song", group["song_name"])
self.assertEqual(seeded["canonical_location_id"], group["keep"]["id"])
self.assertEqual("Singer A/Duplicate Song.flac", group["keep"]["locator"])
self.assertTrue(group["keep"]["file_exists"])
self.assertEqual(1, len(group["duplicates"]))
self.assertEqual(seeded["duplicate_location_id"], group["duplicates"][0]["id"])
self.assertEqual(
"Singer A/Duplicate Song (1).flac",
group["duplicates"][0]["locator"],
)
def test_dedupe_local_duplicates_repoints_references_and_deletes_duplicate_files(self):
from musicdl.catalogsync.ops.models import JobStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.ops.maintenance import LocalMaintenanceService
seeded = self._seed_duplicate_local_files()
repo = seeded["repo"]
ops_repo = OpsRepository(seeded["db_path"])
remote_backend_id = repo.upsert_object_storage_backend(
name="test-bucket",
container_name="music",
endpoint="https://s3.example.invalid",
region=None,
base_prefix="catalogsync",
credential_env_prefix="CATALOGSYNC_TEST",
public_base_url="https://cdn.example.invalid",
)
upload_task_id = repo.enqueue_upload_task(
file_asset_id=seeded["asset_id"],
source_location_id=seeded["duplicate_location_id"],
target_backend_id=remote_backend_id,
target_container_name="music",
target_locator="Singer A/Duplicate Song.flac",
)
job_id = ops_repo.create_job(
job_type="upload_only",
config_snapshot={},
status=JobStatus.QUEUED,
)
stage_id = ops_repo.create_stage(job_run_id=job_id, stage_type="upload", seq_no=1)
item_id = ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_upload",
item_key="upload:dup-song",
song_id=seeded["song_id"],
file_location_id=seeded["duplicate_location_id"],
)
service = LocalMaintenanceService(seeded["db_path"])
payload = service.dedupe_local_duplicates(sample_limit=10)
self.assertEqual(0, payload["summary"]["duplicate_group_count"])
self.assertEqual(0, payload["summary"]["duplicate_location_count"])
self.assertEqual(1, payload["execution"]["deduped_group_count"])
self.assertEqual(1, payload["execution"]["inactive_location_count"])
self.assertEqual(1, payload["execution"]["deleted_file_count"])
self.assertEqual(7, payload["execution"]["released_bytes"])
self.assertEqual(1, payload["execution"]["repointed_upload_task_count"])
self.assertEqual(1, payload["execution"]["repointed_job_item_count"])
duplicate_location = repo._fetchone(
"SELECT status, is_primary FROM file_locations WHERE id = ?",
(seeded["duplicate_location_id"],),
)
self.assertEqual("inactive", duplicate_location["status"])
self.assertEqual(0, int(duplicate_location["is_primary"]))
canonical_location = repo._fetchone(
"SELECT status, is_primary FROM file_locations WHERE id = ?",
(seeded["canonical_location_id"],),
)
self.assertEqual("active", canonical_location["status"])
self.assertEqual(1, int(canonical_location["is_primary"]))
upload_task = repo._fetchone(
"SELECT source_location_id FROM upload_tasks WHERE id = ?",
(upload_task_id,),
)
self.assertEqual(seeded["canonical_location_id"], int(upload_task["source_location_id"]))
job_item = ops_repo._fetchone(
"SELECT file_location_id FROM job_items WHERE id = ?",
(item_id,),
)
self.assertEqual(seeded["canonical_location_id"], int(job_item["file_location_id"]))
presence = repo.get_song_backend_presence(
song_id=seeded["song_id"],
backend_id=seeded["backend_id"],
)
self.assertIsNotNone(presence)
self.assertEqual(1, int(presence["active_file_count"]))
self.assertEqual(seeded["canonical_location_id"], int(presence["primary_file_location_id"]))
self.assertTrue(seeded["canonical_path"].exists())
self.assertFalse(seeded["duplicate_path"].exists())
def test_dedupe_local_duplicates_raises_when_jobs_or_items_are_running(self):
from musicdl.catalogsync.ops.models import JobStatus
from musicdl.catalogsync.ops.maintenance import (
LocalDedupeBlockedError,
LocalMaintenanceService,
)
from musicdl.catalogsync.ops.repository import OpsRepository
seeded = self._seed_duplicate_local_files()
ops_repo = OpsRepository(seeded["db_path"])
ops_repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.RUNNING,
)
service = LocalMaintenanceService(seeded["db_path"])
with self.assertRaises(LocalDedupeBlockedError):
service.dedupe_local_duplicates()
@@ -0,0 +1,306 @@
import tempfile
import unittest
from pathlib import Path
class OpsRepositoryTaskCenterTests(unittest.TestCase):
def test_claim_item_resets_stale_throughput_for_reused_worker(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import ItemStatus, JobStatus, StageStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.RUNNING,
)
stage_id = repo.create_stage(
job_run_id=job_id,
stage_type="download",
seq_no=1,
status=StageStatus.RUNNING,
)
first_item_id = repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:first",
song_id=101,
status=ItemStatus.PENDING,
)
repo.claim_item(item_id=first_item_id, worker_name="download-1")
repo.update_worker_state(
worker_name="download-1",
status="running",
current_job_item_id=first_item_id,
downloaded_bytes=3 * 1024 * 1024,
total_bytes=6 * 1024 * 1024,
speed_bytes_per_sec=3 * 1024 * 1024,
progress_percent=50,
last_progress_text="3.00MB/6.00MB",
)
self.assertTrue(repo.mark_item_succeeded(first_item_id))
second_item_id = repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key="song:second",
song_id=102,
status=ItemStatus.PENDING,
)
repo.claim_item(item_id=second_item_id, worker_name="download-1")
worker_row = repo._fetchone(
"""
SELECT current_job_item_id, downloaded_bytes, total_bytes, speed_bytes_per_sec, progress_percent, last_progress_text
FROM job_workers
WHERE job_run_id = ? AND job_stage_id = ? AND worker_name = ?
ORDER BY id DESC
LIMIT 1
""",
(job_id, stage_id, "download-1"),
)
self.assertEqual(second_item_id, int(worker_row["current_job_item_id"]))
self.assertEqual(0, int(worker_row["downloaded_bytes"] or 0))
self.assertEqual(0, int(worker_row["total_bytes"] or 0))
self.assertEqual(0, int(worker_row["speed_bytes_per_sec"] or 0))
self.assertEqual(0, int(worker_row["progress_percent"] or 0))
self.assertFalse(str(worker_row["last_progress_text"] or "").strip())
def test_update_worker_state_targets_matching_row_for_current_job_item(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import ItemStatus, JobStatus, StageStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
first_job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.RUNNING,
)
first_stage_id = repo.create_stage(
job_run_id=first_job_id,
stage_type="download",
seq_no=1,
status=StageStatus.RUNNING,
)
first_item_id = repo.create_item(
job_stage_id=first_stage_id,
item_type="song_download",
item_key="song:first",
song_id=201,
status=ItemStatus.PENDING,
)
repo.claim_item(item_id=first_item_id, worker_name="download-1")
first_worker_row = repo._fetchone(
"""
SELECT id
FROM job_workers
WHERE job_run_id = ? AND job_stage_id = ? AND worker_name = ?
ORDER BY id DESC
LIMIT 1
""",
(first_job_id, first_stage_id, "download-1"),
)
second_job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.RUNNING,
)
second_stage_id = repo.create_stage(
job_run_id=second_job_id,
stage_type="download",
seq_no=1,
status=StageStatus.RUNNING,
)
second_item_id = repo.create_item(
job_stage_id=second_stage_id,
item_type="song_download",
item_key="song:second",
song_id=202,
status=ItemStatus.PENDING,
)
repo.claim_item(item_id=second_item_id, worker_name="download-1")
second_worker_row = repo._fetchone(
"""
SELECT id
FROM job_workers
WHERE job_run_id = ? AND job_stage_id = ? AND worker_name = ?
ORDER BY id DESC
LIMIT 1
""",
(second_job_id, second_stage_id, "download-1"),
)
repo.update_worker_state(
worker_name="download-1",
current_job_item_id=first_item_id,
status="running",
current_song_id=201,
current_display_text="Song One",
speed_bytes_per_sec=1234,
)
worker_rows = repo._fetchall(
"""
SELECT id, current_job_item_id, current_song_id, current_display_text, speed_bytes_per_sec
FROM job_workers
WHERE id IN (?, ?)
ORDER BY id ASC
""",
(int(first_worker_row["id"]), int(second_worker_row["id"])),
)
rows_by_id = {int(row["id"]): row for row in worker_rows}
self.assertEqual(first_item_id, int(rows_by_id[int(first_worker_row["id"])]["current_job_item_id"]))
self.assertEqual(201, int(rows_by_id[int(first_worker_row["id"])]["current_song_id"]))
self.assertEqual("Song One", rows_by_id[int(first_worker_row["id"])]["current_display_text"])
self.assertEqual(1234, int(rows_by_id[int(first_worker_row["id"])]["speed_bytes_per_sec"] or 0))
self.assertEqual(second_item_id, int(rows_by_id[int(second_worker_row["id"])]["current_job_item_id"]))
self.assertEqual(0, int(rows_by_id[int(second_worker_row["id"])]["speed_bytes_per_sec"] or 0))
def test_list_task_center_rows_computes_lane_queue_progress_and_speed(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import ItemStatus, JobStatus, StageStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
running_download_job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.RUNNING,
playlist_scope={"playlist_ids": [7]},
)
running_download_stage_id = repo.create_stage(
job_run_id=running_download_job_id,
stage_type="download",
seq_no=1,
status=StageStatus.RUNNING,
)
repo.create_item(
job_stage_id=running_download_stage_id,
item_type="song_download",
item_key="song:done",
song_id=101,
status=ItemStatus.SUCCEEDED,
)
running_item_id = repo.create_item(
job_stage_id=running_download_stage_id,
item_type="song_download",
item_key="song:running",
song_id=102,
status=ItemStatus.PENDING,
)
repo.claim_item(item_id=running_item_id, worker_name="download-1")
repo.update_worker_state(
worker_name="download-1",
status="running",
current_job_item_id=running_item_id,
downloaded_bytes=3 * 1024 * 1024,
total_bytes=6 * 1024 * 1024,
speed_bytes_per_sec=3 * 1024 * 1024,
progress_percent=50,
last_progress_text="1 / 2 songs",
)
queued_download_job_id = repo.create_job(
job_type="catalog_sync",
config_snapshot={},
status=JobStatus.QUEUED,
playlist_scope={"playlist_ids": [9]},
)
running_general_job_id = repo.create_job(
job_type="sync_only",
config_snapshot={},
status=JobStatus.RUNNING,
playlist_scope={"playlist_ids": [11, 12]},
)
active_job_ids = [job.id for job in repo.list_active_jobs()]
queued_job_ids = [job.id for job in repo.list_queued_jobs()]
rows = repo.list_task_center_rows(limit=20)
self.assertIn(running_download_job_id, active_job_ids)
self.assertIn(running_general_job_id, active_job_ids)
self.assertIn(queued_download_job_id, queued_job_ids)
rows_by_id = {int(row["id"]): row for row in rows}
running_download = rows_by_id[running_download_job_id]
self.assertEqual("download", running_download["lane_type"])
self.assertEqual(1, running_download["active_worker_count"])
self.assertEqual(50, running_download["primary_progress_percent"])
self.assertEqual(3 * 1024 * 1024, running_download["speed_bytes_per_sec"])
self.assertEqual("3.0 MB/s", running_download["speed_text"])
self.assertIn("1 / 2 songs", str(running_download["primary_progress_text"]))
queued_download = rows_by_id[queued_download_job_id]
self.assertEqual("queued #1", queued_download["queue_label"])
running_general = rows_by_id[running_general_job_id]
self.assertEqual("general", running_general["lane_type"])
self.assertEqual("2 playlists", running_general["scope_summary"])
def test_list_task_center_rows_includes_paused_jobs(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import JobStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
paused_job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.PAUSED,
playlist_scope={"playlist_ids": [23]},
)
rows = repo.list_task_center_rows(limit=20)
rows_by_id = {int(row["id"]): row for row in rows}
self.assertIn(paused_job_id, rows_by_id)
self.assertEqual("paused", rows_by_id[paused_job_id]["status"])
def test_list_task_center_rows_includes_completed_jobs(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.ops.models import JobStatus
from musicdl.catalogsync.ops.repository import OpsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = OpsRepository(db_path)
completed_job_id = repo.create_job(
job_type="download_only",
config_snapshot={},
status=JobStatus.COMPLETED,
playlist_scope={"playlist_ids": [42]},
)
rows = repo.list_task_center_rows(limit=20)
rows_by_id = {int(row["id"]): row for row in rows}
self.assertIn(completed_job_id, rows_by_id)
self.assertEqual("completed", rows_by_id[completed_job_id]["status"])
if __name__ == "__main__":
unittest.main()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,418 @@
import tempfile
import unittest
from pathlib import Path
class PlaylistRepositoryTests(unittest.TestCase):
def setUp(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong, PlaylistCandidate
from musicdl.catalogsync.ops.models import ItemStatus, JobStatus, StageStatus
from musicdl.catalogsync.ops.repository import OpsRepository
from musicdl.catalogsync.repository import CatalogRepository
self.tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
self.addCleanup(self.tmpdir.cleanup)
self.db_path = Path(self.tmpdir.name) / "catalogsync.db"
self.library_root = Path(self.tmpdir.name) / "library"
initialize_database(self.db_path, default_library_root=self.library_root).close()
self.repo = CatalogRepository(self.db_path)
self.ops_repo = OpsRepository(self.db_path)
self.CatalogSong = CatalogSong
self.PlaylistCandidate = PlaylistCandidate
self.ItemStatus = ItemStatus
self.JobStatus = JobStatus
self.StageStatus = StageStatus
self.local_backend_id = self.repo.get_default_backend_id()
def _create_playlist(
self,
*,
platform: str = "qq",
pool_kind: str = "manual_file",
remote_id: str,
name: str,
play_count: int | None = None,
collected_song_count: int | None = None,
) -> int:
playlist_id = self.repo.upsert_playlist(
self.PlaylistCandidate(
platform=platform,
pool_kind=pool_kind,
remote_id=remote_id,
name=name,
url=f"https://example.invalid/{platform}/{remote_id}",
play_count=play_count,
collected_song_count=collected_song_count,
)
)
pool_id = self.repo.upsert_playlist_pool(
platform=platform,
pool_kind=pool_kind,
external_id=f"{pool_kind}:{remote_id}",
name=f"{pool_kind}-{platform}",
url=f"https://example.invalid/pool/{pool_kind}/{remote_id}",
)
self.repo.link_pool_playlist(pool_id, playlist_id)
return playlist_id
def _create_song(self, *, platform: str = "qq", remote_id: str, name: str) -> int:
return self.repo.upsert_song(
self.CatalogSong(
platform=platform,
remote_song_id=remote_id,
name=name,
singers="Singer A",
ext="mp3",
file_size_bytes=128,
quality_label="standard",
)
)
def _mark_local_downloaded(self, song_id: int, relative_path: str) -> None:
self.repo.record_local_file(
song_id=song_id,
backend_id=self.local_backend_id,
relative_path=relative_path,
file_size_bytes=128,
ext="mp3",
quality_label="standard",
)
def _mark_running_download(self, song_id: int) -> None:
job_id = self.ops_repo.create_job(
job_type="download_only",
config_snapshot={},
status=self.JobStatus.RUNNING,
)
stage_id = self.ops_repo.create_stage(
job_run_id=job_id,
stage_type="download",
seq_no=1,
status=self.StageStatus.RUNNING,
)
self.ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}:running",
song_id=song_id,
status=self.ItemStatus.RUNNING,
payload={"song_id": song_id},
)
self.ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}:queued",
song_id=song_id,
status=self.ItemStatus.PENDING,
payload={"song_id": song_id},
)
def _create_download_item(
self,
*,
song_id: int,
job_status,
stage_status,
item_status,
item_key_suffix: str,
) -> None:
job_id = self.ops_repo.create_job(
job_type="download_only",
config_snapshot={},
status=job_status,
)
stage_id = self.ops_repo.create_stage(
job_run_id=job_id,
stage_type="download",
seq_no=1,
status=stage_status,
)
self.ops_repo.create_item(
job_stage_id=stage_id,
item_type="song_download",
item_key=f"song:{song_id}:{item_key_suffix}",
song_id=song_id,
status=item_status,
payload={"song_id": song_id},
)
def test_mark_and_unmark_playlists_wanted(self):
playlist_id = self._create_playlist(remote_id="wanted-1", name="Wanted Playlist")
self.repo.mark_playlists_wanted([playlist_id], marked_by="unit-test")
marked_page = self.repo.list_playlist_page(page=1, page_size=20, wanted_only=True)
self.assertEqual(1, marked_page["total_count"])
self.assertEqual(1, marked_page["items"][0]["is_wanted"])
self.assertEqual("unit-test", marked_page["items"][0]["marked_by"])
self.repo.unmark_playlists_wanted([playlist_id])
unmarked_page = self.repo.list_playlist_page(page=1, page_size=20, wanted_only=True)
self.assertEqual(0, unmarked_page["total_count"])
self.assertEqual([], unmarked_page["items"])
def test_list_playlist_page_computes_five_states_and_status_filter(self):
unsynced_id = self._create_playlist(remote_id="unsynced-1", name="Unsynced Playlist")
not_downloaded_id = self._create_playlist(remote_id="not-downloaded-1", name="Not Downloaded Playlist")
downloading_id = self._create_playlist(remote_id="downloading-1", name="Downloading Playlist")
partial_id = self._create_playlist(remote_id="partial-1", name="Partial Playlist")
downloaded_id = self._create_playlist(remote_id="downloaded-1", name="Downloaded Playlist")
not_downloaded_song = self._create_song(remote_id="song-1", name="Song 1")
downloading_song = self._create_song(remote_id="song-2", name="Song 2")
partial_song_a = self._create_song(remote_id="song-3", name="Song 3")
partial_song_b = self._create_song(remote_id="song-4", name="Song 4")
downloaded_song_a = self._create_song(remote_id="song-5", name="Song 5")
downloaded_song_b = self._create_song(remote_id="song-6", name="Song 6")
self.repo.link_playlist_song(not_downloaded_id, not_downloaded_song, 1)
self.repo.link_playlist_song(downloading_id, downloading_song, 1)
self.repo.link_playlist_song(partial_id, partial_song_a, 1)
self.repo.link_playlist_song(partial_id, partial_song_b, 2)
self.repo.link_playlist_song(downloaded_id, downloaded_song_a, 1)
self.repo.link_playlist_song(downloaded_id, downloaded_song_b, 2)
self._mark_running_download(downloading_song)
self._mark_local_downloaded(partial_song_a, "qq/Singer A/song-3.mp3")
self._mark_local_downloaded(downloaded_song_a, "qq/Singer A/song-5.mp3")
self._mark_local_downloaded(downloaded_song_b, "qq/Singer A/song-6.mp3")
page = self.repo.list_playlist_page(page=1, page_size=20)
by_name = {item["name"]: item for item in page["items"]}
self.assertEqual("unsynced", by_name["Unsynced Playlist"]["state_code"])
self.assertEqual("not_downloaded", by_name["Not Downloaded Playlist"]["state_code"])
self.assertEqual("downloading", by_name["Downloading Playlist"]["state_code"])
self.assertEqual("partial", by_name["Partial Playlist"]["state_code"])
self.assertEqual("downloaded", by_name["Downloaded Playlist"]["state_code"])
self.assertEqual(0, by_name["Unsynced Playlist"]["song_count"])
self.assertEqual(1, by_name["Downloading Playlist"]["running_download_song_count"])
self.assertEqual(1, by_name["Partial Playlist"]["downloaded_song_count"])
self.assertEqual(2, by_name["Downloaded Playlist"]["downloaded_song_count"])
self.assertEqual(2, by_name["Downloaded Playlist"]["song_count"])
self.assertTrue(by_name["Unsynced Playlist"]["state_label"])
partial_page = self.repo.list_playlist_page(page=1, page_size=20, status="partial")
self.assertEqual(1, partial_page["total_count"])
self.assertEqual("Partial Playlist", partial_page["items"][0]["name"])
self.repo.mark_playlists_wanted([downloaded_id], marked_by="unit-test")
wanted_downloaded = self.repo.list_playlist_page(
page=1,
page_size=20,
status="downloaded",
wanted_only=True,
)
self.assertEqual(1, wanted_downloaded["total_count"])
self.assertEqual("Downloaded Playlist", wanted_downloaded["items"][0]["name"])
self.assertEqual(downloaded_id, wanted_downloaded["items"][0]["id"])
self.assertNotIn(unsynced_id, [item["id"] for item in wanted_downloaded["items"]])
def test_list_playlist_page_supports_pagination_platform_pool_kind_and_keyword(self):
self._create_playlist(
platform="qq",
pool_kind="manual_file",
remote_id="qq-001",
name="QQ Alpha",
play_count=123456,
)
self._create_playlist(
platform="qq",
pool_kind="playlist_square",
remote_id="qq-002",
name="QQ Beta",
)
self._create_playlist(
platform="netease",
pool_kind="manual_file",
remote_id="ne-003",
name="NE Gamma",
)
page_1 = self.repo.list_playlist_page(page=1, page_size=2)
page_2 = self.repo.list_playlist_page(page=2, page_size=2)
self.assertEqual(3, page_1["total_count"])
self.assertEqual(2, page_1["total_pages"])
self.assertEqual(2, len(page_1["items"]))
self.assertEqual(1, len(page_2["items"]))
platform_page = self.repo.list_playlist_page(page=1, page_size=20, platform="netease")
self.assertEqual(1, platform_page["total_count"])
self.assertEqual("netease", platform_page["items"][0]["platform"])
pool_page = self.repo.list_playlist_page(page=1, page_size=20, pool_kind="playlist_square")
self.assertEqual(1, pool_page["total_count"])
self.assertEqual("QQ Beta", pool_page["items"][0]["name"])
keyword_page = self.repo.list_playlist_page(page=1, page_size=20, keyword="qq-001")
self.assertEqual(1, keyword_page["total_count"])
self.assertEqual("QQ Alpha", keyword_page["items"][0]["name"])
self.assertEqual(123456, keyword_page["items"][0]["play_count"])
def test_list_playlist_page_exposes_collected_song_count_for_unsynced_playlists(self):
self._create_playlist(
platform="qq",
pool_kind="playlist_square",
remote_id="qq-collected-001",
name="Collected Count Playlist",
collected_song_count=42,
)
page = self.repo.list_playlist_page(page=1, page_size=20, keyword="Collected Count Playlist")
self.assertEqual(1, page["total_count"])
row = page["items"][0]
self.assertEqual(0, row["song_count"])
self.assertEqual(42, row["collected_song_count"])
self.assertEqual(42, row["display_song_count"])
self.assertTrue(row["is_song_count_estimated"])
def test_list_playlist_page_supports_sorting_by_id_platform_name_and_play_count(self):
id_a = self._create_playlist(
platform="qq",
pool_kind="manual_file",
remote_id="sort-001",
name="Zulu Playlist",
play_count=100,
)
id_b = self._create_playlist(
platform="netease",
pool_kind="manual_file",
remote_id="sort-002",
name="Alpha Playlist",
play_count=300,
)
id_c = self._create_playlist(
platform="kuwo",
pool_kind="manual_file",
remote_id="sort-003",
name="Mike Playlist",
play_count=200,
)
id_desc = self.repo.list_playlist_page(page=1, page_size=20, sort_by="id", sort_dir="desc")
self.assertEqual([id_c, id_b, id_a], [item["id"] for item in id_desc["items"]])
platform_asc = self.repo.list_playlist_page(
page=1,
page_size=20,
sort_by="platform",
sort_dir="asc",
)
self.assertEqual(
["kuwo", "netease", "qq"],
[item["platform"] for item in platform_asc["items"]],
)
name_asc = self.repo.list_playlist_page(page=1, page_size=20, sort_by="name", sort_dir="asc")
self.assertEqual(
["Alpha Playlist", "Mike Playlist", "Zulu Playlist"],
[item["name"] for item in name_asc["items"]],
)
play_count_desc = self.repo.list_playlist_page(
page=1,
page_size=20,
sort_by="play_count",
sort_dir="desc",
)
self.assertEqual(
[300, 200, 100],
[item["play_count"] for item in play_count_desc["items"]],
)
def test_list_playlist_page_rejects_invalid_status(self):
self._create_playlist(remote_id="invalid-status-1", name="Invalid Status Playlist")
with self.assertRaises(ValueError):
self.repo.list_playlist_page(page=1, page_size=20, status="bad-status")
def test_downloading_only_counts_running_items_in_running_stage_and_job(self):
queued_job_playlist = self._create_playlist(remote_id="queued-job-1", name="Queued Job Playlist")
paused_stage_playlist = self._create_playlist(remote_id="paused-stage-1", name="Paused Stage Playlist")
song_a = self._create_song(remote_id="queued-song-1", name="Queued Song")
song_b = self._create_song(remote_id="paused-song-1", name="Paused Song")
self.repo.link_playlist_song(queued_job_playlist, song_a, 1)
self.repo.link_playlist_song(paused_stage_playlist, song_b, 1)
self._create_download_item(
song_id=song_a,
job_status=self.JobStatus.QUEUED,
stage_status=self.StageStatus.RUNNING,
item_status=self.ItemStatus.RUNNING,
item_key_suffix="queued-job",
)
self._create_download_item(
song_id=song_b,
job_status=self.JobStatus.RUNNING,
stage_status=self.StageStatus.PAUSED,
item_status=self.ItemStatus.RUNNING,
item_key_suffix="paused-stage",
)
page = self.repo.list_playlist_page(page=1, page_size=20)
by_name = {item["name"]: item for item in page["items"]}
self.assertEqual("not_downloaded", by_name["Queued Job Playlist"]["state_code"])
self.assertEqual(0, by_name["Queued Job Playlist"]["running_download_song_count"])
self.assertEqual("not_downloaded", by_name["Paused Stage Playlist"]["state_code"])
self.assertEqual(0, by_name["Paused Stage Playlist"]["running_download_song_count"])
def test_pending_download_item_does_not_count_as_downloading(self):
playlist_id = self._create_playlist(remote_id="pending-1", name="Pending Download Playlist")
song_id = self._create_song(remote_id="pending-song-1", name="Pending Song")
self.repo.link_playlist_song(playlist_id, song_id, 1)
self._create_download_item(
song_id=song_id,
job_status=self.JobStatus.RUNNING,
stage_status=self.StageStatus.RUNNING,
item_status=self.ItemStatus.PENDING,
item_key_suffix="pending-only",
)
page = self.repo.list_playlist_page(page=1, page_size=20)
by_name = {item["name"]: item for item in page["items"]}
self.assertEqual("not_downloaded", by_name["Pending Download Playlist"]["state_code"])
self.assertEqual(0, by_name["Pending Download Playlist"]["running_download_song_count"])
def test_remote_or_inactive_files_do_not_count_as_downloaded(self):
playlist_id = self._create_playlist(remote_id="remote-inactive-1", name="Remote Inactive Playlist")
song_id = self._create_song(remote_id="remote-inactive-song-1", name="Remote Inactive Song")
self.repo.link_playlist_song(playlist_id, song_id, 1)
file_asset_id = self.repo.record_local_file(
song_id=song_id,
backend_id=self.local_backend_id,
relative_path="qq/Singer A/remote-inactive-song-1.mp3",
file_size_bytes=128,
ext="mp3",
quality_label="standard",
)
self.repo._execute(
"UPDATE file_locations SET status = 'inactive' WHERE file_asset_id = ?",
(file_asset_id,),
)
object_backend_id = self.repo.upsert_object_storage_backend(
name="obj-test",
container_name="bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_OBJ",
)
self.repo.record_remote_file(
file_asset_id=file_asset_id,
backend_id=object_backend_id,
container_name="bucket",
locator="music/qq/Singer A/remote-inactive-song-1.mp3",
public_url=None,
download_url=None,
)
page = self.repo.list_playlist_page(page=1, page_size=20)
by_name = {item["name"]: item for item in page["items"]}
self.assertEqual("not_downloaded", by_name["Remote Inactive Playlist"]["state_code"])
self.assertEqual(0, by_name["Remote Inactive Playlist"]["downloaded_song_count"])
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,668 @@
import unittest
class MultiSourceSongResolverTests(unittest.TestCase):
def test_resolver_prefers_preferred_source_exact_candidate_before_cross_platform_search(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeClient:
def __init__(self, search_results=None, on_search=None):
self.search_results = list(search_results or [])
self.on_search = on_search
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
if self.on_search is not None:
self.on_search(keyword)
return list(self.search_results)
stale_song_info = SongInfo(
source="NeteaseMusicClient",
identifier="song-c",
song_name="Song C",
singers="Singer A / Singer B",
ext="mp3",
raw_data={"search": {"id": "song-c"}},
download_url=None,
download_url_status={},
)
netease_candidate = SongInfo(
source="NeteaseMusicClient",
identifier="song-c",
song_name="Song C",
singers="Singer A / Singer B",
ext="mp3",
file_size_bytes=1024,
file_size="1.00 MB",
raw_data={"quality": "standard"},
download_url="https://example.com/song-c.mp3",
download_url_status={"ok": True},
)
qq_candidate = SongInfo(
source="QQMusicClient",
identifier="qq-song-c",
song_name="Song C",
singers="Singer A / Singer B",
ext="flac",
file_size_bytes=4096,
file_size="4.00 MB",
raw_data={"quality": "lossless"},
download_url="https://example.com/song-c.flac",
download_url_status={"ok": True},
)
searched_sources: list[str] = []
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"netease": FakeClient([netease_candidate], on_search=lambda keyword: searched_sources.append("netease")),
"qq": FakeClient([qq_candidate], on_search=lambda keyword: searched_sources.append("qq")),
}[platform]
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "netease",
"name": "Song C",
"singers": "Singer A / Singer B",
"remote_song_id": "song-c",
},
snapshot_song_info=stale_song_info,
download_sources=["netease", "qq"],
)
self.assertEqual(["netease"], searched_sources)
self.assertEqual("NeteaseMusicClient", resolved_song_info.source)
self.assertEqual("mp3", resolved_song_info.ext)
self.assertEqual("song-c", resolved_song_info.identifier)
def test_resolver_stops_after_preferred_source_refresh_returns_downloadable_song(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class PreferredClient:
def __init__(self, refreshed_song):
self.refreshed_song = refreshed_song
self.search_called = False
def _parsewithofficialapiv1(self, search_result, request_overrides=None):
return self.refreshed_song
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.search_called = True
return []
class FallbackClient:
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
raise AssertionError("fallback source should not be searched")
snapshot_song_info = SongInfo(
source="QQMusicClient",
identifier="song-e",
song_name="Song E",
singers="Singer E",
ext="flac",
raw_data={"search": {"id": "song-e"}},
download_url=None,
download_url_status={},
)
refreshed_song = SongInfo(
source="QQMusicClient",
identifier="song-e",
song_name="Song E",
singers="Singer E",
ext="flac",
file_size_bytes=4096,
raw_data={"quality": "lossless"},
download_url="https://example.com/song-e.flac",
download_url_status={"ok": True},
)
preferred_client = PreferredClient(refreshed_song)
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": preferred_client,
"kuwo": FallbackClient(),
}[platform]
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song E",
"singers": "Singer E",
"remote_song_id": "song-e",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "kuwo"],
)
self.assertFalse(preferred_client.search_called)
self.assertEqual("QQMusicClient", resolved_song_info.source)
self.assertEqual("song-e", resolved_song_info.identifier)
def test_resolver_attempts_preferred_source_first_even_when_not_in_download_sources(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot_song_info = SongInfo(
source="NeteaseMusicClient",
identifier="song-pref",
song_name="Song Preferred",
singers="Singer Preferred",
ext="mp3",
raw_data={"search": {"id": "song-pref"}},
download_url=None,
download_url_status={},
)
netease_candidate = SongInfo(
source="NeteaseMusicClient",
identifier="song-pref",
song_name="Song Preferred",
singers="Singer Preferred",
ext="mp3",
file_size_bytes=2048,
raw_data={"quality": "standard"},
download_url="https://example.com/song-pref.mp3",
download_url_status={"ok": True},
)
search_calls = []
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"netease": FakeClient("netease", [netease_candidate], search_calls),
"qq": FakeClient("qq", [], search_calls),
"kuwo": FakeClient("kuwo", [], search_calls),
}[platform]
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "netease",
"name": "Song Preferred",
"singers": "Singer Preferred",
"remote_song_id": "song-pref",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "kuwo"],
)
self.assertEqual(["netease"], search_calls)
self.assertEqual("NeteaseMusicClient", resolved_song_info.source)
self.assertEqual("song-pref", resolved_song_info.identifier)
def test_resolver_reports_source_attempts_to_progress_callback(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeClient:
def __init__(self, search_results=None):
self.search_results = list(search_results or [])
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
return list(self.search_results)
snapshot_song_info = SongInfo(
source="NeteaseMusicClient",
identifier="song-d",
song_name="Song D",
singers="Singer D",
ext="mp3",
raw_data={"search": {"id": "song-d"}},
download_url=None,
download_url_status={},
)
kuwo_candidate = SongInfo(
source="KuwoMusicClient",
identifier="kuwo-song-d",
song_name="Song D",
singers="Singer D",
ext="flac",
file_size_bytes=4096,
raw_data={"quality": "lossless"},
download_url="https://example.com/song-d.flac",
download_url_status={"ok": True},
)
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"netease": FakeClient([]),
"qq": FakeClient([]),
"kuwo": FakeClient([kuwo_candidate]),
}[platform]
)
progress_messages: list[str] = []
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "netease",
"name": "Song D",
"singers": "Singer D",
"remote_song_id": "song-d",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "kuwo"],
progress_callback=progress_messages.append,
)
self.assertEqual(
[
"resolving source netease (1/3)",
"resolving source qq (2/3)",
"resolving source kuwo (3/3)",
],
progress_messages,
)
self.assertEqual("KuwoMusicClient", resolved_song_info.source)
self.assertEqual("kuwo-song-d", resolved_song_info.identifier)
def test_resolver_uses_ranked_top_two_fallback_sources_after_warmup(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeStatsRepo:
def __init__(self):
self.rank_call = None
self.records = []
def rank_fallback_sources(self, origin_source, fallback_sources, warmup_attempts=1000):
self.rank_call = (origin_source, list(fallback_sources), warmup_attempts)
return ["migu", "kuwo", "qianqian"]
def record_fallback_result(self, origin_source, candidate_source, *, succeeded):
self.records.append((origin_source, candidate_source, succeeded))
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot = SongInfo(
source="QQMusicClient",
identifier="song-1",
song_name="Song 1",
singers="Singer 1",
raw_data={"search": {"id": "song-1"}},
download_url=None,
download_url_status={},
)
migu_hit = SongInfo(
source="MiguMusicClient",
identifier="migu-song-1",
song_name="Song 1",
singers="Singer 1",
ext="mp3",
download_url="https://example.com/song-1.mp3",
download_url_status={"ok": True},
)
search_calls = []
stats_repo = FakeStatsRepo()
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": FakeClient("qq", [], search_calls),
"kuwo": FakeClient("kuwo", [], search_calls),
"migu": FakeClient("migu", [migu_hit], search_calls),
"qianqian": FakeClient("qianqian", [], search_calls),
}[platform],
resolver_stats_repo=stats_repo,
)
resolved = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song 1",
"singers": "Singer 1",
"remote_song_id": "song-1",
},
snapshot_song_info=snapshot,
download_sources=["qq", "kuwo", "migu", "qianqian"],
)
self.assertEqual(
("qq", ["kuwo", "migu", "qianqian"], 1000),
stats_repo.rank_call,
)
self.assertEqual(["qq", "migu"], search_calls)
self.assertEqual([("qq", "migu", True)], stats_repo.records)
self.assertEqual("MiguMusicClient", resolved.source)
def test_resolver_continues_after_ranked_top_two_fail(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeStatsRepo:
def __init__(self):
self.records = []
def rank_fallback_sources(self, origin_source, fallback_sources, warmup_attempts=1000):
return ["migu", "kuwo", "qianqian"]
def record_fallback_result(self, origin_source, candidate_source, *, succeeded):
self.records.append((origin_source, candidate_source, succeeded))
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot = SongInfo(
source="QQMusicClient",
identifier="song-2",
song_name="Song 2",
singers="Singer 2",
raw_data={"search": {"id": "song-2"}},
download_url=None,
download_url_status={},
)
qianqian_hit = SongInfo(
source="QianqianMusicClient",
identifier="qianqian-song-2",
song_name="Song 2",
singers="Singer 2",
ext="mp3",
download_url="https://example.com/song-2.mp3",
download_url_status={"ok": True},
)
search_calls = []
stats_repo = FakeStatsRepo()
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": FakeClient("qq", [], search_calls),
"migu": FakeClient("migu", [], search_calls),
"kuwo": FakeClient("kuwo", [], search_calls),
"qianqian": FakeClient("qianqian", [qianqian_hit], search_calls),
}[platform],
resolver_stats_repo=stats_repo,
)
resolved = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song 2",
"singers": "Singer 2",
"remote_song_id": "song-2",
},
snapshot_song_info=snapshot,
download_sources=["qq", "kuwo", "migu", "qianqian"],
)
self.assertEqual(["qq", "migu", "kuwo", "qianqian"], search_calls)
self.assertEqual(
[
("qq", "migu", False),
("qq", "kuwo", False),
("qq", "qianqian", True),
],
stats_repo.records,
)
self.assertEqual("QianqianMusicClient", resolved.source)
def test_resolver_continues_to_fallback_when_preferred_client_factory_raises(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot_song_info = SongInfo(
source="QQMusicClient",
identifier="song-pref-fail",
song_name="Song Preferred Fail",
singers="Singer Preferred Fail",
raw_data={"search": {"id": "song-pref-fail"}},
download_url=None,
download_url_status={},
)
migu_hit = SongInfo(
source="MiguMusicClient",
identifier="migu-song-pref-fail",
song_name="Song Preferred Fail",
singers="Singer Preferred Fail",
ext="mp3",
download_url="https://example.com/song-pref-fail.mp3",
download_url_status={"ok": True},
)
search_calls = []
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"kuwo": FakeClient("kuwo", [], search_calls),
"migu": FakeClient("migu", [migu_hit], search_calls),
}[platform]
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song Preferred Fail",
"singers": "Singer Preferred Fail",
"remote_song_id": "song-pref-fail",
},
snapshot_song_info=snapshot_song_info,
download_sources=["kuwo", "migu"],
)
self.assertEqual(["kuwo", "migu"], search_calls)
self.assertEqual("MiguMusicClient", resolved_song_info.source)
self.assertEqual("migu-song-pref-fail", resolved_song_info.identifier)
def test_resolver_uses_configured_fallback_order_when_rank_lookup_raises(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeStatsRepo:
def __init__(self):
self.records = []
def rank_fallback_sources(self, origin_source, fallback_sources, warmup_attempts=1000):
raise RuntimeError("rank unavailable")
def record_fallback_result(self, origin_source, candidate_source, *, succeeded):
self.records.append((origin_source, candidate_source, succeeded))
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot_song_info = SongInfo(
source="QQMusicClient",
identifier="song-rank-fail",
song_name="Song Rank Fail",
singers="Singer Rank Fail",
raw_data={"search": {"id": "song-rank-fail"}},
download_url=None,
download_url_status={},
)
kuwo_hit = SongInfo(
source="KuwoMusicClient",
identifier="kuwo-song-rank-fail",
song_name="Song Rank Fail",
singers="Singer Rank Fail",
ext="mp3",
download_url="https://example.com/song-rank-fail.mp3",
download_url_status={"ok": True},
)
search_calls = []
stats_repo = FakeStatsRepo()
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": FakeClient("qq", [], search_calls),
"kuwo": FakeClient("kuwo", [kuwo_hit], search_calls),
"migu": FakeClient("migu", [], search_calls),
}[platform],
resolver_stats_repo=stats_repo,
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song Rank Fail",
"singers": "Singer Rank Fail",
"remote_song_id": "song-rank-fail",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "kuwo", "migu"],
)
self.assertEqual(["qq", "kuwo"], search_calls)
self.assertEqual([("qq", "kuwo", True)], stats_repo.records)
self.assertEqual("KuwoMusicClient", resolved_song_info.source)
self.assertEqual("kuwo-song-rank-fail", resolved_song_info.identifier)
def test_resolver_continues_when_record_fallback_result_raises(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeStatsRepo:
def rank_fallback_sources(self, origin_source, fallback_sources, warmup_attempts=1000):
return ["migu", "kuwo"]
def record_fallback_result(self, origin_source, candidate_source, *, succeeded):
raise RuntimeError("record unavailable")
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot_song_info = SongInfo(
source="QQMusicClient",
identifier="song-record-fail",
song_name="Song Record Fail",
singers="Singer Record Fail",
raw_data={"search": {"id": "song-record-fail"}},
download_url=None,
download_url_status={},
)
migu_hit = SongInfo(
source="MiguMusicClient",
identifier="migu-song-record-fail",
song_name="Song Record Fail",
singers="Singer Record Fail",
ext="mp3",
download_url="https://example.com/song-record-fail.mp3",
download_url_status={"ok": True},
)
search_calls = []
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": FakeClient("qq", [], search_calls),
"migu": FakeClient("migu", [migu_hit], search_calls),
"kuwo": FakeClient("kuwo", [], search_calls),
}[platform],
resolver_stats_repo=FakeStatsRepo(),
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song Record Fail",
"singers": "Singer Record Fail",
"remote_song_id": "song-record-fail",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "kuwo", "migu"],
)
self.assertEqual(["qq", "migu"], search_calls)
self.assertEqual("MiguMusicClient", resolved_song_info.source)
self.assertEqual("migu-song-record-fail", resolved_song_info.identifier)
def test_resolver_continues_when_first_fallback_client_factory_raises(self):
from musicdl.catalogsync.resolver import MultiSourceSongResolver
from musicdl.modules.utils.data import SongInfo
class FakeStatsRepo:
def rank_fallback_sources(self, origin_source, fallback_sources, warmup_attempts=1000):
return ["migu", "kuwo"]
def record_fallback_result(self, origin_source, candidate_source, *, succeeded):
return None
class FakeClient:
def __init__(self, source, search_results, calls):
self.source = source
self.search_results = list(search_results or [])
self.calls = calls
def search(self, keyword, num_threadings=1, request_overrides=None, rule=None, main_process_context=None):
self.calls.append(self.source)
return list(self.search_results)
snapshot_song_info = SongInfo(
source="QQMusicClient",
identifier="song-fallback-factory-fail",
song_name="Song Fallback Factory Fail",
singers="Singer Fallback Factory Fail",
raw_data={"search": {"id": "song-fallback-factory-fail"}},
download_url=None,
download_url_status={},
)
kuwo_hit = SongInfo(
source="KuwoMusicClient",
identifier="kuwo-song-fallback-factory-fail",
song_name="Song Fallback Factory Fail",
singers="Singer Fallback Factory Fail",
ext="mp3",
download_url="https://example.com/song-fallback-factory-fail.mp3",
download_url_status={"ok": True},
)
search_calls = []
resolver = MultiSourceSongResolver(
client_factory=lambda platform: {
"qq": FakeClient("qq", [], search_calls),
"kuwo": FakeClient("kuwo", [kuwo_hit], search_calls),
}[platform],
resolver_stats_repo=FakeStatsRepo(),
)
resolved_song_info = resolver.resolve_song_info(
row={
"platform": "qq",
"name": "Song Fallback Factory Fail",
"singers": "Singer Fallback Factory Fail",
"remote_song_id": "song-fallback-factory-fail",
},
snapshot_song_info=snapshot_song_info,
download_sources=["qq", "migu", "kuwo"],
)
self.assertEqual(["qq", "kuwo"], search_calls)
self.assertEqual("KuwoMusicClient", resolved_song_info.source)
self.assertEqual("kuwo-song-fallback-factory-fail", resolved_song_info.identifier)
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,149 @@
from concurrent.futures import ThreadPoolExecutor
import tempfile
import unittest
from pathlib import Path
def _fetch_stats_row(db_path: Path, origin_source: str, candidate_source: str):
from musicdl.catalogsync.resolver_stats import connect_resolver_stats_database
conn = connect_resolver_stats_database(db_path)
try:
return conn.execute(
"""
SELECT attempt_count, resolve_success_count,
created_at, updated_at, last_attempt_at, last_success_at
FROM resolver_source_stats
WHERE origin_source = ? AND candidate_source = ?
""",
(origin_source, candidate_source),
).fetchone()
finally:
conn.close()
class ResolverStatsRepositoryTests(unittest.TestCase):
def test_initialize_resolver_stats_database_creates_stats_table_with_timestamps(self):
from musicdl.catalogsync.resolver_stats import initialize_resolver_stats_database
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "resolver_stats.db"
conn = initialize_resolver_stats_database(db_path)
try:
table_names = {
row["name"]
for row in conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table'"
).fetchall()
}
column_names = {
row["name"]
for row in conn.execute(
"PRAGMA table_info(resolver_source_stats)"
).fetchall()
}
finally:
conn.close()
self.assertIn("resolver_source_stats", table_names)
self.assertTrue(
{"created_at", "updated_at", "last_attempt_at", "last_success_at"}
<= column_names
)
def test_record_fallback_result_tracks_attempt_and_success_timestamps(self):
from musicdl.catalogsync.resolver_stats import ResolverStatsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "resolver_stats.db"
repo = ResolverStatsRepository(db_path)
repo.record_fallback_result("qq", "kuwo", succeeded=False)
first_row = _fetch_stats_row(db_path, "qq", "kuwo")
repo.record_fallback_result("qq", "kuwo", succeeded=True)
second_row = _fetch_stats_row(db_path, "qq", "kuwo")
self.assertEqual(1, int(first_row["attempt_count"]))
self.assertEqual(0, int(first_row["resolve_success_count"]))
self.assertIsNotNone(first_row["created_at"])
self.assertIsNotNone(first_row["updated_at"])
self.assertIsNotNone(first_row["last_attempt_at"])
self.assertIsNone(first_row["last_success_at"])
self.assertEqual(2, int(second_row["attempt_count"]))
self.assertEqual(1, int(second_row["resolve_success_count"]))
self.assertIsNotNone(second_row["created_at"])
self.assertIsNotNone(second_row["updated_at"])
self.assertIsNotNone(second_row["last_attempt_at"])
self.assertIsNotNone(second_row["last_success_at"])
def test_repository_operations_can_run_from_non_creator_thread(self):
from musicdl.catalogsync.resolver_stats import ResolverStatsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
repo = ResolverStatsRepository(Path(tmpdir) / "resolver_stats.db")
with ThreadPoolExecutor(max_workers=1) as executor:
record_future = executor.submit(
repo.record_fallback_result,
"qq",
"kuwo",
succeeded=True,
)
rank_future = executor.submit(
lambda: repo.rank_fallback_sources(
"qq",
["kuwo", "migu"],
warmup_attempts=0,
)
)
self.assertIsNone(record_future.exception())
self.assertEqual(["kuwo", "migu"], rank_future.result())
def test_rank_fallback_sources_keeps_config_order_before_warmup(self):
from musicdl.catalogsync.resolver_stats import ResolverStatsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
repo = ResolverStatsRepository(Path(tmpdir) / "resolver_stats.db")
repo.record_fallback_result("qq", "kuwo", succeeded=True)
ranked = repo.rank_fallback_sources(
"qq",
["kuwo", "migu", "qianqian"],
warmup_attempts=1000,
)
self.assertEqual(["kuwo", "migu", "qianqian"], ranked)
def test_rank_fallback_sources_reorders_after_warmup_per_origin_source(self):
from musicdl.catalogsync.resolver_stats import ResolverStatsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
repo = ResolverStatsRepository(Path(tmpdir) / "resolver_stats.db")
for _ in range(800):
repo.record_fallback_result("qq", "migu", succeeded=True)
for _ in range(200):
repo.record_fallback_result("qq", "kuwo", succeeded=False)
ranked = repo.rank_fallback_sources(
"qq",
["kuwo", "migu", "qianqian"],
warmup_attempts=1000,
)
self.assertEqual(["migu", "qianqian", "kuwo"], ranked)
def test_rank_fallback_sources_uses_config_order_as_tie_breaker(self):
from musicdl.catalogsync.resolver_stats import ResolverStatsRepository
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
repo = ResolverStatsRepository(Path(tmpdir) / "resolver_stats.db")
for _ in range(5):
repo.record_fallback_result("qq", "kuwo", succeeded=True)
repo.record_fallback_result("qq", "migu", succeeded=True)
ranked = repo.rank_fallback_sources(
"qq",
["kuwo", "migu", "qianqian"],
warmup_attempts=10,
)
self.assertEqual(["kuwo", "migu", "qianqian"], ranked)
@@ -0,0 +1,353 @@
import tempfile
import unittest
from pathlib import Path
import shutil
import subprocess
class RuntimeLayoutTests(unittest.TestCase):
def test_runtime_config_builds_defaults_from_root_dir(self):
from musicdl.catalogsync.runtime import CatalogSyncRuntimeConfig
config = CatalogSyncRuntimeConfig.from_mapping(
{
"ROOT_DIR": "/volume4/Music_Cloud",
"PYTHON_BIN": "python3",
}
)
self.assertEqual(Path("/volume4/Music_Cloud/catalogsync"), config.app_home)
self.assertEqual(Path("/volume4/Music_Cloud/library"), config.library_dir)
self.assertEqual(
Path("/volume4/Music_Cloud/catalogsync/data/catalogsync.db"), config.db_path
)
self.assertEqual(
Path("/volume4/Music_Cloud/catalogsync/config/catalogsync.env"),
config.env_file,
)
self.assertEqual("127.0.0.1", config.web_host)
self.assertEqual(18080, config.web_port)
self.assertEqual("platform_first_artist", config.download_layout)
def test_catalogsync_modules_avoid_python310_only_dataclass_slots_for_nas_python38(self):
for relative_path in (
"musicdl/catalogsync/runtime.py",
"musicdl/catalogsync/downloader.py",
"musicdl/catalogsync/ops/executors.py",
):
with self.subTest(relative_path=relative_path):
source = Path(relative_path).read_text(encoding="utf-8")
self.assertNotIn("@dataclass(slots=True)", source)
def test_runtime_config_reads_web_fields_from_mapping(self):
from musicdl.catalogsync.runtime import CatalogSyncRuntimeConfig
config = CatalogSyncRuntimeConfig.from_mapping(
{
"ROOT_DIR": "/volume4/Music_Cloud",
"ENV_FILE": "/etc/catalogsync.env",
"WEB_HOST": "0.0.0.0",
"WEB_PORT": "19090",
}
)
self.assertEqual(Path("/etc/catalogsync.env"), config.env_file)
self.assertEqual("0.0.0.0", config.web_host)
self.assertEqual(19090, config.web_port)
def test_runtime_config_falls_back_when_web_port_invalid_or_out_of_range(self):
from musicdl.catalogsync.runtime import CatalogSyncRuntimeConfig
for raw_value in ("", "abc", "0", "-1", "70000"):
config = CatalogSyncRuntimeConfig.from_mapping(
{
"ROOT_DIR": "/volume4/Music_Cloud",
"WEB_PORT": raw_value,
}
)
self.assertEqual(18080, config.web_port)
def test_runtime_config_ensure_directories_creates_expected_tree(self):
from musicdl.catalogsync.runtime import CatalogSyncRuntimeConfig
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root_dir = Path(tmpdir) / "Music_Cloud"
config = CatalogSyncRuntimeConfig.from_mapping({"ROOT_DIR": str(root_dir)})
config.ensure_directories()
self.assertTrue((root_dir / "library").is_dir())
self.assertTrue((root_dir / "catalogsync" / "app").is_dir())
self.assertTrue((root_dir / "catalogsync" / "bin").is_dir())
self.assertTrue((root_dir / "catalogsync" / "config").is_dir())
self.assertTrue((root_dir / "catalogsync" / "data").is_dir())
self.assertTrue((root_dir / "catalogsync" / "inputs").is_dir())
self.assertTrue((root_dir / "catalogsync" / "logs").is_dir())
def test_runtime_config_ensure_directories_respects_override_paths(self):
from musicdl.catalogsync.runtime import CatalogSyncRuntimeConfig
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
root_dir = Path(tmpdir) / "Music_Cloud"
db_path = root_dir / "state" / "db" / "catalogsync.db"
input_dir = root_dir / "state" / "manual_inputs"
log_dir = root_dir / "state" / "runtime_logs"
config = CatalogSyncRuntimeConfig.from_mapping(
{
"ROOT_DIR": str(root_dir),
"DB_PATH": str(db_path),
"INPUT_DIR": str(input_dir),
"LOG_DIR": str(log_dir),
}
)
config.ensure_directories()
self.assertTrue(db_path.parent.is_dir())
self.assertTrue(input_dir.is_dir())
self.assertTrue(log_dir.is_dir())
def test_build_download_relative_dir_uses_platform_and_first_artist(self):
from musicdl.catalogsync.runtime import build_download_relative_dir
relative_dir = build_download_relative_dir(
platform="qq",
singers="Singer A / Singer B",
)
self.assertEqual(Path("qq") / "Singer A", relative_dir)
def test_build_download_relative_dir_falls_back_to_unknown_artist(self):
from musicdl.catalogsync.runtime import build_download_relative_dir
relative_dir = build_download_relative_dir(
platform="netease",
singers="",
)
self.assertEqual(Path("netease") / "Unknown Artist", relative_dir)
def test_catalogsync_env_example_contains_required_keys(self):
template = Path(
"scripts/catalogsync/templates/catalogsync.env.example"
).read_text(encoding="utf-8")
self.assertIn("ROOT_DIR=", template)
self.assertIn("APP_HOME=", template)
self.assertIn("LIBRARY_DIR=", template)
self.assertIn("DB_PATH=", template)
self.assertIn("INPUT_DIR=", template)
self.assertIn("LOG_DIR=", template)
self.assertIn("ENV_FILE=", template)
self.assertIn("WEB_HOST=", template)
self.assertIn("WEB_PORT=", template)
self.assertIn("PYTHON_BIN=", template)
self.assertIn("VENV_DIR=", template)
self.assertIn("DOWNLOAD_LAYOUT=platform_first_artist", template)
self.assertIn("DOWNLOAD_SOURCES=", template)
self.assertIn("OBJECT_BACKEND_NAME=", template)
self.assertIn("OBJECT_BUCKET=", template)
self.assertIn("OBJECT_ENDPOINT=", template)
self.assertIn("OBJECT_CREDENTIAL_ENV_PREFIX=", template)
self.assertIn("SYNC_WORKERS=", template)
self.assertIn("UPLOAD_WORKERS=", template)
def test_requirements_include_eval_type_backport_for_python38_pydantic_web_models(self):
requirements = Path("requirements.txt").read_text(encoding="utf-8")
self.assertIn("eval_type_backport", requirements)
def test_requirements_include_jinja2_for_ops_web_templates(self):
requirements = Path("requirements.txt").read_text(encoding="utf-8")
self.assertIn("jinja2", requirements)
def test_bootstrap_script_uses_remote_host_parameter_name(self):
script = Path("scripts/catalogsync/bootstrap_to_linux.ps1").read_text(
encoding="utf-8"
)
self.assertIn("[string]$RemoteHost", script)
self.assertNotIn("[string]$Host,", script)
def test_bootstrap_script_uses_remote_path_quoting_helpers(self):
script = Path("scripts/catalogsync/bootstrap_to_linux.ps1").read_text(
encoding="utf-8"
)
self.assertIn("function Quote-RemotePathForPosixShell", script)
self.assertIn("function New-ScpRemoteTarget", script)
self.assertIn("Quote-RemotePathForPosixShell -Path", script)
self.assertIn("New-ScpRemoteTarget -Remote $Remote", script)
self.assertIn("upload_all.sh", script)
self.assertIn("install_runtime.sh", script)
self.assertIn("serve_console.sh", script)
self.assertIn("README.md", script)
self.assertIn("LICENSE", script)
self.assertNotIn("${Remote}:$AppHome/app/", script)
self.assertNotIn("${Remote}:$AppHome/bin/", script)
def test_bootstrap_script_refreshes_env_example_when_template_changes(self):
script = Path("scripts/catalogsync/bootstrap_to_linux.ps1").read_text(
encoding="utf-8"
)
self.assertNotIn("if (($EnvCheck | Select-Object -Last 1).Trim() -eq \"missing\")", script)
self.assertIn("catalogsync.env.example", script)
def test_bootstrap_script_is_parseable_by_powershell(self):
powershell = shutil.which("pwsh") or shutil.which("powershell")
if powershell is None:
self.skipTest("PowerShell is unavailable in current environment.")
script_path = Path("scripts/catalogsync/bootstrap_to_linux.ps1").resolve()
escaped_path = str(script_path).replace("'", "''")
parse_command = (
"$tokens=$null; $errors=$null; "
f"[void][System.Management.Automation.Language.Parser]::ParseFile('{escaped_path}', [ref]$tokens, [ref]$errors); "
"if ($errors.Count -gt 0) { $errors | ForEach-Object { Write-Error $_.Message }; exit 1 }"
)
result = subprocess.run(
[powershell, "-NoProfile", "-Command", parse_command],
capture_output=True,
text=True,
check=False,
)
self.assertEqual(
0,
result.returncode,
msg=f"PowerShell parser errors:\n{result.stderr or result.stdout}",
)
def test_runtime_script_templates_pass_bash_syntax_check(self):
bash = shutil.which("bash")
if bash is None:
self.skipTest("bash is unavailable in current environment.")
for template_name in (
"load_env.sh",
"download_from_file.sh",
"download_all.sh",
"upload_all.sh",
"install_runtime.sh",
"serve_console.sh",
):
script_path = Path(
f"scripts/catalogsync/templates/{template_name}"
).resolve()
result = subprocess.run(
[bash, "-n", script_path.as_posix()],
capture_output=True,
text=True,
check=False,
)
self.assertEqual(
0,
result.returncode,
msg=f"bash -n failed for {template_name}:\n{result.stderr or result.stdout}",
)
def test_runtime_script_template_uses_configured_library_dir(self):
for template_name in ("download_from_file.sh", "download_all.sh"):
script = Path(
f"scripts/catalogsync/templates/{template_name}"
).read_text(encoding="utf-8")
self.assertIn("LIBRARY_DIR", script)
self.assertIn("DB_PATH", script)
self.assertIn("INPUT_DIR", script)
self.assertIn("LOG_DIR", script)
self.assertIn("PYTHON_BIN", script)
self.assertIn("PYTHONPATH", script)
self.assertIn("VENV_DIR", script)
self.assertIn('if [[ -n "${VENV_DIR:-}" && -x "${VENV_DIR}/bin/python" ]]', script)
self.assertIn("musicdl.catalogsync.cli run", script)
def test_upload_runtime_script_template_uses_upload_command_and_object_backend_vars(self):
script = Path("scripts/catalogsync/templates/upload_all.sh").read_text(
encoding="utf-8"
)
self.assertIn("DB_PATH", script)
self.assertIn("LOG_DIR", script)
self.assertIn("PYTHON_BIN", script)
self.assertIn("PYTHONPATH", script)
self.assertIn("VENV_DIR", script)
self.assertIn('if [[ -n "${VENV_DIR:-}" && -x "${VENV_DIR}/bin/python" ]]', script)
self.assertIn("OBJECT_BACKEND_NAME", script)
self.assertIn("OBJECT_BUCKET", script)
self.assertIn("OBJECT_ENDPOINT", script)
self.assertIn("OBJECT_CREDENTIAL_ENV_PREFIX", script)
self.assertIn("musicdl.catalogsync.cli register-object-backend", script)
self.assertIn("musicdl.catalogsync.cli upload", script)
def test_install_runtime_script_template_sets_up_venv_and_nas_requirements(self):
script = Path("scripts/catalogsync/templates/install_runtime.sh").read_text(
encoding="utf-8"
)
self.assertIn("VENV_DIR", script)
self.assertIn("PYTHON_BIN", script)
self.assertIn("requirements.nas.txt", script)
self.assertIn('APP_DIR="${APP_HOME}/app"', script)
self.assertIn('REQUIREMENTS_FILE="${APP_DIR}/requirements.txt"', script)
self.assertIn('SETUP_FILE="${APP_DIR}/setup.py"', script)
self.assertIn("grep -v '^nodejs-wheel$'", script)
self.assertIn('"${PYTHON_BIN}" -m venv "${VENV_DIR}"', script)
self.assertIn('"${RUNTIME_PYTHON_BIN}" -m pip install -r "${NAS_REQUIREMENTS_FILE}"', script)
self.assertIn('"${RUNTIME_PYTHON_BIN}" -m pip install --no-deps -e "${APP_DIR}"', script)
load_config_index = script.index('load_env_file "${CONFIG_FILE}"')
app_dir_index = script.index('APP_DIR="${APP_HOME}/app"')
self.assertGreater(app_dir_index, load_config_index)
def test_runtime_script_templates_include_preflight_and_log_file(self):
for template_name in (
"download_from_file.sh",
"download_all.sh",
"upload_all.sh",
"install_runtime.sh",
"serve_console.sh",
):
script = Path(
f"scripts/catalogsync/templates/{template_name}"
).read_text(encoding="utf-8")
self.assertIn('[[ -f "${CONFIG_FILE}" ]]', script)
self.assertIn('command -v "${PYTHON_BIN}"', script)
self.assertIn("for required_var in", script)
self.assertIn("DB_PATH", script)
self.assertIn("LOG_DIR", script)
self.assertIn("PYTHON_BIN", script)
self.assertIn('LOG_FILE="${LOG_DIR}/', script)
self.assertIn('exec > >(tee -a "${LOG_FILE}") 2>&1', script)
def test_runtime_script_templates_use_safe_env_loader(self):
helper = Path("scripts/catalogsync/templates/load_env.sh").read_text(
encoding="utf-8"
)
self.assertIn("load_env_file()", helper)
for template_name in (
"download_from_file.sh",
"download_all.sh",
"upload_all.sh",
"install_runtime.sh",
"serve_console.sh",
"deploy_and_restart.sh",
):
script = Path(
f"scripts/catalogsync/templates/{template_name}"
).read_text(encoding="utf-8")
self.assertIn('source "${SCRIPT_DIR}/load_env.sh"', script)
self.assertIn('load_env_file "${CONFIG_FILE}"', script)
self.assertNotIn('source "${CONFIG_FILE}"', script)
def test_serve_console_runtime_script_template_uses_serve_command_and_web_vars(self):
script = Path("scripts/catalogsync/templates/serve_console.sh").read_text(
encoding="utf-8"
)
self.assertIn("DB_PATH", script)
self.assertIn("ENV_FILE", script)
self.assertIn("WEB_HOST", script)
self.assertIn("WEB_PORT", script)
self.assertIn("LOG_DIR", script)
self.assertIn("PYTHON_BIN", script)
self.assertIn("PYTHONPATH", script)
self.assertIn("VENV_DIR", script)
self.assertIn("validate_port", script)
self.assertIn("WEB_PORT must be an integer in range 1..65535", script)
self.assertIn(
'if [[ -n "${VENV_DIR:-}" && -x "${VENV_DIR}/bin/python" ]]',
script,
)
self.assertIn("musicdl.catalogsync.cli serve", script)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,133 @@
import tempfile
import unittest
from pathlib import Path
class SuspectedLiveDetectionTests(unittest.TestCase):
def test_detect_reason_codes_matches_name_keyword(self):
from musicdl.catalogsync.suspected_live import detect_suspected_live_reason_codes
reason_codes = detect_suspected_live_reason_codes(
name="Song A Live",
album="Studio Album",
)
self.assertEqual(["name_keyword"], reason_codes)
def test_detect_reason_codes_matches_album_show_keyword(self):
from musicdl.catalogsync.suspected_live import detect_suspected_live_reason_codes
reason_codes = detect_suspected_live_reason_codes(
name="年轻的战场",
album="听!我们的歌",
)
self.assertEqual(["album_show_keyword"], reason_codes)
def test_detect_reason_codes_ignores_album_that_repeats_song_name(self):
from musicdl.catalogsync.suspected_live import detect_suspected_live_reason_codes
reason_codes = detect_suspected_live_reason_codes(
name="Song A",
album="Song A 演唱会特别版",
)
self.assertEqual([], reason_codes)
class SuspectedLiveScannerTests(unittest.TestCase):
def _build_repo(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.repository import CatalogRepository
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
self.addCleanup(tmpdir.cleanup)
root = Path(tmpdir.name)
db_path = root / "catalogsync.db"
initialize_database(db_path).close()
return root, db_path, CatalogRepository(db_path)
def test_scan_suspected_live_songs_returns_downloaded_candidates_by_default(self):
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.suspected_live import scan_suspected_live_songs
root, db_path, repo = self._build_repo()
library_root = root / "library"
backend_id = repo.ensure_local_backend(
library_root,
name="default-local",
is_default=True,
)
downloaded_song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="downloaded-live-1",
name="年轻的战场",
singers="张杰",
album="听!我们的歌",
)
)
repo.record_local_file(
song_id=downloaded_song_id,
backend_id=backend_id,
relative_path="qq/张杰/年轻的战场.flac",
file_size_bytes=8,
ext="flac",
quality_label="lossless",
)
repo.upsert_song(
CatalogSong(
platform="netease",
remote_song_id="undownloaded-live-1",
name="Besame Mucho",
singers="王晰",
album="我是歌手第四季 第6期",
)
)
repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="downloaded-studio-1",
name="Studio Song",
singers="Singer A",
album="Studio Album",
)
)
matches = scan_suspected_live_songs(db_path)
self.assertEqual(1, len(matches))
self.assertEqual(downloaded_song_id, matches[0].song_id)
self.assertEqual(("album_show_keyword",), matches[0].reason_codes)
def test_scan_suspected_live_songs_can_include_undownloaded(self):
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.suspected_live import scan_suspected_live_songs
_, db_path, repo = self._build_repo()
first_song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-live-1",
name="Song A",
singers="Singer A",
album="我们的歌",
)
)
second_song_id = repo.upsert_song(
CatalogSong(
platform="netease",
remote_song_id="song-live-2",
name="Song B",
singers="Singer B",
album="我是歌手第四季 第6期",
)
)
matches = scan_suspected_live_songs(
db_path,
downloaded_only=False,
limit=2,
)
self.assertEqual([second_song_id, first_song_id], [item.song_id for item in matches])
@@ -0,0 +1,374 @@
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
class ObjectStorageUploaderTests(unittest.TestCase):
def _set_minimal_credentials(self):
os.environ["CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID"] = "access-key"
os.environ["CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY"] = "secret-key"
def _clear_minimal_credentials(self):
os.environ.pop("CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID", None)
os.environ.pop("CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY", None)
os.environ.pop("CATALOGSYNC_MAIN_S3_SESSION_TOKEN", None)
def test_enqueue_missing_uploads_creates_task_with_mirrored_locator(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
ext="flac",
file_size_bytes=128,
quality_label="lossless",
)
)
backend_id = repo.get_default_backend_id()
local_file = library_root / "qq" / "Singer A" / "song-a.flac"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
queued = uploader.enqueue_missing_uploads(backend_name="main-s3")
tasks = repo.list_pending_upload_tasks(target_backend_id=2)
self.assertEqual(1, queued)
self.assertEqual(1, len(tasks))
self.assertEqual("music/qq/Singer A/song-a.flac", tasks[0]["target_locator"])
def test_run_records_remote_location_and_presence(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
class FakeS3Client:
def __init__(self):
self.uploads = []
def upload_file(self, filename, bucket, key, ExtraArgs=None):
self.uploads.append((filename, bucket, key, ExtraArgs))
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
ext="flac",
file_size_bytes=128,
quality_label="lossless",
)
)
local_backend_id = repo.get_default_backend_id()
local_file = library_root / "qq" / "Singer A" / "song-a.flac"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
public_base_url="https://cdn.example.com/music",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
fake_client = FakeS3Client()
try:
with patch(
"musicdl.catalogsync.uploader.build_s3_client",
return_value=fake_client,
):
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
remote_location = repo._fetchone(
"""
SELECT locator, public_url, is_primary, status, absolute_path
FROM file_locations
WHERE backend_id = ?
ORDER BY id DESC
LIMIT 1
""",
(backend_id,),
)
has_backend_file = repo.song_has_active_backend_file(song_id, backend_id)
self.assertEqual(1, summary["succeeded"])
self.assertEqual(1, len(fake_client.uploads))
self.assertEqual("music/qq/Singer A/song-a.flac", remote_location["locator"])
self.assertEqual("https://cdn.example.com/music/qq/Singer A/song-a.flac", remote_location["public_url"])
self.assertEqual(0, remote_location["is_primary"])
self.assertEqual("active", remote_location["status"])
self.assertIsNone(remote_location["absolute_path"])
self.assertTrue(has_backend_file)
def test_run_marks_task_failed_when_source_file_is_missing(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-missing",
name="Missing",
singers="Singer A",
ext="mp3",
file_size_bytes=64,
quality_label="standard",
)
)
local_backend_id = repo.get_default_backend_id()
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-missing.mp3",
file_size_bytes=64,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
try:
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
task_row = repo._fetchone(
"SELECT status, last_error FROM upload_tasks WHERE target_backend_id = ? ORDER BY id DESC LIMIT 1",
(backend_id,),
)
self.assertEqual(1, summary["failed"])
self.assertEqual("failed", task_row["status"])
self.assertIn("does not exist", task_row["last_error"])
def test_build_s3_client_requires_credentials(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import build_s3_client
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = CatalogRepository(db_path)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
backend = repo.get_backend(backend_id)
self._clear_minimal_credentials()
with self.assertRaises(RuntimeError):
build_s3_client(backend)
def test_enqueue_missing_uploads_includes_multiple_local_versions(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
)
)
local_backend_id = repo.get_default_backend_id()
file_a = library_root / "qq" / "Singer A" / "song-a.flac"
file_a.parent.mkdir(parents=True, exist_ok=True)
file_a.write_bytes(b"flac")
file_b = library_root / "qq" / "Singer A" / "song-a.mp3"
file_b.write_bytes(b"mp3")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=64,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
queued = uploader.enqueue_missing_uploads(backend_name="main-s3")
tasks = repo.list_pending_upload_tasks(target_backend_id=backend_id)
self.assertEqual(2, queued)
self.assertEqual(2, len(tasks))
def test_run_with_multiple_workers_does_not_duplicate_uploads(self):
import threading
import time
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
class FakeS3Client:
def __init__(self):
self.lock = threading.Lock()
self.uploads = []
def upload_file(self, filename, bucket, key, ExtraArgs=None):
time.sleep(0.02)
with self.lock:
self.uploads.append((filename, bucket, key))
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
local_backend_id = repo.get_default_backend_id()
for index in range(4):
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id=f"song-{index}",
name=f"Song {index}",
singers="Singer A",
ext="mp3",
file_size_bytes=64 + index,
quality_label="standard",
)
)
local_file = library_root / "qq" / "Singer A" / f"song-{index}.mp3"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path=f"qq/Singer A/song-{index}.mp3",
file_size_bytes=64 + index,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=3)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
fake_client = FakeS3Client()
try:
with patch("musicdl.catalogsync.uploader.build_s3_client", return_value=fake_client):
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
pending_rows = repo.list_pending_upload_tasks(target_backend_id=backend_id)
remote_rows = repo._fetchall(
"SELECT locator FROM file_locations WHERE backend_id = ? ORDER BY locator ASC",
(backend_id,),
)
self.assertEqual(4, summary["succeeded"])
self.assertEqual(0, summary["failed"])
self.assertEqual(0, len(pending_rows))
self.assertEqual(4, len(fake_client.uploads))
self.assertEqual(4, len({item[2] for item in fake_client.uploads}))
self.assertEqual(4, len(remote_rows))
if __name__ == "__main__":
unittest.main()