211 lines
8.7 KiB
Python
211 lines
8.7 KiB
Python
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
|
|
class LocalMaintenanceTests(unittest.TestCase):
|
|
def _build_repo(self):
|
|
from musicdl.catalogsync.db import initialize_database
|
|
from musicdl.catalogsync.repository import CatalogRepository
|
|
|
|
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
|
self.addCleanup(tmpdir.cleanup)
|
|
root = Path(tmpdir.name)
|
|
db_path = root / "catalogsync.db"
|
|
initialize_database(db_path).close()
|
|
return root, db_path, CatalogRepository(db_path)
|
|
|
|
def _seed_duplicate_local_files(self):
|
|
from musicdl.catalogsync.models import CatalogSong
|
|
|
|
root, db_path, repo = self._build_repo()
|
|
song_id = repo.upsert_song(
|
|
CatalogSong(
|
|
platform="qq",
|
|
remote_song_id="song-dup-1",
|
|
name="Duplicate Song",
|
|
singers="Singer A",
|
|
ext="flac",
|
|
file_size_bytes=7,
|
|
quality_label="lossless",
|
|
metadata={},
|
|
)
|
|
)
|
|
library_root = root / "library"
|
|
backend_id = repo.ensure_local_backend(
|
|
library_root,
|
|
name="default-local",
|
|
is_default=True,
|
|
)
|
|
asset_id = repo.record_local_file(
|
|
song_id=song_id,
|
|
backend_id=backend_id,
|
|
relative_path="Singer A/Duplicate Song.flac",
|
|
file_size_bytes=7,
|
|
ext="flac",
|
|
quality_label="lossless",
|
|
)
|
|
repo.record_local_file(
|
|
song_id=song_id,
|
|
backend_id=backend_id,
|
|
relative_path="Singer A/Duplicate Song (1).flac",
|
|
file_size_bytes=7,
|
|
ext="flac",
|
|
quality_label="lossless",
|
|
)
|
|
canonical_path = library_root / "Singer A" / "Duplicate Song.flac"
|
|
duplicate_path = library_root / "Singer A" / "Duplicate Song (1).flac"
|
|
canonical_path.parent.mkdir(parents=True, exist_ok=True)
|
|
canonical_path.write_bytes(b"abcdefg")
|
|
duplicate_path.write_bytes(b"abcdefg")
|
|
canonical_row = repo._fetchone(
|
|
"SELECT * FROM file_locations WHERE locator = ?",
|
|
("Singer A/Duplicate Song.flac",),
|
|
)
|
|
duplicate_row = repo._fetchone(
|
|
"SELECT * FROM file_locations WHERE locator = ?",
|
|
("Singer A/Duplicate Song (1).flac",),
|
|
)
|
|
return {
|
|
"root": root,
|
|
"db_path": db_path,
|
|
"repo": repo,
|
|
"song_id": song_id,
|
|
"backend_id": backend_id,
|
|
"asset_id": asset_id,
|
|
"canonical_location_id": int(canonical_row["id"]),
|
|
"duplicate_location_id": int(duplicate_row["id"]),
|
|
"canonical_path": canonical_path,
|
|
"duplicate_path": duplicate_path,
|
|
}
|
|
|
|
def test_scan_local_duplicates_reports_groups_and_prefers_canonical_locator(self):
|
|
from musicdl.catalogsync.ops.maintenance import LocalMaintenanceService
|
|
|
|
seeded = self._seed_duplicate_local_files()
|
|
service = LocalMaintenanceService(seeded["db_path"])
|
|
|
|
payload = service.scan_local_duplicates(sample_limit=10)
|
|
|
|
self.assertEqual(1, payload["summary"]["duplicate_group_count"])
|
|
self.assertEqual(1, payload["summary"]["duplicate_location_count"])
|
|
self.assertEqual(2, payload["summary"]["scanned_active_local_location_count"])
|
|
self.assertEqual(1, len(payload["groups"]))
|
|
group = payload["groups"][0]
|
|
self.assertEqual(seeded["song_id"], group["song_id"])
|
|
self.assertEqual(seeded["backend_id"], group["backend_id"])
|
|
self.assertEqual("Duplicate Song", group["song_name"])
|
|
self.assertEqual(seeded["canonical_location_id"], group["keep"]["id"])
|
|
self.assertEqual("Singer A/Duplicate Song.flac", group["keep"]["locator"])
|
|
self.assertTrue(group["keep"]["file_exists"])
|
|
self.assertEqual(1, len(group["duplicates"]))
|
|
self.assertEqual(seeded["duplicate_location_id"], group["duplicates"][0]["id"])
|
|
self.assertEqual(
|
|
"Singer A/Duplicate Song (1).flac",
|
|
group["duplicates"][0]["locator"],
|
|
)
|
|
|
|
def test_dedupe_local_duplicates_repoints_references_and_deletes_duplicate_files(self):
|
|
from musicdl.catalogsync.ops.models import JobStatus
|
|
from musicdl.catalogsync.ops.repository import OpsRepository
|
|
from musicdl.catalogsync.ops.maintenance import LocalMaintenanceService
|
|
|
|
seeded = self._seed_duplicate_local_files()
|
|
repo = seeded["repo"]
|
|
ops_repo = OpsRepository(seeded["db_path"])
|
|
remote_backend_id = repo.upsert_object_storage_backend(
|
|
name="test-bucket",
|
|
container_name="music",
|
|
endpoint="https://s3.example.invalid",
|
|
region=None,
|
|
base_prefix="catalogsync",
|
|
credential_env_prefix="CATALOGSYNC_TEST",
|
|
public_base_url="https://cdn.example.invalid",
|
|
)
|
|
upload_task_id = repo.enqueue_upload_task(
|
|
file_asset_id=seeded["asset_id"],
|
|
source_location_id=seeded["duplicate_location_id"],
|
|
target_backend_id=remote_backend_id,
|
|
target_container_name="music",
|
|
target_locator="Singer A/Duplicate Song.flac",
|
|
)
|
|
job_id = ops_repo.create_job(
|
|
job_type="upload_only",
|
|
config_snapshot={},
|
|
status=JobStatus.QUEUED,
|
|
)
|
|
stage_id = ops_repo.create_stage(job_run_id=job_id, stage_type="upload", seq_no=1)
|
|
item_id = ops_repo.create_item(
|
|
job_stage_id=stage_id,
|
|
item_type="song_upload",
|
|
item_key="upload:dup-song",
|
|
song_id=seeded["song_id"],
|
|
file_location_id=seeded["duplicate_location_id"],
|
|
)
|
|
|
|
service = LocalMaintenanceService(seeded["db_path"])
|
|
payload = service.dedupe_local_duplicates(sample_limit=10)
|
|
|
|
self.assertEqual(0, payload["summary"]["duplicate_group_count"])
|
|
self.assertEqual(0, payload["summary"]["duplicate_location_count"])
|
|
self.assertEqual(1, payload["execution"]["deduped_group_count"])
|
|
self.assertEqual(1, payload["execution"]["inactive_location_count"])
|
|
self.assertEqual(1, payload["execution"]["deleted_file_count"])
|
|
self.assertEqual(7, payload["execution"]["released_bytes"])
|
|
self.assertEqual(1, payload["execution"]["repointed_upload_task_count"])
|
|
self.assertEqual(1, payload["execution"]["repointed_job_item_count"])
|
|
|
|
duplicate_location = repo._fetchone(
|
|
"SELECT status, is_primary FROM file_locations WHERE id = ?",
|
|
(seeded["duplicate_location_id"],),
|
|
)
|
|
self.assertEqual("inactive", duplicate_location["status"])
|
|
self.assertEqual(0, int(duplicate_location["is_primary"]))
|
|
canonical_location = repo._fetchone(
|
|
"SELECT status, is_primary FROM file_locations WHERE id = ?",
|
|
(seeded["canonical_location_id"],),
|
|
)
|
|
self.assertEqual("active", canonical_location["status"])
|
|
self.assertEqual(1, int(canonical_location["is_primary"]))
|
|
|
|
upload_task = repo._fetchone(
|
|
"SELECT source_location_id FROM upload_tasks WHERE id = ?",
|
|
(upload_task_id,),
|
|
)
|
|
self.assertEqual(seeded["canonical_location_id"], int(upload_task["source_location_id"]))
|
|
job_item = ops_repo._fetchone(
|
|
"SELECT file_location_id FROM job_items WHERE id = ?",
|
|
(item_id,),
|
|
)
|
|
self.assertEqual(seeded["canonical_location_id"], int(job_item["file_location_id"]))
|
|
|
|
presence = repo.get_song_backend_presence(
|
|
song_id=seeded["song_id"],
|
|
backend_id=seeded["backend_id"],
|
|
)
|
|
self.assertIsNotNone(presence)
|
|
self.assertEqual(1, int(presence["active_file_count"]))
|
|
self.assertEqual(seeded["canonical_location_id"], int(presence["primary_file_location_id"]))
|
|
self.assertTrue(seeded["canonical_path"].exists())
|
|
self.assertFalse(seeded["duplicate_path"].exists())
|
|
|
|
def test_dedupe_local_duplicates_raises_when_jobs_or_items_are_running(self):
|
|
from musicdl.catalogsync.ops.models import JobStatus
|
|
from musicdl.catalogsync.ops.maintenance import (
|
|
LocalDedupeBlockedError,
|
|
LocalMaintenanceService,
|
|
)
|
|
from musicdl.catalogsync.ops.repository import OpsRepository
|
|
|
|
seeded = self._seed_duplicate_local_files()
|
|
ops_repo = OpsRepository(seeded["db_path"])
|
|
ops_repo.create_job(
|
|
job_type="download_only",
|
|
config_snapshot={},
|
|
status=JobStatus.RUNNING,
|
|
)
|
|
service = LocalMaintenanceService(seeded["db_path"])
|
|
|
|
with self.assertRaises(LocalDedupeBlockedError):
|
|
service.dedupe_local_duplicates()
|