import os import tempfile import unittest from pathlib import Path from unittest.mock import patch class ObjectStorageUploaderTests(unittest.TestCase): def _set_minimal_credentials(self): os.environ["CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID"] = "access-key" os.environ["CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY"] = "secret-key" def _clear_minimal_credentials(self): os.environ.pop("CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID", None) os.environ.pop("CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY", None) os.environ.pop("CATALOGSYNC_MAIN_S3_SESSION_TOKEN", None) def test_enqueue_missing_uploads_creates_task_with_mirrored_locator(self): from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.models import CatalogSong from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import CatalogUploader with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" library_root = Path(tmpdir) / "library" initialize_database(db_path, default_library_root=library_root).close() repo = CatalogRepository(db_path) song_id = repo.upsert_song( CatalogSong( platform="qq", remote_song_id="song-a", name="Song A", singers="Singer A", ext="flac", file_size_bytes=128, quality_label="lossless", ) ) backend_id = repo.get_default_backend_id() local_file = library_root / "qq" / "Singer A" / "song-a.flac" local_file.parent.mkdir(parents=True, exist_ok=True) local_file.write_bytes(b"audio") repo.record_local_file( song_id=song_id, backend_id=backend_id, relative_path="qq/Singer A/song-a.flac", file_size_bytes=128, ext="flac", quality_label="lossless", ) repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", ) uploader = CatalogUploader(repository=repo, worker_count=2) queued = uploader.enqueue_missing_uploads(backend_name="main-s3") tasks = repo.list_pending_upload_tasks(target_backend_id=2) self.assertEqual(1, queued) self.assertEqual(1, len(tasks)) self.assertEqual("music/qq/Singer A/song-a.flac", tasks[0]["target_locator"]) def test_run_records_remote_location_and_presence(self): from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.models import CatalogSong from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import CatalogUploader class FakeS3Client: def __init__(self): self.uploads = [] def upload_file(self, filename, bucket, key, ExtraArgs=None): self.uploads.append((filename, bucket, key, ExtraArgs)) with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" library_root = Path(tmpdir) / "library" initialize_database(db_path, default_library_root=library_root).close() repo = CatalogRepository(db_path) song_id = repo.upsert_song( CatalogSong( platform="qq", remote_song_id="song-a", name="Song A", singers="Singer A", ext="flac", file_size_bytes=128, quality_label="lossless", ) ) local_backend_id = repo.get_default_backend_id() local_file = library_root / "qq" / "Singer A" / "song-a.flac" local_file.parent.mkdir(parents=True, exist_ok=True) local_file.write_bytes(b"audio") repo.record_local_file( song_id=song_id, backend_id=local_backend_id, relative_path="qq/Singer A/song-a.flac", file_size_bytes=128, ext="flac", quality_label="lossless", ) backend_id = repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", public_base_url="https://cdn.example.com/music", ) uploader = CatalogUploader(repository=repo, worker_count=2) uploader.enqueue_missing_uploads(backend_name="main-s3") self._set_minimal_credentials() fake_client = FakeS3Client() try: with patch( "musicdl.catalogsync.uploader.build_s3_client", return_value=fake_client, ): summary = uploader.run(backend_name="main-s3") finally: self._clear_minimal_credentials() remote_location = repo._fetchone( """ SELECT locator, public_url, is_primary, status, absolute_path FROM file_locations WHERE backend_id = ? ORDER BY id DESC LIMIT 1 """, (backend_id,), ) has_backend_file = repo.song_has_active_backend_file(song_id, backend_id) self.assertEqual(1, summary["succeeded"]) self.assertEqual(1, len(fake_client.uploads)) self.assertEqual("music/qq/Singer A/song-a.flac", remote_location["locator"]) self.assertEqual("https://cdn.example.com/music/qq/Singer A/song-a.flac", remote_location["public_url"]) self.assertEqual(0, remote_location["is_primary"]) self.assertEqual("active", remote_location["status"]) self.assertIsNone(remote_location["absolute_path"]) self.assertTrue(has_backend_file) def test_run_marks_task_failed_when_source_file_is_missing(self): from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.models import CatalogSong from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import CatalogUploader with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" library_root = Path(tmpdir) / "library" initialize_database(db_path, default_library_root=library_root).close() repo = CatalogRepository(db_path) song_id = repo.upsert_song( CatalogSong( platform="qq", remote_song_id="song-missing", name="Missing", singers="Singer A", ext="mp3", file_size_bytes=64, quality_label="standard", ) ) local_backend_id = repo.get_default_backend_id() repo.record_local_file( song_id=song_id, backend_id=local_backend_id, relative_path="qq/Singer A/song-missing.mp3", file_size_bytes=64, ext="mp3", quality_label="standard", ) backend_id = repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", ) uploader = CatalogUploader(repository=repo, worker_count=2) uploader.enqueue_missing_uploads(backend_name="main-s3") self._set_minimal_credentials() try: summary = uploader.run(backend_name="main-s3") finally: self._clear_minimal_credentials() task_row = repo._fetchone( "SELECT status, last_error FROM upload_tasks WHERE target_backend_id = ? ORDER BY id DESC LIMIT 1", (backend_id,), ) self.assertEqual(1, summary["failed"]) self.assertEqual("failed", task_row["status"]) self.assertIn("does not exist", task_row["last_error"]) def test_build_s3_client_requires_credentials(self): from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import build_s3_client with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" initialize_database(db_path).close() repo = CatalogRepository(db_path) backend_id = repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", ) backend = repo.get_backend(backend_id) self._clear_minimal_credentials() with self.assertRaises(RuntimeError): build_s3_client(backend) def test_enqueue_missing_uploads_includes_multiple_local_versions(self): from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.models import CatalogSong from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import CatalogUploader with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" library_root = Path(tmpdir) / "library" initialize_database(db_path, default_library_root=library_root).close() repo = CatalogRepository(db_path) song_id = repo.upsert_song( CatalogSong( platform="qq", remote_song_id="song-a", name="Song A", singers="Singer A", ) ) local_backend_id = repo.get_default_backend_id() file_a = library_root / "qq" / "Singer A" / "song-a.flac" file_a.parent.mkdir(parents=True, exist_ok=True) file_a.write_bytes(b"flac") file_b = library_root / "qq" / "Singer A" / "song-a.mp3" file_b.write_bytes(b"mp3") repo.record_local_file( song_id=song_id, backend_id=local_backend_id, relative_path="qq/Singer A/song-a.flac", file_size_bytes=128, ext="flac", quality_label="lossless", ) repo.record_local_file( song_id=song_id, backend_id=local_backend_id, relative_path="qq/Singer A/song-a.mp3", file_size_bytes=64, ext="mp3", quality_label="standard", ) backend_id = repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", ) uploader = CatalogUploader(repository=repo, worker_count=2) queued = uploader.enqueue_missing_uploads(backend_name="main-s3") tasks = repo.list_pending_upload_tasks(target_backend_id=backend_id) self.assertEqual(2, queued) self.assertEqual(2, len(tasks)) def test_run_with_multiple_workers_does_not_duplicate_uploads(self): import threading import time from musicdl.catalogsync.db import initialize_database from musicdl.catalogsync.models import CatalogSong from musicdl.catalogsync.repository import CatalogRepository from musicdl.catalogsync.uploader import CatalogUploader class FakeS3Client: def __init__(self): self.lock = threading.Lock() self.uploads = [] def upload_file(self, filename, bucket, key, ExtraArgs=None): time.sleep(0.02) with self.lock: self.uploads.append((filename, bucket, key)) with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: db_path = Path(tmpdir) / "catalogsync.db" library_root = Path(tmpdir) / "library" initialize_database(db_path, default_library_root=library_root).close() repo = CatalogRepository(db_path) local_backend_id = repo.get_default_backend_id() for index in range(4): song_id = repo.upsert_song( CatalogSong( platform="qq", remote_song_id=f"song-{index}", name=f"Song {index}", singers="Singer A", ext="mp3", file_size_bytes=64 + index, quality_label="standard", ) ) local_file = library_root / "qq" / "Singer A" / f"song-{index}.mp3" local_file.parent.mkdir(parents=True, exist_ok=True) local_file.write_bytes(b"audio") repo.record_local_file( song_id=song_id, backend_id=local_backend_id, relative_path=f"qq/Singer A/song-{index}.mp3", file_size_bytes=64 + index, ext="mp3", quality_label="standard", ) backend_id = repo.upsert_object_storage_backend( name="main-s3", container_name="music-bucket", endpoint="https://s3.example.com", region="auto", base_prefix="music", credential_env_prefix="CATALOGSYNC_MAIN_S3", ) uploader = CatalogUploader(repository=repo, worker_count=3) uploader.enqueue_missing_uploads(backend_name="main-s3") self._set_minimal_credentials() fake_client = FakeS3Client() try: with patch("musicdl.catalogsync.uploader.build_s3_client", return_value=fake_client): summary = uploader.run(backend_name="main-s3") finally: self._clear_minimal_credentials() pending_rows = repo.list_pending_upload_tasks(target_backend_id=backend_id) remote_rows = repo._fetchall( "SELECT locator FROM file_locations WHERE backend_id = ? ORDER BY locator ASC", (backend_id,), ) self.assertEqual(4, summary["succeeded"]) self.assertEqual(0, summary["failed"]) self.assertEqual(0, len(pending_rows)) self.assertEqual(4, len(fake_client.uploads)) self.assertEqual(4, len({item[2] for item in fake_client.uploads})) self.assertEqual(4, len(remote_rows)) if __name__ == "__main__": unittest.main()