Files
musicdl-catalog-sync-suite/catalog-sync/tests/catalogsync/test_uploader.py
T

375 lines
16 KiB
Python

import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
class ObjectStorageUploaderTests(unittest.TestCase):
def _set_minimal_credentials(self):
os.environ["CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID"] = "access-key"
os.environ["CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY"] = "secret-key"
def _clear_minimal_credentials(self):
os.environ.pop("CATALOGSYNC_MAIN_S3_ACCESS_KEY_ID", None)
os.environ.pop("CATALOGSYNC_MAIN_S3_SECRET_ACCESS_KEY", None)
os.environ.pop("CATALOGSYNC_MAIN_S3_SESSION_TOKEN", None)
def test_enqueue_missing_uploads_creates_task_with_mirrored_locator(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
ext="flac",
file_size_bytes=128,
quality_label="lossless",
)
)
backend_id = repo.get_default_backend_id()
local_file = library_root / "qq" / "Singer A" / "song-a.flac"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
queued = uploader.enqueue_missing_uploads(backend_name="main-s3")
tasks = repo.list_pending_upload_tasks(target_backend_id=2)
self.assertEqual(1, queued)
self.assertEqual(1, len(tasks))
self.assertEqual("music/qq/Singer A/song-a.flac", tasks[0]["target_locator"])
def test_run_records_remote_location_and_presence(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
class FakeS3Client:
def __init__(self):
self.uploads = []
def upload_file(self, filename, bucket, key, ExtraArgs=None):
self.uploads.append((filename, bucket, key, ExtraArgs))
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
ext="flac",
file_size_bytes=128,
quality_label="lossless",
)
)
local_backend_id = repo.get_default_backend_id()
local_file = library_root / "qq" / "Singer A" / "song-a.flac"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
public_base_url="https://cdn.example.com/music",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
fake_client = FakeS3Client()
try:
with patch(
"musicdl.catalogsync.uploader.build_s3_client",
return_value=fake_client,
):
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
remote_location = repo._fetchone(
"""
SELECT locator, public_url, is_primary, status, absolute_path
FROM file_locations
WHERE backend_id = ?
ORDER BY id DESC
LIMIT 1
""",
(backend_id,),
)
has_backend_file = repo.song_has_active_backend_file(song_id, backend_id)
self.assertEqual(1, summary["succeeded"])
self.assertEqual(1, len(fake_client.uploads))
self.assertEqual("music/qq/Singer A/song-a.flac", remote_location["locator"])
self.assertEqual("https://cdn.example.com/music/qq/Singer A/song-a.flac", remote_location["public_url"])
self.assertEqual(0, remote_location["is_primary"])
self.assertEqual("active", remote_location["status"])
self.assertIsNone(remote_location["absolute_path"])
self.assertTrue(has_backend_file)
def test_run_marks_task_failed_when_source_file_is_missing(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-missing",
name="Missing",
singers="Singer A",
ext="mp3",
file_size_bytes=64,
quality_label="standard",
)
)
local_backend_id = repo.get_default_backend_id()
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-missing.mp3",
file_size_bytes=64,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
try:
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
task_row = repo._fetchone(
"SELECT status, last_error FROM upload_tasks WHERE target_backend_id = ? ORDER BY id DESC LIMIT 1",
(backend_id,),
)
self.assertEqual(1, summary["failed"])
self.assertEqual("failed", task_row["status"])
self.assertIn("does not exist", task_row["last_error"])
def test_build_s3_client_requires_credentials(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import build_s3_client
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
initialize_database(db_path).close()
repo = CatalogRepository(db_path)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
backend = repo.get_backend(backend_id)
self._clear_minimal_credentials()
with self.assertRaises(RuntimeError):
build_s3_client(backend)
def test_enqueue_missing_uploads_includes_multiple_local_versions(self):
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id="song-a",
name="Song A",
singers="Singer A",
)
)
local_backend_id = repo.get_default_backend_id()
file_a = library_root / "qq" / "Singer A" / "song-a.flac"
file_a.parent.mkdir(parents=True, exist_ok=True)
file_a.write_bytes(b"flac")
file_b = library_root / "qq" / "Singer A" / "song-a.mp3"
file_b.write_bytes(b"mp3")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.flac",
file_size_bytes=128,
ext="flac",
quality_label="lossless",
)
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path="qq/Singer A/song-a.mp3",
file_size_bytes=64,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=2)
queued = uploader.enqueue_missing_uploads(backend_name="main-s3")
tasks = repo.list_pending_upload_tasks(target_backend_id=backend_id)
self.assertEqual(2, queued)
self.assertEqual(2, len(tasks))
def test_run_with_multiple_workers_does_not_duplicate_uploads(self):
import threading
import time
from musicdl.catalogsync.db import initialize_database
from musicdl.catalogsync.models import CatalogSong
from musicdl.catalogsync.repository import CatalogRepository
from musicdl.catalogsync.uploader import CatalogUploader
class FakeS3Client:
def __init__(self):
self.lock = threading.Lock()
self.uploads = []
def upload_file(self, filename, bucket, key, ExtraArgs=None):
time.sleep(0.02)
with self.lock:
self.uploads.append((filename, bucket, key))
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
db_path = Path(tmpdir) / "catalogsync.db"
library_root = Path(tmpdir) / "library"
initialize_database(db_path, default_library_root=library_root).close()
repo = CatalogRepository(db_path)
local_backend_id = repo.get_default_backend_id()
for index in range(4):
song_id = repo.upsert_song(
CatalogSong(
platform="qq",
remote_song_id=f"song-{index}",
name=f"Song {index}",
singers="Singer A",
ext="mp3",
file_size_bytes=64 + index,
quality_label="standard",
)
)
local_file = library_root / "qq" / "Singer A" / f"song-{index}.mp3"
local_file.parent.mkdir(parents=True, exist_ok=True)
local_file.write_bytes(b"audio")
repo.record_local_file(
song_id=song_id,
backend_id=local_backend_id,
relative_path=f"qq/Singer A/song-{index}.mp3",
file_size_bytes=64 + index,
ext="mp3",
quality_label="standard",
)
backend_id = repo.upsert_object_storage_backend(
name="main-s3",
container_name="music-bucket",
endpoint="https://s3.example.com",
region="auto",
base_prefix="music",
credential_env_prefix="CATALOGSYNC_MAIN_S3",
)
uploader = CatalogUploader(repository=repo, worker_count=3)
uploader.enqueue_missing_uploads(backend_name="main-s3")
self._set_minimal_credentials()
fake_client = FakeS3Client()
try:
with patch("musicdl.catalogsync.uploader.build_s3_client", return_value=fake_client):
summary = uploader.run(backend_name="main-s3")
finally:
self._clear_minimal_credentials()
pending_rows = repo.list_pending_upload_tasks(target_backend_id=backend_id)
remote_rows = repo._fetchall(
"SELECT locator FROM file_locations WHERE backend_id = ? ORDER BY locator ASC",
(backend_id,),
)
self.assertEqual(4, summary["succeeded"])
self.assertEqual(0, summary["failed"])
self.assertEqual(0, len(pending_rows))
self.assertEqual(4, len(fake_client.uploads))
self.assertEqual(4, len({item[2] for item in fake_client.uploads}))
self.assertEqual(4, len(remote_rows))
if __name__ == "__main__":
unittest.main()