Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
from .models import ItemStatus, JobItem, JobRun, JobStatus, JobStage, StageStatus
|
||||
from .repository import OpsRepository
|
||||
|
||||
__all__ = [
|
||||
"ItemStatus",
|
||||
"JobItem",
|
||||
"JobRun",
|
||||
"JobStatus",
|
||||
"JobStage",
|
||||
"OpsRepository",
|
||||
"StageStatus",
|
||||
]
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
def _parse_sources(value: str | None) -> list[str]:
|
||||
if not value:
|
||||
return []
|
||||
return [item.strip() for item in value.split(",") if item and item.strip()]
|
||||
|
||||
|
||||
def _normalize_env_value(raw_value: str) -> str:
|
||||
stripped_value = raw_value.strip()
|
||||
if (
|
||||
len(stripped_value) >= 2
|
||||
and stripped_value[0] == stripped_value[-1]
|
||||
and stripped_value[0] in {"'", '"'}
|
||||
):
|
||||
return stripped_value[1:-1]
|
||||
return raw_value
|
||||
|
||||
|
||||
def _parse_env(content: str) -> dict[str, str]:
|
||||
mapping: dict[str, str] = {}
|
||||
for raw_line in content.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
normalized = raw_line.lstrip()
|
||||
if normalized.startswith("export "):
|
||||
normalized = normalized[len("export ") :]
|
||||
if "=" not in normalized:
|
||||
continue
|
||||
key, value = normalized.split("=", 1)
|
||||
key = key.strip()
|
||||
if not key:
|
||||
continue
|
||||
mapping[key] = _normalize_env_value(value)
|
||||
return mapping
|
||||
|
||||
|
||||
class CatalogsyncEnvManager:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
db_path: str | Path,
|
||||
env_file_path: str | Path,
|
||||
repository: OpsRepository | None = None,
|
||||
):
|
||||
self.env_file_path = Path(env_file_path)
|
||||
self.repository = repository or OpsRepository(db_path)
|
||||
|
||||
def load_current(self) -> dict[str, str]:
|
||||
if not self.env_file_path.exists():
|
||||
return {}
|
||||
content = self.env_file_path.read_text(encoding="utf-8")
|
||||
return _parse_env(content)
|
||||
|
||||
def build_job_snapshot(self) -> dict[str, Any]:
|
||||
current = self.load_current()
|
||||
snapshot: dict[str, Any] = dict(current)
|
||||
snapshot["download_sources"] = _parse_sources(current.get("DOWNLOAD_SOURCES"))
|
||||
return snapshot
|
||||
|
||||
def save_revision(self, note: str | None = None, source_type: str = "env_file") -> int:
|
||||
content = ""
|
||||
if self.env_file_path.exists():
|
||||
content = self.env_file_path.read_text(encoding="utf-8")
|
||||
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
return self.repository.create_config_revision(
|
||||
source_type=source_type,
|
||||
file_path=str(self.env_file_path.resolve()),
|
||||
content_text=content,
|
||||
content_hash=content_hash,
|
||||
note=note,
|
||||
)
|
||||
|
||||
def list_revisions(self, limit: int = 50) -> list[dict[str, Any]]:
|
||||
return self.repository.list_config_revisions(limit=limit)
|
||||
|
||||
def apply_revision(self, revision_id: int) -> None:
|
||||
revision = self.repository.get_config_revision(revision_id)
|
||||
if revision is None:
|
||||
raise ValueError(f"config revision not found: {revision_id}")
|
||||
self.env_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.env_file_path.write_text(revision["content_text"], encoding="utf-8")
|
||||
self.repository.mark_config_revision_applied(revision_id)
|
||||
@@ -0,0 +1,466 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from musicdl.catalogsync.downloader import CatalogDownloader
|
||||
from musicdl.catalogsync.repository import CatalogRepository
|
||||
from musicdl.catalogsync.services import CatalogSyncService
|
||||
from musicdl.catalogsync.uploader import CatalogUploader
|
||||
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
NON_MUSIC_RESOURCE_REASON = "非音乐资源(有声榜条目)"
|
||||
NON_MUSIC_RESOURCE_CODE = "NON_MUSIC_RESOURCE"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolvedStageDownloadTask:
|
||||
item_id: int
|
||||
playlist_id: int | None
|
||||
row: dict[str, object]
|
||||
resolved_payload: object
|
||||
|
||||
|
||||
def _format_error(exc: Exception) -> str:
|
||||
return f"{type(exc).__name__}: {exc}"
|
||||
|
||||
|
||||
class _TransitionUpdateError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _ensure_transition_applied(applied: bool, *, item_id: int, action: str) -> None:
|
||||
if applied:
|
||||
return
|
||||
raise _TransitionUpdateError(
|
||||
f"CAS transition failed for item {item_id}: {action} returned False"
|
||||
)
|
||||
|
||||
|
||||
def _mark_failed_or_raise(ops_repo: OpsRepository, *, item_id: int, error_message: str, cause: Exception) -> None:
|
||||
if ops_repo.mark_item_failed(item_id=item_id, error_message=error_message):
|
||||
return
|
||||
raise RuntimeError(
|
||||
f"CAS transition failed for item {item_id}: mark_item_failed returned False while handling error: {error_message}"
|
||||
) from cause
|
||||
|
||||
|
||||
def _mark_non_music_resource_skipped_or_raise(ops_repo: OpsRepository, *, item_id: int) -> None:
|
||||
_ensure_transition_applied(
|
||||
ops_repo.mark_item_skipped(
|
||||
item_id=item_id,
|
||||
reason_message=NON_MUSIC_RESOURCE_REASON,
|
||||
reason_code=NON_MUSIC_RESOURCE_CODE,
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_skipped",
|
||||
)
|
||||
|
||||
|
||||
def _is_non_music_resource_download_row(row: dict[str, object] | None) -> bool:
|
||||
row = row or {}
|
||||
remote_song_id = str(row.get("remote_song_id") or "").strip().lower()
|
||||
if remote_song_id.startswith("qqtop_"):
|
||||
return True
|
||||
metadata_json = row.get("metadata_json")
|
||||
if not metadata_json:
|
||||
return False
|
||||
try:
|
||||
metadata = json.loads(str(metadata_json))
|
||||
except Exception:
|
||||
return False
|
||||
if not isinstance(metadata, dict):
|
||||
return False
|
||||
snapshot = metadata.get("snapshot")
|
||||
if not isinstance(snapshot, dict):
|
||||
return False
|
||||
raw_data = snapshot.get("raw_data")
|
||||
if not isinstance(raw_data, dict):
|
||||
return False
|
||||
search = raw_data.get("search")
|
||||
if not isinstance(search, dict):
|
||||
return False
|
||||
return bool(search.get("qq_toplist_fallback"))
|
||||
|
||||
|
||||
class CollectStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
service: CatalogSyncService | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.service = service or CatalogSyncService(repository=self.catalog_repo)
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
item = self.ops_repo.get_item(item_id)
|
||||
if item is None:
|
||||
raise RuntimeError(f"Unknown item: {item_id}")
|
||||
source = str(item.payload.get("source") or "").strip()
|
||||
if not source:
|
||||
raise RuntimeError(f"Collect item {item_id} is missing source")
|
||||
display_text = f"collect:{source}"
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_display_text=display_text,
|
||||
last_progress_text="starting playlist collection",
|
||||
)
|
||||
counts = self.service.collect_playlists(
|
||||
sources=[source],
|
||||
include_playlist_square=bool(item.payload.get("include_playlist_square", True)),
|
||||
include_toplist=bool(item.payload.get("include_toplist", True)),
|
||||
progress_callback=lambda event_type, payload: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_display_text=display_text,
|
||||
last_progress_text=self._format_progress_text(event_type, payload),
|
||||
),
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id, result_payload={"counts": counts}),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _format_progress_text(event_type: str, payload: dict[str, object]) -> str:
|
||||
if event_type == "playlist_square_page":
|
||||
page = int(payload.get("page") or 0)
|
||||
total = int(payload.get("total") or 0)
|
||||
new_count = int(payload.get("new_count") or 0)
|
||||
if payload.get("duplicate_page"):
|
||||
return f"page {page}: duplicate page detected, stopping at {total}"
|
||||
return f"page {page}: +{new_count}, total {total}"
|
||||
if event_type == "toplist_collected":
|
||||
return f"toplist: {int(payload.get('count') or 0)}"
|
||||
if event_type == "source_finished":
|
||||
counts = payload.get("counts") if isinstance(payload.get("counts"), dict) else {}
|
||||
playlist_square = int(counts.get("playlist_square") or 0)
|
||||
toplist = int(counts.get("toplist") or 0)
|
||||
return f"done: square {playlist_square}, toplist {toplist}"
|
||||
return str(event_type).replace("_", " ")
|
||||
|
||||
|
||||
class DownloadStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
library_root: str | Path,
|
||||
download_sources: list[str] | None = None,
|
||||
downloader: CatalogDownloader | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.library_root = Path(library_root)
|
||||
self.download_sources = list(download_sources or [])
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.downloader = downloader or CatalogDownloader(repository=self.catalog_repo)
|
||||
|
||||
def process_resolve_item(
|
||||
self,
|
||||
item_id: int,
|
||||
worker_name: str,
|
||||
*,
|
||||
ready_queue,
|
||||
already_claimed: bool = False,
|
||||
) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
row = self.ops_repo.build_download_row(item_id=item_id)
|
||||
song_id = int(row.get("id") or row.get("song_id") or 0)
|
||||
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_song_id=song_id,
|
||||
current_playlist_id=row.get("playlist_id"),
|
||||
current_display_text=str(row.get("name") or row.get("id") or song_id),
|
||||
last_progress_text="already downloaded",
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"already_downloaded": True},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
resolved_payload = self.downloader.resolve_song_row(
|
||||
row=row,
|
||||
library_root=self.library_root,
|
||||
download_sources=self.download_sources,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if resolved_payload is None:
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message="resolve returned no downloadable song",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
return
|
||||
ready_queue.put(
|
||||
ResolvedStageDownloadTask(
|
||||
item_id=item_id,
|
||||
playlist_id=row.get("playlist_id"),
|
||||
row=row,
|
||||
resolved_payload=resolved_payload,
|
||||
)
|
||||
)
|
||||
|
||||
def process_download_task(self, task: ResolvedStageDownloadTask, worker_name: str) -> None:
|
||||
try:
|
||||
succeeded = self.downloader.download_resolved_song(
|
||||
resolved_payload=task.resolved_payload,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=task.item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if succeeded:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=task.item_id),
|
||||
item_id=task.item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
if _is_non_music_resource_download_row(task.row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=task.item_id,
|
||||
error_message="download returned no file",
|
||||
),
|
||||
item_id=task.item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
if _is_non_music_resource_download_row(task.row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=task.item_id)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
return
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=task.item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
row: dict[str, object] | None = None
|
||||
try:
|
||||
row = self.ops_repo.build_download_row(item_id=item_id)
|
||||
song_id = int(row.get("id") or row.get("song_id") or 0)
|
||||
if song_id > 0 and self.catalog_repo.song_has_active_local_file(song_id):
|
||||
self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
current_song_id=song_id,
|
||||
current_playlist_id=row.get("playlist_id"),
|
||||
current_display_text=str(row.get("name") or row.get("id") or song_id),
|
||||
last_progress_text="already downloaded",
|
||||
)
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"already_downloaded": True},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
succeeded = self.downloader.download_song_row(
|
||||
row=row,
|
||||
library_root=self.library_root,
|
||||
download_sources=self.download_sources,
|
||||
worker_callback=lambda **state: self.ops_repo.update_worker_state(
|
||||
worker_name=worker_name,
|
||||
current_job_item_id=item_id,
|
||||
status="running",
|
||||
**state,
|
||||
),
|
||||
)
|
||||
if succeeded:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
return
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
return
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message="download returned no file",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
if _is_non_music_resource_download_row(row):
|
||||
_mark_non_music_resource_skipped_or_raise(self.ops_repo, item_id=item_id)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
return
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
|
||||
class SyncStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
service: CatalogSyncService | None = None,
|
||||
service_factory: Callable[[], CatalogSyncService] | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
if service_factory is not None:
|
||||
self._service_factory = service_factory
|
||||
elif service is not None:
|
||||
self._service_factory = lambda: service
|
||||
else:
|
||||
self._service_factory = lambda: CatalogSyncService(repository=self.catalog_repo)
|
||||
self._service_local = threading.local()
|
||||
|
||||
def _get_service(self) -> CatalogSyncService:
|
||||
service = getattr(self._service_local, "service", None)
|
||||
if service is None:
|
||||
service = self._service_factory()
|
||||
self._service_local.service = service
|
||||
return service
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
playlist_row = self.ops_repo.get_playlist_row_for_item(item_id=item_id)
|
||||
linked_count = int(self._get_service().sync_playlist_row(playlist_row))
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(
|
||||
item_id=item_id,
|
||||
result_payload={"linked_count": linked_count},
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
|
||||
|
||||
class UploadStageExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str | Path,
|
||||
backend_name: str,
|
||||
uploader: CatalogUploader | None = None,
|
||||
ops_repo: OpsRepository | None = None,
|
||||
):
|
||||
self.db_path = Path(db_path)
|
||||
self.backend_name = str(backend_name)
|
||||
self.ops_repo = ops_repo or OpsRepository(self.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
self.uploader = uploader or CatalogUploader(repository=self.catalog_repo)
|
||||
|
||||
def process_item(self, item_id: int, worker_name: str, *, already_claimed: bool = False) -> None:
|
||||
if not already_claimed:
|
||||
self.ops_repo.claim_item(item_id=item_id, worker_name=worker_name)
|
||||
try:
|
||||
upload_row = self.ops_repo.get_upload_row_for_item(item_id=item_id)
|
||||
result = str(
|
||||
self.uploader.process_upload_task_row(
|
||||
task_row=upload_row,
|
||||
backend_name=self.backend_name,
|
||||
)
|
||||
)
|
||||
if result == "succeeded":
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_succeeded(item_id=item_id),
|
||||
item_id=item_id,
|
||||
action="mark_item_succeeded",
|
||||
)
|
||||
else:
|
||||
_ensure_transition_applied(
|
||||
self.ops_repo.mark_item_failed(
|
||||
item_id=item_id,
|
||||
error_message=f"upload result: {result}",
|
||||
),
|
||||
item_id=item_id,
|
||||
action="mark_item_failed",
|
||||
)
|
||||
except Exception as exc:
|
||||
failure_message = _format_error(exc)
|
||||
_mark_failed_or_raise(
|
||||
self.ops_repo,
|
||||
item_id=item_id,
|
||||
error_message=failure_message,
|
||||
cause=exc,
|
||||
)
|
||||
if isinstance(exc, _TransitionUpdateError):
|
||||
raise
|
||||
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
DOWNLOAD_LANE = "download"
|
||||
GENERAL_LANE = "general"
|
||||
|
||||
JOB_STAGE_SEQUENCES: dict[str, tuple[str, ...]] = {
|
||||
"catalog_sync": ("collect", "sync", "download"),
|
||||
"collect_only": ("collect",),
|
||||
"sync_only": ("sync",),
|
||||
"sync_download": ("sync", "download"),
|
||||
"download_only": ("download",),
|
||||
"upload_only": ("upload",),
|
||||
"download_upload": ("download", "upload"),
|
||||
}
|
||||
|
||||
def job_has_stage(job_type: str, stage_type: str) -> bool:
|
||||
sequence = JOB_STAGE_SEQUENCES.get(str(job_type), ())
|
||||
return str(stage_type) in sequence
|
||||
|
||||
|
||||
def job_lane_type(job_type: str) -> str:
|
||||
if job_has_stage(job_type, "download"):
|
||||
return DOWNLOAD_LANE
|
||||
return GENERAL_LANE
|
||||
|
||||
|
||||
def primary_stage_type(job_type: str) -> str | None:
|
||||
for stage_type in ("download", "upload", "sync", "collect"):
|
||||
if job_has_stage(job_type, stage_type):
|
||||
return stage_type
|
||||
return None
|
||||
|
||||
|
||||
def display_name(job_type: str, playlist_scope: dict[str, Any] | None = None) -> str:
|
||||
playlist_ids = (playlist_scope or {}).get("playlist_ids")
|
||||
is_scoped = isinstance(playlist_ids, list) and len(playlist_ids) > 0
|
||||
mapping = {
|
||||
"catalog_sync": "Full Pipeline",
|
||||
"collect_only": "Collect",
|
||||
"sync_only": "Sync Selected Playlists" if is_scoped else "Sync",
|
||||
"sync_download": "Sync Then Download" if is_scoped else "Sync Then Download All",
|
||||
"download_only": "Download Selected Playlists" if is_scoped else "Download",
|
||||
"upload_only": "Upload",
|
||||
"download_upload": "Download Then Upload",
|
||||
}
|
||||
return mapping.get(str(job_type), str(job_type))
|
||||
@@ -0,0 +1,402 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sqlite3
|
||||
from contextlib import contextmanager, suppress
|
||||
from pathlib import Path, PurePath
|
||||
from typing import Any
|
||||
|
||||
from musicdl.catalogsync.db import connect_database
|
||||
|
||||
|
||||
_COPY_SUFFIX_RE = re.compile(r" \(\d+\)(?=(\.[^.]+)?$)")
|
||||
|
||||
|
||||
class LocalDedupeBlockedError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _coerce_int(value: Any) -> int | None:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _row_value(row: sqlite3.Row | dict[str, Any], key: str) -> Any:
|
||||
if isinstance(row, sqlite3.Row):
|
||||
try:
|
||||
return row[key]
|
||||
except IndexError:
|
||||
return None
|
||||
return row.get(key)
|
||||
|
||||
|
||||
def _path_for_location(row: sqlite3.Row | dict[str, Any]) -> Path | None:
|
||||
absolute_path = str(_row_value(row, "absolute_path") or "").strip()
|
||||
if absolute_path:
|
||||
return Path(absolute_path)
|
||||
base_path = str(_row_value(row, "base_path") or "").strip()
|
||||
locator = str(_row_value(row, "locator") or "").strip()
|
||||
if not base_path or not locator:
|
||||
return None
|
||||
return Path(base_path) / locator
|
||||
|
||||
|
||||
def _resolved_path(path: Path | None) -> Path | None:
|
||||
if path is None:
|
||||
return None
|
||||
with suppress(OSError, RuntimeError):
|
||||
return path.resolve(strict=False)
|
||||
return path
|
||||
|
||||
|
||||
def _paths_match(left: Path | None, right: Path | None) -> bool:
|
||||
if left is None or right is None:
|
||||
return False
|
||||
return _resolved_path(left) == _resolved_path(right)
|
||||
|
||||
|
||||
def _has_copy_suffix(locator: str | None) -> bool:
|
||||
return bool(_COPY_SUFFIX_RE.search(PurePath(str(locator or "")).name))
|
||||
|
||||
|
||||
def _location_payload(row: sqlite3.Row | dict[str, Any]) -> dict[str, Any]:
|
||||
path = _path_for_location(row)
|
||||
file_exists = bool(path and path.exists())
|
||||
actual_file_size_bytes = None
|
||||
if file_exists and path is not None:
|
||||
with suppress(OSError):
|
||||
actual_file_size_bytes = int(path.stat().st_size)
|
||||
return {
|
||||
"id": int(row["location_id"]),
|
||||
"file_asset_id": int(row["file_asset_id"]),
|
||||
"song_id": int(row["song_id"]),
|
||||
"backend_id": int(row["backend_id"]),
|
||||
"backend_name": str(row["backend_name"] or ""),
|
||||
"locator": str(row["locator"] or ""),
|
||||
"absolute_path": str(row["absolute_path"] or ""),
|
||||
"file_exists": file_exists,
|
||||
"file_size_bytes": _coerce_int(row["file_size_bytes"]),
|
||||
"actual_file_size_bytes": actual_file_size_bytes,
|
||||
"song_name": str(row["song_name"] or ""),
|
||||
"singers": str(row["singers"] or ""),
|
||||
"_path": path,
|
||||
}
|
||||
|
||||
|
||||
def _location_sort_key(location: dict[str, Any]) -> tuple[int, int, int, int]:
|
||||
return (
|
||||
0 if location["file_exists"] else 1,
|
||||
0 if not _has_copy_suffix(location["locator"]) else 1,
|
||||
len(location["locator"]),
|
||||
int(location["id"]),
|
||||
)
|
||||
|
||||
|
||||
def _duplicate_size_bytes(location: dict[str, Any]) -> int:
|
||||
size_value = location.get("actual_file_size_bytes")
|
||||
if size_value is None:
|
||||
size_value = location.get("file_size_bytes")
|
||||
return max(int(size_value or 0), 0)
|
||||
|
||||
|
||||
class LocalMaintenanceService:
|
||||
def __init__(self, db_path: str | Path):
|
||||
self.db_path = Path(db_path)
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
return connect_database(self.db_path)
|
||||
|
||||
@contextmanager
|
||||
def _connection(self):
|
||||
conn = self._connect()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def scan_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
|
||||
with self._connection() as conn:
|
||||
groups = self._load_duplicate_groups(conn)
|
||||
scanned_row = conn.execute(
|
||||
"""
|
||||
SELECT COUNT(*) AS count_value
|
||||
FROM file_locations AS fl
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
AND sb.backend_type = 'local_fs'
|
||||
"""
|
||||
).fetchone()
|
||||
return self._build_scan_payload(
|
||||
groups,
|
||||
scanned_active_local_location_count=int(scanned_row["count_value"]) if scanned_row else 0,
|
||||
sample_limit=sample_limit,
|
||||
)
|
||||
|
||||
def dedupe_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
|
||||
with self._connection() as conn:
|
||||
self._raise_if_running_work(conn)
|
||||
groups = self._load_duplicate_groups(conn)
|
||||
execution = {
|
||||
"deduped_group_count": 0,
|
||||
"inactive_location_count": 0,
|
||||
"deleted_file_count": 0,
|
||||
"released_bytes": 0,
|
||||
"repointed_upload_task_count": 0,
|
||||
"repointed_job_item_count": 0,
|
||||
}
|
||||
affected_pairs: set[tuple[int, int]] = set()
|
||||
for group in groups:
|
||||
keep = group["keep"]
|
||||
duplicates = list(group["duplicates"])
|
||||
if not duplicates:
|
||||
continue
|
||||
execution["deduped_group_count"] += 1
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE file_locations
|
||||
SET
|
||||
is_primary = CASE WHEN id = ? THEN 1 ELSE 0 END,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE file_asset_id = ? AND backend_id = ?
|
||||
""",
|
||||
(
|
||||
int(keep["id"]),
|
||||
int(group["file_asset_id"]),
|
||||
int(group["backend_id"]),
|
||||
),
|
||||
)
|
||||
for duplicate in duplicates:
|
||||
duplicate_id = int(duplicate["id"])
|
||||
upload_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE upload_tasks
|
||||
SET
|
||||
source_location_id = ?,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE source_location_id = ?
|
||||
""",
|
||||
(int(keep["id"]), duplicate_id),
|
||||
)
|
||||
execution["repointed_upload_task_count"] += max(upload_cursor.rowcount, 0)
|
||||
item_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE job_items
|
||||
SET file_location_id = ?
|
||||
WHERE file_location_id = ?
|
||||
""",
|
||||
(int(keep["id"]), duplicate_id),
|
||||
)
|
||||
execution["repointed_job_item_count"] += max(item_cursor.rowcount, 0)
|
||||
inactive_cursor = conn.execute(
|
||||
"""
|
||||
UPDATE file_locations
|
||||
SET
|
||||
status = 'inactive',
|
||||
is_primary = 0,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ? AND status = 'active'
|
||||
""",
|
||||
(duplicate_id,),
|
||||
)
|
||||
execution["inactive_location_count"] += max(inactive_cursor.rowcount, 0)
|
||||
duplicate_path = duplicate["_path"]
|
||||
if (
|
||||
duplicate_path is not None
|
||||
and duplicate_path.exists()
|
||||
and not _paths_match(duplicate_path, keep["_path"])
|
||||
):
|
||||
duplicate_size_bytes = _duplicate_size_bytes(duplicate)
|
||||
with suppress(OSError):
|
||||
duplicate_path.unlink()
|
||||
execution["deleted_file_count"] += 1
|
||||
execution["released_bytes"] += duplicate_size_bytes
|
||||
affected_pairs.add((int(group["song_id"]), int(group["backend_id"])))
|
||||
for song_id, backend_id in affected_pairs:
|
||||
self._refresh_song_backend_presence_with_connection(
|
||||
conn,
|
||||
song_id=song_id,
|
||||
backend_id=backend_id,
|
||||
)
|
||||
payload = self.scan_local_duplicates(sample_limit=sample_limit)
|
||||
payload["execution"] = execution
|
||||
return payload
|
||||
|
||||
def _raise_if_running_work(self, conn: sqlite3.Connection) -> None:
|
||||
running_jobs_row = conn.execute(
|
||||
"SELECT COUNT(*) AS count_value FROM job_runs WHERE status = 'running'"
|
||||
).fetchone()
|
||||
running_items_row = conn.execute(
|
||||
"SELECT COUNT(*) AS count_value FROM job_items WHERE status = 'running'"
|
||||
).fetchone()
|
||||
running_jobs = int(running_jobs_row["count_value"]) if running_jobs_row else 0
|
||||
running_items = int(running_items_row["count_value"]) if running_items_row else 0
|
||||
if running_jobs > 0 or running_items > 0:
|
||||
raise LocalDedupeBlockedError(
|
||||
f"cannot dedupe while jobs or items are running (jobs={running_jobs}, items={running_items})"
|
||||
)
|
||||
|
||||
def _load_duplicate_groups(self, conn: sqlite3.Connection) -> list[dict[str, Any]]:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
WITH duplicate_pairs AS (
|
||||
SELECT fl.file_asset_id, fl.backend_id
|
||||
FROM file_locations AS fl
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
AND sb.backend_type = 'local_fs'
|
||||
GROUP BY fl.file_asset_id, fl.backend_id
|
||||
HAVING COUNT(*) > 1
|
||||
)
|
||||
SELECT
|
||||
fl.id AS location_id,
|
||||
fl.file_asset_id,
|
||||
fa.song_id,
|
||||
fl.backend_id,
|
||||
sb.name AS backend_name,
|
||||
sb.base_path,
|
||||
fl.locator,
|
||||
fl.absolute_path,
|
||||
COALESCE(fa.file_size_bytes, s.file_size_bytes) AS file_size_bytes,
|
||||
s.name AS song_name,
|
||||
s.singers
|
||||
FROM file_locations AS fl
|
||||
JOIN duplicate_pairs AS dp
|
||||
ON dp.file_asset_id = fl.file_asset_id
|
||||
AND dp.backend_id = fl.backend_id
|
||||
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
|
||||
JOIN songs AS s ON s.id = fa.song_id
|
||||
JOIN storage_backends AS sb ON sb.id = fl.backend_id
|
||||
WHERE fl.status = 'active'
|
||||
ORDER BY fl.file_asset_id ASC, fl.backend_id ASC, fl.id ASC
|
||||
"""
|
||||
).fetchall()
|
||||
grouped: dict[tuple[int, int], list[dict[str, Any]]] = {}
|
||||
for row in rows:
|
||||
location = _location_payload(row)
|
||||
key = (int(location["file_asset_id"]), int(location["backend_id"]))
|
||||
grouped.setdefault(key, []).append(location)
|
||||
|
||||
groups: list[dict[str, Any]] = []
|
||||
for (file_asset_id, backend_id), locations in grouped.items():
|
||||
ordered_locations = sorted(locations, key=_location_sort_key)
|
||||
keep = ordered_locations[0]
|
||||
groups.append(
|
||||
{
|
||||
"file_asset_id": int(file_asset_id),
|
||||
"backend_id": int(backend_id),
|
||||
"backend_name": keep["backend_name"],
|
||||
"song_id": int(keep["song_id"]),
|
||||
"song_name": keep["song_name"],
|
||||
"singers": keep["singers"],
|
||||
"keep": keep,
|
||||
"duplicates": ordered_locations[1:],
|
||||
}
|
||||
)
|
||||
groups.sort(
|
||||
key=lambda group: (
|
||||
int(group["song_id"]),
|
||||
int(group["file_asset_id"]),
|
||||
int(group["backend_id"]),
|
||||
)
|
||||
)
|
||||
return groups
|
||||
|
||||
def _build_scan_payload(
|
||||
self,
|
||||
groups: list[dict[str, Any]],
|
||||
*,
|
||||
scanned_active_local_location_count: int,
|
||||
sample_limit: int,
|
||||
) -> dict[str, Any]:
|
||||
normalized_sample_limit = max(int(sample_limit or 20), 1)
|
||||
return {
|
||||
"summary": {
|
||||
"duplicate_group_count": len(groups),
|
||||
"duplicate_location_count": sum(len(group["duplicates"]) for group in groups),
|
||||
"duplicate_file_size_bytes": sum(
|
||||
_duplicate_size_bytes(location)
|
||||
for group in groups
|
||||
for location in group["duplicates"]
|
||||
),
|
||||
"scanned_active_local_location_count": int(scanned_active_local_location_count),
|
||||
},
|
||||
"groups": [self._serialize_group(group) for group in groups[:normalized_sample_limit]],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _serialize_group(group: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"file_asset_id": int(group["file_asset_id"]),
|
||||
"backend_id": int(group["backend_id"]),
|
||||
"backend_name": str(group["backend_name"]),
|
||||
"song_id": int(group["song_id"]),
|
||||
"song_name": str(group["song_name"]),
|
||||
"singers": str(group["singers"]),
|
||||
"keep": LocalMaintenanceService._serialize_location(group["keep"]),
|
||||
"duplicates": [
|
||||
LocalMaintenanceService._serialize_location(location)
|
||||
for location in group["duplicates"]
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _serialize_location(location: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"id": int(location["id"]),
|
||||
"locator": str(location["locator"]),
|
||||
"absolute_path": str(location["absolute_path"]),
|
||||
"file_exists": bool(location["file_exists"]),
|
||||
"file_size_bytes": _coerce_int(location["file_size_bytes"]),
|
||||
"actual_file_size_bytes": _coerce_int(location["actual_file_size_bytes"]),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _refresh_song_backend_presence_with_connection(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
song_id: int,
|
||||
backend_id: int,
|
||||
) -> None:
|
||||
summary = conn.execute(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(*) AS active_file_count,
|
||||
MIN(fl.id) AS primary_file_location_id
|
||||
FROM file_locations AS fl
|
||||
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
|
||||
WHERE fa.song_id = ?
|
||||
AND fl.backend_id = ?
|
||||
AND fl.status = 'active'
|
||||
""",
|
||||
(int(song_id), int(backend_id)),
|
||||
).fetchone()
|
||||
active_file_count = int(summary["active_file_count"]) if summary else 0
|
||||
has_active_file = 1 if active_file_count > 0 else 0
|
||||
primary_file_location_id = summary["primary_file_location_id"] if summary else None
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO song_backend_presence (
|
||||
song_id,
|
||||
backend_id,
|
||||
has_active_file,
|
||||
active_file_count,
|
||||
primary_file_location_id
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(song_id, backend_id) DO UPDATE SET
|
||||
has_active_file = excluded.has_active_file,
|
||||
active_file_count = excluded.active_file_count,
|
||||
primary_file_location_id = excluded.primary_file_location_id,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(
|
||||
int(song_id),
|
||||
int(backend_id),
|
||||
has_active_file,
|
||||
active_file_count,
|
||||
primary_file_location_id,
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
QUEUED = "queued"
|
||||
RUNNING = "running"
|
||||
PAUSE_REQUESTED = "pause_requested"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
COMPLETED_WITH_ERRORS = "completed_with_errors"
|
||||
FAILED = "failed"
|
||||
CANCELED = "canceled"
|
||||
|
||||
|
||||
class StageStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSE_REQUESTED = "pause_requested"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
SKIPPED = "skipped"
|
||||
|
||||
|
||||
class ItemStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
INTERRUPTED = "interrupted"
|
||||
SKIPPED = "skipped"
|
||||
CANCELED = "canceled"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobRun:
|
||||
id: int
|
||||
job_type: str
|
||||
status: JobStatus
|
||||
priority: int
|
||||
requested_by: str | None
|
||||
config_snapshot: dict[str, Any]
|
||||
sources: list[str]
|
||||
download_sources: list[str]
|
||||
playlist_scope: dict[str, Any]
|
||||
created_at: str | None
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
resume_token: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobStage:
|
||||
id: int
|
||||
job_run_id: int
|
||||
stage_type: str
|
||||
seq_no: int
|
||||
status: StageStatus
|
||||
total_items: int
|
||||
pending_items: int
|
||||
running_items: int
|
||||
success_items: int
|
||||
failed_items: int
|
||||
skipped_items: int
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JobItem:
|
||||
id: int
|
||||
job_stage_id: int
|
||||
item_type: str
|
||||
item_key: str
|
||||
playlist_pool_id: int | None
|
||||
playlist_id: int | None
|
||||
song_id: int | None
|
||||
file_location_id: int | None
|
||||
status: ItemStatus
|
||||
attempt_count: int
|
||||
max_attempts: int
|
||||
worker_id: int | None
|
||||
started_at: str | None
|
||||
ended_at: str | None
|
||||
last_error: str | None
|
||||
last_error_code: str | None
|
||||
payload: dict[str, Any]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,896 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from collections import Counter
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import Any
|
||||
|
||||
from musicdl.catalogsync.catalog_export import run_catalog_export_command
|
||||
from musicdl.catalogsync.downloader import DownloadPlanner
|
||||
from musicdl.catalogsync.repository import CatalogRepository
|
||||
from musicdl.catalogsync.services import CatalogSyncService
|
||||
from musicdl.catalogsync.uploader import CatalogUploader
|
||||
|
||||
from .jobdefs import DOWNLOAD_LANE, JOB_STAGE_SEQUENCES, job_lane_type
|
||||
from .executors import (
|
||||
CollectStageExecutor,
|
||||
DownloadStageExecutor,
|
||||
SyncStageExecutor,
|
||||
UploadStageExecutor,
|
||||
)
|
||||
from .models import JobStatus, StageStatus
|
||||
from .repository import OpsRepository
|
||||
|
||||
|
||||
DEFAULT_DOWNLOAD_WORKERS = 10
|
||||
DEFAULT_SYNC_WORKERS = 4
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _unique_preserve_order(values: list[str]) -> list[str]:
|
||||
normalized: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
item = str(value).strip()
|
||||
if not item or item in seen:
|
||||
continue
|
||||
normalized.append(item)
|
||||
seen.add(item)
|
||||
return normalized
|
||||
|
||||
|
||||
def _split_csv(value: Any) -> list[str]:
|
||||
if isinstance(value, list):
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
if not value:
|
||||
return []
|
||||
return [part.strip() for part in str(value).split(",") if part.strip()]
|
||||
|
||||
|
||||
def _int_value(value: Any, default: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return parsed if parsed > 0 else default
|
||||
|
||||
|
||||
class OpsRunner:
|
||||
def __init__(
|
||||
self,
|
||||
repository: OpsRepository,
|
||||
sleep_seconds: float = 1.0,
|
||||
*,
|
||||
download_lane_concurrency: int = 1,
|
||||
general_lane_concurrency: int = 3,
|
||||
):
|
||||
self.repository = repository
|
||||
self.sleep_seconds = max(float(sleep_seconds), 0.1)
|
||||
self.download_lane_concurrency = 1
|
||||
self.general_lane_concurrency = max(int(general_lane_concurrency), 1)
|
||||
self._job_pool = ThreadPoolExecutor(
|
||||
max_workers=self.download_lane_concurrency + self.general_lane_concurrency
|
||||
)
|
||||
self._futures: dict[int, Future[None]] = {}
|
||||
self._futures_lock = threading.Lock()
|
||||
self._playlist_export_lock = threading.Lock()
|
||||
self._catalog_export_lock = threading.Lock()
|
||||
self._exported_stage_playlists: set[tuple[int, int]] = set()
|
||||
self.db_path = Path(self.repository.db_path)
|
||||
self.catalog_repo = CatalogRepository(self.db_path)
|
||||
|
||||
def recover_incomplete_jobs(self) -> None:
|
||||
for job in self.repository.list_recoverable_jobs():
|
||||
self.repository.pause_job_for_recovery(job.id)
|
||||
for item in self.repository.list_running_items(job.id):
|
||||
self.repository.mark_item_interrupted(
|
||||
item.id,
|
||||
last_error="Recovery interrupted running item after runner restart.",
|
||||
)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"recovery_requeued",
|
||||
"Recovered incomplete job and re-queued resumable work.",
|
||||
)
|
||||
self.repository.resume_job(job.id)
|
||||
|
||||
def apply_pending_commands(self) -> None:
|
||||
for command in self.repository.list_pending_commands():
|
||||
command_type = str(command["command_type"])
|
||||
job_id = int(command["job_run_id"])
|
||||
command_id = int(command["id"])
|
||||
target_item_id = command["target_item_id"]
|
||||
|
||||
if command_type == "pause":
|
||||
self.repository.request_job_pause(job_id)
|
||||
elif command_type == "resume":
|
||||
self.repository.resume_job(job_id)
|
||||
elif command_type == "cancel":
|
||||
self.repository.cancel_job(job_id)
|
||||
elif command_type == "retry_item":
|
||||
if target_item_id is None:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"retry_item command missing target_item_id.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif not self.repository.requeue_item(
|
||||
int(target_item_id), force=False, job_id=job_id
|
||||
):
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"retry_rejected",
|
||||
"retry_item command rejected.",
|
||||
item_id=int(target_item_id),
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif command_type == "force_retry_item":
|
||||
if target_item_id is None:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"force_retry_item command missing target_item_id.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
elif not self.repository.requeue_item(
|
||||
int(target_item_id), force=True, job_id=job_id
|
||||
):
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"retry_rejected",
|
||||
"force_retry_item command rejected.",
|
||||
item_id=int(target_item_id),
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
else:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"ignored_command",
|
||||
"Unsupported command type.",
|
||||
details={"command_type": command_type, "command_id": command_id},
|
||||
)
|
||||
|
||||
self.repository.mark_command_applied(command_id)
|
||||
|
||||
def reconcile_pause_state(self, job_id: int) -> None:
|
||||
if self.repository.job_has_running_items(job_id):
|
||||
return
|
||||
self.repository.finalize_pause(job_id)
|
||||
|
||||
def run_forever(self, stop_event=None) -> None:
|
||||
self.recover_incomplete_jobs()
|
||||
while stop_event is None or not stop_event.is_set():
|
||||
worked = self.loop_once()
|
||||
if worked:
|
||||
continue
|
||||
if stop_event is not None:
|
||||
stop_event.wait(self.sleep_seconds)
|
||||
else:
|
||||
time.sleep(self.sleep_seconds)
|
||||
|
||||
def loop_once(self) -> bool:
|
||||
had_commands = bool(self.repository.list_pending_commands())
|
||||
self.apply_pending_commands()
|
||||
finished = self._reap_finished_jobs()
|
||||
started = self._start_eligible_jobs()
|
||||
return bool(had_commands or finished or started)
|
||||
|
||||
def _reap_finished_jobs(self) -> int:
|
||||
finished_count = 0
|
||||
finished_futures: list[tuple[int, Future[None]]] = []
|
||||
with self._futures_lock:
|
||||
for job_id, future in list(self._futures.items()):
|
||||
if not future.done():
|
||||
continue
|
||||
del self._futures[job_id]
|
||||
finished_futures.append((job_id, future))
|
||||
for job_id, future in finished_futures:
|
||||
try:
|
||||
future.result()
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"job_future_error",
|
||||
str(exc),
|
||||
)
|
||||
job = self.repository.get_job(job_id)
|
||||
if job is not None and job.status not in {
|
||||
JobStatus.COMPLETED,
|
||||
JobStatus.COMPLETED_WITH_ERRORS,
|
||||
JobStatus.FAILED,
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.FAILED,
|
||||
last_error=str(exc),
|
||||
)
|
||||
finished_count += 1
|
||||
return finished_count
|
||||
|
||||
def _submit_job(self, job_id: int) -> bool:
|
||||
with self._futures_lock:
|
||||
if job_id in self._futures:
|
||||
return False
|
||||
self._futures[job_id] = self._job_pool.submit(self._run_job, job_id)
|
||||
return True
|
||||
|
||||
def _start_eligible_jobs(self) -> int:
|
||||
started_count = 0
|
||||
active_jobs = self.repository.list_active_jobs()
|
||||
lane_counts = Counter(job_lane_type(job.job_type) for job in active_jobs)
|
||||
for active_job in active_jobs:
|
||||
if active_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(active_job.id)
|
||||
continue
|
||||
if self._submit_job(active_job.id):
|
||||
started_count += 1
|
||||
for queued_job in self.repository.list_queued_jobs():
|
||||
lane_type = job_lane_type(queued_job.job_type)
|
||||
lane_limit = (
|
||||
self.download_lane_concurrency
|
||||
if lane_type == DOWNLOAD_LANE
|
||||
else self.general_lane_concurrency
|
||||
)
|
||||
if lane_counts[lane_type] >= lane_limit:
|
||||
continue
|
||||
claimed = self.repository.claim_job_if_queued(queued_job.id)
|
||||
if claimed is None:
|
||||
continue
|
||||
lane_counts[lane_type] += 1
|
||||
if self._submit_job(claimed.id):
|
||||
started_count += 1
|
||||
return started_count
|
||||
|
||||
def _run_job(self, job_id: int) -> None:
|
||||
try:
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSED:
|
||||
return
|
||||
if not self.repository.mark_job_running(job_id):
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is not None:
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
elif current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
self._ensure_job_stages(current_job)
|
||||
|
||||
while True:
|
||||
current_job = self.repository.get_job(job_id)
|
||||
if current_job is None:
|
||||
return
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
|
||||
stage = self._next_runnable_stage(job_id)
|
||||
if stage is None:
|
||||
if self._job_is_finished(job_id):
|
||||
self._finalize_job(job_id)
|
||||
return
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if any(
|
||||
stage_row.status in {StageStatus.PAUSED, StageStatus.PAUSE_REQUESTED}
|
||||
for stage_row in stages
|
||||
):
|
||||
self.repository.pause_job_for_recovery(job_id)
|
||||
return
|
||||
raise RuntimeError("Job has no runnable stages but is not finished.")
|
||||
|
||||
self._run_stage(current_job, stage)
|
||||
|
||||
refreshed_job = self.repository.get_job(job_id)
|
||||
if refreshed_job is None:
|
||||
return
|
||||
if refreshed_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job_id)
|
||||
return
|
||||
if refreshed_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job_id)
|
||||
return
|
||||
if self._job_is_finished(job_id):
|
||||
self._finalize_job(job_id)
|
||||
return
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job_id,
|
||||
"job_execution_error",
|
||||
str(exc),
|
||||
)
|
||||
job = self.repository.get_job(job_id)
|
||||
if job is not None and job.status not in {
|
||||
JobStatus.COMPLETED,
|
||||
JobStatus.COMPLETED_WITH_ERRORS,
|
||||
JobStatus.FAILED,
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.FAILED,
|
||||
last_error=str(exc),
|
||||
)
|
||||
|
||||
def _ensure_job_stages(self, job) -> None:
|
||||
existing = self.repository.list_job_stages(job.id)
|
||||
if existing:
|
||||
return
|
||||
for seq_no, stage_type in enumerate(
|
||||
JOB_STAGE_SEQUENCES.get(str(job.job_type), []), start=1
|
||||
):
|
||||
self.repository.create_stage(job_run_id=job.id, stage_type=stage_type, seq_no=seq_no)
|
||||
|
||||
def _next_runnable_stage(self, job_id: int):
|
||||
for stage in self.repository.list_job_stages(job_id):
|
||||
if stage.status in {StageStatus.PENDING, StageStatus.RUNNING}:
|
||||
return stage
|
||||
return None
|
||||
|
||||
def _job_sources(self, job) -> list[str]:
|
||||
return _unique_preserve_order(
|
||||
list(job.sources or _split_csv(job.config_snapshot.get("SOURCES")))
|
||||
)
|
||||
|
||||
def _job_download_sources(self, job) -> list[str]:
|
||||
return _unique_preserve_order(
|
||||
list(
|
||||
job.download_sources
|
||||
or _split_csv(job.config_snapshot.get("download_sources"))
|
||||
or _split_csv(job.config_snapshot.get("DOWNLOAD_SOURCES"))
|
||||
)
|
||||
)
|
||||
|
||||
def _job_playlist_ids(self, job) -> list[int] | None:
|
||||
raw_value = job.playlist_scope.get("playlist_ids")
|
||||
if not isinstance(raw_value, list):
|
||||
return None
|
||||
playlist_ids = []
|
||||
for item in raw_value:
|
||||
try:
|
||||
playlist_ids.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return playlist_ids or None
|
||||
|
||||
def _resolve_library_root(self, job) -> Path:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
|
||||
if library_dir:
|
||||
return Path(str(library_dir)).resolve()
|
||||
try:
|
||||
backend = self.catalog_repo.get_backend(self.catalog_repo.get_default_backend_id())
|
||||
except Exception:
|
||||
backend = None
|
||||
if backend and backend["base_path"]:
|
||||
return Path(str(backend["base_path"])).resolve()
|
||||
raise RuntimeError("No library root configured for download stage")
|
||||
|
||||
def _resolve_playlists_root(self, job) -> Path | None:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
root_dir = mapping.get("ROOT_DIR") or mapping.get("root_dir")
|
||||
if root_dir:
|
||||
path = Path(str(root_dir)).resolve() / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
library_dir = mapping.get("LIBRARY_DIR") or mapping.get("library_dir")
|
||||
if library_dir:
|
||||
path = Path(str(library_dir)).resolve().parent / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
library_root = self.catalog_repo.get_default_local_library_root()
|
||||
if library_root is None:
|
||||
return None
|
||||
path = library_root.parent / "playlists"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
def _mark_playlist_exported(self, stage_id: int, playlist_id: int) -> bool:
|
||||
key = (int(stage_id), int(playlist_id))
|
||||
with self._playlist_export_lock:
|
||||
if key in self._exported_stage_playlists:
|
||||
return False
|
||||
self._exported_stage_playlists.add(key)
|
||||
return True
|
||||
|
||||
def _forget_playlist_exported(self, stage_id: int, playlist_id: int) -> None:
|
||||
key = (int(stage_id), int(playlist_id))
|
||||
with self._playlist_export_lock:
|
||||
self._exported_stage_playlists.discard(key)
|
||||
|
||||
def _export_playlist_artifacts_for_playlist_if_ready(self, job, stage, playlist_id: int | None) -> bool:
|
||||
if str(stage.stage_type) != "download" or playlist_id is None:
|
||||
return False
|
||||
scoped_playlist_ids = self._job_playlist_ids(job)
|
||||
normalized_playlist_id = int(playlist_id)
|
||||
if not scoped_playlist_ids or normalized_playlist_id not in scoped_playlist_ids:
|
||||
return False
|
||||
if self.repository.playlist_has_open_items(stage.id, normalized_playlist_id):
|
||||
return False
|
||||
if not self._mark_playlist_exported(stage.id, normalized_playlist_id):
|
||||
return False
|
||||
|
||||
playlists_root = self._resolve_playlists_root(job)
|
||||
if playlists_root is None:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_skipped",
|
||||
"Playlists root is not configured for scoped download export.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
service = CatalogSyncService(
|
||||
repository=self.catalog_repo,
|
||||
playlists_root=playlists_root,
|
||||
)
|
||||
try:
|
||||
folder_path = service.ensure_playlist_artifacts_for_playlist(normalized_playlist_id)
|
||||
except Exception as exc:
|
||||
self._forget_playlist_exported(stage.id, normalized_playlist_id)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
if folder_path is None:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_skipped",
|
||||
"Playlist export row is unavailable.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id},
|
||||
)
|
||||
return False
|
||||
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_export_ready",
|
||||
f"Exported playlist artifacts for playlist {normalized_playlist_id}.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_id": normalized_playlist_id, "playlist_dir": str(folder_path)},
|
||||
)
|
||||
return True
|
||||
|
||||
def _refresh_ready_playlist_artifacts(self, job, stage) -> list[int]:
|
||||
if str(stage.stage_type) != "download":
|
||||
return []
|
||||
playlist_ids = self._job_playlist_ids(job)
|
||||
if not playlist_ids:
|
||||
return []
|
||||
exported_ids: list[int] = []
|
||||
for playlist_id in playlist_ids:
|
||||
if self._export_playlist_artifacts_for_playlist_if_ready(job, stage, int(playlist_id)):
|
||||
exported_ids.append(int(playlist_id))
|
||||
return exported_ids
|
||||
|
||||
def _resolve_backend_name(self, job) -> str:
|
||||
value = (
|
||||
job.config_snapshot.get("OBJECT_BACKEND_NAME")
|
||||
or job.config_snapshot.get("object_backend_name")
|
||||
or ""
|
||||
)
|
||||
return str(value).strip()
|
||||
|
||||
def _worker_count(self, job, stage_type: str) -> int:
|
||||
mapping = dict(job.config_snapshot or {})
|
||||
if stage_type == "download":
|
||||
return _int_value(mapping.get("DOWNLOAD_WORKERS"), DEFAULT_DOWNLOAD_WORKERS)
|
||||
if stage_type == "sync":
|
||||
return _int_value(mapping.get("SYNC_WORKERS"), DEFAULT_SYNC_WORKERS)
|
||||
if stage_type == "upload":
|
||||
return _int_value(mapping.get("UPLOAD_WORKERS"), 4)
|
||||
return 1
|
||||
|
||||
def _download_stage_worker_split(self, total_workers: int) -> tuple[int, int]:
|
||||
normalized_total = max(int(total_workers or 0), 1)
|
||||
if normalized_total == 1:
|
||||
return 1, 0
|
||||
if normalized_total == 2:
|
||||
return 1, 1
|
||||
if normalized_total <= 5:
|
||||
download_workers = 1
|
||||
else:
|
||||
download_workers = 2
|
||||
resolver_workers = max(1, normalized_total - download_workers)
|
||||
return resolver_workers, download_workers
|
||||
|
||||
def _materialize_stage_items(self, job, stage) -> None:
|
||||
refreshed_stage = self.repository.get_stage(stage.id)
|
||||
if refreshed_stage is None or refreshed_stage.total_items > 0:
|
||||
return
|
||||
|
||||
playlist_ids = self._job_playlist_ids(job)
|
||||
if stage.stage_type == "collect":
|
||||
for source in self._job_sources(job):
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="collect_source",
|
||||
item_key=f"collect:{source}",
|
||||
payload={
|
||||
"source": source,
|
||||
"include_playlist_square": True,
|
||||
"include_toplist": True,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "sync":
|
||||
if playlist_ids:
|
||||
playlist_rows = self.catalog_repo.list_playlists_by_ids(playlist_ids)
|
||||
else:
|
||||
playlist_rows = self.catalog_repo.list_playlists(sources=self._job_sources(job))
|
||||
for row in playlist_rows:
|
||||
playlist_id = int(row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="playlist_sync",
|
||||
item_key=f"playlist:{playlist_id}",
|
||||
playlist_id=playlist_id,
|
||||
payload={"playlist_row": dict(row)},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "download":
|
||||
planner = DownloadPlanner(self.catalog_repo)
|
||||
for row in planner.build_download_queue(
|
||||
sources=self._job_sources(job),
|
||||
playlist_ids=playlist_ids,
|
||||
):
|
||||
song_id = int(row.get("song_id") or row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="song_download",
|
||||
item_key=f"song:{song_id}",
|
||||
song_id=song_id,
|
||||
playlist_id=row.get("playlist_id"),
|
||||
payload={"row": dict(row)},
|
||||
)
|
||||
return
|
||||
|
||||
if stage.stage_type == "upload":
|
||||
backend_name = self._resolve_backend_name(job)
|
||||
if not backend_name:
|
||||
return
|
||||
uploader = CatalogUploader(self.catalog_repo)
|
||||
uploader.enqueue_missing_uploads(
|
||||
backend_name=backend_name,
|
||||
sources=self._job_sources(job) or None,
|
||||
playlist_ids=playlist_ids,
|
||||
)
|
||||
backend = self.catalog_repo.get_backend_by_name(backend_name)
|
||||
if backend is None:
|
||||
return
|
||||
rows = self.catalog_repo.list_pending_upload_tasks(target_backend_id=int(backend["id"]))
|
||||
for row in rows:
|
||||
upload_task_id = int(row["id"])
|
||||
self.repository.create_item(
|
||||
job_stage_id=stage.id,
|
||||
item_type="file_upload",
|
||||
item_key=f"upload:{upload_task_id}",
|
||||
file_location_id=row["source_location_id"],
|
||||
payload={
|
||||
"upload_task_id": upload_task_id,
|
||||
"upload_row": dict(row),
|
||||
},
|
||||
)
|
||||
|
||||
def _build_executor(self, job, stage):
|
||||
if stage.stage_type == "collect":
|
||||
return CollectStageExecutor(self.db_path, ops_repo=self.repository)
|
||||
if stage.stage_type == "sync":
|
||||
return SyncStageExecutor(self.db_path, ops_repo=self.repository)
|
||||
if stage.stage_type == "download":
|
||||
return DownloadStageExecutor(
|
||||
self.db_path,
|
||||
library_root=self._resolve_library_root(job),
|
||||
download_sources=self._job_download_sources(job),
|
||||
ops_repo=self.repository,
|
||||
)
|
||||
if stage.stage_type == "upload":
|
||||
backend_name = self._resolve_backend_name(job)
|
||||
if not backend_name:
|
||||
raise RuntimeError("No object backend configured for upload stage")
|
||||
return UploadStageExecutor(
|
||||
self.db_path,
|
||||
backend_name=backend_name,
|
||||
ops_repo=self.repository,
|
||||
)
|
||||
raise RuntimeError(f"Unsupported stage type: {stage.stage_type}")
|
||||
|
||||
def _export_playlist_artifacts_for_job(self, job, stage) -> None:
|
||||
exported_ids = self._refresh_ready_playlist_artifacts(job, stage)
|
||||
playlist_ids = self._job_playlist_ids(job) or []
|
||||
if str(stage.stage_type) != "download" or not playlist_ids:
|
||||
return
|
||||
try:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"playlist_exported",
|
||||
f"Refreshed playlist export folders for {len(exported_ids)} playlists.",
|
||||
stage_id=stage.id,
|
||||
details={"playlist_ids": exported_ids, "scoped_playlist_ids": playlist_ids},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to persist playlist_exported event for job %s stage %s.",
|
||||
job.id,
|
||||
stage.id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _run_catalog_export_for_stage(self, job, stage) -> None:
|
||||
if str(stage.stage_type) != "download":
|
||||
return
|
||||
|
||||
with self._catalog_export_lock:
|
||||
refreshed_job = self.repository.get_job(job.id) or job
|
||||
if refreshed_job.status in {
|
||||
JobStatus.CANCELED,
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.PAUSED,
|
||||
}:
|
||||
return
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"catalog_export_started",
|
||||
"Started post-download catalog export command.",
|
||||
stage_id=stage.id,
|
||||
)
|
||||
try:
|
||||
result = run_catalog_export_command(refreshed_job.config_snapshot)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"catalog_export_failed",
|
||||
f"Catalog export command raised an error: {exc}",
|
||||
stage_id=stage.id,
|
||||
details={"error": str(exc) or exc.__class__.__name__},
|
||||
)
|
||||
return
|
||||
|
||||
details: dict[str, Any] = {}
|
||||
if result.command:
|
||||
details["command"] = result.command
|
||||
if result.workdir:
|
||||
details["workdir"] = result.workdir
|
||||
if result.returncode is not None:
|
||||
details["returncode"] = result.returncode
|
||||
if result.stdout:
|
||||
details["stdout"] = result.stdout
|
||||
if result.stderr:
|
||||
details["stderr"] = result.stderr
|
||||
|
||||
normalized_status = str(result.status).strip().lower()
|
||||
if normalized_status == "succeeded":
|
||||
event_type = "catalog_export_succeeded"
|
||||
message = "Catalog export command completed successfully."
|
||||
elif normalized_status == "skipped":
|
||||
event_type = "catalog_export_skipped"
|
||||
message = "Catalog export command was skipped."
|
||||
else:
|
||||
event_type = "catalog_export_failed"
|
||||
message = "Catalog export command failed."
|
||||
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
event_type,
|
||||
message,
|
||||
stage_id=stage.id,
|
||||
details=details or None,
|
||||
)
|
||||
|
||||
def _run_stage_with_single_pool(self, job, stage, executor, worker_count: int) -> None:
|
||||
def worker_loop(worker_index: int) -> None:
|
||||
worker_name = f"{stage.stage_type}-{worker_index + 1}"
|
||||
while True:
|
||||
active_job = self.repository.get_job(job.id)
|
||||
if active_job is None or active_job.status in {
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.CANCELED,
|
||||
}:
|
||||
return
|
||||
item = self.repository.claim_next_stage_item(stage.id, worker_name)
|
||||
if item is None:
|
||||
return
|
||||
try:
|
||||
executor.process_item(item.id, worker_name, already_claimed=True)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
item.playlist_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=item.id,
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=worker_count) as pool:
|
||||
futures = [pool.submit(worker_loop, index) for index in range(worker_count)]
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
def _run_download_stage_pipeline(self, job, stage, executor, worker_count: int) -> None:
|
||||
resolver_workers, download_workers = self._download_stage_worker_split(worker_count)
|
||||
if download_workers == 0:
|
||||
self._run_stage_with_single_pool(job, stage, executor, worker_count)
|
||||
return
|
||||
|
||||
ready_queue: Queue = Queue(maxsize=max(1, download_workers * 2))
|
||||
stop_event = threading.Event()
|
||||
sentinel = object()
|
||||
|
||||
def resolver_loop(worker_index: int) -> None:
|
||||
worker_name = f"resolve-{worker_index + 1}"
|
||||
while not stop_event.is_set():
|
||||
active_job = self.repository.get_job(job.id)
|
||||
if active_job is None or active_job.status in {
|
||||
JobStatus.PAUSE_REQUESTED,
|
||||
JobStatus.CANCELED,
|
||||
}:
|
||||
stop_event.set()
|
||||
return
|
||||
item = self.repository.claim_next_stage_item(stage.id, worker_name)
|
||||
if item is None:
|
||||
return
|
||||
try:
|
||||
executor.process_resolve_item(
|
||||
item.id,
|
||||
worker_name,
|
||||
ready_queue=ready_queue,
|
||||
already_claimed=True,
|
||||
)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
item.playlist_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=item.id,
|
||||
)
|
||||
|
||||
def download_loop(worker_index: int) -> None:
|
||||
worker_name = f"download-{worker_index + 1}"
|
||||
while True:
|
||||
task = ready_queue.get()
|
||||
if task is sentinel:
|
||||
return
|
||||
try:
|
||||
executor.process_download_task(task, worker_name)
|
||||
self._export_playlist_artifacts_for_playlist_if_ready(
|
||||
job,
|
||||
stage,
|
||||
getattr(task, "playlist_id", None),
|
||||
)
|
||||
except Exception as exc:
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"item_execution_error",
|
||||
str(exc),
|
||||
stage_id=stage.id,
|
||||
item_id=getattr(task, "item_id", None),
|
||||
)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=resolver_workers + download_workers) as pool:
|
||||
resolver_futures = [pool.submit(resolver_loop, index) for index in range(resolver_workers)]
|
||||
download_futures = [pool.submit(download_loop, index) for index in range(download_workers)]
|
||||
for future in resolver_futures:
|
||||
future.result()
|
||||
for _ in range(download_workers):
|
||||
ready_queue.put(sentinel)
|
||||
for future in download_futures:
|
||||
future.result()
|
||||
|
||||
def _run_stage(self, job, stage) -> None:
|
||||
if stage.status == StageStatus.PENDING:
|
||||
self.repository.mark_stage_running(stage.id)
|
||||
self.repository.add_job_event(
|
||||
job.id,
|
||||
"stage_started",
|
||||
f"Started stage {stage.stage_type}.",
|
||||
stage_id=stage.id,
|
||||
)
|
||||
|
||||
self._materialize_stage_items(job, stage)
|
||||
refreshed_stage = self.repository.get_stage(stage.id)
|
||||
if refreshed_stage is None:
|
||||
return
|
||||
if refreshed_stage.total_items == 0:
|
||||
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
|
||||
final_stage = self.repository.get_stage(stage.id)
|
||||
if final_stage is not None:
|
||||
self._export_playlist_artifacts_for_job(job, final_stage)
|
||||
self._run_catalog_export_for_stage(job, final_stage)
|
||||
return
|
||||
|
||||
executor = self._build_executor(job, refreshed_stage)
|
||||
worker_count = self._worker_count(job, refreshed_stage.stage_type)
|
||||
if refreshed_stage.stage_type == "download":
|
||||
self._run_download_stage_pipeline(job, refreshed_stage, executor, worker_count)
|
||||
else:
|
||||
self._run_stage_with_single_pool(job, refreshed_stage, executor, worker_count)
|
||||
|
||||
current_job = self.repository.get_job(job.id)
|
||||
if current_job is not None:
|
||||
if current_job.status == JobStatus.CANCELED:
|
||||
self.repository.finalize_canceled_job(job.id)
|
||||
return
|
||||
if current_job.status == JobStatus.PAUSE_REQUESTED:
|
||||
self.reconcile_pause_state(job.id)
|
||||
return
|
||||
|
||||
current_stage = self.repository.get_stage(stage.id)
|
||||
if current_stage is None:
|
||||
return
|
||||
if self.repository.stage_has_open_items(stage.id):
|
||||
return
|
||||
if current_stage.failed_items > 0:
|
||||
self.repository.mark_stage_finished(
|
||||
stage.id,
|
||||
status=StageStatus.FAILED,
|
||||
last_error="One or more stage items failed.",
|
||||
)
|
||||
else:
|
||||
self.repository.mark_stage_finished(stage.id, status=StageStatus.COMPLETED)
|
||||
final_stage = self.repository.get_stage(stage.id)
|
||||
if final_stage is not None:
|
||||
self._export_playlist_artifacts_for_job(job, final_stage)
|
||||
self._run_catalog_export_for_stage(job, final_stage)
|
||||
|
||||
def _job_is_finished(self, job_id: int) -> bool:
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if not stages:
|
||||
return True
|
||||
return all(
|
||||
stage.status in {StageStatus.COMPLETED, StageStatus.FAILED, StageStatus.SKIPPED}
|
||||
for stage in stages
|
||||
)
|
||||
|
||||
def _finalize_job(self, job_id: int) -> None:
|
||||
stages = self.repository.list_job_stages(job_id)
|
||||
if not stages:
|
||||
self.repository.mark_job_finished(job_id, status=JobStatus.COMPLETED)
|
||||
return
|
||||
has_errors = any(
|
||||
stage.status == StageStatus.FAILED or stage.failed_items > 0 for stage in stages
|
||||
)
|
||||
self.repository.mark_job_finished(
|
||||
job_id,
|
||||
status=JobStatus.COMPLETED_WITH_ERRORS if has_errors else JobStatus.COMPLETED,
|
||||
last_error="One or more stage items failed." if has_errors else None,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user