222 lines
9.1 KiB
Python
222 lines
9.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
|
|
from .repository import CatalogRepository
|
|
|
|
|
|
def load_backend_config(backend_row) -> dict:
|
|
return json.loads(backend_row["config_json"] or "{}")
|
|
|
|
|
|
def normalize_prefix(value: str | None) -> str:
|
|
return str(value or "").strip().strip("/")
|
|
|
|
|
|
def build_target_locator(base_prefix: str | None, relative_locator: str) -> str:
|
|
normalized_relative = str(relative_locator).strip().lstrip("/")
|
|
normalized_prefix = normalize_prefix(base_prefix)
|
|
if not normalized_prefix:
|
|
return normalized_relative
|
|
return f"{normalized_prefix}/{normalized_relative}"
|
|
|
|
|
|
def derive_public_url(public_base_url: str | None, locator: str, base_prefix: str | None) -> str | None:
|
|
base_url = str(public_base_url or "").strip().rstrip("/")
|
|
if not base_url:
|
|
return None
|
|
normalized_prefix = normalize_prefix(base_prefix)
|
|
normalized_locator = str(locator).strip().lstrip("/")
|
|
if normalized_prefix:
|
|
prefix_segment = f"{normalized_prefix}/"
|
|
if base_url.endswith(f"/{normalized_prefix}") and normalized_locator.startswith(prefix_segment):
|
|
normalized_locator = normalized_locator[len(prefix_segment) :]
|
|
normalized_locator = normalized_locator.lstrip("/")
|
|
if not normalized_locator:
|
|
return base_url
|
|
return f"{base_url}/{normalized_locator}"
|
|
|
|
|
|
def build_s3_client(backend_row):
|
|
config = load_backend_config(backend_row)
|
|
credential_env_prefix = str(config.get("credential_env_prefix") or "").strip()
|
|
if not credential_env_prefix:
|
|
raise RuntimeError("Object storage backend is missing credential_env_prefix")
|
|
access_key_id = os.getenv(f"{credential_env_prefix}_ACCESS_KEY_ID")
|
|
secret_access_key = os.getenv(f"{credential_env_prefix}_SECRET_ACCESS_KEY")
|
|
session_token = os.getenv(f"{credential_env_prefix}_SESSION_TOKEN")
|
|
if not access_key_id or not secret_access_key:
|
|
raise RuntimeError(f"Missing credentials for backend {backend_row['name']}")
|
|
try:
|
|
import boto3
|
|
from botocore.config import Config
|
|
except ImportError as exc:
|
|
raise RuntimeError("boto3 is required for object storage uploads") from exc
|
|
addressing_style = str(config.get("addressing_style") or "").strip().lower()
|
|
client_config = None
|
|
if addressing_style in {"path", "virtual"}:
|
|
client_config = Config(s3={"addressing_style": addressing_style})
|
|
region = str(config.get("region") or "").strip()
|
|
if region.lower() == "auto":
|
|
region = ""
|
|
return boto3.client(
|
|
"s3",
|
|
endpoint_url=config.get("endpoint"),
|
|
region_name=region or None,
|
|
aws_access_key_id=access_key_id,
|
|
aws_secret_access_key=secret_access_key,
|
|
aws_session_token=session_token or None,
|
|
config=client_config,
|
|
)
|
|
|
|
|
|
class S3CompatibleUploader:
|
|
def __init__(self, backend_row, client=None):
|
|
self.backend = backend_row
|
|
self.config = load_backend_config(backend_row)
|
|
self.client = client or build_s3_client(backend_row)
|
|
|
|
def upload_file(self, local_path: Path, container_name: str, locator: str) -> dict[str, str | None]:
|
|
self.client.upload_file(str(local_path), container_name, locator, ExtraArgs=None)
|
|
return {
|
|
"public_url": derive_public_url(
|
|
self.config.get("public_base_url"),
|
|
locator,
|
|
self.config.get("base_prefix"),
|
|
),
|
|
"download_url": None,
|
|
}
|
|
|
|
|
|
class CatalogUploader:
|
|
def __init__(
|
|
self,
|
|
repository: CatalogRepository,
|
|
worker_count: int = 4,
|
|
client_factory=None,
|
|
):
|
|
self.repository = repository
|
|
self.worker_count = max(1, worker_count)
|
|
self.client_factory = client_factory or (lambda backend_row: build_s3_client(backend_row))
|
|
|
|
def get_backend(self, backend_name: str):
|
|
backend = self.repository.get_backend_by_name(backend_name)
|
|
if backend is None:
|
|
raise RuntimeError(f"Unknown backend: {backend_name}")
|
|
if backend["backend_type"] != "object_storage":
|
|
raise RuntimeError(f"Backend {backend_name} is not object storage")
|
|
return backend
|
|
|
|
def enqueue_missing_uploads(
|
|
self,
|
|
backend_name: str,
|
|
sources: list[str] | None = None,
|
|
limit: int | None = None,
|
|
playlist_ids: list[int] | None = None,
|
|
) -> int:
|
|
backend = self.get_backend(backend_name)
|
|
candidates = self.repository.list_missing_object_upload_candidates(
|
|
target_backend_id=int(backend["id"]),
|
|
sources=sources,
|
|
limit=limit,
|
|
playlist_ids=playlist_ids,
|
|
)
|
|
queued_count = 0
|
|
seen_task_ids: set[int] = set()
|
|
for candidate in candidates:
|
|
task_id = self.repository.enqueue_upload_task(
|
|
file_asset_id=int(candidate["file_asset_id"]),
|
|
source_location_id=int(candidate["source_location_id"]),
|
|
target_backend_id=int(backend["id"]),
|
|
target_container_name=candidate["target_container_name"],
|
|
target_locator=candidate["target_locator"],
|
|
)
|
|
if task_id not in seen_task_ids:
|
|
seen_task_ids.add(task_id)
|
|
queued_count += 1
|
|
return queued_count
|
|
|
|
def process_upload_task_row(self, task_row, backend_name: str) -> str:
|
|
backend = self.get_backend(backend_name)
|
|
source_path_text = task_row["absolute_path"] if task_row is not None else None
|
|
uploader = None
|
|
if source_path_text and Path(source_path_text).exists():
|
|
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
|
|
return self._process_task(task_row, backend, uploader)
|
|
|
|
def run(self, backend_name: str, limit: int | None = None) -> dict[str, int]:
|
|
backend = self.get_backend(backend_name)
|
|
backend_id = int(backend["id"])
|
|
pending_tasks = self.repository.list_pending_upload_tasks(target_backend_id=backend_id, limit=limit)
|
|
uploader = None
|
|
if any(row["absolute_path"] and Path(row["absolute_path"]).exists() for row in pending_tasks):
|
|
uploader = S3CompatibleUploader(backend, client=self.client_factory(backend))
|
|
|
|
def worker():
|
|
local_summary = {"succeeded": 0, "failed": 0, "skipped": 0}
|
|
while True:
|
|
task = self.repository.claim_next_upload_task(target_backend_id=backend_id)
|
|
if task is None:
|
|
break
|
|
result = self._process_task(task, backend, uploader)
|
|
local_summary[result] += 1
|
|
return local_summary
|
|
|
|
summary = {
|
|
"queued": len(pending_tasks),
|
|
"succeeded": 0,
|
|
"failed": 0,
|
|
"skipped": 0,
|
|
"workers": self.worker_count,
|
|
}
|
|
with ThreadPoolExecutor(max_workers=self.worker_count) as executor:
|
|
futures = [executor.submit(worker) for _ in range(self.worker_count)]
|
|
for future in as_completed(futures):
|
|
worker_summary = future.result()
|
|
for key in ("succeeded", "failed", "skipped"):
|
|
summary[key] += int(worker_summary[key])
|
|
return summary
|
|
|
|
def _process_task(self, task, backend, uploader: S3CompatibleUploader | None) -> str:
|
|
source_path_text = task["absolute_path"]
|
|
source_path = Path(source_path_text) if source_path_text else None
|
|
if source_path is None or not source_path.exists():
|
|
missing_path = str(source_path) if source_path is not None else "<missing>"
|
|
self.repository.mark_upload_task_status(
|
|
task_id=int(task["id"]),
|
|
status="failed",
|
|
last_error=f"Source file does not exist: {missing_path}",
|
|
)
|
|
return "failed"
|
|
try:
|
|
active_uploader = uploader or S3CompatibleUploader(backend, client=self.client_factory(backend))
|
|
result = active_uploader.upload_file(
|
|
local_path=source_path,
|
|
container_name=task["target_container_name"] or backend["container_name"],
|
|
locator=task["target_locator"],
|
|
)
|
|
self.repository.record_remote_file(
|
|
file_asset_id=int(task["file_asset_id"]),
|
|
backend_id=int(task["target_backend_id"]),
|
|
container_name=task["target_container_name"] or backend["container_name"],
|
|
locator=task["target_locator"],
|
|
public_url=result["public_url"],
|
|
download_url=result["download_url"],
|
|
)
|
|
self.repository.mark_upload_task_status(
|
|
task_id=int(task["id"]),
|
|
status="succeeded",
|
|
last_error=None,
|
|
)
|
|
return "succeeded"
|
|
except Exception as exc:
|
|
self.repository.mark_upload_task_status(
|
|
task_id=int(task["id"]),
|
|
status="failed",
|
|
last_error=f"{type(exc).__name__}: {exc}",
|
|
)
|
|
return "failed"
|