Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,402 @@
from __future__ import annotations
import re
import sqlite3
from contextlib import contextmanager, suppress
from pathlib import Path, PurePath
from typing import Any
from musicdl.catalogsync.db import connect_database
_COPY_SUFFIX_RE = re.compile(r" \(\d+\)(?=(\.[^.]+)?$)")
class LocalDedupeBlockedError(RuntimeError):
pass
def _coerce_int(value: Any) -> int | None:
try:
return int(value)
except (TypeError, ValueError):
return None
def _row_value(row: sqlite3.Row | dict[str, Any], key: str) -> Any:
if isinstance(row, sqlite3.Row):
try:
return row[key]
except IndexError:
return None
return row.get(key)
def _path_for_location(row: sqlite3.Row | dict[str, Any]) -> Path | None:
absolute_path = str(_row_value(row, "absolute_path") or "").strip()
if absolute_path:
return Path(absolute_path)
base_path = str(_row_value(row, "base_path") or "").strip()
locator = str(_row_value(row, "locator") or "").strip()
if not base_path or not locator:
return None
return Path(base_path) / locator
def _resolved_path(path: Path | None) -> Path | None:
if path is None:
return None
with suppress(OSError, RuntimeError):
return path.resolve(strict=False)
return path
def _paths_match(left: Path | None, right: Path | None) -> bool:
if left is None or right is None:
return False
return _resolved_path(left) == _resolved_path(right)
def _has_copy_suffix(locator: str | None) -> bool:
return bool(_COPY_SUFFIX_RE.search(PurePath(str(locator or "")).name))
def _location_payload(row: sqlite3.Row | dict[str, Any]) -> dict[str, Any]:
path = _path_for_location(row)
file_exists = bool(path and path.exists())
actual_file_size_bytes = None
if file_exists and path is not None:
with suppress(OSError):
actual_file_size_bytes = int(path.stat().st_size)
return {
"id": int(row["location_id"]),
"file_asset_id": int(row["file_asset_id"]),
"song_id": int(row["song_id"]),
"backend_id": int(row["backend_id"]),
"backend_name": str(row["backend_name"] or ""),
"locator": str(row["locator"] or ""),
"absolute_path": str(row["absolute_path"] or ""),
"file_exists": file_exists,
"file_size_bytes": _coerce_int(row["file_size_bytes"]),
"actual_file_size_bytes": actual_file_size_bytes,
"song_name": str(row["song_name"] or ""),
"singers": str(row["singers"] or ""),
"_path": path,
}
def _location_sort_key(location: dict[str, Any]) -> tuple[int, int, int, int]:
return (
0 if location["file_exists"] else 1,
0 if not _has_copy_suffix(location["locator"]) else 1,
len(location["locator"]),
int(location["id"]),
)
def _duplicate_size_bytes(location: dict[str, Any]) -> int:
size_value = location.get("actual_file_size_bytes")
if size_value is None:
size_value = location.get("file_size_bytes")
return max(int(size_value or 0), 0)
class LocalMaintenanceService:
def __init__(self, db_path: str | Path):
self.db_path = Path(db_path)
def _connect(self) -> sqlite3.Connection:
return connect_database(self.db_path)
@contextmanager
def _connection(self):
conn = self._connect()
try:
yield conn
conn.commit()
finally:
conn.close()
def scan_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
with self._connection() as conn:
groups = self._load_duplicate_groups(conn)
scanned_row = conn.execute(
"""
SELECT COUNT(*) AS count_value
FROM file_locations AS fl
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
"""
).fetchone()
return self._build_scan_payload(
groups,
scanned_active_local_location_count=int(scanned_row["count_value"]) if scanned_row else 0,
sample_limit=sample_limit,
)
def dedupe_local_duplicates(self, *, sample_limit: int = 20) -> dict[str, Any]:
with self._connection() as conn:
self._raise_if_running_work(conn)
groups = self._load_duplicate_groups(conn)
execution = {
"deduped_group_count": 0,
"inactive_location_count": 0,
"deleted_file_count": 0,
"released_bytes": 0,
"repointed_upload_task_count": 0,
"repointed_job_item_count": 0,
}
affected_pairs: set[tuple[int, int]] = set()
for group in groups:
keep = group["keep"]
duplicates = list(group["duplicates"])
if not duplicates:
continue
execution["deduped_group_count"] += 1
conn.execute(
"""
UPDATE file_locations
SET
is_primary = CASE WHEN id = ? THEN 1 ELSE 0 END,
updated_at = CURRENT_TIMESTAMP
WHERE file_asset_id = ? AND backend_id = ?
""",
(
int(keep["id"]),
int(group["file_asset_id"]),
int(group["backend_id"]),
),
)
for duplicate in duplicates:
duplicate_id = int(duplicate["id"])
upload_cursor = conn.execute(
"""
UPDATE upload_tasks
SET
source_location_id = ?,
updated_at = CURRENT_TIMESTAMP
WHERE source_location_id = ?
""",
(int(keep["id"]), duplicate_id),
)
execution["repointed_upload_task_count"] += max(upload_cursor.rowcount, 0)
item_cursor = conn.execute(
"""
UPDATE job_items
SET file_location_id = ?
WHERE file_location_id = ?
""",
(int(keep["id"]), duplicate_id),
)
execution["repointed_job_item_count"] += max(item_cursor.rowcount, 0)
inactive_cursor = conn.execute(
"""
UPDATE file_locations
SET
status = 'inactive',
is_primary = 0,
updated_at = CURRENT_TIMESTAMP
WHERE id = ? AND status = 'active'
""",
(duplicate_id,),
)
execution["inactive_location_count"] += max(inactive_cursor.rowcount, 0)
duplicate_path = duplicate["_path"]
if (
duplicate_path is not None
and duplicate_path.exists()
and not _paths_match(duplicate_path, keep["_path"])
):
duplicate_size_bytes = _duplicate_size_bytes(duplicate)
with suppress(OSError):
duplicate_path.unlink()
execution["deleted_file_count"] += 1
execution["released_bytes"] += duplicate_size_bytes
affected_pairs.add((int(group["song_id"]), int(group["backend_id"])))
for song_id, backend_id in affected_pairs:
self._refresh_song_backend_presence_with_connection(
conn,
song_id=song_id,
backend_id=backend_id,
)
payload = self.scan_local_duplicates(sample_limit=sample_limit)
payload["execution"] = execution
return payload
def _raise_if_running_work(self, conn: sqlite3.Connection) -> None:
running_jobs_row = conn.execute(
"SELECT COUNT(*) AS count_value FROM job_runs WHERE status = 'running'"
).fetchone()
running_items_row = conn.execute(
"SELECT COUNT(*) AS count_value FROM job_items WHERE status = 'running'"
).fetchone()
running_jobs = int(running_jobs_row["count_value"]) if running_jobs_row else 0
running_items = int(running_items_row["count_value"]) if running_items_row else 0
if running_jobs > 0 or running_items > 0:
raise LocalDedupeBlockedError(
f"cannot dedupe while jobs or items are running (jobs={running_jobs}, items={running_items})"
)
def _load_duplicate_groups(self, conn: sqlite3.Connection) -> list[dict[str, Any]]:
rows = conn.execute(
"""
WITH duplicate_pairs AS (
SELECT fl.file_asset_id, fl.backend_id
FROM file_locations AS fl
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
AND sb.backend_type = 'local_fs'
GROUP BY fl.file_asset_id, fl.backend_id
HAVING COUNT(*) > 1
)
SELECT
fl.id AS location_id,
fl.file_asset_id,
fa.song_id,
fl.backend_id,
sb.name AS backend_name,
sb.base_path,
fl.locator,
fl.absolute_path,
COALESCE(fa.file_size_bytes, s.file_size_bytes) AS file_size_bytes,
s.name AS song_name,
s.singers
FROM file_locations AS fl
JOIN duplicate_pairs AS dp
ON dp.file_asset_id = fl.file_asset_id
AND dp.backend_id = fl.backend_id
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
JOIN songs AS s ON s.id = fa.song_id
JOIN storage_backends AS sb ON sb.id = fl.backend_id
WHERE fl.status = 'active'
ORDER BY fl.file_asset_id ASC, fl.backend_id ASC, fl.id ASC
"""
).fetchall()
grouped: dict[tuple[int, int], list[dict[str, Any]]] = {}
for row in rows:
location = _location_payload(row)
key = (int(location["file_asset_id"]), int(location["backend_id"]))
grouped.setdefault(key, []).append(location)
groups: list[dict[str, Any]] = []
for (file_asset_id, backend_id), locations in grouped.items():
ordered_locations = sorted(locations, key=_location_sort_key)
keep = ordered_locations[0]
groups.append(
{
"file_asset_id": int(file_asset_id),
"backend_id": int(backend_id),
"backend_name": keep["backend_name"],
"song_id": int(keep["song_id"]),
"song_name": keep["song_name"],
"singers": keep["singers"],
"keep": keep,
"duplicates": ordered_locations[1:],
}
)
groups.sort(
key=lambda group: (
int(group["song_id"]),
int(group["file_asset_id"]),
int(group["backend_id"]),
)
)
return groups
def _build_scan_payload(
self,
groups: list[dict[str, Any]],
*,
scanned_active_local_location_count: int,
sample_limit: int,
) -> dict[str, Any]:
normalized_sample_limit = max(int(sample_limit or 20), 1)
return {
"summary": {
"duplicate_group_count": len(groups),
"duplicate_location_count": sum(len(group["duplicates"]) for group in groups),
"duplicate_file_size_bytes": sum(
_duplicate_size_bytes(location)
for group in groups
for location in group["duplicates"]
),
"scanned_active_local_location_count": int(scanned_active_local_location_count),
},
"groups": [self._serialize_group(group) for group in groups[:normalized_sample_limit]],
}
@staticmethod
def _serialize_group(group: dict[str, Any]) -> dict[str, Any]:
return {
"file_asset_id": int(group["file_asset_id"]),
"backend_id": int(group["backend_id"]),
"backend_name": str(group["backend_name"]),
"song_id": int(group["song_id"]),
"song_name": str(group["song_name"]),
"singers": str(group["singers"]),
"keep": LocalMaintenanceService._serialize_location(group["keep"]),
"duplicates": [
LocalMaintenanceService._serialize_location(location)
for location in group["duplicates"]
],
}
@staticmethod
def _serialize_location(location: dict[str, Any]) -> dict[str, Any]:
return {
"id": int(location["id"]),
"locator": str(location["locator"]),
"absolute_path": str(location["absolute_path"]),
"file_exists": bool(location["file_exists"]),
"file_size_bytes": _coerce_int(location["file_size_bytes"]),
"actual_file_size_bytes": _coerce_int(location["actual_file_size_bytes"]),
}
@staticmethod
def _refresh_song_backend_presence_with_connection(
conn: sqlite3.Connection,
*,
song_id: int,
backend_id: int,
) -> None:
summary = conn.execute(
"""
SELECT
COUNT(*) AS active_file_count,
MIN(fl.id) AS primary_file_location_id
FROM file_locations AS fl
JOIN file_assets AS fa ON fa.id = fl.file_asset_id
WHERE fa.song_id = ?
AND fl.backend_id = ?
AND fl.status = 'active'
""",
(int(song_id), int(backend_id)),
).fetchone()
active_file_count = int(summary["active_file_count"]) if summary else 0
has_active_file = 1 if active_file_count > 0 else 0
primary_file_location_id = summary["primary_file_location_id"] if summary else None
conn.execute(
"""
INSERT INTO song_backend_presence (
song_id,
backend_id,
has_active_file,
active_file_count,
primary_file_location_id
)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(song_id, backend_id) DO UPDATE SET
has_active_file = excluded.has_active_file,
active_file_count = excluded.active_file_count,
primary_file_location_id = excluded.primary_file_location_id,
updated_at = CURRENT_TIMESTAMP
""",
(
int(song_id),
int(backend_id),
has_active_file,
active_file_count,
primary_file_location_id,
),
)