Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
from .kuwo import KuwoCollector, parse_playlist_square_html as parse_kuwo_playlist_square_html, parse_toplist_html as parse_kuwo_toplist_html
|
||||
from .netease import NeteaseCollector, parse_playlist_square_html as parse_netease_playlist_square_html, parse_toplist_payload as parse_netease_toplist_payload
|
||||
from .qq import QQCollector, parse_playlist_square_payload as parse_qq_playlist_square_payload, parse_toplist_payload as parse_qq_toplist_payload
|
||||
|
||||
__all__ = [
|
||||
"KuwoCollector",
|
||||
"NeteaseCollector",
|
||||
"QQCollector",
|
||||
"parse_kuwo_playlist_square_html",
|
||||
"parse_kuwo_toplist_html",
|
||||
"parse_netease_playlist_square_html",
|
||||
"parse_netease_toplist_payload",
|
||||
"parse_qq_playlist_square_payload",
|
||||
"parse_qq_toplist_payload",
|
||||
]
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseCollector:
|
||||
headers: dict[str, str] = field(default_factory=lambda: {"User-Agent": "Mozilla/5.0"})
|
||||
session: requests.Session = field(default_factory=requests.Session)
|
||||
|
||||
def get(self, url: str, **kwargs):
|
||||
response = self.session.get(url, headers=self.headers, timeout=15, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
@@ -0,0 +1,260 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://www.kuwo.cn/playlist"
|
||||
TOPLIST_URL = "https://www.kuwo.cn/rankList"
|
||||
NUXT_SCRIPT_RE = re.compile(r"<script>\s*window\.__NUXT__=(.*?)</script>", re.DOTALL)
|
||||
NUXT_FUNCTION_RE = re.compile(
|
||||
r"^\(function\((?P<params>.*?)\)\s*\{\s*return\s+(?P<body>.*)\}\)\((?P<args>.*)\)\s*;?\s*$",
|
||||
re.DOTALL,
|
||||
)
|
||||
_COUNT_UNIT_MULTIPLIERS = {
|
||||
"万": 10_000,
|
||||
"亿": 100_000_000,
|
||||
}
|
||||
|
||||
|
||||
def _parse_play_count(value: object) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
text = re.sub(r"\s+", "", str(value)).replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
if text.isdigit():
|
||||
return int(text)
|
||||
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
|
||||
if not match:
|
||||
return None
|
||||
number_value = float(match.group(1))
|
||||
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
|
||||
if multiplier is None:
|
||||
return None
|
||||
return int(number_value * multiplier)
|
||||
|
||||
|
||||
def split_js_arguments(text: str) -> list[str]:
|
||||
items: list[str] = []
|
||||
current: list[str] = []
|
||||
quote_char = ""
|
||||
escape = False
|
||||
depth = 0
|
||||
for char in str(text or ""):
|
||||
if escape:
|
||||
current.append(char)
|
||||
escape = False
|
||||
continue
|
||||
if quote_char:
|
||||
current.append(char)
|
||||
if char == "\\":
|
||||
escape = True
|
||||
elif char == quote_char:
|
||||
quote_char = ""
|
||||
continue
|
||||
if char in {"'", '"'}:
|
||||
current.append(char)
|
||||
quote_char = char
|
||||
continue
|
||||
if char in "([{":
|
||||
depth += 1
|
||||
current.append(char)
|
||||
continue
|
||||
if char in ")]}":
|
||||
depth = max(depth - 1, 0)
|
||||
current.append(char)
|
||||
continue
|
||||
if char == "," and depth == 0:
|
||||
item = "".join(current).strip()
|
||||
if item:
|
||||
items.append(item)
|
||||
current = []
|
||||
continue
|
||||
current.append(char)
|
||||
tail = "".join(current).strip()
|
||||
if tail:
|
||||
items.append(tail)
|
||||
return items
|
||||
|
||||
|
||||
def resolve_js_value(token: str, variables: dict[str, object] | None = None):
|
||||
token = str(token or "").strip()
|
||||
variables = variables or {}
|
||||
if not token:
|
||||
return None
|
||||
if token in variables:
|
||||
return variables[token]
|
||||
if token in {"true", "false", "null"}:
|
||||
return {"true": True, "false": False, "null": None}[token]
|
||||
if token.startswith(("'", '"')) and token.endswith(("'", '"')):
|
||||
normalized = token
|
||||
if token.startswith("'") and token.endswith("'"):
|
||||
normalized = '"' + token[1:-1].replace("\\", "\\\\").replace('"', '\\"') + '"'
|
||||
return json.loads(normalized)
|
||||
try:
|
||||
if "." in token:
|
||||
return float(token)
|
||||
return int(token)
|
||||
except ValueError:
|
||||
return token
|
||||
|
||||
|
||||
def extract_kuwo_bang_menu_items(script_body: str) -> list[dict]:
|
||||
match = NUXT_FUNCTION_RE.match(str(script_body or "").strip())
|
||||
if not match:
|
||||
return []
|
||||
params = [part.strip() for part in str(match.group("params") or "").split(",") if part.strip()]
|
||||
args = [resolve_js_value(part) for part in split_js_arguments(match.group("args") or "")]
|
||||
variables = {name: value for name, value in zip(params, args)}
|
||||
body = str(match.group("body") or "")
|
||||
if "bangMenu" not in body:
|
||||
return []
|
||||
|
||||
item_pattern = re.compile(
|
||||
r"\{sourceid:(?P<sourceid>[^,]+),.*?name:(?P<name>[^,]+),\s*id:(?P<id>[^,]+),\s*source:(?P<source>[^,]+),\s*pic:(?P<pic>[^,]+),\s*pub:(?P<pub>[^,}\]]+)(?:,\s*(?:listencnt|playCount|listenCount):(?P<play_count>[^,}\]]+))?",
|
||||
re.DOTALL,
|
||||
)
|
||||
items: list[dict] = []
|
||||
for item_match in item_pattern.finditer(body):
|
||||
resolved = {
|
||||
key: resolve_js_value(item_match.group(key), variables)
|
||||
for key in ("sourceid", "name", "id", "source", "pic", "pub", "play_count")
|
||||
}
|
||||
if not resolved.get("id"):
|
||||
continue
|
||||
items.append(resolved)
|
||||
return items
|
||||
|
||||
|
||||
def extract_nuxt_state(html: str) -> dict | None:
|
||||
match = NUXT_SCRIPT_RE.search(html)
|
||||
if not match:
|
||||
return None
|
||||
script_body = match.group(1)
|
||||
node_script = (
|
||||
"const window = {}; "
|
||||
f"window.__NUXT__={script_body}; "
|
||||
"process.stdout.write(JSON.stringify(window.__NUXT__));"
|
||||
)
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["node", "-e", node_script],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
timeout=10,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
output = completed.stdout.decode("utf-8", errors="ignore").strip()
|
||||
if not output:
|
||||
return None
|
||||
try:
|
||||
return json.loads(output)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
items: list[PlaylistCandidate] = []
|
||||
seen: set[str] = set()
|
||||
for anchor in soup.select("a[href*='playlist_detail/']"):
|
||||
href = anchor.get("href", "").strip()
|
||||
remote_id = href.rstrip("/").split("/")[-1]
|
||||
if not remote_id or remote_id in seen:
|
||||
continue
|
||||
seen.add(remote_id)
|
||||
absolute_url = href if href.startswith("http") else f"https://www.kuwo.cn{href}"
|
||||
name = anchor.get("title") or anchor.get_text(strip=True) or remote_id
|
||||
cover = (anchor.find("img") or {}).get("src")
|
||||
play_count_node = anchor.select_one(".num")
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=name,
|
||||
url=absolute_url,
|
||||
cover_url=cover,
|
||||
play_count=_parse_play_count(
|
||||
play_count_node.get_text(" ", strip=True) if play_count_node else None
|
||||
),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _extract_toplist_play_count(entry: dict) -> int | None:
|
||||
for key in ("listencnt", "play_count", "playCount", "listenCount"):
|
||||
parsed = _parse_play_count(entry.get(key))
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
return None
|
||||
|
||||
|
||||
def parse_toplist_html(html: str) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
state = extract_nuxt_state(html)
|
||||
if not state:
|
||||
for entry in extract_kuwo_bang_menu_items(NUXT_SCRIPT_RE.search(html).group(1) if NUXT_SCRIPT_RE.search(html) else ""):
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
|
||||
cover_url=entry.get("pic"),
|
||||
parse_strategy="kuwo_toplist",
|
||||
play_count=_extract_toplist_play_count(entry),
|
||||
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
for group in state.get("data", []) or []:
|
||||
for menu in group.get("bangMenu", []) or []:
|
||||
for entry in menu.get("list", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="kuwo",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://www.kuwo.cn/rankList?bangId={remote_id}",
|
||||
cover_url=entry.get("pic"),
|
||||
parse_strategy="kuwo_toplist",
|
||||
play_count=_extract_toplist_play_count(entry),
|
||||
metadata={"sourceid": str(entry.get("sourceid", "")), "pub": entry.get("pub")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class KuwoCollector(BaseCollector):
|
||||
def collect_playlist_square(self, page: int = 1, page_size: int = 30) -> list[PlaylistCandidate]:
|
||||
response = self.get(
|
||||
PLAYLIST_SQUARE_URL,
|
||||
params={"pn": str(max(page, 1)), "rn": str(max(page_size, 1))},
|
||||
)
|
||||
return parse_playlist_square_html(response.text)
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_URL)
|
||||
return parse_toplist_html(response.text)
|
||||
@@ -0,0 +1,113 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://music.163.com/discover/playlist"
|
||||
TOPLIST_API_URL = "https://music.163.com/api/toplist/detail"
|
||||
|
||||
_COUNT_UNIT_MULTIPLIERS = {
|
||||
"万": 10_000,
|
||||
"亿": 100_000_000,
|
||||
}
|
||||
|
||||
|
||||
def _parse_play_count(value: object) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
text = re.sub(r"\s+", "", str(value)).replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
if text.isdigit():
|
||||
return int(text)
|
||||
match = re.search(r"([0-9]+(?:\.[0-9]+)?)([万亿])", text)
|
||||
if not match:
|
||||
return None
|
||||
number_value = float(match.group(1))
|
||||
multiplier = _COUNT_UNIT_MULTIPLIERS.get(match.group(2))
|
||||
if multiplier is None:
|
||||
return None
|
||||
return int(number_value * multiplier)
|
||||
|
||||
|
||||
def parse_playlist_square_html(html: str) -> list[PlaylistCandidate]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
items: list[PlaylistCandidate] = []
|
||||
seen: set[str] = set()
|
||||
for anchor in soup.select("a.msk[href*='/playlist?id=']"):
|
||||
href = anchor.get("href", "")
|
||||
remote_id = href.split("id=")[-1].strip()
|
||||
if not remote_id or remote_id in seen:
|
||||
continue
|
||||
seen.add(remote_id)
|
||||
cover_node = anchor.parent if anchor.parent else anchor
|
||||
play_count_node = cover_node.select_one(".nb")
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="netease",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=anchor.get("title") or remote_id,
|
||||
url=f"https://music.163.com/#/playlist?id={remote_id}",
|
||||
cover_url=(anchor.find_previous("img") or {}).get("src"),
|
||||
play_count=_parse_play_count(
|
||||
play_count_node.get_text(" ", strip=True) if play_count_node else None
|
||||
),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("list", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="netease",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("name") or remote_id,
|
||||
url=f"https://music.163.com/#/playlist?id={remote_id}",
|
||||
cover_url=entry.get("coverImgUrl"),
|
||||
parse_strategy="netease_toplist",
|
||||
play_count=_parse_play_count(
|
||||
entry.get("playCount") or entry.get("subscribedCount")
|
||||
),
|
||||
metadata={"update_frequency": entry.get("updateFrequency")},
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class NeteaseCollector(BaseCollector):
|
||||
def collect_playlist_square(
|
||||
self,
|
||||
category: str = "全部",
|
||||
order: str = "hot",
|
||||
page: int = 1,
|
||||
page_size: int = 35,
|
||||
offset: int | None = None,
|
||||
) -> list[PlaylistCandidate]:
|
||||
if offset is None:
|
||||
offset = max(page - 1, 0) * max(page_size, 1)
|
||||
response = self.get(
|
||||
PLAYLIST_SQUARE_URL,
|
||||
params={"cat": category, "order": order, "offset": offset},
|
||||
)
|
||||
return parse_playlist_square_html(response.text)
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_API_URL)
|
||||
return parse_toplist_payload(response.json())
|
||||
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import requests
|
||||
|
||||
from ..models import PlaylistCandidate
|
||||
from .base import BaseCollector
|
||||
|
||||
|
||||
PLAYLIST_SQUARE_URL = "https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg"
|
||||
TOPLIST_URL = "https://c.y.qq.com/v8/fcg-bin/fcg_myqq_toplist.fcg"
|
||||
|
||||
|
||||
def _extract_collected_song_count(entry: dict) -> int | None:
|
||||
for key in ("songnum", "song_num", "songCount", "song_count", "trackCount", "track_count"):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, bool):
|
||||
continue
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
if isinstance(value, str) and value.strip().isdigit():
|
||||
return int(value.strip())
|
||||
return None
|
||||
|
||||
|
||||
def parse_playlist_square_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("data", {}).get("list", []) or []:
|
||||
remote_id = str(entry.get("dissid", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
creator = entry.get("creator") or {}
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="playlist_square",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("dissname") or remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/playlist/{remote_id}",
|
||||
cover_url=entry.get("imgurl"),
|
||||
creator_name=creator.get("name"),
|
||||
play_count=entry.get("listennum"),
|
||||
collected_song_count=_extract_collected_song_count(entry),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def parse_toplist_payload(payload: dict) -> list[PlaylistCandidate]:
|
||||
items: list[PlaylistCandidate] = []
|
||||
for entry in payload.get("data", {}).get("topList", []) or []:
|
||||
remote_id = str(entry.get("id", "")).strip()
|
||||
if not remote_id:
|
||||
continue
|
||||
items.append(
|
||||
PlaylistCandidate(
|
||||
platform="qq",
|
||||
pool_kind="toplist",
|
||||
remote_id=remote_id,
|
||||
name=entry.get("topTitle") or remote_id,
|
||||
url=f"https://y.qq.com/n/ryqq/toplist/{remote_id}",
|
||||
cover_url=entry.get("picUrl"),
|
||||
play_count=entry.get("listenCount"),
|
||||
collected_song_count=_extract_collected_song_count(entry),
|
||||
parse_strategy="qq_toplist",
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
class QQCollector(BaseCollector):
|
||||
def __init__(self, headers: dict[str, str] | None = None, session: requests.Session | None = None):
|
||||
super().__init__(headers=headers or {"User-Agent": "Mozilla/5.0"}, session=session or requests.Session())
|
||||
self.headers.update({"Referer": "https://y.qq.com/", "Origin": "https://y.qq.com/"})
|
||||
|
||||
def collect_playlist_square(
|
||||
self,
|
||||
category_id: int = 10000000,
|
||||
sort_id: int = 5,
|
||||
page: int = 1,
|
||||
page_size: int = 30,
|
||||
) -> list[PlaylistCandidate]:
|
||||
params = {
|
||||
"picmid": "1",
|
||||
"rnd": "0.1",
|
||||
"g_tk": "732560869",
|
||||
"loginUin": "0",
|
||||
"hostUin": "0",
|
||||
"format": "json",
|
||||
"inCharset": "utf8",
|
||||
"outCharset": "utf-8",
|
||||
"notice": "0",
|
||||
"platform": "yqq.json",
|
||||
"needNewCode": "0",
|
||||
"categoryId": str(category_id),
|
||||
"sortId": str(sort_id),
|
||||
"sin": str(max(page - 1, 0) * page_size),
|
||||
"ein": str(max(page, 1) * page_size - 1),
|
||||
}
|
||||
response = self.get(PLAYLIST_SQUARE_URL, params=params)
|
||||
return parse_playlist_square_payload(response.json())
|
||||
|
||||
def collect_toplist(self) -> list[PlaylistCandidate]:
|
||||
response = self.get(TOPLIST_URL, params={"format": "json"})
|
||||
return parse_toplist_payload(response.json())
|
||||
Reference in New Issue
Block a user