Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,293 @@
'''
Function:
Implementation of BaseMusicClient
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import random
import pickle
import requests
from pathlib import Path
from threading import Lock
from rich.text import Text
from itertools import chain
from datetime import datetime
from collections import defaultdict
from pathvalidate import sanitize_filepath
from concurrent.futures import ThreadPoolExecutor, as_completed
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn, MofNCompleteColumn, ProgressColumn, Task
from ..utils import LoggerHandle, AudioLinkTester, SongInfo, SongInfoUtils, HLSDownloader, touchdir, usedownloadheaderscookies, usesearchheaderscookies, useparseheaderscookies, cookies2dict, cookies2string, shortenpathsinsonginfos, optionalimport, optionalimportfrom
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
def build_user_agent() -> str:
try:
user_agent_cls = optionalimportfrom("fake_useragent", "UserAgent")
if user_agent_cls is not None:
return user_agent_cls().random
except Exception:
pass
return DEFAULT_USER_AGENT
'''AudioAwareColumn'''
class AudioAwareColumn(ProgressColumn):
def __init__(self):
super(AudioAwareColumn, self).__init__()
self._download_col = DownloadColumn()
'''render'''
def render(self, task: Task):
kind = task.fields.get("kind", "download")
if kind == "overall": completed = int(task.completed); total = int(task.total) if task.total is not None else 0; return Text(f"{completed}/{total} audios")
elif kind == "hls": completed = int(task.completed); total = int(task.total) if task.total is not None else 0; return Text(f"{completed}/{total} segments")
else: return self._download_col.render(task)
'''BaseMusicClient'''
class BaseMusicClient():
source = 'BaseMusicClient'
def __init__(self, search_size_per_source: int = 5, auto_set_proxies: bool = False, random_update_ua: bool = False, enable_search_curl_cffi: bool = False, enable_parse_curl_cffi: bool = False, enable_download_curl_cffi: bool = False, maintain_session: bool = False, logger_handle: LoggerHandle = None, disable_print: bool = False, work_dir: str = 'musicdl_outputs',
max_retries: int = 3, freeproxy_settings: dict = None, default_search_cookies: dict | str = None, default_download_cookies: dict | str = None, default_parse_cookies: dict | str = None, strict_limit_search_size_per_page: bool = True, search_size_per_page: int = 10, quark_parser_config: dict = None):
# set up work dir
touchdir(work_dir)
# set attributes
self.search_size_per_source = search_size_per_source
self.auto_set_proxies = auto_set_proxies
self.random_update_ua = random_update_ua
self.max_retries = max_retries
self.maintain_session = maintain_session
self.logger_handle = logger_handle if logger_handle else LoggerHandle()
self.disable_print = disable_print
self.work_dir = work_dir
self.freeproxy_settings = freeproxy_settings or {}
self.quark_parser_config = quark_parser_config or {}
self.default_search_cookies = cookies2dict(default_search_cookies); self.default_download_cookies = cookies2dict(default_download_cookies); self.default_parse_cookies = cookies2dict(default_parse_cookies); self.default_cookies = self.default_search_cookies
self.search_size_per_page = min(search_size_per_source, search_size_per_page); self.strict_limit_search_size_per_page = strict_limit_search_size_per_page
self.enable_search_curl_cffi = enable_search_curl_cffi; self.enable_download_curl_cffi = enable_download_curl_cffi; self.enable_parse_curl_cffi = enable_parse_curl_cffi; self.enable_curl_cffi = self.enable_search_curl_cffi
self.cc_impersonates = self._listccimpersonates() if (enable_search_curl_cffi or enable_download_curl_cffi) else None
# init requests.Session
self.default_search_headers = {'User-Agent': build_user_agent()}; self.default_download_headers = {'User-Agent': build_user_agent()}; self.default_parse_headers = {'User-Agent': build_user_agent()}
self.quark_default_download_headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.225.400 QQBrowser/12.2.5544.400', 'origin': 'https://pan.quark.cn',
'referer': 'https://pan.quark.cn/', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': cookies2string(self.quark_parser_config.get('cookies', '')),
}
self.quark_default_download_cookies = {} # placeholder, useless now
self.default_headers = self.default_search_headers
self._initsession()
# proxied_session_client
freeproxy = optionalimportfrom('freeproxy', 'freeproxy')
(default_freeproxy_settings := dict(disable_print=True, proxy_sources=['ProxiflyProxiedSession'], max_tries=20, init_proxied_session_cfg={})).update(self.freeproxy_settings)
self.proxied_session_client = freeproxy.ProxiedSessionClient(**default_freeproxy_settings) if auto_set_proxies else None
'''_listccimpersonates'''
def _listccimpersonates(self):
curl_cffi = optionalimport('curl_cffi')
root = Path(curl_cffi.__file__).resolve().parent
exts = {".py", ".so", ".pyd", ".dll", ".dylib"}
pat = re.compile(rb"\b(?:chrome|edge|safari|firefox|tor)(?:\d+[a-z_]*|_android|_ios)?\b")
return sorted({m.decode("utf-8", "ignore") for p in root.rglob("*") if p.suffix in exts for m in pat.findall(p.read_bytes())})
'''_initsession'''
def _initsession(self):
if self.maintain_session and getattr(self, 'session', None) and getattr(self, 'audio_link_tester', None) and getattr(self, 'quark_audio_link_tester', None): return
curl_cffi = optionalimport('curl_cffi')
self.session = requests.Session() if not self.enable_curl_cffi else curl_cffi.requests.Session()
self.session.headers = self.default_headers
self.audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.default_download_headers), cookies=copy.deepcopy(self.default_download_cookies))
self.quark_audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.quark_default_download_headers), cookies=copy.deepcopy(self.quark_default_download_cookies))
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
raise NotImplementedError('not to be implemented')
'''_constructuniqueworkdir'''
def _constructuniqueworkdir(self, keyword: str, sort_by_search_kwd_and_time: bool = True):
time_stamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
touchdir((work_dir := sanitize_filepath(os.path.join(self.work_dir, self.source, f'{time_stamp} {keyword}') if sort_by_search_kwd_and_time else os.path.join(self.work_dir, self.source))))
return work_dir
'''_removeduplicates'''
def _removeduplicates(self, song_infos: list[SongInfo] = None) -> list[SongInfo]:
unique_song_infos, identifiers = [], set()
for song_info in song_infos:
if song_info.identifier in identifiers: continue
identifiers.add(song_info.identifier); unique_song_infos.append(song_info)
return unique_song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
raise NotImplementedError('not be implemented')
'''search'''
@usesearchheaderscookies
def search(self, keyword: str, num_threadings: int = 5, request_overrides: dict = None, rule: dict = None, main_process_context: Progress = None, main_progress_id: int = None, main_progress_lock: Lock = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# logging
self.logger_handle.info(f'Start to search music files using {self.source}.', disable_print=self.disable_print)
# construct search urls
search_urls = self._constructsearchurls(keyword=keyword, rule=rule, request_overrides=request_overrides)
# multi threadings for searching music files
if main_process_context is None: owns_progress = True; main_process_context = Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10); main_process_context.__enter__()
else: owns_progress = False
if main_progress_lock is None: main_progress_lock = Lock()
with main_progress_lock:
progress_id = main_process_context.add_task(f"{self.source}.search >>> completed (0/{len(search_urls)})", total=len(search_urls))
if main_progress_id is not None:
cur_total = main_process_context.tasks[main_progress_id].total or 0
main_process_context.update(main_progress_id, total=cur_total + len(search_urls))
main_process_context.update(main_progress_id, description=f"Search from sources >>> completed ({int(main_process_context.tasks[main_progress_id].completed)}/{cur_total + len(search_urls)})")
song_infos, submitted_tasks = {}, []
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
for search_url_idx, search_url in enumerate(search_urls):
song_infos[str(search_url_idx)] = []
submitted_tasks.append(pool.submit(self._search, keyword, search_url, request_overrides, song_infos[str(search_url_idx)], main_process_context, progress_id))
for future in as_completed(submitted_tasks):
future.result()
with main_progress_lock:
main_process_context.advance(progress_id, 1)
num_searched_urls = int(main_process_context.tasks[progress_id].completed)
main_process_context.update(progress_id, description=f"{self.source}.search >>> completed ({num_searched_urls}/{len(search_urls)})")
if main_progress_id is None: continue
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"Search from sources >>> completed ({int(main_process_context.tasks[main_progress_id].completed)}/{int(main_process_context.tasks[main_progress_id].total or 0)})")
song_infos = list(chain.from_iterable(song_infos.values())); song_infos = self._removeduplicates(song_infos=song_infos)
work_dir = self._constructuniqueworkdir(keyword=keyword)
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# logging
if len(song_infos) > 0:
work_dir_to_song_info, work_dir = defaultdict(list), ', '.join(list(set([str(s.work_dir) for s in song_infos])))
for s in song_infos: s.work_dir = str(s.work_dir); work_dir_to_song_info[s.work_dir].append(s.todict())
for w, items in work_dir_to_song_info.items(): touchdir(w); self._savetopkl(items, os.path.join(w, "search_results.pkl"))
else:
work_dir = self.work_dir
self.logger_handle.info(f'Finished searching music files using {self.source}. Search results have been saved to {work_dir}, valid items: {len(song_infos)}.', disable_print=self.disable_print)
if owns_progress: main_process_context.__exit__(None, None, None)
# return
return song_infos
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list[SongInfo] = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
request_overrides = copy.deepcopy(request_overrides or {})
if song_info.protocol.upper() in {'HLS'}:
try:
hls_downloader = HLSDownloader(
output_dir=song_info.work_dir, proxies=request_overrides.pop('proxies', {}) or self._autosetproxies(), headers=song_info.default_download_headers or request_overrides.pop('headers', {}) or self.default_headers, cookies=request_overrides.pop('cookies', {}) or self.default_cookies,
logger_handle=self.logger_handle, verify_tls=request_overrides.pop('verify', True), timeout=request_overrides.pop('timeout', (10, 30)), disable_print=self.disable_print, request_overrides=request_overrides
)
hls_downloader.download(song_info.download_url, song_info.save_path, quality='best', keep_segments=False, temp_subdir=str(song_info.identifier), progress=progress, progress_id=song_progress_id)
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
elif song_info.protocol.upper() in {'HTTP'} and song_info.downloaded_contents:
try:
touchdir(song_info.work_dir)
total_size = song_info.downloaded_contents.__sizeof__()
progress.update(song_progress_id, total=total_size)
with open(song_info.save_path, "wb") as fp: fp.write(song_info.downloaded_contents)
progress.advance(song_progress_id, total_size)
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
elif song_info.protocol.upper() in {'HTTP'}:
try:
touchdir(song_info.work_dir)
if song_info.default_download_headers: request_overrides['headers'] = song_info.default_download_headers
with self.get(song_info.download_url, stream=True, **request_overrides) as resp:
resp.raise_for_status()
total_size, chunk_size, downloaded_size = int(resp.headers.get('content-length', 0)), song_info.get('chunk_size', 1024), 0
progress.update(song_progress_id, total=total_size)
with open(song_info.save_path, "wb") as fp:
for chunk in resp.iter_content(chunk_size=chunk_size):
if not chunk: continue
fp.write(chunk); downloaded_size = downloaded_size + len(chunk)
if total_size > 0: downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, total_size / 1024 / 1024)
else: progress.update(song_progress_id, total=downloaded_size); downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, downloaded_size / 1024 / 1024)
progress.advance(song_progress_id, len(chunk))
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Downloading: {downloading_text})")
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
return downloaded_song_infos
'''download'''
@usedownloadheaderscookies
def download(self, song_infos: list[SongInfo], num_threadings: int = 5, request_overrides: dict = None, auto_supplement_song: bool = True):
# init
request_overrides = request_overrides or {}; shortenpathsinsonginfos(song_infos=song_infos)
# logging
self.logger_handle.info(f'Start to download music files using {self.source}.', disable_print=self.disable_print)
# multi threadings for downloading music files
columns = [SpinnerColumn(), TextColumn("{task.description}"), BarColumn(bar_width=None), TaskProgressColumn(), AudioAwareColumn(), TransferSpeedColumn(), TimeRemainingColumn()]
with Progress(*columns, refresh_per_second=20, expand=True) as progress:
songs_progress_id = progress.add_task(f"{self.source}.download >>> completed (0/{len(song_infos)})", total=len(song_infos), kind='overall')
song_progress_ids, downloaded_song_infos, submitted_tasks = [], [], []
for _, song_info in enumerate(song_infos):
desc = f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Preparing)"
song_progress_ids.append(progress.add_task(desc, total=None, kind='download'))
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
for song_progress_id, song_info in zip(song_progress_ids, song_infos): submitted_tasks.append(pool.submit(self._download, song_info, request_overrides, downloaded_song_infos, progress, song_progress_id, auto_supplement_song))
for _ in as_completed(submitted_tasks):
progress.advance(songs_progress_id, 1)
num_downloaded_songs = int(progress.tasks[songs_progress_id].completed)
progress.update(songs_progress_id, description=f"{self.source}.download >>> completed ({num_downloaded_songs}/{len(song_infos)})")
# logging
if len(downloaded_song_infos) > 0:
work_dir_to_song_info, work_dir = defaultdict(list), ', '.join(list(set([str(s.work_dir) for s in downloaded_song_infos])))
for s in downloaded_song_infos: s.work_dir = str(s.work_dir); work_dir_to_song_info[s.work_dir].append(s.todict())
for w, items in work_dir_to_song_info.items(): touchdir(w); self._savetopkl(items, os.path.join(w, "download_results.pkl"))
else:
work_dir = self.work_dir
self.logger_handle.info(f'Finished downloading music files using {self.source}. Download results have been saved to {work_dir}, valid downloads: {len(downloaded_song_infos)}.', disable_print=self.disable_print)
# return
return downloaded_song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
raise NotImplementedError(f'Not supported now to parse playlist from {self.source}')
'''_autosetproxies'''
def _autosetproxies(self):
if not self.auto_set_proxies: return {}
try: proxies = self.proxied_session_client.getrandomproxy()
except Exception as err: self.logger_handle.error(f'{self.source}._autosetproxies >>> freeproxy lib failed to auto fetch proxies (Error: {err})', disable_print=self.disable_print); proxies = {}
return proxies
'''get'''
def get(self, url, **kwargs):
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
if 'timeout' not in kwargs: kwargs['timeout'] = (10, 30)
if 'impersonate' not in kwargs and self.enable_curl_cffi: kwargs['impersonate'] = random.choice(self.cc_impersonates)
resp = None
for _ in range(self.max_retries):
if not self.maintain_session:
self._initsession()
if self.random_update_ua: self.session.headers.update({'User-Agent': build_user_agent()})
proxies = kwargs.pop('proxies', None) or self._autosetproxies()
try: (resp := self.session.get(url, proxies=proxies, **kwargs)).raise_for_status()
except Exception as err: self.logger_handle.error(f'{self.source}.get >>> {url} (Error: {err}; status={getattr(locals().get("resp"), "status_code", None)})', disable_print=self.disable_print); continue
return resp
return resp
'''post'''
def post(self, url, **kwargs):
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
if 'timeout' not in kwargs: kwargs['timeout'] = (10, 30)
if 'impersonate' not in kwargs and self.enable_curl_cffi: kwargs['impersonate'] = random.choice(self.cc_impersonates)
resp = None
for _ in range(self.max_retries):
if not self.maintain_session:
self._initsession()
if self.random_update_ua: self.session.headers.update({'User-Agent': build_user_agent()})
proxies = kwargs.pop('proxies', None) or self._autosetproxies()
try: (resp := self.session.post(url, proxies=proxies, **kwargs)).raise_for_status()
except Exception as err: self.logger_handle.error(f'{self.source}.post >>> {url} (Error: {err}; status={getattr(locals().get("resp"), "status_code", None)})', disable_print=self.disable_print); continue
return resp
return resp
'''_savetopkl'''
def _savetopkl(self, data, file_path, auto_sanitize=True):
if auto_sanitize: file_path = sanitize_filepath(file_path)
with open(file_path, 'wb') as fp: pickle.dump(data, fp)