Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
+12
View File
@@ -0,0 +1,12 @@
'''initialize'''
from .sources import MusicClientBuilder, BaseMusicClient, BuildMusicClient
from .utils import (
# classes
BaseModuleBuilder, LoggerHandle, AudioLinkTester, WhisperLRC, QuarkParser, SongInfo, SongInfoUtils, RandomIPGenerator, LanZouYParser, HLSDownloader, LyricSearchClient,
# functions
cachecookies, resp2json, isvalidresp, safeextractfromdict, replacefile, printfullline, smarttrunctable, usesearchheaderscookies, byte2mb, printtable, usedownloadheaderscookies,
useparseheaderscookies, cookies2dict, cookies2string, touchdir, estimatedurationwithfilesizebr, estimatedurationwithfilelink, extractdurationsecondsfromlrc, optionalimportfrom,
searchdictbykey, colorize, legalizestring, shortenpathsinsonginfos, cursorpickintable, optionalimport, obtainhostname, hostmatchessuffix, seconds2hms, naiveguessextfromaudiobytes,
# lambda functions
cleanlrc,
)
@@ -0,0 +1,5 @@
'''initialize'''
from .lrts import LRTSMusicClient
from .lizhi import LizhiMusicClient
from .ximalaya import XimalayaMusicClient
from .qingting import QingtingMusicClient
@@ -0,0 +1,204 @@
'''
Function:
Implementation of LizhiMusicClient: https://www.lizhi.fm/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
from contextlib import suppress
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, SongInfo
'''LizhiMusicClient'''
class LizhiMusicClient(BaseMusicClient):
source = 'LizhiMusicClient'
ALLOWED_SEARCH_TYPES = ['album', 'track']
MUSIC_QUALITIES = ['_ud.mp3', '_hd.mp3', '_sd.m4a']
def __init__(self, **kwargs):
self.allowed_search_types = list(set(kwargs.pop('allowed_search_types', LizhiMusicClient.ALLOWED_SEARCH_TYPES)))
super(LizhiMusicClient, self).__init__(**kwargs)
self.default_search_headers = {'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1', 'Referer': 'https://m.lizhi.fm'}
self.default_download_headers = {'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
self.search_size_per_page = min(self.search_size_per_page, 20)
# construct search urls based on search rules
search_urls, page_size = [], self.search_size_per_page
for search_type in LizhiMusicClient.ALLOWED_SEARCH_TYPES:
if search_type not in self.allowed_search_types: continue
if search_type in {'track'}:
default_rule = {'deviceId': "h5-b6ef91a9-3dbb-c716-1fdd-43ba08851150", "keywords": keyword, "page": 1, "receiptData": ""}
default_rule.update(rule)
base_url, count = 'https://m.lizhi.fm/vodapi/search/voice?', 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = str(int(count // page_size) + 1)
if count > 0:
with suppress(Exception): receipt_data = resp2json(self.get(search_urls[-1]['url'], **request_overrides)).get('receiptData', '')
page_rule['receiptData'] = receipt_data
search_urls.append({'url': base_url + urlencode(page_rule), 'type': search_type})
count += page_size
elif search_type in ['album']:
default_rule = {'deviceId': "h5-b6ef91a9-3dbb-c716-1fdd-43ba08851150", "keywords": keyword, "page": 1, "receiptData": ""}
default_rule.update(rule)
base_url, count = 'https://m.lizhi.fm/vodapi/search/voice?', 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = str(int(count // page_size) + 1)
if count > 0:
with suppress(Exception): receipt_data = resp2json(self.get(search_urls[-1]['url'], **request_overrides)).get('receiptData', '')
page_rule['receiptData'] = receipt_data
search_urls.append({'url': base_url + urlencode(page_rule), 'type': search_type})
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, safeextractfromdict(search_result, ['voiceInfo', 'voiceId'], ''), SongInfo(source=self.source)
# parse
(resp := self.get(f'https://m.lizhi.fm/vodapi/voice/info/{song_id}', **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
download_url = safeextractfromdict(download_result, ['data', 'userVoice', 'voicePlayProperty', 'trackUrl'], '')
if not download_url or not str(download_url).startswith('http'):
image_url = safeextractfromdict(download_result, ['data', 'userVoice', 'voiceInfo', 'imageUrl'], "") or ""
m = re.search(r'/(\d{4}/\d{2}/\d{2})(?:/|$)', str(image_url))
if not m: return song_info
download_url = f'https://cdn101.lizhi.fm/audio/{m.group(1)}/{song_id}_sd.m4a' # cdn101 is better than cdn5
for quality in LizhiMusicClient.MUSIC_QUALITIES:
download_url: str = (download_url[:-7] + quality).replace('//cdn5.lizhi.fm/audio/', '//cdn101.lizhi.fm/audio/')
duration_in_secs = safeextractfromdict(download_result, ['data', 'userVoice', 'voiceInfo', 'duration'], 0) or 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'userVoice', 'voiceInfo', 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['data', 'userVoice', 'userInfo', 'name'], None)), album=legalizestring(safeextractfromdict(download_result, ['data', 'userVoice', 'userInfo', 'name'], None)),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=safeextractfromdict(download_result, ['data', 'userVoice', 'voiceInfo', 'imageUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: song_info.update(dict(download_url=download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), download_url_status=self.audio_link_tester.test(download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), request_overrides)))
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
# return
return song_info
'''_parsebytrack'''
def _parsebytrack(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results:
if not isinstance(search_result, dict) or not safeextractfromdict(search_result, ['voiceInfo', 'voiceId'], ''): continue
song_info, song_id = SongInfo(source=self.source), safeextractfromdict(search_result, ['voiceInfo', 'voiceId'], '')
download_url = safeextractfromdict(search_result, ['voicePlayProperty', 'trackUrl'], '')
if not download_url or not str(download_url).startswith('http'):
image_url = safeextractfromdict(search_result, ['voiceInfo', 'imageUrl'], "") or ""
m = re.search(r'/(\d{4}/\d{2}/\d{2})(?:/|$)', str(image_url))
if not m: continue
download_url = f'https://cdn101.lizhi.fm/audio/{m.group(1)}/{song_id}_sd.m4a' # cdn101 is better than cdn5
for quality in LizhiMusicClient.MUSIC_QUALITIES:
download_url: str = (download_url[:-7] + quality).replace('//cdn5.lizhi.fm/audio/', '//cdn101.lizhi.fm/audio/')
duration_in_secs = safeextractfromdict(search_result, ['voiceInfo', 'duration'], 0)
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(search_result, ['voiceInfo', 'name'], None)), singers=legalizestring(safeextractfromdict(search_result, ['userInfo', 'name'], None)),
album=legalizestring(safeextractfromdict(search_result, ['userInfo', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs or 0, duration=seconds2hms(duration_in_secs),
lyric=None, cover_url=safeextractfromdict(search_result, ['voiceInfo', 'imageUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: song_info.update(dict(download_url=download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), download_url_status=self.audio_link_tester.test(download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), request_overrides)))
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_parsebyalbum'''
def _parsebyalbum(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides, unique_album_ids = request_overrides or {}, set()
for search_result in search_results:
if not isinstance(search_result, dict) or not safeextractfromdict(search_result, ['userInfo', 'userId'], ''): continue
album_id = safeextractfromdict(search_result, ['userInfo', 'userId'], '')
if album_id in unique_album_ids: continue
unique_album_ids.add(album_id)
download_results, page_size, page_no, track_idx, unique_track_ids = [], 1000, 1, 0, set()
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_results, 'lyric': {}}, source=self.source, song_name=album_id, singers=legalizestring(safeextractfromdict(search_result, ['userInfo', 'name'], '')),
album=f"{safeextractfromdict(search_result, ['userInfo', 'audioNum'], 0) or 0} Episodes", ext=None, file_size_bytes=None, file_size=None, identifier=album_id, duration_s=None, duration='-:-:-', lyric=None,
cover_url=safeextractfromdict(search_result, ['userInfo', 'photo'], None), download_url=None, download_url_status={}, episodes=[],
)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/0) pages downloaded in album {album_id}", total=0)
while True:
try: (resp := self.get(f'https://m.lizhi.fm/vodapi/user/{album_id}?pageNo={page_no}&pageSize={page_size}', **request_overrides)).raise_for_status()
except Exception: break
download_result = resp2json(resp=resp)
if not download_result.get('data'): break
download_results.append(download_result)
page_no += 1
progress.update(download_album_pid, total=page_no, completed=page_no)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({page_no}/{page_no}) pages downloaded in album {album_id}")
total_episodes = sum([len(safeextractfromdict(download_result, ['data'], []) or []) for download_result in download_results])
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{total_episodes}) episodes completed in album {album_id}", total=total_episodes)
for download_result in download_results:
for track in (safeextractfromdict(download_result, ['data'], []) or []):
track_idx += 1
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx}/{total_episodes}) episodes completed in album {album_id}")
if not isinstance(track, dict) or not safeextractfromdict(track, ['voiceInfo', 'voiceId'], ''): continue
eps_info, eps_id = SongInfo(source=self.source), safeextractfromdict(track, ['voiceInfo', 'voiceId'], '')
if eps_id in unique_track_ids: continue
unique_track_ids.add(eps_id)
download_url = safeextractfromdict(track, ['voicePlayProperty', 'trackUrl'], '')
if not download_url or not str(download_url).startswith('http'):
image_url = safeextractfromdict(track, ['voiceInfo', 'imageUrl'], "") or ""
m = re.search(r'/(\d{4}/\d{2}/\d{2})(?:/|$)', str(image_url))
if not m: continue
download_url = f'https://cdn101.lizhi.fm/audio/{m.group(1)}/{eps_id}_sd.m4a' # cdn101 is better than cdn5
for quality in LizhiMusicClient.MUSIC_QUALITIES:
download_url: str = (download_url[:-7] + quality).replace('//cdn5.lizhi.fm/audio/', '//cdn101.lizhi.fm/audio/')
duration_in_secs = safeextractfromdict(track, ['voiceInfo', 'duration'], 0) or 0
eps_info = SongInfo(
raw_data={'search': search_result, 'download': track, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(track, ['voiceInfo', 'name'], None)), singers=legalizestring(safeextractfromdict(track, ['userInfo', 'name'], None)),
album=legalizestring(safeextractfromdict(track, ['userInfo', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=eps_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs),
lyric=None, cover_url=safeextractfromdict(track, ['voiceInfo', 'imageUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not eps_info.with_valid_download_url: eps_info.update(dict(download_url=download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), download_url_status=self.audio_link_tester.test(download_url.replace('//cdn101.lizhi.fm/audio/', '//cdn5.lizhi.fm/audio/'), request_overrides)))
if eps_info.with_valid_download_url: break
if not eps_info.with_valid_download_url: continue
eps_info.download_url_status['probe_status'] = self.audio_link_tester.probe(eps_info.download_url, request_overrides)
eps_info.file_size = eps_info.download_url_status['probe_status']['file_size']
song_info.episodes.append(eps_info)
if not song_info.with_valid_download_url: continue
try: song_info.duration_s = sum([eps.duration_s for eps in song_info.episodes]); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: pass
try: song_info.file_size = str(round(sum([float(eps.file_size.removesuffix('MB').strip()) for eps in song_info.episodes]), 2)) + ' MB'
except Exception: pass
song_info.album = f"{len(song_info.episodes)} Episodes"
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
search_type, search_url = search_url['type'], search_url['url']
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)['data']
# --parse based on search type
parsers = {'album': self._parsebyalbum, 'track': self._parsebytrack}
parsers[search_type](search_results, song_infos=song_infos, request_overrides=request_overrides, progress=progress)
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,208 @@
'''
Function:
Implementation of LRTSMusicClient: https://www.lrts.me/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
import math
from rich.progress import Progress
from urllib.parse import urlencode
from ..sources import BaseMusicClient
from ..utils import legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, byte2mb, SongInfo
'''LRTSMusicClient'''
class LRTSMusicClient(BaseMusicClient):
source = 'LRTSMusicClient'
ALLOWED_SEARCH_TYPES = ['album', 'book']
def __init__(self, **kwargs):
self.allowed_search_types = list(set(kwargs.pop('allowed_search_types', LRTSMusicClient.ALLOWED_SEARCH_TYPES)))
super(LRTSMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "cache-control": "max-age=0", "connection": "keep-alive", "sec-ch-ua": '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
"host": "m.lrts.me", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "?1",
"upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"keyWord": keyword, "pageSize": "40", "pageNum": "1", "searchOption": "1"}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://m.lrts.me/ajax/search?'
search_urls, page_size = [], max(self.search_size_per_page, 40)
for search_type in LRTSMusicClient.ALLOWED_SEARCH_TYPES:
if search_type not in self.allowed_search_types: continue
default_rule_search_type, count = copy.deepcopy(default_rule), 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule_search_type)
page_rule['pageSize'] = str(page_size)
page_rule['pageNum'] = str(int(count // page_size) + 1)
search_urls.append({search_type: base_url + urlencode(page_rule)})
count += page_size
# return
return search_urls
'''_parsebookwithofficialapiv1'''
def _parsebookwithofficialapiv1(self, section_idx, search_result: dict, request_overrides: dict = None):
# init
request_overrides, book_id, song_id, song_info = request_overrides or {}, safeextractfromdict(search_result, ['book_info', 'id'], ''), search_result.get('id') or search_result.get('sectionId'), SongInfo(source=self.source)
# parse
try: (resp := self.get(f"https://m.lrts.me/ajax/getPlayPath?entityId={book_id}&entityType=3&opType=1&sections=[{section_idx}]&type=0&id={song_id}&section={section_idx}", **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = {}
download_url = safeextractfromdict(download_result, ['list', 0, 'path'], '')
if not download_url or not download_url.startswith('http'):
try: (resp := self.get(f"https://m.lrts.me/ajax/getListenPath?entityId={book_id}&entityType=3&opType=1&sections=[{section_idx}]&type=0&id={song_id}&section={section_idx}", **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = {}
download_url = safeextractfromdict(download_result, ['data', 'path'], '')
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(safeextractfromdict(search_result, ['book_info', 'announcer'], None)),
album=legalizestring(safeextractfromdict(search_result, ['book_info', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=float(search_result.get('size', 0) or 0), file_size=byte2mb(search_result.get('size', 0) or 0),
identifier=song_id, duration_s=int(float(search_result.get('length', 0.0) or 0.0)), duration=seconds2hms(int(float(search_result.get('length', 0.0) or 0.0))), lyric=None, cover_url=safeextractfromdict(search_result, ['book_info', 'cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
# return
return song_info
'''_parsealbumwithofficialapiv1'''
def _parsealbumwithofficialapiv1(self, section_idx, search_result: dict, request_overrides: dict = None):
# init
request_overrides, album_id, song_id, song_info = request_overrides or {}, safeextractfromdict(search_result, ['album_info', 'id'], ''), search_result.get('audioId') or search_result.get('sectionId'), SongInfo(source=self.source)
# parse
try: (resp := self.get(f"https://m.lrts.me/ajax/getPlayPath?entityId={album_id}&entityType=2&opType=1&sections=[{song_id}]&type=0", **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = {}
download_url = safeextractfromdict(download_result, ['list', 0, 'path'], '')
if not download_url or not download_url.startswith('http'):
try: (resp := self.get(f"https://m.lrts.me/ajax/getListenPath?entityId={album_id}&entityType=2&opType=1&sections=[{section_idx}]&type=0&id={song_id}&section={section_idx}", **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = {}
download_url = safeextractfromdict(download_result, ['data', 'path'], '')
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(safeextractfromdict(search_result, ['album_info', 'nickName'], None)),
album=legalizestring(safeextractfromdict(search_result, ['album_info', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=float(search_result.get('size', 0) or 0), file_size=byte2mb(search_result.get('size', 0) or 0),
identifier=song_id, duration_s=int(float(search_result.get('length', 0.0) or 0.0)), duration=seconds2hms(int(float(search_result.get('length', 0.0) or 0.0))), lyric=None, cover_url=safeextractfromdict(search_result, ['album_info', 'cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
# return
return song_info
'''_parsebybook'''
def _parsebybook(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['data']['bookResult']['list']:
if (not isinstance(search_result, dict)) or ('id' not in search_result): continue
download_results, tracks, page_size, unique_track_ids = [], [], 50, set()
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_results, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name', None)),
singers=legalizestring(search_result.get('announcer')), album=f"{safeextractfromdict(search_result, ['sections'], 0) or 0} Episodes", ext=None, file_size=None,
identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('cover', None), download_url=None, download_url_status={}, episodes=[],
)
num_pages = math.ceil(int(safeextractfromdict(search_result, ['sections'], 0) or 0) / page_size)
download_book_pid = progress.add_task(f"{self.source}._parsebybook >>> (0/{num_pages}) pages downloaded in book {search_result['id']}", total=num_pages)
for page_num_idx, page_num in enumerate(range(1, num_pages + 1)):
if page_num_idx > 0:
progress.advance(download_book_pid, 1)
progress.update(download_book_pid, description=f"{self.source}._parsebybook >>> ({page_num_idx}/{num_pages}) pages downloaded in book {search_result['id']}")
try: download_results.append(resp2json(self.get(f'https://m.lrts.me/ajax/getBookMenu?bookId={search_result["id"]}&pageNum={page_num}&pageSize={page_size}&sortType=0', **request_overrides)))
except: continue
progress.advance(download_book_pid, 1)
progress.update(download_book_pid, description=f"{self.source}._parsebybook >>> ({page_num_idx+1}/{num_pages}) pages downloaded in book {search_result['id']}")
for download_result in download_results:
for track in (safeextractfromdict(download_result, ['list'], []) or []):
if not isinstance(track, dict) or not track.get('id'): continue
if track.get('id') in unique_track_ids: continue
unique_track_ids.add(track.get('id'))
tracks.append(track)
download_book_pid = progress.add_task(f"{self.source}._parsebybook >>> (0/{len(tracks)}) episodes completed in book {search_result['id']}", total=len(tracks))
for track_idx, track in enumerate(tracks):
if track_idx > 0:
progress.advance(download_book_pid, 1)
progress.update(download_book_pid, description=f"{self.source}._parsebybook >>> ({track_idx}/{len(tracks)}) episodes completed in book {search_result['id']}")
eps_info, track['book_info'] = SongInfo(source=self.source), copy.deepcopy(search_result)
for parser in [self._parsebookwithofficialapiv1]:
try: eps_info = parser(section_idx=track_idx+1, search_result=track, request_overrides=request_overrides)
except: continue
if eps_info.with_valid_download_url: break
if not eps_info.with_valid_download_url: continue
song_info.episodes.append(eps_info)
progress.advance(download_book_pid, 1)
progress.update(download_book_pid, description=f"{self.source}._parsebybook >>> ({track_idx+1}/{len(tracks)}) episodes completed in book {search_result['id']}")
if not song_info.with_valid_download_url: continue
try: song_info.duration_s = sum([eps.duration_s for eps in song_info.episodes]); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: pass
try: song_info.file_size_bytes = sum([eps.file_size_bytes for eps in song_info.episodes]); song_info.file_size = byte2mb(song_info.file_size_bytes)
except Exception: pass
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_parsebyalbum'''
def _parsebyalbum(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['data']['albumResult']['list']:
if (not isinstance(search_result, dict)) or ('id' not in search_result): continue
download_results, tracks, unique_track_ids = [], [], set()
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_results, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name', None)),
singers=legalizestring(search_result.get('nickName')), album=f"{safeextractfromdict(search_result, ['sections'], 0) or 0} Episodes", ext=None, file_size=None,
identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('cover', None), download_url=None, download_url_status={}, episodes=[],
)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/1) pages downloaded in album {search_result['id']}", total=1)
try: (resp := self.get(f'https://m.lrts.me/ajax/getAlbumAudios?ablumnId={search_result["id"]}&sortType=0')).raise_for_status()
except Exception: continue
download_results.append(resp2json(resp=resp))
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> (1/1) pages downloaded in album {search_result['id']}")
for download_result in download_results:
for track in (safeextractfromdict(download_result, ['list'], []) or []):
if not isinstance(track, dict) or not track.get('audioId'): continue
if track.get('audioId') in unique_track_ids: continue
unique_track_ids.add(track.get('audioId'))
tracks.append(track)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{len(tracks)}) episodes completed in album {search_result['id']}", total=len(tracks))
for track_idx, track in enumerate(tracks):
if track_idx > 0:
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx}/{len(tracks)}) episodes completed in album {search_result['id']}")
eps_info, track['album_info'] = SongInfo(source=self.source), copy.deepcopy(search_result)
for parser in [self._parsealbumwithofficialapiv1]:
try: eps_info = parser(section_idx=track_idx+1, search_result=track, request_overrides=request_overrides)
except: continue
if eps_info.with_valid_download_url: break
if not eps_info.with_valid_download_url: continue
song_info.episodes.append(eps_info)
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx+1}/{len(tracks)}) episodes completed in album {search_result['id']}")
if not song_info.with_valid_download_url: continue
try: song_info.duration_s = sum([eps.duration_s for eps in song_info.episodes]); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: pass
try: song_info.file_size_bytes = sum([eps.file_size_bytes for eps in song_info.episodes]); song_info.file_size = byte2mb(song_info.file_size_bytes)
except Exception: pass
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
(search_type, search_url), = search_url.items()
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)
# --parse based on search type
parsers = {'album': self._parsebyalbum, 'book': self._parsebybook}
parsers[search_type](search_results, song_infos=song_infos, request_overrides=request_overrides, progress=progress)
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,202 @@
'''
Function:
Implementation of QingtingMusicClient: https://m.qingting.fm/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import copy
import hmac
import math
import hashlib
from rich.progress import Progress
from typing import Any, Dict, List
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urlparse, parse_qs
from ..utils import legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, byte2mb, SongInfo
'''QingtingMusicClient'''
class QingtingMusicClient(BaseMusicClient):
source = 'QingtingMusicClient'
HMAC_KEY = "99@b8#571(bb38_b"
DEVICE_ID = "66f6e3b560ad8876e52e6e67ee535c5c"
ALLOWED_SEARCH_TYPES = ['album', 'track']
def __init__(self, **kwargs):
self.allowed_search_types = list(set(kwargs.pop('allowed_search_types', QingtingMusicClient.ALLOWED_SEARCH_TYPES)))
super(QingtingMusicClient, self).__init__(**kwargs)
if self.default_search_cookies: assert ("qingting_id" in self.default_search_cookies) and (("access_token" in self.default_search_cookies) or ("refresh_token" in self.default_search_cookies)), '"qingting_id", "access_token" and "refresh_token" should be configured, refer to "https://musicdl.readthedocs.io/en/latest/Quickstart.html#qingtingfm-audio-radio-download"'
if self.default_download_cookies: assert ("qingting_id" in self.default_download_cookies) and (("access_token" in self.default_download_cookies) or ("refresh_token" in self.default_download_cookies)), '"qingting_id", "access_token" and "refresh_token" should be configured, refer to "https://musicdl.readthedocs.io/en/latest/Quickstart.html#qingtingfm-audio-radio-download"'
self.default_search_headers = {"User-Agent": "QingTing-iOS/10.7.9.0 com.Qting.QTTour Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148", "QT-App-Version": "10.7.9.0"}
self.default_download_headers = {"User-Agent": "QingTing-iOS/10.7.9.0 com.Qting.QTTour Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148", "QT-App-Version": "10.7.9.0"}
self.default_headers = self.default_search_headers
self.auth_info = copy.deepcopy(self.default_search_cookies or self.default_download_cookies)
self.default_search_cookies = {}; self.default_download_cookies = {}
self._initsession()
'''_auth'''
def _auth(self, request_overrides: dict = None):
request_overrides = request_overrides or {}
qingting_id, refresh_token = self.auth_info['qingting_id'], self.auth_info['refresh_token']
(resp := self.post("https://user.qtfm.cn/u2/api/v4/auth", headers={"Content-Type": "application/x-www-form-urlencoded"}, data={"refresh_token": refresh_token, "qingting_id": qingting_id, "device_id": QingtingMusicClient.DEVICE_ID, "grant_type": "refresh_token"}, **request_overrides)).raise_for_status()
auth_info = resp2json(resp)['data']
self.auth_info = copy.deepcopy(auth_info)
return auth_info
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
if self.auth_info and ("access_token" not in self.auth_info): self._auth()
# search rules: sort_type should be in {"0", "1", "2"} >>> {Comprehensive Sorting, Most Popular, Latest Updates}; include should be in {"channel_ondemand", "channel_live", "program_ondemand", "people_podcaster", "all"}
default_rule = {"k": keyword, "sort_type": '0', "page": "1", "include": "channel_ondemand", "pagesize": "30", "k_src": "direct"}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://app.qtfm.cn/m-bff/v1/search/result?'
search_urls, page_size = [], self.search_size_per_page
for search_type in QingtingMusicClient.ALLOWED_SEARCH_TYPES:
if search_type not in self.allowed_search_types: continue
default_rule_search_type = copy.deepcopy(default_rule)
default_rule_search_type['include'], count = {"album": "channel_ondemand", "track": "program_ondemand"}[search_type], 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule_search_type)
page_rule['pagesize'] = str(page_size)
page_rule['page'] = str(int(count // page_size) + 1)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_fetchchannelinfo'''
def _fetchchannelinfo(self, channel_id: str, request_overrides: dict = None) -> Dict[str, Any]:
request_overrides = request_overrides or {}
url = f"https://app.qtfm.cn/m-bff/v2/channel/{channel_id}"
(resp := self.get(url, **request_overrides)).raise_for_status()
channel_info = resp2json(resp=resp)
return channel_info
'''_listpageprograms'''
def _listpageprograms(self, channel_id: str, page: int, page_size: int, request_overrides: dict = None) -> List[Dict[str, Any]]:
request_overrides = request_overrides or {}
url = f"https://app.qtfm.cn/m-bff/v2/channel/{channel_id}/programs"
(resp := self.get(url, params={"order": "asc", "pagesize": str(page_size), "curpage": str(page)}, **request_overrides)).raise_for_status()
programs = resp2json(resp=resp)
return programs
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info, app_url = request_overrides or {}, search_result.get('id') or search_result.get('Id'), SongInfo(source=self.source), search_result.get('url')
if not song_id or not app_url: return song_info
hmac_md5_hex_func = lambda key, msg: hmac.new(str(key).encode("utf-8"), str(msg).encode("utf-8"), hashlib.md5).hexdigest()
# parse
parsed_app_url_params = parse_qs(urlparse(str(app_url)).query, keep_blank_values=True)
channel_id, program_id = parsed_app_url_params.get('channel_id')[0], (parsed_app_url_params.get('program_id') or [song_id])[0]
assert str(song_id) == str(program_id), 'song_id and app_url are not synchronized'
path_query = f"/m-bff/v1/audiostreams/channel/{channel_id}/program/{program_id}?access_token={self.auth_info.get('access_token', '')}&device_id={QingtingMusicClient.DEVICE_ID}&qingting_id={self.auth_info.get('qingting_id', '')}&type=play"
sign = hmac_md5_hex_func(QingtingMusicClient.HMAC_KEY, path_query)
(resp := self.get(f"https://app.qtfm.cn{path_query}&sign={sign}", **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
if 'channel_info' not in search_result:
try: search_result['channel_info'] = self._fetchchannelinfo(channel_id, request_overrides)
except Exception: pass
candidate_editions: list[dict] = sorted(download_result['data']['editions'] + (download_result['data'].get('backup_editions') if isinstance(download_result['data'].get('backup_editions'), list) else []), key=lambda x: (x.get('size', 0), x.get('bitrate', 0)), reverse=True)
for edition in candidate_editions:
if not edition.get('urls'): continue
if isinstance(edition.get('urls'), str): edition['urls'] = [edition.get('urls')]
for download_url in edition.get('urls'):
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(', '.join([singer.get('nick_name') for singer in (safeextractfromdict(search_result, ['channel_info', 'data', 'podcasters'], []) or []) if isinstance(singer, dict) and singer.get('nick_name')])),
album=legalizestring(safeextractfromdict(search_result, ['channel_info', 'data', 'title'], None) or search_result.get('desc')), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=int(float(edition.get('size', 0) or 0)) * 1024, file_size=byte2mb(int(float(edition.get('size', 0) or 0)) * 1024), identifier=song_id, duration_s=search_result.get('duration', 0),
duration=seconds2hms(search_result.get('duration', 0) or 0), lyric=None, cover_url=safeextractfromdict(search_result, ['cover'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if song_info.with_valid_download_url: break
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsebytrack'''
def _parsebytrack(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['data']['data']:
if (not isinstance(search_result, dict)) or ('id' not in search_result) or (search_result.get('type') not in {'program'}): continue
song_info = SongInfo(source=self.source)
for parser in [self._parsewithofficialapiv1]:
try: song_info = parser(search_result=search_result, request_overrides=request_overrides)
except: continue
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_parsebyalbum'''
def _parsebyalbum(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['data']['data']:
if (not isinstance(search_result, dict)) or ('id' not in search_result) or (search_result.get('type') not in {'channel_ondemand'}): continue
try: search_result['channel_info'] = self._fetchchannelinfo(search_result['id'], request_overrides)
except Exception: pass
download_results, page_size, tracks, unique_track_ids = [], 100, [], set()
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_results, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['podcaster', 'name'], None)),
album=f"{safeextractfromdict(search_result, ['channel_info', 'data', 'program_count'], 0) or 0} Episodes", ext=None, file_size=None, identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('cover', None),
download_url=None, download_url_status={}, episodes=[],
)
num_pages = math.ceil(int(safeextractfromdict(search_result, ['channel_info', 'data', 'program_count'], 0) or 0) / page_size)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{num_pages}) pages downloaded in album {search_result['id']}", total=num_pages)
for page_num_idx, page_num in enumerate(range(1, num_pages + 1)):
if page_num_idx > 0:
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({page_num_idx}/{num_pages}) pages downloaded in album {search_result['id']}")
try: download_results.append(self._listpageprograms(search_result['id'], page=page_num, page_size=page_size, request_overrides=request_overrides))
except: continue
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({page_num_idx+1}/{num_pages}) pages downloaded in album {search_result['id']}")
for download_result in download_results:
for track in (safeextractfromdict(download_result, ['data', 'programs'], []) or []):
if not isinstance(track, dict) or not track.get('id'): continue
if track.get('id') in unique_track_ids: continue
unique_track_ids.add(track.get('id'))
tracks.append(track)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{len(tracks)}) episodes completed in album {search_result['id']}", total=len(tracks))
for track_idx, track in enumerate(tracks):
if track_idx > 0:
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx}/{len(tracks)}) episodes completed in album {search_result['id']}")
eps_info, track['channel_info'] = SongInfo(source=self.source), search_result.get('channel_info', {})
track['url'] = f"qingtingfm://app.qingting.fm/playingview?type=ondemand&channel_id={search_result['id']}&program_id={track['id']}"
for parser in [self._parsewithofficialapiv1]:
try: eps_info = parser(search_result=track, request_overrides=request_overrides)
except: continue
if eps_info.with_valid_download_url: break
if not eps_info.with_valid_download_url: continue
song_info.episodes.append(eps_info)
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx+1}/{len(tracks)}) episodes completed in album {search_result['id']}")
if not song_info.with_valid_download_url: continue
try: song_info.duration_s = sum([eps.duration_s for eps in song_info.episodes]); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: pass
try: song_info.file_size_bytes = sum([eps.file_size_bytes for eps in song_info.episodes]); song_info.file_size = byte2mb(song_info.file_size_bytes)
except Exception: pass
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)
# --parse based on search type
search_type = parse_qs(urlparse(search_url).query, keep_blank_values=True).get('include')[0]
parsers = {'channel_ondemand': self._parsebyalbum, 'program_ondemand': self._parsebytrack}
parsers[search_type](search_results, song_infos=song_infos, request_overrides=request_overrides, progress=progress)
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,205 @@
'''
Function:
Implementation of XimalayaMusicClient: https://www.ximalaya.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import time
import math
import copy
import base64
import binascii
from Crypto.Cipher import AES
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urlparse, parse_qs
from ..utils import byte2mb, resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, SongInfo
'''XimalayaMusicClient'''
class XimalayaMusicClient(BaseMusicClient):
source = 'XimalayaMusicClient'
ALLOWED_SEARCH_TYPES = ['album', 'track']
def __init__(self, **kwargs):
self.allowed_search_types = list(set(kwargs.pop('allowed_search_types', XimalayaMusicClient.ALLOWED_SEARCH_TYPES)))
super(XimalayaMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36",
}
self.default_download_headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36",
}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {
'appid': '0', 'condition': 'relation', 'core': 'track', 'device': 'android', 'deviceId': '9a68144e-de5b-3c60-be5e-adce947ab5ff', 'kw': keyword,
'live': 'true', 'needSemantic': 'true', 'network': 'wifi', 'operator': '1', 'page': '1', 'paidFilter': 'false', 'plan': 'c', 'recall': 'normal',
'rows': self.search_size_per_page, 'search_version': '2.8', 'spellchecker': 'true', 'version': '6.6.48', 'voiceAsinput': 'false',
}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://searchwsa.ximalaya.com/front/v1?'
search_urls, page_size = [], self.search_size_per_page
for search_type in XimalayaMusicClient.ALLOWED_SEARCH_TYPES:
if search_type not in self.allowed_search_types: continue
default_rule_search_type = copy.deepcopy(default_rule)
default_rule_search_type['core'], count = search_type, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule_search_type)
page_rule['rows'] = str(page_size)
page_rule['page'] = str(int(count // page_size) + 1)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_crackplayurl'''
def _crackplayurl(self, ciphertext: str):
if not ciphertext: return ciphertext
key = binascii.unhexlify("aaad3e4fd540b0f79dca95606e72bf93")
ciphertext = base64.urlsafe_b64decode(ciphertext + "=" * (4 - len(ciphertext) % 4))
cipher = AES.new(key, AES.MODE_ECB)
plaintext = cipher.decrypt(ciphertext)
plaintext = re.sub(r"[^\x20-\x7E]", "", plaintext.decode("utf-8"))
return plaintext
'''_parsewithcggapi'''
def _parsewithcggapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, search_result.get('id') or search_result.get('trackId'), SongInfo(source=self.source)
# parse
(resp := self.get(f"https://api-v2.cenguigui.cn/api/music/ximalaya.php?trackId={song_id}", **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
if ('0 MB' in download_result['size']) or (not download_result.get('url')): return song_info
download_url = download_result['url']
file_size = re.sub(r"^\s*([0-9]*\.?[0-9]+)\s*([A-Za-z]+)\s*$", lambda m: f"{float(m.group(1)):.2f} {m.group(2)}", download_result['size'])
m = re.match(r'^\s*([0-9]*\.?[0-9]+)\s*([KMGT]?B)\s*$', download_result['size'])
file_size_bytes = int(float(m.group(1)) * {'B': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}[m.group(2).upper()])
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('nickname')),
album=legalizestring(search_result.get('album_title') or search_result.get('albumTitle')), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=file_size_bytes, file_size=file_size, identifier=song_id,
duration_s=int(float(search_result.get('duration', 0) or 0)), duration=seconds2hms(search_result.get('duration', 0) or 0), lyric=None, cover_url=safeextractfromdict(search_result, ['cover_path'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
song_info.ext = song_info.download_url_status['probe_status']['ext'] if (song_info.download_url_status['probe_status']['ext'] and song_info.download_url_status['probe_status']['ext'] not in ('NULL',)) else song_info.ext
return song_info
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, search_result.get('id') or search_result.get('trackId'), SongInfo(source=self.source)
# parse
params = {"device": "web", "trackId": song_id, "trackQualityLevel": '3'}
(resp := self.get(f"https://www.ximalaya.com/mobile-playpage/track/v3/baseInfo/{int(time.time() * 1000)}", params=params, **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
track_info = safeextractfromdict(download_result, ['trackInfo'], {})
if not track_info or not isinstance(track_info, dict): return song_info
for encrypted_url in sorted(safeextractfromdict(track_info, ['playUrlList'], []), key=lambda x: int(x['fileSize']), reverse=True):
if not isinstance(encrypted_url, dict): continue
download_url: str = self._crackplayurl(encrypted_url.get('url', ''))
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('nickname')),
album=legalizestring(search_result.get('album_title') or search_result.get('albumTitle')), ext=download_url.split('?')[0].split('.')[-1] or 'mp3', file_size_bytes=float(encrypted_url.get('fileSize', 0) or 0),
file_size=byte2mb(encrypted_url.get('fileSize', 0)), identifier=song_id, duration_s=int(float(search_result.get('duration', 0) or 0)), duration=seconds2hms(search_result.get('duration', 0) or 0), lyric=None,
cover_url=safeextractfromdict(search_result, ['cover_path'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
song_info.ext = song_info.download_url_status['probe_status']['ext'] if (song_info.download_url_status['probe_status']['ext'] and song_info.download_url_status['probe_status']['ext'] not in ('NULL',)) else song_info.ext
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsebytrack'''
def _parsebytrack(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['response']['docs']:
if (not isinstance(search_result, dict)) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
for parser in [self._parsewithcggapi, self._parsewithofficialapiv1]:
try: song_info = parser(search_result=search_result, request_overrides=request_overrides)
except: continue
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_parsebyalbum'''
def _parsebyalbum(self, search_results, song_infos: list = [], request_overrides: dict = None, progress: Progress = None):
request_overrides = request_overrides or {}
for search_result in search_results['response']['docs']:
if (not isinstance(search_result, dict)) or ('id' not in search_result): continue
download_results, page_size, tracks, unique_track_ids = [], 200, [], set()
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_results, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('nickname')),
album=f"{search_result.get('tracks', 0) or 0} Episodes", ext=None, file_size=None, identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=safeextractfromdict(search_result, ['cover_path'], None),
download_url=None, download_url_status={}, episodes=[],
)
num_pages = math.ceil(int(search_result.get('tracks', 0) or 0) / page_size)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{num_pages}) pages downloaded in album {search_result['id']}", total=num_pages)
for page_num_idx, page_num in enumerate(range(1, num_pages + 1)):
if page_num_idx > 0:
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({page_num_idx}/{num_pages}) pages downloaded in album {search_result['id']}")
try: resp = self.get(f'http://mobile.ximalaya.com/mobile/v1/album/track?albumId={search_result["id"]}&pageId={page_num}&pageSize={page_size}&isAsc=true', **request_overrides)
except: continue
download_results.append(resp2json(resp=resp))
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({page_num_idx+1}/{num_pages}) pages downloaded in album {search_result['id']}")
for download_result in download_results:
for track in (safeextractfromdict(download_result, ['data', 'list'], []) or []):
if not isinstance(track, dict) or not track.get('trackId'): continue
if track.get('trackId') in unique_track_ids: continue
unique_track_ids.add(track.get('trackId'))
tracks.append(track)
download_album_pid = progress.add_task(f"{self.source}._parsebyalbum >>> (0/{len(tracks)}) episodes completed in album {search_result['id']}", total=len(tracks))
for track_idx, track in enumerate(tracks):
if track_idx > 0:
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx}/{len(tracks)}) episodes completed in album {search_result['id']}")
eps_info = SongInfo(source=self.source)
for parser in [self._parsewithcggapi, self._parsewithofficialapiv1]:
try: eps_info = parser(search_result=track, request_overrides=request_overrides)
except: continue
if eps_info.with_valid_download_url: break
if not eps_info.with_valid_download_url: continue
song_info.episodes.append(eps_info)
progress.advance(download_album_pid, 1)
progress.update(download_album_pid, description=f"{self.source}._parsebyalbum >>> ({track_idx+1}/{len(tracks)}) episodes completed in album {search_result['id']}")
if not song_info.with_valid_download_url: continue
try: song_info.duration_s = sum([eps.duration_s for eps in song_info.episodes]); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: pass
try: song_info.file_size_bytes = sum([eps.file_size_bytes for eps in song_info.episodes]); song_info.file_size = byte2mb(song_info.file_size_bytes)
except Exception: pass
song_infos.append(song_info)
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
return song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)
# --parse based on search type
search_type = parse_qs(urlparse(search_url).query, keep_blank_values=True).get('core')[0]
parsers = {'album': self._parsebyalbum, 'track': self._parsebytrack}
parsers[search_type](search_results, song_infos=song_infos, request_overrides=request_overrides, progress=progress)
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,6 @@
'''initialize'''
from .jbsou import JBSouMusicClient
from .tunehub import TuneHubMusicClient
from .mp3juice import MP3JuiceMusicClient
from .gdstudio import GDStudioMusicClient
from .myfreemp3 import MyFreeMP3MusicClient
@@ -0,0 +1,166 @@
'''
Function:
Implementation of GDStudioMusicClient: https://music.gdstudio.xyz/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
import time
import random
import hashlib
import json_repair
from urllib.parse import quote
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, resp2json, usesearchheaderscookies, byte2mb, estimatedurationwithfilesizebr, estimatedurationwithfilelink, seconds2hms, safeextractfromdict, cleanlrc, SongInfo, AudioLinkTester
'''GDStudioMusicClient'''
class GDStudioMusicClient(BaseMusicClient):
source = 'GDStudioMusicClient'
SUPPORTED_SITES = ['spotify', 'netease', 'kuwo', 'tidal', 'qobuz', 'joox', 'bilibili', 'apple', 'tencent', 'ytmusic'] # 'kugou', 'ximalaya', 'migu'
SITE_TO_API_MAPPER = {
'netease': 'https://music.gdstudio.xyz/api.php', 'tencent': 'https://music.gdstudio.xyz/api.php', 'tidal': 'https://music.gdstudio.xyz/api.php', 'spotify': 'https://music.gdstudio.xyz/api.php', 'kuwo': 'https://music.gdstudio.xyz/api.php', 'bilibili': 'https://music.gdstudio.xyz/api.php', 'apple': 'https://music.gdstudio.xyz/api.php',
'migu': 'https://music-api-cn.gdstudio.xyz/api.php', 'kugou': 'https://music-api-cn.gdstudio.xyz/api.php', 'ximalaya': 'https://music-api-cn.gdstudio.xyz/api.php', 'joox': 'https://music-api-hk.gdstudio.xyz/api.php', 'qobuz': 'https://music-api-us.gdstudio.xyz/api.php', 'ytmusic': 'https://music-api-us.gdstudio.xyz/api.php',
}
def __init__(self, **kwargs):
self.allowed_music_sources = list(set(kwargs.pop('allowed_music_sources', GDStudioMusicClient.SUPPORTED_SITES[:-2])))
super(GDStudioMusicClient, self).__init__(**kwargs)
self.default_search_headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'}
self.default_download_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'}
self.default_headers = self.default_search_headers
self._initsession()
'''_yieldcallback'''
def _yieldcallback(self):
random_num = ''.join([str(random.randint(0, 9)) for _ in range(21)])
timestamp = int(time.time() * 1000)
return f"jQuery{random_num}_{timestamp}"
'''_yieldcrc32'''
def _yieldcrc32(self, id_value: str, hostname: str = 'music.gdstudio.xyz', version: str = "2025.11.4"):
# timestamp
try: (resp := self.get('https://www.ximalaya.com/revision/time')).raise_for_status(); ts_ms = resp.text.strip()
except Exception: ts_ms = int(time.time() * 1000)
ts9 = str(ts_ms)[:9]
# version
parts = version.split("."); padded = [p if len(p) != 1 else "0" + p for p in parts]; ver_padded = "".join(padded)
# id
id_str = quote(str(id_value))
# src
src = f"{hostname}|{ver_padded}|{ts9}|{id_str}"
# return
return hashlib.md5(src.encode("utf-8")).hexdigest()[-8:].upper()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
allowed_music_sources = copy.deepcopy(self.allowed_music_sources)
# search rules
default_rule = {'types': 'search', 'count': self.search_size_per_page, 'pages': '1', 'name': keyword}
default_rule.update(rule)
# construct search urls based on search rules
search_urls, page_size = [], self.search_size_per_page
for source in GDStudioMusicClient.SUPPORTED_SITES:
if source not in allowed_music_sources: continue
source_default_rule = copy.deepcopy(default_rule)
source_default_rule['source'], count = source, 0
while self.search_size_per_source > count:
if GDStudioMusicClient.SITE_TO_API_MAPPER[source] in {'https://music.gdstudio.xyz/api.php'}:
page_rule_post = copy.deepcopy(source_default_rule)
page_rule_post['pages'] = str(int(count // page_size) + 1); page_rule_post['count'] = str(page_size); page_rule_post['s'] = self._yieldcrc32(keyword)
search_urls.append({'url': GDStudioMusicClient.SITE_TO_API_MAPPER[source], 'data': page_rule_post, 'params': {'callback': self._yieldcallback()}, 'method': 'post'})
else:
page_rule_get = copy.deepcopy(source_default_rule)
page_rule_get['pages'] = str(int(count // page_size) + 1); page_rule_get['count'] = str(page_size); page_rule_get['s'] = self._yieldcrc32(keyword); page_rule_get['callback'] = self._yieldcallback(); page_rule_get['_'] = str(int(time.time() * 1000))
search_urls.append({'url': GDStudioMusicClient.SITE_TO_API_MAPPER[source], 'params': page_rule_get, 'method': 'get'})
count += page_size
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
search_meta = copy.deepcopy(search_url)
search_url, method = search_meta.pop('url'), search_meta.pop('method')
self.default_headers, request_overrides = copy.deepcopy(self.default_headers), copy.deepcopy(request_overrides)
# successful
try:
# --search results
(resp := getattr(self, method)(search_url, **search_meta, **request_overrides)).raise_for_status()
search_results = json_repair.loads(resp.text[resp.text.index('(')+1: resp.text.rindex(')')])
for search_result in search_results:
# --download results
if (not isinstance(search_result, dict)) or ('id' not in search_result) or ('url_id' not in search_result) or ('source' not in search_result): continue
song_info, song_id = SongInfo(source=self.source, root_source=search_result['source']), search_result['id']
for br in [999, 740, 320, 192, 128]: # 999 and 740 mean lossless
params = {'callback': self._yieldcallback()}; data_json = {'types': 'url', 'id': song_id, 'source': search_result['source'], 'br': br, 's': self._yieldcrc32(song_id)}
try: (resp := self.post(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], params=params, data=data_json, **request_overrides)).raise_for_status() if method == 'post' else (resp := self.get(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], params={**params, **data_json, '_': str(int(time.time() * 1000))}, **request_overrides)).raise_for_status()
except Exception: continue
download_result = json_repair.loads(resp.text[resp.text.index('(')+1: resp.text.rindex(')')])
if not (download_url := download_result.get('url')): continue
if not str(download_url).startswith('http'): download_url = f'https://music.gdstudio.xyz/' + download_url
if search_result['source'] in {'bilibili'}: download_url = f'https://music-proxy.gdstudio.org/{download_url}'
download_url_status = self.audio_link_tester.test(download_url, request_overrides); download_url = download_url_status['final_url']
duration_in_secs = estimatedurationwithfilesizebr(download_result.get('size', 0), download_result.get('br', br), return_seconds=True)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(search_result, ['name'], None)), singers=legalizestring(', '.join(safeextractfromdict(search_result, ['artist'], []) or [])),
album=legalizestring(safeextractfromdict(search_result, ['album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=download_result.get('size'), file_size=byte2mb(download_result.get('size', 0)), identifier=song_id, duration_s=duration_in_secs,
duration=seconds2hms(duration_in_secs), lyric=None, cover_url=None, download_url=download_url, download_url_status=download_url_status, root_source=search_result['source'],
)
if search_result['source'] in {'bilibili'}: song_info.download_url_status['ok'] = True if song_info.download_url_status['clen'] > 0 else False # use proxy url, general test method will fail
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.ext in {'m4s', 'mp4'}: song_info.ext = 'm4a'
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# --lyric results
try:
data_json = {'types': 'lyric', 'id': search_result['lyric_id'], 'source': search_result['source'], 's': self._yieldcrc32(search_result['lyric_id'])}
if method == 'post': (resp := self.post(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], data=data_json, params={'callback': self._yieldcallback()}, **request_overrides)).raise_for_status()
else: (resp := self.get(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], params={**{'callback': self._yieldcallback()}, **data_json, '_': str(int(time.time() * 1000))}, **request_overrides)).raise_for_status()
lyric_result = json_repair.loads(resp.text[resp.text.index('(')+1: resp.text.rindex(')')])
lyric = cleanlrc(lyric_result.get('lyric') or "") or cleanlrc(lyric_result.get('tlyric') or "") or 'NULL'
except:
lyric_result, lyric = dict(), 'NULL'
if not lyric or lyric == 'NULL':
try:
params = {'artist_name': song_info.singers, 'track_name': song_info.song_name, 'album_name': song_info.album, 'duration': estimatedurationwithfilelink(song_info.download_url, headers=self.default_download_headers, request_overrides=request_overrides)}
(resp := self.get(f'https://lrclib.net/api/get?', params=params, **request_overrides)).raise_for_status()
lyric_result = resp2json(resp=resp); lyric = cleanlrc(lyric_result.get('syncedLyrics') or "") or 'NULL'
song_info.duration_s, song_info.duration = params['duration'], seconds2hms(params['duration'])
except:
lyric_result, lyric = dict(), 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# --cover results
if search_result['source'] in {'kuwo'}:
cdn_hosts = ["http://img1.kwcdn.kuwo.cn/star/albumcover/", "http://img2.kwcdn.kuwo.cn/star/albumcover/", "http://img3.kwcdn.kuwo.cn/star/albumcover/"]
try: search_result['pic_id'] = '300/' + search_result['pic_id'][4:] if str(search_result['pic_id']).startswith('120/') else search_result['pic_id']; song_info.cover_url = cdn_hosts[0] + search_result['pic_id']
except Exception: pass
elif search_result['source'] in {'apple'}:
try: song_info.cover_url = search_result['pic_id'].format(w=300, h=300)
except Exception: pass
elif search_result['source'] in {'bilibili'}:
try: song_info.cover_url = search_result['pic_id']; song_info.cover_url = f'https:{song_info.cover_url}' if not song_info.cover_url.startswith('http') else song_info.cover_url
except Exception: pass
else:
try:
data_json = {'types': 'pic', 'id': search_result['pic_id'], 'source': search_result['source'], 'size': 300, 's': self._yieldcrc32(search_result['pic_id'])}
(resp := self.post(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], data=data_json, params={'callback': self._yieldcallback()}, **request_overrides)).raise_for_status() if method == 'post' else (resp := self.get(GDStudioMusicClient.SITE_TO_API_MAPPER[search_result['source']], params={**{'callback': self._yieldcallback()}, **data_json, '_': str(int(time.time() * 1000))}, **request_overrides)).raise_for_status()
cover_result = json_repair.loads(resp.text[resp.text.index('(')+1: resp.text.rindex(')')]); song_info.cover_url = cover_result['url']
except Exception: pass
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,98 @@
'''
Function:
Implementation of JBSouMusicClient: https://www.jbsou.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
from urllib.parse import urljoin
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, resp2json, usesearchheaderscookies, seconds2hms, extractdurationsecondsfromlrc, safeextractfromdict, cleanlrc, SongInfo, AudioLinkTester
'''JBSouMusicClient'''
class JBSouMusicClient(BaseMusicClient):
source = 'JBSouMusicClient'
ALLOWED_SITES = ['netease', 'qq', 'kugou', 'kuwo', 'migu', 'qianqian'][:-2] # it seems qianqian and migu are useless, recorded in 2026-01-29
def __init__(self, **kwargs):
self.allowed_music_sources = list(set(kwargs.pop('allowed_music_sources', JBSouMusicClient.ALLOWED_SITES)))
super(JBSouMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "origin": "https://www.jbsou.cn", "x-requested-with": "XMLHttpRequest",
"accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "referer": "https://www.jbsou.cn/"
}
self.default_download_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
self.search_size_per_page = min(self.search_size_per_page, 10)
allowed_music_sources = copy.deepcopy(self.allowed_music_sources)
# construct search urls based on search rules
base_url = 'https://www.jbsou.cn/'
search_urls, page_size = [], self.search_size_per_page
for source in JBSouMusicClient.ALLOWED_SITES:
if source not in allowed_music_sources: continue
source_default_rule, count = {'input': keyword, 'filter': 'name', 'type': source, 'page': 1}, 0
source_default_rule.update(rule)
while self.search_size_per_source > count:
page_rule = copy.deepcopy(source_default_rule)
page_rule['page'] = str(int(count // page_size) + 1)
search_urls.append({'url': base_url, 'data': page_rule})
count += page_size
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides, base_url = request_overrides or {}, "https://www.jbsou.cn/"
source = search_url['data']['type']
# successful
try:
# --search results
(resp := self.post(**search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)['data']
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('songid' not in search_result) or ('url' not in search_result): continue
search_result['source'] = source; song_info = SongInfo(source=self.source, root_source=search_result['source'])
download_url = urljoin(base_url, search_result['url'])
try: (resp := self.session.head(download_url, allow_redirects=True, **request_overrides)).raise_for_status(); download_url = resp.url
except Exception: continue
cover_url = urljoin(base_url, search_result.get('cover', "") or "")
try: (resp := self.session.head(cover_url, timeout=10, allow_redirects=True, **request_overrides)).raise_for_status(); cover_url = resp.url
except Exception: cover_url = cover_url
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(str(safeextractfromdict(search_result, ['artist'], "")).replace('/', ', ')),
album=legalizestring(search_result.get('album')), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['songid'], duration_s=None, duration='-:-:-', lyric=None, cover_url=cover_url,
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), root_source=search_result['source'],
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# --lyric results
try: (resp := self.get(urljoin(base_url, search_result['lrc']), **request_overrides)).raise_for_status(); lyric, lyric_result = cleanlrc(resp.text), {'lyric': resp.text}; song_info.duration_s = extractdurationsecondsfromlrc(lyric); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: lyric_result, lyric = dict(), 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,119 @@
'''
Function:
Implementation of MP3JuiceMusicClient: https://mp3juice.co/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
import time
import base64
import json_repair
from urllib.parse import quote
from itertools import zip_longest
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, resp2json, byte2mb, SongInfo
'''MP3JuiceMusicClient'''
class MP3JuiceMusicClient(BaseMusicClient):
source = 'MP3JuiceMusicClient'
def __init__(self, **kwargs):
kwargs['search_size_per_source'] = kwargs['search_size_per_source'] * 2
super(MP3JuiceMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Referer": "https://mp3juice.sc/", "Origin": "https://mp3juice.sc"}
self.default_download_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Referer": "https://mp3juice.sc/", "Origin": "https://mp3juice.sc"}
self.default_headers = self.default_search_headers
self._initsession()
'''_getdynamicconfig'''
def _getdynamicconfig(self, request_overrides: dict = None):
request_overrides = request_overrides or {}
(resp := self.get(f"https://mp3juice.as/?t={int(time.time() * 1000)}", **request_overrides)).raise_for_status()
match = re.search(r"var\s+json\s*=\s*JSON\.parse\('(.+?)'\);", resp.text)
if not match: match = re.search(r"var\s+json\s*=\s*(\[.+?\]);", resp.text)
return json_repair.loads(match.group(1))
'''_calculateauth'''
def _calculateauth(self, raw_data):
data_arr, should_reverse, offset_arr, result_chars = raw_data[0], raw_data[1], raw_data[2], []; offset_len = len(offset_arr)
for t in range(len(data_arr)): result_chars.append(chr(data_arr[t] - offset_arr[offset_len - (t + 1)]))
if should_reverse: result_chars.reverse()
full_token = "".join(result_chars)
return full_token[:32]
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
config = self._getdynamicconfig(); auth_token = self._calculateauth(config)
# search rules
default_rule = {'k': auth_token, 'y': 's', 'q': base64.b64encode(quote(keyword, safe="").encode("utf-8")).decode("utf-8"), 't': str(int(time.time()))}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://mp3juice.sc/api/v1/search?'
page_rule = copy.deepcopy(default_rule)
search_urls = [{'url': base_url + urlencode(page_rule), 'auth_token': auth_token, 'param_key': chr(config[6])}]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides, search_meta = request_overrides or {}, copy.deepcopy(search_url)
search_url, auth_token, param_key = search_meta['url'], search_meta['auth_token'], search_meta['param_key']
# successful
try:
# --search results
(resp := self.get(search_url, allow_redirects=True, **request_overrides)).raise_for_status()
search_results_yt, search_results_sc = [], []
for item in resp2json(resp)["yt"]: item['root_source'] = 'YouTube'; search_results_yt.append(item)
for item in resp2json(resp)["sc"]: item['root_source'] = 'SoundCloud'; search_results_sc.append(item)
search_results = [x for ab in zip_longest(search_results_yt, search_results_sc) for x in ab if x is not None]
for search_result in search_results:
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result): continue
if search_result['root_source'] in ['SoundCloud'] and ('id_base64' not in search_result or 'title_base64' not in search_result): continue
song_info, download_result = SongInfo(source=self.source, root_source=search_result['root_source']), dict()
# ----SoundCloud
if search_result['root_source'] in ['SoundCloud']:
download_url = f"https://thetacloud.org/s/{search_result['id_base64']}/{search_result['title_base64']}/"
# ----YouTube
else:
params = {param_key: auth_token, 't': str(int(time.time()))}
try: (init_resp := self.get('https://theta.thetacloud.org/api/v1/init?', params=params, **request_overrides)).raise_for_status()
except Exception: continue
download_result['init'] = resp2json(resp=init_resp)
if not (convert_url := download_result['init'].get('convertURL', '')): continue
convert_url = f'{convert_url}&v={search_result["id"]}&f=mp3&t={str(int(time.time()))}'
try: (convert_resp := self.get(convert_url, **request_overrides)).raise_for_status()
except Exception: continue
download_result['convert'] = resp2json(resp=convert_resp)
if not (redirect_url := download_result['convert'].get('redirectURL', '')): continue
try: (resp := self.get(redirect_url, **request_overrides)).raise_for_status()
except Exception: continue
download_result['redirect'] = resp2json(resp=resp)
if not (download_url := download_result['redirect'].get('downloadURL', '')): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers='NULL',
album='NULL', ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration='-:-:-', lyric='NULL', cover_url=None, download_url=download_url,
download_url_status=self.audio_link_tester.test(download_url, request_overrides), root_source=search_result['root_source'],
)
if not song_info.with_valid_download_url: continue
# ----you have to download the music contents immediately, otherwise the links will fail.
song_info.downloaded_contents = self.get(download_url, **request_overrides).content
song_info.file_size_bytes = song_info.downloaded_contents.__sizeof__()
song_info.file_size = byte2mb(song_info.file_size_bytes)
# --append to song_infos
song_infos.append(song_info)
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,122 @@
'''
Function:
Implementation of MyFreeMP3MusicClient: https://www.myfreemp3.com.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
from urllib.parse import urlparse
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, resp2json, usesearchheaderscookies, seconds2hms, extractdurationsecondsfromlrc, searchdictbykey, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''MyFreeMP3MusicClient'''
class MyFreeMP3MusicClient(BaseMusicClient):
source = 'MyFreeMP3MusicClient'
def __init__(self, **kwargs):
super(MyFreeMP3MusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so only "netease" source can be leveraged.')
self.allowed_music_sources = ['kuake', 'netease'] if self.quark_parser_config.get('cookies') else ['netease']
self.default_search_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "priority": "u=1, i", "x-requested-with": "XMLHttpRequest",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "origin": "https://www.myfreemp3.com.cn",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "sec-fetch-site": "same-origin",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
allowed_music_sources = copy.deepcopy(self.allowed_music_sources)
self.search_size_per_page = min(10, self.search_size_per_page)
# search rules
default_rule = {'type': 'netease', 'filter': 'name', 'page': '1', 'input': keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://www.myfreemp3.com.cn/'
search_urls, page_size = [], self.search_size_per_page
for source in allowed_music_sources:
source_default_rule = copy.deepcopy(default_rule)
source_default_rule['type'], count = source, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(source_default_rule)
page_rule['page'] = str(int(count // page_size) + 1)
search_urls.append({'url': base_url, 'data': page_rule, 'source': source})
count += page_size
# return
return search_urls
'''_parseneteasesearchresult'''
def _parseneteasesearchresult(self, search_result: dict, request_overrides: dict = None):
request_overrides = request_overrides or {}
if (not isinstance(search_result, dict)) or ('id' not in search_result): return SongInfo(source=self.source)
download_url = self.session.head(f'http://music.163.com/song/media/outer/url?id={search_result["id"]}.mp3', timeout=10, allow_redirects=True, **request_overrides).url
lyric: str = cleanlrc((search_result.get('lrc', '') or '').removeprefix('data:text/plain,'))
duration_in_secs = extractdurationsecondsfromlrc(lyric)
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author')),
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL', identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric,
cover_url=search_result.get('pic'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), root_source='netease',
)
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
return song_info
'''_parsequarksearchresult'''
def _parsequarksearchresult(self, search_result: dict, request_overrides: dict = None):
request_overrides = request_overrides or {}
if (not isinstance(search_result, dict)) or ('url_kk' not in search_result): return SongInfo(source=self.source)
search_result['id'] = urlparse(str(search_result['url_kk'])).path.strip('/').split('/')[-1]
quark_download_url = search_result['url_kk']
download_result, download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): return SongInfo(source=self.source)
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_name, singers = (lambda s: (m.group(2).strip(), m.group(1).strip()) if (m:=re.search(r'^\s*(.*?)\s*[-–—-]\s*(.*?)(?:\.[A-Za-z0-9]{1,5})?\s*(?:\s*[-–—-]\s*.*)?$', s.strip())) else (re.sub(r'\.[^.]+$', '', s.strip()).strip(), ""))(search_result.get('title'))
lyric: str = cleanlrc((search_result.get('lrc', '') or '').removeprefix('data:text/plain,'))
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(song_name), singers=legalizestring(singers), album='NULL', ext='mp3',
file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=search_result.get('pic'), download_url=download_url,
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), root_source='quark', default_download_headers=self.quark_default_download_headers,
)
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
search_meta = copy.deepcopy(search_url)
search_url, source = search_meta.pop('url'), search_meta.pop('source')
# successful
try:
# --search results
(resp := self.post(search_url, **search_meta, **request_overrides)).raise_for_status()
search_results = resp2json(resp)['data']['list']
for search_result in search_results:
# --download results
try: song_info = {'netease': self._parseneteasesearchresult, 'kuake': self._parsequarksearchresult}[source](search_result, request_overrides)
except Exception: continue
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,145 @@
'''
Function:
Implementation of TuneHubMusicClient: https://tunehub.sayqz.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import copy
import random
import base64
import requests
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlparse, parse_qs
from ..utils import legalizestring, resp2json, usesearchheaderscookies, seconds2hms, extractdurationsecondsfromlrc, safeextractfromdict, cleanlrc, SongInfo, AudioLinkTester
'''TuneHubMusicClient'''
class TuneHubMusicClient(BaseMusicClient):
source = 'TuneHubMusicClient'
ALLOWED_SITES = ['netease', 'qq', 'kuwo', 'kugou', 'migu'][:3] # it seems kugou and migu are useless, recorded in 2026-01-28
MUSIC_QUALITIES = ['flac24bit', 'flac', '320k', '128k']
BAKA_MUSIC_QUALITIES = ['400', '380', '320', '128']
REQUEST_API_KEYS = ['dGhfOGYwMGQ4NzA5ZGJhOWQ0NDgwYmExOTE2NjgxNDdlMWI3YjkzNjkyMDkyMGZhNjZm', 'dGhfZDgzYzY4YjA5NDVlYzYxMjZjNDQxMzkwN2MxYzc3MmI3YmI3ZGUwODU4NWI0N2Y1']
def __init__(self, **kwargs):
self.allowed_music_sources = list(set(kwargs.pop('allowed_music_sources', TuneHubMusicClient.ALLOWED_SITES)))
super(TuneHubMusicClient, self).__init__(**kwargs)
decrypt_func = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8')
self.default_search_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
'X-API-Key': decrypt_func(random.choice(TuneHubMusicClient.REQUEST_API_KEYS)),
}
self.default_download_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
}
self.default_headers = self.default_search_headers
self._initsession()
'''_tunehubkuwosearch: https://tunehub.sayqz.com/api/v1/methods/kuwo/search'''
def _tunehubkuwosearch(self, keyword: str, page: int = 1, limit: int = 20, timeout: float = 10.0):
url = "http://search.kuwo.cn/r.s"; page = 1 if (page is None or int(page) < 1) else int(page); limit = 20 if (limit is None or int(limit) <= 0) else int(limit)
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
params = {"client": "kt", "all": keyword, "pn": page - 1, "rn": limit, "uid": "794762570", "ver": "kwplayer_ar_9.2.2.1", "vipver": "1", "show_copyright_off": "1", "newver": "1", "ft": "music", "cluster": "0", "strategy": "2012", "encoding": "utf8", "rformat": "json", "vermerge": "1", "mobi": "1", "issubtitle": "1"}
(resp := requests.get(url, params=params, headers=headers, timeout=timeout)).raise_for_status()
data: dict = resp.json(); abslist, out = data.get("abslist"), []
if not abslist: return []
for item in abslist: isinstance(item, dict) and out.append({"id": str(item.get("MUSICRID", "")).replace("MUSIC_", ""), "name": item.get("SONGNAME", ""), "artist": (item.get("ARTIST", "") or "").replace("&", ", "), "album": item.get("ALBUM") or "", "source": "kuwo"})
return out
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
allowed_music_sources = copy.deepcopy(self.allowed_music_sources)
# construct search urls based on search rules
search_urls, page_size = [], self.search_size_per_page
for source in TuneHubMusicClient.ALLOWED_SITES:
if source not in allowed_music_sources: continue
if source in {'netease', 'qq'}:
server = {'netease': 'netease', 'qq': 'tencent'}[source]
search_urls.append(f"https://api.baka.plus/meting?server={server}&type=search&id=0&yrc=false&keyword={keyword}")
else:
source_default_rule, count = {'keyword': keyword, 'page': 1, 'limit': 20, 'timeout': 10.0}, 0
source_default_rule.update(rule)
while self.search_size_per_source > count:
page_rule = copy.deepcopy(source_default_rule)
page_rule['page'] = str(int(count // page_size) + 1)
page_rule['limit'] = str(page_size)
search_urls.append({'search_api': {'kuwo': self._tunehubkuwosearch}[source], 'rule': page_rule})
count += page_size
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str | dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
if isinstance(search_url, dict): search_results = search_url['search_api'](**search_url['rule'])
else: (resp := self.get(search_url, **request_overrides)).raise_for_status(); search_results = resp2json(resp)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result and 'url' not in search_result) or ('source' not in search_result): continue
if 'id' not in search_result: search_result['id'] = parse_qs(urlparse(str(search_result['url'])).query, keep_blank_values=True).get('id')[0]
search_result['source'] = {'netease': 'netease', 'tencent': 'qq', 'kuwo': 'kuwo'}[search_result['source']]
song_info = SongInfo(source=self.source, root_source=search_result['source'])
if search_result['source'] in {'netease', 'qq'}:
for br in (TuneHubMusicClient.BAKA_MUSIC_QUALITIES if search_result['source'] in {'netease'} else TuneHubMusicClient.BAKA_MUSIC_QUALITIES[:1]):
params = {'br': br, 'id': search_result['id'], 'server': {'netease': 'netease', 'qq': 'tencent', 'kuwo': 'kuwo'}[search_result['source']], 'type': 'url'}
try: (resp := self.session.head('https://api.baka.plus/meting?', timeout=10, params=params, allow_redirects=True, **request_overrides)).raise_for_status(); download_url = resp.url
except Exception: continue
try: (resp := self.session.head(safeextractfromdict(search_result, ['pic'], None), timeout=10, allow_redirects=True, **request_overrides)).raise_for_status(); cover_url = resp.url
except Exception: cover_url = safeextractfromdict(search_result, ['pic'], None) or ""
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(search_result.get('artist')),
album=legalizestring(search_result.get('album', None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration='-:-:-',
lyric=None, cover_url=cover_url, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), root_source=search_result['source'],
)
if song_info.root_source in ['tencent']: song_info.root_source = 'qq'
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
elif search_result['source'] in {'kuwo'}:
for quality in TuneHubMusicClient.MUSIC_QUALITIES:
data = {'quality': quality, 'ids': search_result['id'], 'platform': search_result['source']}
try: (resp := self.post('https://tunehub.sayqz.com/api/v1/parse?', timeout=10, data=data, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: break
download_url = safeextractfromdict(download_result, ['data', 'data', 0, 'url'], "")
if not download_url or not download_url.startswith('http'): continue
duration_in_secs = safeextractfromdict(download_result, ['data', 'data', 0, 'info', 'duration'], 0)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(search_result.get('artist')),
album=legalizestring(search_result.get('album', None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs,
duration=seconds2hms(duration_in_secs), lyric=safeextractfromdict(download_result, ['data', 'data', 0, 'lyrics'], None), cover_url=safeextractfromdict(download_result, ['data', 'data', 0, 'cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), root_source=search_result['source'],
)
if str(song_info.lyric).startswith('http'): search_result['lrc'] = song_info.lyric; song_info.lyric = None
if song_info.lyric: song_info.lyric = cleanlrc(song_info.lyric)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
# --lyric results
try: (resp := self.get(search_result['lrc'], **request_overrides)).raise_for_status(); lyric, lyric_result = cleanlrc(resp.text), {'lyric': resp.text}; song_info.duration_s = extractdurationsecondsfromlrc(lyric); song_info.duration = seconds2hms(song_info.duration_s)
except Exception: lyric_result, lyric = dict(), 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1 @@
'''initialize'''
@@ -0,0 +1,2 @@
'''initialize'''
from .jsinterp import JSInterpreter, extractplayerjsglobalvar
File diff suppressed because one or more lines are too long
@@ -0,0 +1,901 @@
'''
Function:
Implementation of JSInterpreter (Refer To https://pytubefix.readthedocs.io/en/latest/index.html)
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import json
import math
import datetime
import calendar
import operator
import itertools
import contextlib
import email.utils
import collections
from functools import update_wrapper
from contextlib import suppress as compat_contextlib_suppress
'''settings'''
DATE_FORMATS = (
'%d %B %Y', '%d %b %Y', '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', '%B %drd %Y', '%B %dth %Y', '%b %d %Y', '%b %dst %Y', '%b %dnd %Y', '%b %drd %Y', '%b %dth %Y',
'%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %drd %Y %I:%M', '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', '%Y.%m.%d.', '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S',
'%Y%m%d%H%M', '%Y%m%d%H%M%S', '%Y%m%d', '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S:%f', '%d.%m.%Y %H:%M', '%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%f0Z', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', '%b %d %Y at %H:%M',
'%b %d %Y at %H:%M:%S', '%B %d %Y at %H:%M', '%B %d %Y at %H:%M:%S', '%H:%M %d-%b-%Y',
)
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend(['%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', '%m/%d/%Y %H:%M:%S'])
TIMEZONE_NAMES = {'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0, 'AST': -4, 'ADT': -3, 'EST': -5, 'EDT': -4, 'CST': -6, 'CDT': -5, 'MST': -7, 'MDT': -6, 'PST': -8, 'PDT': -7}
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend(['%d-%m-%Y', '%d.%m.%Y', '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', '%d-%m-%Y %H:%M', '%H:%M %d/%m/%Y'])
'''js2json'''
def js2json(code, vars={}, *, strict=False):
# constants
STRING_QUOTES = '\'"`'
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
INTEGER_TABLE = ((fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16), (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8))
# process escape
def processescape(match):
JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
escape = match.group(1) or match.group(2)
return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES else R'\u00' if escape == 'x' else '' if escape == '\n' else escape)
# template substitute
def templatesubstitute(match):
evaluated = js2json(match.group(1), vars, strict=strict)
if evaluated[0] == '"': return json.loads(evaluated)
return evaluated
# fix kv
def fixkv(m):
v: str = m.group(0)
if v in ('true', 'false', 'null'): return v
elif v in ('undefined', 'void 0'): return 'null'
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': return ''
if v[0] in STRING_QUOTES:
v = re.sub(r'(?s)\${([^}]+)}', templatesubstitute, v[1:-1]) if v[0] == '`' else v[1:-1]
escaped = re.sub(r'(?s)(")|\\(.)', processescape, v)
r = f'"{escaped}"'
return r
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
i = int(im.group(1), base)
return f'"{i}":' if v.endswith(':') else str(i)
if v in vars:
try:
if not strict: json.loads(vars[v])
except json.JSONDecodeError:
return json.dumps(vars[v])
else:
return vars[v]
if not strict: return f'"{v}"'
raise ValueError(f'Unknown value: {v}')
# create map
def createmap(mobj):
r = json.dumps(dict(json.loads(js2json(mobj.group(1) or '[]', vars=vars))))
return r
# process
code = re.sub(r'new Map\((\[.*?\])?\)', createmap, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
# return
return re.sub(rf'''(?sx)
{STRING_RE}|
{COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
[0-9]+(?={SKIP_RE}:)|
!+
''', fixkv, code)
'''extracttimezone'''
def extracttimezone(date_str):
r = r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
(?P<tz>Z| # just the UTC Z, or
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
[ ]? # optional space
(?P<sign>\+|-) # +/-
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
'''
m = re.search(r, date_str)
if not m:
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None: date_str = date_str[:-len(m.group('tz'))]
timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'): timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes')))
return timezone, date_str
'''dateformats'''
def dateformats(day_first=True):
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
'''unifiedtimestamp'''
def unifiedtimestamp(date_str, day_first=True):
if not isinstance(date_str, str): return None
date_str = re.sub(r'\s+', ' ', re.sub(r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extracttimezone(date_str)
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
if m: date_str = date_str[:-len(m.group('tz'))]
m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
if m: date_str = m.group(1)
for expression in dateformats(day_first):
with contextlib.suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple())
timetuple = email.utils.parsedate_tz(date_str)
if timetuple: return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
'''removequotes'''
def removequotes(s):
if s is None or len(s) < 2:
return s
for quote in ('"', "'",):
if s[0] == quote and s[-1] == quote:
return s[1:-1]
return s
'''truncatestring'''
def truncatestring(s, left, right=0):
assert left > 3 and right >= 0
if s is None or len(s) <= left + right:
return s
return f'{s[:left - 3]}...{s[-right:] if right else ""}'
'''jsbitop'''
def jsbitop(op):
def zeroise(x):
if x in (None, JSUndefined):
return 0
with contextlib.suppress(TypeError):
if math.isnan(x): return 0
return x
def wrapped(a, b):
return op(zeroise(a), zeroise(b)) & 0xffffffff
return wrapped
'''jsarithop'''
def jsarithop(op):
def wrapped(a, b):
if JSUndefined in (a, b): return float('nan')
return op(a or 0, b or 0)
return wrapped
'''jsdiv'''
def jsdiv(a, b):
if JSUndefined in (a, b) or not (a or b):
return float('nan')
return (a or 0) / b if b else float('inf')
'''jsmod'''
def jsmod(a, b):
if JSUndefined in (a, b) or not b:
return float('nan')
return (a or 0) % b
'''jsexp'''
def jsexp(a, b):
if not b: return 1
elif JSUndefined in (a, b): return float('nan')
return (a or 0) ** b
'''jseqop'''
def jseqop(op):
def wrapped(a, b):
if {a, b} <= {None, JSUndefined}: return op(a, a)
return op(a, b)
return wrapped
'''jscompop'''
def jscompop(op):
def wrapped(a, b):
if JSUndefined in (a, b): return False
if isinstance(a, str) or isinstance(b, str): return op(str(a or 0), str(b or 0))
return op(a or 0, b or 0)
return wrapped
'''jsternary'''
def jsternary(cndn, if_true=True, if_false=False):
if cndn in (False, None, 0, '', JSUndefined):
return if_false
with contextlib.suppress(TypeError):
if math.isnan(cndn): return if_false
return if_true
'''jstypeof'''
def jstypeof(expr):
with compat_contextlib_suppress(TypeError, KeyError):
return {JSUndefined: 'undefined', float('nan'): 'number', float('inf'): 'number', True: 'boolean', False: 'boolean', None: 'object'}[expr]
for t, n in (((str, bytes), 'string'), ((int, float, complex), 'number')):
if isinstance(expr, t): return n
if callable(expr): return 'function'
return 'object'
'''wrapsop'''
def wrapsop(op):
def updateandrenamewrapper(w):
f = update_wrapper(w, op)
f.__name__ = str('JS_') + f.__name__
return f
return updateandrenamewrapper
'''jsunaryop'''
def jsunaryop(op):
@wrapsop(op)
def wrapped(_, a):
return op(a)
return wrapped
'''extractplayerjsglobalvar'''
def extractplayerjsglobalvar(jscode):
global_var = re.search(
r'''(?x)
(?P<q1>["\'])use\s+strict(?P=q1);\s*
(?P<code>
var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
(?P<value>
(?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
\.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
|\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
)
)[;,]
''', jscode)
if global_var: return global_var.group('code'), global_var.group("name"), global_var.group("value")
else: return None, None, None
'''fixupnfunctioncode'''
def fixupnfunctioncode(argnames, code, full_code):
global_var, _, _ = extractplayerjsglobalvar(full_code)
if global_var: code = global_var + '; ' + code
return argnames, re.sub(rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(["\'])undefined\1\s*\)\s*return\s+{argnames[0]};', ';', code)
'''NODEFAULT'''
class NODEFAULT:
pass
'''FunctionWithRepr'''
class FunctionWithRepr:
def __init__(self, func, repr_=None):
self.func, self.__repr = func, repr_
def __call__(self, *args, **kwargs):
return self.func(*args, **kwargs)
def __repr__(self):
if self.__repr:
return self.__repr
return f'{self.func.__module__}.{self.func.__qualname__}'
'''JSUndefined'''
class JSUndefined:
pass
'''JSBreak'''
class JSBreak(Exception):
def __init__(self):
Exception.__init__(self, 'Invalid break')
'''JSContinue'''
class JSContinue(Exception):
def __init__(self):
Exception.__init__(self, 'Invalid continue')
'''JSThrow'''
class JSThrow(Exception):
def __init__(self, e):
self.error = e
Exception.__init__(self, f'Uncaught exception {e}')
'''LocalNameSpace'''
class LocalNameSpace(collections.ChainMap):
def __setitem__(self, key, value):
for scope in self.maps:
if key in scope:
scope[key] = value
return
self.maps[0][key] = value
def __delitem__(self, key):
raise NotImplementedError('Deleting is not supported')
'''constants'''
_OPERATORS = {
'?': None, '??': None, '||': None, '&&': None, '|': jsbitop(operator.or_), '^': jsbitop(operator.xor), '&': jsbitop(operator.and_), '===': operator.is_,
'!==': operator.is_not, '==': jseqop(operator.eq), '!=': jseqop(operator.ne), '<=': jscompop(operator.le), '>=': jscompop(operator.ge), '<': jscompop(operator.lt),
'>': jscompop(operator.gt), '>>': jsbitop(operator.rshift), '<<': jsbitop(operator.lshift), '+': jsarithop(operator.add), '-': jsarithop(operator.sub),
'*': jsarithop(operator.mul), '%': jsmod, '/': jsdiv, '**': jsexp,
}
_UNARY_OPERATORS_X = {'void': jsunaryop(lambda _: JSUndefined), 'typeof': jsunaryop(jstypeof)}
_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
_ALL_OPERATORS = {**_OPERATORS, **_UNARY_OPERATORS_X}
_NAME_RE = r'[a-zA-Z_$][\w$]*'
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
_QUOTES = '\'"/'
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
'''JSInterpreter'''
class JSInterpreter:
__named_object_counter = 0
_RE_FLAGS = {'d': 1024, 'g': 2048, 'i': re.I, 'm': re.M, 's': re.S, 'u': re.U, 'y': 4096}
def __init__(self, code, objects=None):
self.code, self._functions = code, {}
self._objects = {} if objects is None else objects
'''Exception'''
class Exception(Exception):
def __init__(self, msg, expr=None, *args, **kwargs):
if expr is not None:
msg = f'{msg.rstrip()} in: {truncatestring(expr, 50, 50)}'
super().__init__(msg, *args, **kwargs)
'''_namedobject'''
def _namedobject(self, namespace, obj):
self.__named_object_counter += 1
name = f'__pytubefix_jsinterp_obj{self.__named_object_counter}'
if callable(obj) and not isinstance(obj, FunctionWithRepr):
obj = FunctionWithRepr(obj, f'F<{self.__named_object_counter}>')
namespace[name] = obj
return name
'''_regexflags'''
@classmethod
def _regexflags(cls, expr):
flags = 0
if not expr: return flags, expr
for idx, ch in enumerate(expr):
if ch not in cls._RE_FLAGS: break
flags |= cls._RE_FLAGS[ch]
return flags, expr[idx + 1:]
'''_separate'''
@staticmethod
def _separate(expr, delim=',', max_split=None):
OP_CHARS = '+-*/%&|^=<>!,;{}:['
if not expr: return
counters = {k: 0 for k in _MATCHING_PARENS.values()}
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
for idx, char in enumerate(expr):
if not in_quote and char in _MATCHING_PARENS:
counters[_MATCHING_PARENS[char]] += 1
elif not in_quote and char in counters:
if counters[char]: counters[char] -= 1
elif not escaping:
if char in _QUOTES and in_quote in (char, None):
if in_quote or after_op or char != '/': in_quote = None if in_quote and not in_regex_char_group else char
elif in_quote == '/' and char in '[]':
in_regex_char_group = char == '['
escaping = not escaping and in_quote and char == '\\'
in_unary_op = (not in_quote and not in_regex_char_group and after_op not in (True, False) and char in '-+')
after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
pos = 0
continue
elif pos != delim_len:
pos += 1
continue
yield expr[start: idx - delim_len]
start, pos = idx + 1, 0
splits += 1
if max_split and splits >= max_split: break
yield expr[start:]
'''_separateatparen'''
@classmethod
def _separateatparen(cls, expr, delim=None):
if delim is None:
delim = expr and _MATCHING_PARENS[expr[0]]
separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2:
raise cls.Exception(f'No terminating paren {delim}', expr)
return separated[0][1:].strip(), separated[1].strip()
'''_operator'''
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'):
if (op == '&&') ^ jsternary(left_val): return left_val
elif op == '??':
if left_val not in (None, JSUndefined): return left_val
elif op == '?':
right_expr = jsternary(left_val, *self._separate(right_expr, ':', 1))
right_val = self.interpretexpression(right_expr, local_vars, allow_recursion)
if not _OPERATORS.get(op): return right_val
try: return _OPERATORS[op](left_val, right_val)
except Exception as e: raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e)
'''_index'''
def _index(self, obj, idx, allow_undefined=False):
if idx == 'length': return len(obj)
try:
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
except Exception as e:
if allow_undefined: return JSUndefined
raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e)
'''_dump'''
def _dump(self, obj, namespace):
try: return json.dumps(obj)
except TypeError: return self._namedobject(namespace, obj)
'''handleoperators'''
def handleoperators(self, expr, local_vars, allow_recursion):
for op in _ALL_OPERATORS:
separated = list(self._separate(expr, op))
right_expr = separated.pop()
while True:
if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip(): separated.pop()
elif not (separated and op == '?' and right_expr.startswith('.')): break
right_expr = f'{op}{right_expr}'
if op != '-': right_expr = f'{separated.pop()}{op}{right_expr}'
if not separated: continue
left_val = self.interpretexpression(op.join(separated), local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
'''interpretstatement'''
def interpretstatement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: raise self.Exception('Recursion limit reached')
allow_recursion -= 1
should_return = False
sub_statements = list(self._separate(stmt, ';')) or ['']
expr = stmt = sub_statements.pop().strip()
for sub_stmt in sub_statements:
ret, should_return = self.interpretstatement(sub_stmt, local_vars, allow_recursion)
if should_return: return ret, should_return
m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
if m:
expr = stmt[len(m.group(0)):].strip()
if m.group('throw'): raise JSThrow(self.interpretexpression(expr, local_vars, allow_recursion))
should_return = not m.group('var')
if not expr: return None, should_return
if expr[0] in _QUOTES:
inner, outer = self._separate(expr, expr[0], 1)
if expr[0] == '/':
flags, outer = self._regexflags(outer)
inner = f'{inner}/{flags}'
else:
inner = json.loads(js2json(f'{inner}{expr[0]}', strict=True))
if not outer: return inner, should_return
expr = self._namedobject(local_vars, inner) + outer
if expr.startswith('new '):
obj = expr[4:]
if obj.startswith('Date('):
left, right = self._separateatparen(obj[4:])
date = unifiedtimestamp(self.interpretexpression(left, local_vars, allow_recursion), False)
if date is None: raise self.Exception(f'Failed to parse date {left!r}', expr)
expr = self._dump(int(date * 1000), local_vars) + right
else:
raise self.Exception(f'Unsupported object {obj}', expr)
if expr.startswith('void '):
left = self.interpretexpression(expr[5:], local_vars, allow_recursion)
return None, should_return
for op in _UNARY_OPERATORS_X:
if not expr.startswith(op): continue
operand = expr[len(op):]
if not operand or operand[0] != ' ': continue
op_result = self.handleoperators(expr, local_vars, allow_recursion)
if op_result: return op_result[0], should_return
if expr.startswith('{'):
inner, outer = self._separateatparen(expr)
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
def dictitem_func(key, val):
val = self.interpretexpression(val, local_vars, allow_recursion)
if re.match(_NAME_RE, key): return key, val
return self.interpretexpression(key, local_vars, allow_recursion), val
return dict(dictitem_func(k, v) for k, v in sub_expressions), should_return
inner, should_abort = self.interpretstatement(inner, local_vars, allow_recursion)
if not outer or should_abort:
return inner, should_abort or should_return
else:
expr = self._dump(inner, local_vars) + outer
if expr.startswith('('):
inner, outer = self._separateatparen(expr)
inner, should_abort = self.interpretstatement(inner, local_vars, allow_recursion)
if not outer or should_abort: return inner, should_abort or should_return
else: expr = self._dump(inner, local_vars) + outer
if expr.startswith('['):
inner, outer = self._separateatparen(expr)
name = self._namedobject(local_vars, [self.interpretexpression(item, local_vars, allow_recursion) for item in self._separate(inner)])
expr = name + outer
m = re.match(r'''(?x)
(?P<try>try)\s*\{|
(?P<if>if)\s*\(|
(?P<switch>switch)\s*\(|
(?P<for>for)\s*\(
''', expr)
md = m.groupdict() if m else {}
if md.get('if'):
cndn, expr = self._separateatparen(expr[m.end() - 1:])
if expr.startswith('{'): if_expr, expr = self._separateatparen(expr)
else: if_expr, expr = self._separateatparen(' %s;' % (expr,), delim=';')
else_expr = None
m = re.match(r'else\s*{', expr)
if m: else_expr, expr = self._separateatparen(expr[m.end() - 1:])
cndn = jsternary(self.interpretexpression(cndn, local_vars, allow_recursion))
ret, should_abort = self.interpretstatement(if_expr if cndn else else_expr, local_vars, allow_recursion)
if should_abort: return ret, True
if md.get('try'):
try_expr, expr = self._separateatparen(expr[m.end() - 1:])
err = None
try:
ret, should_abort = self.interpretstatement(try_expr, local_vars, allow_recursion)
if should_abort: return ret, True
except Exception as e:
err = e
pending = (None, False)
m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
if m:
sub_expr, expr = self._separateatparen(expr[m.end() - 1:])
if err:
catch_vars = {}
if m.group('err'): catch_vars[m.group('err')] = err.error if isinstance(err, JSThrow) else err
catch_vars = local_vars.new_child(catch_vars)
err, pending = None, self.interpretstatement(sub_expr, catch_vars, allow_recursion)
m = re.match(r'finally\s*\{', expr)
if m:
sub_expr, expr = self._separateatparen(expr[m.end() - 1:])
ret, should_abort = self.interpretstatement(sub_expr, local_vars, allow_recursion)
if should_abort: return ret, True
ret, should_abort = pending
if should_abort: return ret, True
if err: raise err
elif md.get('for'):
constructor, remaining = self._separateatparen(expr[m.end() - 1:])
if remaining.startswith('{'):
body, expr = self._separateatparen(remaining)
else:
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
if switch_m:
switch_val, remaining = self._separateatparen(remaining[switch_m.end() - 1:])
body, expr = self._separateatparen(remaining, '}')
body = 'switch(%s){%s}' % (switch_val, body)
else:
body, expr = remaining, ''
start, cndn, increment = self._separate(constructor, ';')
self.interpretexpression(start, local_vars, allow_recursion)
while True:
if not jsternary(self.interpretexpression(cndn, local_vars, allow_recursion)): break
try:
ret, should_abort = self.interpretstatement(body, local_vars, allow_recursion)
if should_abort: return ret, True
except JSBreak: break
except JSContinue: pass
self.interpretexpression(increment, local_vars, allow_recursion)
elif md.get('switch'):
switch_val, remaining = self._separateatparen(expr[m.end() - 1:])
switch_val = self.interpretexpression(switch_val, local_vars, allow_recursion)
body, expr = self._separateatparen(remaining, '}')
items = body.replace('default:', 'case default:').split('case ')[1:]
for default in (False, True):
matched = False
for item in items:
case, stmt = (i.strip() for i in self._separate(item, ':', 1))
if default: matched = matched or case == 'default'
elif not matched: matched = (case != 'default' and switch_val == self.interpretexpression(case, local_vars, allow_recursion))
if not matched: continue
try:
ret, should_abort = self.interpretstatement(stmt, local_vars, allow_recursion)
if should_abort: return ret
except JSBreak:
break
if matched: break
if md:
ret, should_abort = self.interpretstatement(expr, local_vars, allow_recursion)
return ret, should_abort or should_return
sub_expressions = list(self._separate(expr))
if len(sub_expressions) > 1:
for sub_expr in sub_expressions:
ret, should_abort = self.interpretstatement(sub_expr, local_vars, allow_recursion)
if should_abort: return ret, True
return ret, False
p =fr'''(?x)
(?P<out>{_NAME_RE})(?:\[(?P<index>{_NESTED_BRACKETS})\])?\s*
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
=(?!=)(?P<expr>.*)$
'''
m = re.match(p, expr)
if m:
left_val = local_vars.get(m.group('out'))
if not m.group('index'):
local_vars[m.group('out')] = self._operator(m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
return local_vars[m.group('out')], should_return
elif left_val in (None, JSUndefined):
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
idx = self.interpretexpression(m.group('index'), local_vars, allow_recursion)
if not isinstance(idx, (int, float)): raise self.Exception(f'List index {idx} must be integer', expr)
idx = int(idx)
left_val[idx] = self._operator(m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
return left_val[idx], should_return
for m in re.finditer(rf'''(?x)
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
(?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr):
var = m.group('var1') or m.group('var2')
start, end = m.span()
sign = m.group('pre_sign') or m.group('post_sign')
ret = local_vars[var]
local_vars[var] += 1 if sign[0] == '+' else -1
if m.group('pre_sign'): ret = local_vars[var]
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
if not expr: return None, should_return
m = re.match(fr'''(?x)
(?P<assign>
(?P<out>{_NAME_RE})(?:\[(?P<index>{_NESTED_BRACKETS})\])?\s*
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
=(?!=)(?P<expr>.*)$
)|(?P<return>
(?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
)|(?P<attribute>
(?P<var>{_NAME_RE})(?:
(?P<nullish>\?)?\.(?P<member>[^(]+)|
\[(?P<member2>{_NESTED_BRACKETS})\]
)\s*
)|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<function>
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
)''', expr)
if m and m.group('assign'):
left_val = local_vars.get(m.group('out'))
if not m.group('index'):
local_vars[m.group('out')] = self._operator(m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
return local_vars[m.group('out')], should_return
elif left_val in (None, JSUndefined):
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
idx = self.interpretexpression(m.group('index'), local_vars, allow_recursion)
if not isinstance(idx, (int, float)):
raise self.Exception(f'List index {idx} must be integer', expr)
idx = int(idx)
left_val[idx] = self._operator(m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
return left_val[idx], should_return
elif expr.isdigit():
return int(expr), should_return
elif expr == 'break':
raise JSBreak()
elif expr == 'continue':
raise JSContinue()
elif expr == 'undefined':
return JSUndefined, should_return
elif expr == 'NaN':
return float('NaN'), should_return
elif m and m.group('return'):
try:
return local_vars[m.group('name')], should_return
except KeyError as e:
return self.extractglobalvar(e.args[0]), should_return
with contextlib.suppress(ValueError):
return json.loads(js2json(expr, strict=True)), should_return
if m and m.group('indexing'):
val = local_vars[m.group('in')]
idx = self.interpretexpression(m.group('idx'), local_vars, allow_recursion)
return self._index(val, idx), should_return
op_result = self.handleoperators(expr, local_vars, allow_recursion)
if op_result: return op_result[0], should_return
if m and m.group('attribute'):
variable, member, nullish = m.group('var', 'member', 'nullish')
if not member: member = self.interpretexpression(m.group('member2'), local_vars, allow_recursion)
arg_str = expr[m.end():]
if arg_str.startswith('('): arg_str, remaining = self._separateatparen(arg_str)
else: arg_str, remaining = None, arg_str
def assertion(cndn, msg):
if not cndn: raise self.Exception(f'{member} {msg}', expr)
def evalmethod_func():
nonlocal member
types = {'String': str, 'Math': float, 'Array': list}
obj = local_vars.get(variable, types.get(variable, NODEFAULT))
if obj is NODEFAULT:
if variable not in self._objects:
try:
self._objects[variable] = self.extractobject(variable, local_vars)
except self.Exception:
if not nullish: raise Exception
obj = self._objects.get(variable, JSUndefined)
if nullish and obj is JSUndefined: return JSUndefined
if arg_str is None: return self._index(obj, member, nullish)
argvals = [self.interpretexpression(v, local_vars, allow_recursion) for v in self._separate(arg_str)]
if isinstance(obj, type) and member.startswith('prototype.'):
new_member, _, func_prototype = member.partition('.')[2].partition('.')
assertion(argvals, 'takes one or more arguments')
assertion(isinstance(argvals[0], obj), f'needs binding to type {obj}')
if func_prototype == 'call':
obj, *argvals = argvals
elif func_prototype == 'apply':
assertion(len(argvals) == 2, 'takes two arguments')
obj, argvals = argvals
assertion(isinstance(argvals, list), 'second argument needs to be a list')
else:
raise self.Exception(f'Unsupported Function method {func_prototype}', expr)
member = new_member
if obj == str:
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
return ''.join(map(chr, argvals))
raise self.Exception(f'Unsupported String method {member}', expr)
elif obj == float:
if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1]
raise self.Exception(f'Unsupported Math method {member}', expr)
if member == 'split':
assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) == 1, 'with limit argument is not implemented')
return obj.split(argvals[0]) if argvals[0] else list(obj)
elif member == 'join':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) == 1, 'takes exactly one argument')
return argvals[0].join(obj)
elif member == 'reverse':
assertion(not argvals, 'does not take any arguments')
obj.reverse()
return obj
elif member == 'slice':
assertion(isinstance(obj, (list, str)), 'must be applied on a list or string')
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
return obj[slice(*argvals, None)]
elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')
index, howMany = map(int, (argvals + [len(obj)])[:2])
if index < 0:
index += len(obj)
add_items = argvals[2:]
res = []
for i in range(index, min(index + howMany, len(obj))):
res.append(obj.pop(index))
for i, item in enumerate(add_items):
obj.insert(index + i, item)
return res
elif member == 'unshift':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')
for item in reversed(argvals): obj.insert(0, item)
return obj
elif member == 'pop':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(not argvals, 'does not take any arguments')
if not obj: return
return obj.pop()
elif member == 'push':
assertion(argvals, 'takes one or more arguments')
obj.extend(argvals)
return obj
elif member == 'forEach':
assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
f, this = (argvals + [''])[:2]
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
elif member == 'indexOf':
assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
idx, start = (argvals + [0])[:2]
try: return obj.index(idx, start)
except ValueError: return -1
elif member == 'charCodeAt':
assertion(isinstance(obj, str), 'must be applied on a string')
assertion(len(argvals) == 1, 'takes exactly one argument')
idx = argvals[0] if isinstance(argvals[0], int) else 0
if idx >= len(obj): return None
return ord(obj[idx])
idx = int(member) if isinstance(obj, list) else member
return obj[idx](argvals, allow_recursion=allow_recursion)
if remaining:
ret, should_abort = self.interpretstatement(self._namedobject(local_vars, evalmethod_func()) + remaining, local_vars, allow_recursion)
return ret, should_return or should_abort
else:
return evalmethod_func(), should_return
elif m and m.group('function'):
fname = m.group('fname')
argvals = [self.interpretexpression(v, local_vars, allow_recursion) for v in self._separate(m.group('args'))]
if fname in local_vars:
return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
elif fname not in self._functions:
self._functions[fname] = self.extractfunction(fname)
return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
raise self.Exception(f'Unsupported JS expression {truncatestring(expr, 20, 20) if expr != stmt else ""}', stmt)
'''interpretexpression'''
def interpretexpression(self, expr, local_vars, allow_recursion):
ret, should_return = self.interpretstatement(expr, local_vars, allow_recursion)
if should_return: raise self.Exception('Cannot return from an expression', expr)
return ret
'''extractglobalvar'''
def extractglobalvar(self, var):
global_var = re.search(fr'''var\s?{re.escape(var)}=(?P<val>.*?);''', self.code)
code = global_var.group('val')
return code
'''extractobject'''
def extractobject(self, objname, *global_stack):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {}
obj_m = re.search(
r'''(?x)
(?<![a-zA-Z$0-9.])%s\s*=\s*{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE), self.code)
if not obj_m: raise self.Exception(f'Could not find object {objname}')
fields = obj_m.group('fields')
fields_m = re.finditer(
r'''(?x)
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
''' % (_FUNC_NAME_RE, _NAME_RE), fields)
for f in fields_m:
argnames = f.group('args').split(',')
name = removequotes(f.group('key'))
obj[name] = FunctionWithRepr(self.buildfunction(argnames, f.group('code'), *global_stack), f'F<{name}>')
return obj
'''extractfunctioncode'''
def extractfunctioncode(self, funcname):
func_m = re.search(
r'''(?xs)
(?:
function\s+%(name)s|
[{;,]\s*%(name)s\s*=\s*function|
(?:var|const|let)\s+%(name)s\s*=\s*function
)\s*
\((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)}, self.code)
if func_m is None: raise self.Exception(f'Could not find JS function "{funcname}"')
code, _ = self._separateatparen(func_m.group('code'))
return [x.strip() for x in func_m.group('args').split(',')], code
'''extractfunction'''
def extractfunction(self, funcname):
return FunctionWithRepr(self.extractfunctionfromcode(*fixupnfunctioncode(*self.extractfunctioncode(funcname), self.code)), f'F<{funcname}>')
'''extractfunctionfromcode'''
def extractfunctionfromcode(self, argnames, code, *global_stack):
local_vars = {}
while True:
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
if mobj is None: break
start, body_start = mobj.span()
body, remaining = self._separateatparen(code[body_start - 1:])
name = self._namedobject(local_vars, self.extractfunctionfromcode([x.strip() for x in mobj.group('args').split(',')], body, local_vars, *global_stack))
code = code[:start] + name + remaining
return self.buildfunction(argnames, code, local_vars, *global_stack)
'''callfunction'''
def callfunction(self, funcname, *args):
return self.extractfunction(funcname)(args)
'''buildfunction'''
def buildfunction(self, argnames, code, *global_stack):
global_stack = list(global_stack) or [{}]
argnames = tuple(argnames)
def resf(args, kwargs={}, allow_recursion=100):
global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpretstatement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
if should_abort: return ret
return resf
File diff suppressed because one or more lines are too long
@@ -0,0 +1,91 @@
"""Lazy source registry to avoid importing every source at module import time."""
from __future__ import annotations
import importlib
from ..utils import BaseModuleBuilder
from .base import BaseMusicClient
CLIENT_IMPORT_PATHS = {
# Platforms in Greater China
"QQMusicClient": ("musicdl.modules.sources.qq", "QQMusicClient"),
"KugouMusicClient": ("musicdl.modules.sources.kugou", "KugouMusicClient"),
"StreetVoiceMusicClient": ("musicdl.modules.sources.streetvoice", "StreetVoiceMusicClient"),
"SodaMusicClient": ("musicdl.modules.sources.soda", "SodaMusicClient"),
"FiveSingMusicClient": ("musicdl.modules.sources.fivesing", "FiveSingMusicClient"),
"NeteaseMusicClient": ("musicdl.modules.sources.netease", "NeteaseMusicClient"),
"QianqianMusicClient": ("musicdl.modules.sources.qianqian", "QianqianMusicClient"),
"MiguMusicClient": ("musicdl.modules.sources.migu", "MiguMusicClient"),
"KuwoMusicClient": ("musicdl.modules.sources.kuwo", "KuwoMusicClient"),
"BilibiliMusicClient": ("musicdl.modules.sources.bilibili", "BilibiliMusicClient"),
# Global Streaming / Indie
"YouTubeMusicClient": ("musicdl.modules.sources.youtube", "YouTubeMusicClient"),
"JooxMusicClient": ("musicdl.modules.sources.joox", "JooxMusicClient"),
"AppleMusicClient": ("musicdl.modules.sources.apple", "AppleMusicClient"),
"JamendoMusicClient": ("musicdl.modules.sources.jamendo", "JamendoMusicClient"),
"SoundCloudMusicClient": ("musicdl.modules.sources.soundcloud", "SoundCloudMusicClient"),
"DeezerMusicClient": ("musicdl.modules.sources.deezer", "DeezerMusicClient"),
"QobuzMusicClient": ("musicdl.modules.sources.qobuz", "QobuzMusicClient"),
"SpotifyMusicClient": ("musicdl.modules.sources.spotify", "SpotifyMusicClient"),
"TIDALMusicClient": ("musicdl.modules.sources.tidal", "TIDALMusicClient"),
# Audio / Radio
"XimalayaMusicClient": ("musicdl.modules.audiobooks.ximalaya", "XimalayaMusicClient"),
"LizhiMusicClient": ("musicdl.modules.audiobooks.lizhi", "LizhiMusicClient"),
"QingtingMusicClient": ("musicdl.modules.audiobooks.qingting", "QingtingMusicClient"),
"LRTSMusicClient": ("musicdl.modules.audiobooks.lrts", "LRTSMusicClient"),
# Aggregators / Multi-Source Gateways
"MP3JuiceMusicClient": ("musicdl.modules.common.mp3juice", "MP3JuiceMusicClient"),
"TuneHubMusicClient": ("musicdl.modules.common.tunehub", "TuneHubMusicClient"),
"GDStudioMusicClient": ("musicdl.modules.common.gdstudio", "GDStudioMusicClient"),
"MyFreeMP3MusicClient": ("musicdl.modules.common.myfreemp3", "MyFreeMP3MusicClient"),
"JBSouMusicClient": ("musicdl.modules.common.jbsou", "JBSouMusicClient"),
# Unofficial Download Sites / Scrapers
"MituMusicClient": ("musicdl.modules.thirdpartysites.mitu", "MituMusicClient"),
"BuguyyMusicClient": ("musicdl.modules.thirdpartysites.buguyy", "BuguyyMusicClient"),
"GequbaoMusicClient": ("musicdl.modules.thirdpartysites.gequbao", "GequbaoMusicClient"),
"YinyuedaoMusicClient": ("musicdl.modules.thirdpartysites.yinyuedao", "YinyuedaoMusicClient"),
"FLMP3MusicClient": ("musicdl.modules.thirdpartysites.flmp3", "FLMP3MusicClient"),
"FangpiMusicClient": ("musicdl.modules.thirdpartysites.fangpi", "FangpiMusicClient"),
"FiveSongMusicClient": ("musicdl.modules.thirdpartysites.fivesong", "FiveSongMusicClient"),
"KKWSMusicClient": ("musicdl.modules.thirdpartysites.kkws", "KKWSMusicClient"),
"GequhaiMusicClient": ("musicdl.modules.thirdpartysites.gequhai", "GequhaiMusicClient"),
"LivePOOMusicClient": ("musicdl.modules.thirdpartysites.livepoo", "LivePOOMusicClient"),
"HTQYYMusicClient": ("musicdl.modules.thirdpartysites.htqyy", "HTQYYMusicClient"),
"JCPOOMusicClient": ("musicdl.modules.thirdpartysites.jcpoo", "JCPOOMusicClient"),
"TwoT58MusicClient": ("musicdl.modules.thirdpartysites.twot58", "TwoT58MusicClient"),
"ZhuolinMusicClient": ("musicdl.modules.thirdpartysites.zhuolin", "ZhuolinMusicClient"),
}
def _load_client_class(client_name: str):
module_path, attr_name = CLIENT_IMPORT_PATHS[client_name]
module = importlib.import_module(module_path)
return getattr(module, attr_name)
def _build_client_factory(client_name: str):
def factory(**kwargs):
return _load_client_class(client_name)(**kwargs)
factory.__name__ = client_name
return factory
class MusicClientBuilder(BaseModuleBuilder):
REGISTERED_MODULES = {
client_name: _build_client_factory(client_name)
for client_name in CLIENT_IMPORT_PATHS
}
def __getattr__(name: str):
if name in CLIENT_IMPORT_PATHS:
return _load_client_class(name)
raise AttributeError(name)
BuildMusicClient = MusicClientBuilder().build
__all__ = ["BaseMusicClient", "BuildMusicClient", "MusicClientBuilder", *CLIENT_IMPORT_PATHS.keys()]
@@ -0,0 +1,195 @@
'''
Function:
Implementation of AppleMusicClient: https://music.apple.com/{geo}/new
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import copy
import shutil
from types import SimpleNamespace
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import APPLE_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils.appleutils import AppleMusicClientDownloadSongUtils, AppleMusicClientAPIUtils, AppleMusicClientItunesApiUtils, DownloadItem, SongCodec, RemuxMode
from ..utils import touchdir, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, usedownloadheaderscookies, useparseheaderscookies, hostmatchessuffix, obtainhostname, cleanlrc, SongInfo, SongInfoUtils, AudioLinkTester
'''AppleMusicClient'''
class AppleMusicClient(BaseMusicClient):
source = 'AppleMusicClient'
def __init__(self, use_wrapper: bool = False, wrapper_account_url: str = "http://127.0.0.1:30020/", language: str = "en-US", codec: str = None, wrapper_decrypt_ip: str = "127.0.0.1:10020", **kwargs):
super(AppleMusicClient, self).__init__(**kwargs)
self.apple_music_api, self.itunes_api, self.use_wrapper, self.wrapper_account_url, self.language, self.account_info, self.codec, self.wrapper_decrypt_ip = None, None, use_wrapper, wrapper_account_url, language, {}, codec, wrapper_decrypt_ip
if self.codec is None: self.codec = SongCodec.ALAC if use_wrapper else SongCodec.AAC_LEGACY
self.default_search_headers = {
"accept": "*/*", "accept-language": "en-US", "origin": "https://music.apple.com", "priority": "u=1, i", "sec-fetch-site": "same-site", "sec-ch-ua-platform": '"Windows"',
"sec-ch-ua": '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"', "sec-ch-ua-mobile": "?0", "sec-fetch-mode": "cors", "referer": "https://music.apple.com",
"sec-fetch-dest": "empty", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
self.default_parse_headers = {
"accept": "*/*", "accept-language": "en-US", "origin": "https://music.apple.com", "priority": "u=1, i", "sec-fetch-site": "same-site", "sec-ch-ua-platform": '"Windows"',
"sec-ch-ua": '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"', "sec-ch-ua-mobile": "?0", "sec-fetch-mode": "cors", "referer": "https://music.apple.com",
"sec-fetch-dest": "empty", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
request_overrides = request_overrides or {}
if isinstance(song_info.download_url, str): return super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=downloaded_song_infos, progress=progress, song_progress_id=song_progress_id, auto_supplement_song=auto_supplement_song)
try:
touchdir(song_info.work_dir); tmp_dir = f'apple_id_{str(song_info.identifier)}'; touchdir(tmp_dir); download_item: DownloadItem = song_info.download_url
progress.update(song_progress_id, total=1, kind='overall'); progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Downloading)")
AppleMusicClientDownloadSongUtils.download(download_item=download_item, work_dir=tmp_dir, silent=self.disable_print, codec=self.codec, wrapper_decrypt_ip=self.wrapper_decrypt_ip, artist=song_info.singers, use_wrapper=self.use_wrapper, remux_mode=RemuxMode.FFMPEG); shutil.move(download_item.staged_path, song_info.save_path)
progress.update(song_progress_id, total=os.path.getsize(song_info.save_path), kind='download'); progress.advance(song_progress_id, os.path.getsize(song_info.save_path))
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info)); shutil.rmtree(tmp_dir, ignore_errors=True)
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
return downloaded_song_infos
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
if (not self.default_cookies or 'media-user-token' not in self.default_cookies) and (not self.use_wrapper): self.logger_handle.warning(f'{self.source}._constructsearchurls >>> both "media-user-token" and "use_wrapper" are not configured, so song downloads are restricted and only the preview portion of the track can be downloaded.')
if self.use_wrapper and (not self.apple_music_api): self.apple_music_api = AppleMusicClientAPIUtils.createfromwrapper(wrapper_account_url=self.wrapper_account_url, request_overrides=request_overrides, language=self.language)
elif self.default_cookies and ('media-user-token' in self.default_cookies) and (not self.apple_music_api): self.apple_music_api = AppleMusicClientAPIUtils.createfromnetscapecookies(cookies=self.default_cookies, request_overrides=request_overrides, language=self.language)
if self.apple_music_api and (not self.itunes_api): self.itunes_api = AppleMusicClientItunesApiUtils(storefront=self.apple_music_api.storefront, language=self.apple_music_api.language)
if self.apple_music_api and ('authorization' not in self.default_headers): self.default_search_headers = copy.deepcopy(self.apple_music_api.client.headers); self.default_headers = self.default_search_headers; self._initsession(); self.account_info = self.apple_music_api.account_info
elif ('authorization' not in self.default_headers): virtual_client = SimpleNamespace(client=self.session, language=self.language); self.default_search_headers.update({"authorization": f"Bearer {AppleMusicClientAPIUtils.gettoken(virtual_client, request_overrides=request_overrides)}"}); self.default_headers = self.default_search_headers; self._initsession()
# search rules
default_rule = {
"groups": "song", "l": "en-US", "offset": "0", "term": keyword, "types": "activities,albums,apple-curators,artists,curators,editorial-items,music-movies,music-videos,playlists,record-labels,songs,stations,tv-episodes,uploaded-videos", "art[url]": "f", "extend": "artistUrl", "fields[albums]": "artistName,artistUrl,artwork,contentRating,editorialArtwork,editorialNotes,name,playParams,releaseDate,url,trackCount", "fields[artists]": "url,name,artwork",
"format[resources]": "map", "include[editorial-items]": "contents", "include[songs]": "artists", "limit": "10", "omit[resource]": "autos", "platform": "web", "relate[albums]": "artists", "relate[editorial-items]": "contents", "relate[songs]": "albums", "types": "activities,albums,apple-curators,artists,curators,music-movies,music-videos,playlists,songs,stations,tv-episodes,uploaded-videos", "with": "lyrics,serverBubbles",
}
default_rule.update(rule)
geo = safeextractfromdict(self.account_info, ['meta', 'subscription', 'storefront'], 'us')
# construct search urls based on search rules
base_url = f'https://amp-api-edge.music.apple.com/v1/catalog/{geo}/search?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithnonvipofficialapiv1'''
def _parsewithnonvipofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))) or (search_result.get('type') not in {'songs'}): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
if not (download_url := safeextractfromdict(search_result, ['attributes', 'previews', 0, 'url'], '')) or not str(download_url).startswith('http'): return song_info
try: duration_in_secs = float(safeextractfromdict(search_result, ['attributes', 'durationInMillis'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(search_result, ['attributes', 'name'], None)), singers=legalizestring(safeextractfromdict(search_result, ['attributes', 'artistName'], None)), album=legalizestring(safeextractfromdict(search_result, ['attributes', 'albumName'], None)),
ext=str(download_url).split('?')[0].split('.')[-1], file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=safeextractfromdict(search_result, ['attributes', 'artwork', 'url'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.cover_url and song_info.cover_url.startswith('http'): song_info.cover_url = song_info.cover_url.format(w=600, h=600, f='jpg')
# return
return song_info
'''_parsewithvipofficialapiv1'''
def _parsewithvipofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac, codec = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source), self.codec
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))) or (search_result.get('type') not in {'songs'}): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
geo = safeextractfromdict(self.account_info, ['meta', 'subscription', 'storefront'], 'us')
(resp := self.get(f'https://amp-api.music.apple.com/v1/catalog/{geo}/songs/{song_id}', params={"extend": "extendedAssetUrls", "include": "lyrics,albums"}, **request_overrides)).raise_for_status()
download_item: DownloadItem = AppleMusicClientDownloadSongUtils.getdownloaditem(song_metadata=(download_result := resp2json(resp=resp))['data'][0], playlist_metadata=None, codec=codec, apple_music_api=self.apple_music_api, itunes_api=self.itunes_api, request_overrides=request_overrides, use_wrapper=self.use_wrapper)
(resp := self.get(download_item.stream_info.audio_track.stream_url, **request_overrides)).raise_for_status()
try: duration_in_secs = float(safeextractfromdict(search_result, ['attributes', 'durationInMillis'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(search_result, ['attributes', 'name'], None)), singers=legalizestring(safeextractfromdict(search_result, ['attributes', 'artistName'], None)), album=legalizestring(safeextractfromdict(search_result, ['attributes', 'albumName'], None)),
ext=download_item.stream_info.file_format.value, file_size='HLS', identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(str(download_item.lyrics.synced)) or 'NULL', cover_url=safeextractfromdict(search_result, ['attributes', 'artwork', 'url'], None), download_url=download_item, download_url_status={'ok': True},
)
if song_info.cover_url and song_info.cover_url.startswith('http'): song_info.cover_url = song_info.cover_url.format(w=600, h=600, f='jpg')
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for song_key, search_result in dict(resp2json(resp)['resources']['songs']).items():
search_result['song_key'] = song_key
# --parse with non-vip official apis
if (not self.default_cookies or 'media-user-token' not in self.default_cookies) and (not self.use_wrapper):
try: song_info = self._parsewithnonvipofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: continue
# --parse with vip official apis
else:
try: song_info = self._parsewithvipofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: continue
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, APPLE_MUSIC_HOSTS)): return song_infos
if (not self.default_cookies or 'media-user-token' not in self.default_cookies) and (not self.use_wrapper): raise PermissionError(f'{self.source}.parseplaylist >>> both "media-user-token" and "use_wrapper" are not configured, so musicdl does not have permission to parse Apple Music playlists.')
if self.use_wrapper and (not self.apple_music_api): self.apple_music_api = AppleMusicClientAPIUtils.createfromwrapper(wrapper_account_url=self.wrapper_account_url, request_overrides=request_overrides, language=self.language)
elif self.default_cookies and ('media-user-token' in self.default_cookies) and (not self.apple_music_api): self.apple_music_api = AppleMusicClientAPIUtils.createfromnetscapecookies(cookies=self.default_cookies, request_overrides=request_overrides, language=self.language)
self.apple_music_api = AppleMusicClientAPIUtils.createfromnetscapecookies(cookies=self.default_cookies, request_overrides=request_overrides, language=self.language)
if self.apple_music_api and (not self.itunes_api): self.itunes_api = AppleMusicClientItunesApiUtils(storefront=self.apple_music_api.storefront, language=self.apple_music_api.language)
if self.apple_music_api and ('authorization' not in self.default_headers): self.default_search_headers = copy.deepcopy(self.apple_music_api.client.headers); self.default_headers = self.default_search_headers; self._initsession(); self.account_info = self.apple_music_api.account_info
elif ('authorization' not in self.default_headers): virtual_client = SimpleNamespace(client=self.session, language=self.language); self.default_search_headers.update({"authorization": f"Bearer {AppleMusicClientAPIUtils.gettoken(virtual_client, request_overrides=request_overrides)}"}); self.default_headers = self.default_search_headers; self._initsession()
# get tracks in playlist
playlist_result = self.apple_music_api.getplaylist(playlist_id, request_overrides=request_overrides)
tracks_in_playlist = safeextractfromdict(playlist_result, ['data', 0, 'relationships', 'tracks', 'data'], []) or []
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithvipofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['data', 0, 'attributes', 'name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,293 @@
'''
Function:
Implementation of BaseMusicClient
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import random
import pickle
import requests
from pathlib import Path
from threading import Lock
from rich.text import Text
from itertools import chain
from datetime import datetime
from collections import defaultdict
from pathvalidate import sanitize_filepath
from concurrent.futures import ThreadPoolExecutor, as_completed
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn, MofNCompleteColumn, ProgressColumn, Task
from ..utils import LoggerHandle, AudioLinkTester, SongInfo, SongInfoUtils, HLSDownloader, touchdir, usedownloadheaderscookies, usesearchheaderscookies, useparseheaderscookies, cookies2dict, cookies2string, shortenpathsinsonginfos, optionalimport, optionalimportfrom
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
def build_user_agent() -> str:
try:
user_agent_cls = optionalimportfrom("fake_useragent", "UserAgent")
if user_agent_cls is not None:
return user_agent_cls().random
except Exception:
pass
return DEFAULT_USER_AGENT
'''AudioAwareColumn'''
class AudioAwareColumn(ProgressColumn):
def __init__(self):
super(AudioAwareColumn, self).__init__()
self._download_col = DownloadColumn()
'''render'''
def render(self, task: Task):
kind = task.fields.get("kind", "download")
if kind == "overall": completed = int(task.completed); total = int(task.total) if task.total is not None else 0; return Text(f"{completed}/{total} audios")
elif kind == "hls": completed = int(task.completed); total = int(task.total) if task.total is not None else 0; return Text(f"{completed}/{total} segments")
else: return self._download_col.render(task)
'''BaseMusicClient'''
class BaseMusicClient():
source = 'BaseMusicClient'
def __init__(self, search_size_per_source: int = 5, auto_set_proxies: bool = False, random_update_ua: bool = False, enable_search_curl_cffi: bool = False, enable_parse_curl_cffi: bool = False, enable_download_curl_cffi: bool = False, maintain_session: bool = False, logger_handle: LoggerHandle = None, disable_print: bool = False, work_dir: str = 'musicdl_outputs',
max_retries: int = 3, freeproxy_settings: dict = None, default_search_cookies: dict | str = None, default_download_cookies: dict | str = None, default_parse_cookies: dict | str = None, strict_limit_search_size_per_page: bool = True, search_size_per_page: int = 10, quark_parser_config: dict = None):
# set up work dir
touchdir(work_dir)
# set attributes
self.search_size_per_source = search_size_per_source
self.auto_set_proxies = auto_set_proxies
self.random_update_ua = random_update_ua
self.max_retries = max_retries
self.maintain_session = maintain_session
self.logger_handle = logger_handle if logger_handle else LoggerHandle()
self.disable_print = disable_print
self.work_dir = work_dir
self.freeproxy_settings = freeproxy_settings or {}
self.quark_parser_config = quark_parser_config or {}
self.default_search_cookies = cookies2dict(default_search_cookies); self.default_download_cookies = cookies2dict(default_download_cookies); self.default_parse_cookies = cookies2dict(default_parse_cookies); self.default_cookies = self.default_search_cookies
self.search_size_per_page = min(search_size_per_source, search_size_per_page); self.strict_limit_search_size_per_page = strict_limit_search_size_per_page
self.enable_search_curl_cffi = enable_search_curl_cffi; self.enable_download_curl_cffi = enable_download_curl_cffi; self.enable_parse_curl_cffi = enable_parse_curl_cffi; self.enable_curl_cffi = self.enable_search_curl_cffi
self.cc_impersonates = self._listccimpersonates() if (enable_search_curl_cffi or enable_download_curl_cffi) else None
# init requests.Session
self.default_search_headers = {'User-Agent': build_user_agent()}; self.default_download_headers = {'User-Agent': build_user_agent()}; self.default_parse_headers = {'User-Agent': build_user_agent()}
self.quark_default_download_headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.225.400 QQBrowser/12.2.5544.400', 'origin': 'https://pan.quark.cn',
'referer': 'https://pan.quark.cn/', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': cookies2string(self.quark_parser_config.get('cookies', '')),
}
self.quark_default_download_cookies = {} # placeholder, useless now
self.default_headers = self.default_search_headers
self._initsession()
# proxied_session_client
freeproxy = optionalimportfrom('freeproxy', 'freeproxy')
(default_freeproxy_settings := dict(disable_print=True, proxy_sources=['ProxiflyProxiedSession'], max_tries=20, init_proxied_session_cfg={})).update(self.freeproxy_settings)
self.proxied_session_client = freeproxy.ProxiedSessionClient(**default_freeproxy_settings) if auto_set_proxies else None
'''_listccimpersonates'''
def _listccimpersonates(self):
curl_cffi = optionalimport('curl_cffi')
root = Path(curl_cffi.__file__).resolve().parent
exts = {".py", ".so", ".pyd", ".dll", ".dylib"}
pat = re.compile(rb"\b(?:chrome|edge|safari|firefox|tor)(?:\d+[a-z_]*|_android|_ios)?\b")
return sorted({m.decode("utf-8", "ignore") for p in root.rglob("*") if p.suffix in exts for m in pat.findall(p.read_bytes())})
'''_initsession'''
def _initsession(self):
if self.maintain_session and getattr(self, 'session', None) and getattr(self, 'audio_link_tester', None) and getattr(self, 'quark_audio_link_tester', None): return
curl_cffi = optionalimport('curl_cffi')
self.session = requests.Session() if not self.enable_curl_cffi else curl_cffi.requests.Session()
self.session.headers = self.default_headers
self.audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.default_download_headers), cookies=copy.deepcopy(self.default_download_cookies))
self.quark_audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.quark_default_download_headers), cookies=copy.deepcopy(self.quark_default_download_cookies))
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
raise NotImplementedError('not to be implemented')
'''_constructuniqueworkdir'''
def _constructuniqueworkdir(self, keyword: str, sort_by_search_kwd_and_time: bool = True):
time_stamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
touchdir((work_dir := sanitize_filepath(os.path.join(self.work_dir, self.source, f'{time_stamp} {keyword}') if sort_by_search_kwd_and_time else os.path.join(self.work_dir, self.source))))
return work_dir
'''_removeduplicates'''
def _removeduplicates(self, song_infos: list[SongInfo] = None) -> list[SongInfo]:
unique_song_infos, identifiers = [], set()
for song_info in song_infos:
if song_info.identifier in identifiers: continue
identifiers.add(song_info.identifier); unique_song_infos.append(song_info)
return unique_song_infos
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
raise NotImplementedError('not be implemented')
'''search'''
@usesearchheaderscookies
def search(self, keyword: str, num_threadings: int = 5, request_overrides: dict = None, rule: dict = None, main_process_context: Progress = None, main_progress_id: int = None, main_progress_lock: Lock = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# logging
self.logger_handle.info(f'Start to search music files using {self.source}.', disable_print=self.disable_print)
# construct search urls
search_urls = self._constructsearchurls(keyword=keyword, rule=rule, request_overrides=request_overrides)
# multi threadings for searching music files
if main_process_context is None: owns_progress = True; main_process_context = Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10); main_process_context.__enter__()
else: owns_progress = False
if main_progress_lock is None: main_progress_lock = Lock()
with main_progress_lock:
progress_id = main_process_context.add_task(f"{self.source}.search >>> completed (0/{len(search_urls)})", total=len(search_urls))
if main_progress_id is not None:
cur_total = main_process_context.tasks[main_progress_id].total or 0
main_process_context.update(main_progress_id, total=cur_total + len(search_urls))
main_process_context.update(main_progress_id, description=f"Search from sources >>> completed ({int(main_process_context.tasks[main_progress_id].completed)}/{cur_total + len(search_urls)})")
song_infos, submitted_tasks = {}, []
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
for search_url_idx, search_url in enumerate(search_urls):
song_infos[str(search_url_idx)] = []
submitted_tasks.append(pool.submit(self._search, keyword, search_url, request_overrides, song_infos[str(search_url_idx)], main_process_context, progress_id))
for future in as_completed(submitted_tasks):
future.result()
with main_progress_lock:
main_process_context.advance(progress_id, 1)
num_searched_urls = int(main_process_context.tasks[progress_id].completed)
main_process_context.update(progress_id, description=f"{self.source}.search >>> completed ({num_searched_urls}/{len(search_urls)})")
if main_progress_id is None: continue
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"Search from sources >>> completed ({int(main_process_context.tasks[main_progress_id].completed)}/{int(main_process_context.tasks[main_progress_id].total or 0)})")
song_infos = list(chain.from_iterable(song_infos.values())); song_infos = self._removeduplicates(song_infos=song_infos)
work_dir = self._constructuniqueworkdir(keyword=keyword)
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# logging
if len(song_infos) > 0:
work_dir_to_song_info, work_dir = defaultdict(list), ', '.join(list(set([str(s.work_dir) for s in song_infos])))
for s in song_infos: s.work_dir = str(s.work_dir); work_dir_to_song_info[s.work_dir].append(s.todict())
for w, items in work_dir_to_song_info.items(): touchdir(w); self._savetopkl(items, os.path.join(w, "search_results.pkl"))
else:
work_dir = self.work_dir
self.logger_handle.info(f'Finished searching music files using {self.source}. Search results have been saved to {work_dir}, valid items: {len(song_infos)}.', disable_print=self.disable_print)
if owns_progress: main_process_context.__exit__(None, None, None)
# return
return song_infos
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list[SongInfo] = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
request_overrides = copy.deepcopy(request_overrides or {})
if song_info.protocol.upper() in {'HLS'}:
try:
hls_downloader = HLSDownloader(
output_dir=song_info.work_dir, proxies=request_overrides.pop('proxies', {}) or self._autosetproxies(), headers=song_info.default_download_headers or request_overrides.pop('headers', {}) or self.default_headers, cookies=request_overrides.pop('cookies', {}) or self.default_cookies,
logger_handle=self.logger_handle, verify_tls=request_overrides.pop('verify', True), timeout=request_overrides.pop('timeout', (10, 30)), disable_print=self.disable_print, request_overrides=request_overrides
)
hls_downloader.download(song_info.download_url, song_info.save_path, quality='best', keep_segments=False, temp_subdir=str(song_info.identifier), progress=progress, progress_id=song_progress_id)
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
elif song_info.protocol.upper() in {'HTTP'} and song_info.downloaded_contents:
try:
touchdir(song_info.work_dir)
total_size = song_info.downloaded_contents.__sizeof__()
progress.update(song_progress_id, total=total_size)
with open(song_info.save_path, "wb") as fp: fp.write(song_info.downloaded_contents)
progress.advance(song_progress_id, total_size)
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
elif song_info.protocol.upper() in {'HTTP'}:
try:
touchdir(song_info.work_dir)
if song_info.default_download_headers: request_overrides['headers'] = song_info.default_download_headers
with self.get(song_info.download_url, stream=True, **request_overrides) as resp:
resp.raise_for_status()
total_size, chunk_size, downloaded_size = int(resp.headers.get('content-length', 0)), song_info.get('chunk_size', 1024), 0
progress.update(song_progress_id, total=total_size)
with open(song_info.save_path, "wb") as fp:
for chunk in resp.iter_content(chunk_size=chunk_size):
if not chunk: continue
fp.write(chunk); downloaded_size = downloaded_size + len(chunk)
if total_size > 0: downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, total_size / 1024 / 1024)
else: progress.update(song_progress_id, total=downloaded_size); downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, downloaded_size / 1024 / 1024)
progress.advance(song_progress_id, len(chunk))
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Downloading: {downloading_text})")
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
return downloaded_song_infos
'''download'''
@usedownloadheaderscookies
def download(self, song_infos: list[SongInfo], num_threadings: int = 5, request_overrides: dict = None, auto_supplement_song: bool = True):
# init
request_overrides = request_overrides or {}; shortenpathsinsonginfos(song_infos=song_infos)
# logging
self.logger_handle.info(f'Start to download music files using {self.source}.', disable_print=self.disable_print)
# multi threadings for downloading music files
columns = [SpinnerColumn(), TextColumn("{task.description}"), BarColumn(bar_width=None), TaskProgressColumn(), AudioAwareColumn(), TransferSpeedColumn(), TimeRemainingColumn()]
with Progress(*columns, refresh_per_second=20, expand=True) as progress:
songs_progress_id = progress.add_task(f"{self.source}.download >>> completed (0/{len(song_infos)})", total=len(song_infos), kind='overall')
song_progress_ids, downloaded_song_infos, submitted_tasks = [], [], []
for _, song_info in enumerate(song_infos):
desc = f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Preparing)"
song_progress_ids.append(progress.add_task(desc, total=None, kind='download'))
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
for song_progress_id, song_info in zip(song_progress_ids, song_infos): submitted_tasks.append(pool.submit(self._download, song_info, request_overrides, downloaded_song_infos, progress, song_progress_id, auto_supplement_song))
for _ in as_completed(submitted_tasks):
progress.advance(songs_progress_id, 1)
num_downloaded_songs = int(progress.tasks[songs_progress_id].completed)
progress.update(songs_progress_id, description=f"{self.source}.download >>> completed ({num_downloaded_songs}/{len(song_infos)})")
# logging
if len(downloaded_song_infos) > 0:
work_dir_to_song_info, work_dir = defaultdict(list), ', '.join(list(set([str(s.work_dir) for s in downloaded_song_infos])))
for s in downloaded_song_infos: s.work_dir = str(s.work_dir); work_dir_to_song_info[s.work_dir].append(s.todict())
for w, items in work_dir_to_song_info.items(): touchdir(w); self._savetopkl(items, os.path.join(w, "download_results.pkl"))
else:
work_dir = self.work_dir
self.logger_handle.info(f'Finished downloading music files using {self.source}. Download results have been saved to {work_dir}, valid downloads: {len(downloaded_song_infos)}.', disable_print=self.disable_print)
# return
return downloaded_song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
raise NotImplementedError(f'Not supported now to parse playlist from {self.source}')
'''_autosetproxies'''
def _autosetproxies(self):
if not self.auto_set_proxies: return {}
try: proxies = self.proxied_session_client.getrandomproxy()
except Exception as err: self.logger_handle.error(f'{self.source}._autosetproxies >>> freeproxy lib failed to auto fetch proxies (Error: {err})', disable_print=self.disable_print); proxies = {}
return proxies
'''get'''
def get(self, url, **kwargs):
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
if 'timeout' not in kwargs: kwargs['timeout'] = (10, 30)
if 'impersonate' not in kwargs and self.enable_curl_cffi: kwargs['impersonate'] = random.choice(self.cc_impersonates)
resp = None
for _ in range(self.max_retries):
if not self.maintain_session:
self._initsession()
if self.random_update_ua: self.session.headers.update({'User-Agent': build_user_agent()})
proxies = kwargs.pop('proxies', None) or self._autosetproxies()
try: (resp := self.session.get(url, proxies=proxies, **kwargs)).raise_for_status()
except Exception as err: self.logger_handle.error(f'{self.source}.get >>> {url} (Error: {err}; status={getattr(locals().get("resp"), "status_code", None)})', disable_print=self.disable_print); continue
return resp
return resp
'''post'''
def post(self, url, **kwargs):
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
if 'timeout' not in kwargs: kwargs['timeout'] = (10, 30)
if 'impersonate' not in kwargs and self.enable_curl_cffi: kwargs['impersonate'] = random.choice(self.cc_impersonates)
resp = None
for _ in range(self.max_retries):
if not self.maintain_session:
self._initsession()
if self.random_update_ua: self.session.headers.update({'User-Agent': build_user_agent()})
proxies = kwargs.pop('proxies', None) or self._autosetproxies()
try: (resp := self.session.post(url, proxies=proxies, **kwargs)).raise_for_status()
except Exception as err: self.logger_handle.error(f'{self.source}.post >>> {url} (Error: {err}; status={getattr(locals().get("resp"), "status_code", None)})', disable_print=self.disable_print); continue
return resp
return resp
'''_savetopkl'''
def _savetopkl(self, data, file_path, auto_sanitize=True):
if auto_sanitize: file_path = sanitize_filepath(file_path)
with open(file_path, 'wb') as fp: pickle.dump(data, fp)
@@ -0,0 +1,123 @@
'''
Function:
Implementation of BilibiliMusicClient: https://www.bilibili.com/audio/home/?type=9
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
from .base import BaseMusicClient
from urllib.parse import urlencode
from rich.progress import Progress
from ..utils import legalizestring, resp2json, usesearchheaderscookies, seconds2hms, safeextractfromdict, SongInfo, AudioLinkTester
'''BilibiliMusicClient'''
class BilibiliMusicClient(BaseMusicClient):
source = 'BilibiliMusicClient'
def __init__(self, **kwargs):
super(BilibiliMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "Sec-Ch-Ua": '"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"', "Referer": "https://www.bilibili.com/", "Sec-Ch-Ua-Mobile": "?0",
"Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Accept-Encoding": "gzip, deflate", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5", "Sec-Ch-Ua-Platform": '"Windows"', "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1",
}
self.default_download_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "Sec-Ch-Ua": '"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"', "Referer": "https://www.bilibili.com/", "Sec-Ch-Ua-Mobile": "?0",
"Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Accept-Encoding": "gzip, deflate", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5", "Sec-Ch-Ua-Platform": '"Windows"', "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1",
}
self.default_headers = self.default_search_headers
default_cookies = {
"buvid3": "2E109C72-251F-3827-FA8E-921FA0D7EC5291319infoc", "b_nut": "1676213591", "i-wanna-go-back": "-1", "_uuid": "2B2D7A6C-8310C-1167-F548-2F1095A6E93F290252infoc", "buvid4": "31696B5F-BB23-8F2B-3310-8B3C55FB49D491966-023021222-WcoPnBbwgLUAZ6TJuAUN8Q%3D%3D", "CURRENT_FNVAL": "4048", "nostalgia_conf": "-1",
"bili_jct": "4c583b61b86b16d812a7804078828688", "sid": "8dt1ioao", "bili_ticket": "eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDQ2MjUzNjAsImlhdCI6MTcwNDM2NjEwMCwicGx0IjotMX0.4E-V4K2y452cy6eexwY2x_q3-xgcNF2qtugddiuF8d4", "rpdid": "|(JY))RmR~|u0J'uY~YkuJ~Ru", "buvid_fp_plain": "undefined",
"b_ut": "5", "DedeUserID__ckMd5": "66450f2302095cc5", "DedeUserID": "520271156", "FEED_LIVE_VERSION": "V8", "header_theme_version": "CLOSE", "CURRENT_QUALITY": "80", "enable_web_push": "DISABLE", "buvid_fp": "52ad4773acad74caefdb23875d5217cd", "PVID": "1", "CURRENT_PID": "418c8490-cadb-11ed-b23b-dd640f2e1c14",
"home_feed_column": "5", "SESSDATA": "8036f42c%2C1719895843%2C19675%2A12CjATThdxG8TyQ2panBpBQcmT0gDKjexwc-zXNGiMnIQ2I9oLVmOiE9YkLao2_aawEhoSVlhGY05PVjVkZWM0T042Z2hZRXBOdElYWXhJa3RpVmZ0M3NvcWw1N0tPcGRVSmRoOVNQZnNHT1JHS05yR1Y1MUFLX3RXeXVJa3NjbEVBQkUxRVN6RFRRIIEC", "fingerprint": "847f1839b443252d91ff0df7465fa8d9",
"hit-dyn-v2": "1", "LIVE_BUVID": "AUTO8716766313471956", "hit-new-style-dyn": "1", "bili_ticket_expires": "1704625300", "browser_resolution": "1912-924", "bp_video_offset_520271156": "883089613008142344",
}
self.default_search_cookies = self.default_search_cookies or copy.deepcopy(default_cookies)
self.default_download_cookies = self.default_download_cookies or copy.deepcopy(default_cookies)
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'__refresh__': 'true', '_extra': '', 'page': 1, 'page_size': self.search_size_per_page, 'platform': 'pc', 'highlight': '1', 'context': '', 'single_column': '0', 'keyword': keyword, 'category_id': '', 'search_type': 'video', 'dynamic_offset': '0', 'preload': 'true', 'com2co': 'true'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.bilibili.com/x/web-interface/search/type?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page_size'] = page_size
page_rule['page'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not search_result.get('id')) or (not (song_bvid := search_result.get('bvid'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
(resp := self.get(f"https://api.bilibili.com/x/web-interface/view?bvid={song_bvid}", **request_overrides)).raise_for_status()
pages, root_title, song_info = resp2json(resp=resp)['data']['pages'], resp2json(resp=resp)['data']['title'], []
episodes = [(page["cid"], page["part"]) for page in pages if isinstance(page, dict) and page.get("cid") and page.get("part")]
for cid, episode_name in episodes:
try: (resp := self.get(f"https://api.bilibili.com/x/player/playurl?fnval=16&bvid={song_bvid}&cid={cid}")).raise_for_status()
except Exception: continue
download_result = resp2json(resp=resp)
audios = [a for a in (safeextractfromdict(download_result, ['data', 'dash', 'flac', 'audio'], []) or []) if isinstance(a, dict) and (a.get('baseUrl') or a.get('base_url') or a.get('backupUrl') or a.get('backup_url'))]
if not audios: audios = [a for a in (safeextractfromdict(download_result, ['data', 'dash', 'dolby', 'audio'], []) or []) if isinstance(a, dict) and (a.get('baseUrl') or a.get('base_url') or a.get('backupUrl') or a.get('backup_url'))]
if not audios: audios = [a for a in (safeextractfromdict(download_result, ['data', 'dash', 'audio'], []) or []) if isinstance(a, dict) and (a.get('baseUrl') or a.get('base_url') or a.get('backupUrl') or a.get('backup_url'))]
if not audios: continue
audios_sorted = sorted(audios, key=lambda x: (x.get("bandwidth", 0) or 0, x.get("filesize", 0) or 0), reverse=True)
if not (download_url := audios_sorted[0].get('baseUrl') or audios_sorted[0].get('base_url') or audios_sorted[0].get('backupUrl') or audios_sorted[0].get('backup_url')): continue
if isinstance(download_url, list): download_url = download_url[0]
eps_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(episode_name if episode_name == root_title else f'{root_title}-{episode_name}'), singers=legalizestring(search_result.get('author')),
album=legalizestring(str(song_bvid)), ext='m4a', file_size=None, identifier=cid, duration_s=safeextractfromdict(download_result, ['data', 'dash', 'duration'], 0), duration=seconds2hms(safeextractfromdict(download_result, ['data', 'dash', 'duration'], 0)),
lyric=None, cover_url=search_result.get('pic'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if eps_info.cover_url and (not eps_info.cover_url.startswith('http')): eps_info.cover_url = f'https:{eps_info.cover_url}'
eps_info.download_url_status['probe_status'] = self.audio_link_tester.probe(eps_info.download_url, request_overrides)
eps_info.file_size = eps_info.download_url_status['probe_status']['file_size']; eps_info.ext = eps_info.download_url_status['probe_status']['ext']
if eps_info.ext in {'m4s', 'mp4'}: eps_info.ext = 'm4a'
if (eps_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (eps_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): eps_info.ext = eps_info.download_url_status['probe_status']['ext']
elif (eps_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): eps_info.ext = 'mp3'
if eps_info.with_valid_download_url: song_info.append(eps_info)
if self.strict_limit_search_size_per_page and len(song_info) >= self.search_size_per_page: break
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['data']['result']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if isinstance(song_info, list) and (not song_info): continue
if isinstance(song_info, SongInfo) and (not song_info.with_valid_download_url): continue
if isinstance(song_info, list): song_infos.extend(song_info)
elif isinstance(song_info, SongInfo): song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: song_infos = song_infos[:self.search_size_per_page]; break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,219 @@
'''
Function:
Implementation of DeezerMusicClient: https://www.deezer.com/us/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import copy
import requests
from pathlib import Path
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import DEEZER_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse
from ..utils.deezerutils import DeezerMusicClientUtils
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import replacefile, touchdir, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, usedownloadheaderscookies, safeextractfromdict, extractdurationsecondsfromlrc, useparseheaderscookies, obtainhostname, hostmatchessuffix, byte2mb, cleanlrc, SongInfo, AudioLinkTester, SongInfoUtils, LyricSearchClient
'''DeezerMusicClient'''
class DeezerMusicClient(BaseMusicClient):
source = 'DeezerMusicClient'
def __init__(self, **kwargs):
kwargs['maintain_session'] = True
super(DeezerMusicClient, self).__init__(**kwargs)
if self.default_search_cookies: assert "arl" in self.default_search_cookies, '"arl" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#deezer-music-download'
if self.default_parse_cookies: assert "arl" in self.default_parse_cookies, '"arl" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#deezer-music-download'
if self.default_download_cookies: assert "arl" in self.default_download_cookies, '"arl" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#deezer-music-download'
self.default_search_headers = {
'Pragma': 'no-cache', 'Origin': 'https://www.deezer.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:135.0) Gecko/20100101 Firefox/135.0', 'DNT': '1',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': '*/*', 'Cache-Control': 'no-cache', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'Referer': 'https://www.deezer.com/login',
}
self.default_parse_headers = {
'Pragma': 'no-cache', 'Origin': 'https://www.deezer.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:135.0) Gecko/20100101 Firefox/135.0', 'DNT': '1',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': '*/*', 'Cache-Control': 'no-cache', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'Referer': 'https://www.deezer.com/login',
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers; self.auth_info = {}
self._initsession()
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=[], progress=progress, song_progress_id=song_progress_id, auto_supplement_song=False)
if DeezerMusicClientUtils.IS_ENCRYPTED_RPATTERN.search(song_info.download_url) is None: downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info)); return downloaded_song_infos
output_filepath = (output_filepath := Path(song_info.save_path)).parent / f'{output_filepath.stem}.decrypt'
blowfish_key = DeezerMusicClientUtils.generateblowfishkey(str(song_info.raw_data.get('id')))
DeezerMusicClientUtils.decryptdownloadedaudiofile(src_path=str(song_info.save_path), dst_path=str(output_filepath), blowfish_key=blowfish_key)
replacefile(str(output_filepath), str(song_info.save_path))
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
return downloaded_song_infos
'''_setauthinfo'''
def _setauthinfo(self, request_overrides: dict = None):
if self.auth_info: return
request_overrides = request_overrides or {}
(resp := self.post('http://www.deezer.com/ajax/gw-light.php', params={'api_version': "1.0", 'api_token': 'null', 'input': '3', 'method': 'deezer.getUserData'}, **request_overrides)).raise_for_status()
self.auth_info = resp2json(resp=resp)
return self.auth_info
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}; self._setauthinfo(request_overrides=request_overrides)
if (not self.default_cookies or 'arl' not in self.default_cookies): self.logger_handle.warning(f'{self.source}._constructsearchurls >>> cookies are not configured, so song downloads are restricted and only the preview portion of the track can be downloaded.')
# search rules
default_rule = {'q': keyword, 'index': 1, 'limit': 20}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.deezer.com/search/track?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['index'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, is_fallback_retry: bool = False, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source); self._setauthinfo(request_overrides=request_overrides)
if (not isinstance(search_result, dict)) or (not (song_id := (search_result.get('id') or search_result.get('SNG_ID')))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
# --track details
try: (resp := self.post('http://www.deezer.com/ajax/gw-light.php', params={'api_version': "1.0", 'api_token': safeextractfromdict(self.auth_info, ['results', 'checkForm'], None), 'input': '3', 'method': 'song.getData'}, json={'SNG_ID': song_id}, **request_overrides)).raise_for_status(); assert not safeextractfromdict((download_result := resp2json(resp=resp)), ['error'], None)
except: (resp := self.get(f'https://api.deezer.com/track/{song_id}', **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
# --necessary information
license_token = safeextractfromdict(self.auth_info, ['results', 'USER', 'OPTIONS', 'license_token'], None)
track_token = safeextractfromdict(download_result, ['results', 'TRACK_TOKEN'], None) or download_result.get('track_token')
track_hash = safeextractfromdict(download_result, ['results', 'MD5_ORIGIN'], None) or download_result.get('md5_origin')
media_version = safeextractfromdict(download_result, ['results', 'MEDIA_VERSION'], None) or download_result.get('media_version')
fallback_song_id = safeextractfromdict(download_result, ['results', 'FALLBACK', 'SNG_ID'], None) or safeextractfromdict(download_result, ['fallback', 'sng_id'], None) or safeextractfromdict(download_result, ['fallback', 'id'], None)
# --fetch from high to low qualities
for quality in DeezerMusicClientUtils.MUSIC_QUALITIES:
if not track_token or not license_token: continue
try: (resp := self.post("https://media.deezer.com/v1/get_url", json={'license_token': license_token, 'media': [{'type': "FULL", "formats": [{"cipher": "BF_CBC_STRIPE", "format": quality}]}], 'track_tokens': [track_token,]}, **request_overrides)).raise_for_status()
except Exception: continue
download_result['track_details'] = resp2json(resp=resp); candidate_results = safeextractfromdict(download_result['track_details'], ['data', 0, 'media', 0, 'sources'], []) or []
if not (candidate_results := [c for c in candidate_results if isinstance(c, dict) and c.get('url') and str(c.get('url')).startswith('http')]): continue
for candidate_result in candidate_results:
try: file_size_bytes = float(safeextractfromdict(download_result['track_details'], ['data', 0, 'media', 0, 'filesize'], 0))
except Exception: file_size_bytes = 0
try: duration_in_secs = float(safeextractfromdict(download_result, ['results', 'DURATION'], 0) or download_result.get('duration', 0))
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'id': song_id}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['results', 'SNG_TITLE'], None) or download_result.get('title')), singers=legalizestring(safeextractfromdict(download_result, ['results', 'ART_NAME'], None) or safeextractfromdict(download_result, ['artist', 'name'], None)),
album=legalizestring(safeextractfromdict(download_result, ['results', 'ALB_TITLE'], None) or safeextractfromdict(download_result, ['album', 'title'], None)), ext=str(candidate_result['url']).split('?')[0].split('.')[-1], file_size_bytes=int(file_size_bytes), file_size=byte2mb(file_size_bytes), identifier=str(song_id), duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None,
cover_url=DeezerMusicClientUtils.getcoverurl(safeextractfromdict(download_result, ['results', 'ALB_PICTURE'], None)) or safeextractfromdict(download_result, ['album', 'cover_xl'], None), download_url=candidate_result['url'], download_url_status=self.audio_link_tester.test(candidate_result['url'], request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if song_info.with_valid_download_url: break
# --fallback id retry if possible
if (not song_info.with_valid_download_url) and (not is_fallback_retry) and fallback_song_id: return self._parsewithofficialapiv1(search_result={'id': fallback_song_id}, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, lossless_quality_definitions=lossless_quality_definitions, is_fallback_retry=True, request_overrides=request_overrides)
# --manually construct download url, pretty sketchy
if (not song_info.with_valid_download_url) and (media_version is not None) and (track_hash is not None):
download_url = DeezerMusicClientUtils.getencryptedfileurl(song_id, track_hash=track_hash, media_version=media_version)
try: duration_in_secs = float(safeextractfromdict(download_result, ['results', 'DURATION'], 0) or download_result.get('duration', 0))
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'id': song_id}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['results', 'SNG_TITLE'], None) or download_result.get('title')), singers=legalizestring(safeextractfromdict(download_result, ['results', 'ART_NAME'], None) or safeextractfromdict(download_result, ['artist', 'name'], None)), album=legalizestring(safeextractfromdict(download_result, ['results', 'ALB_TITLE'], None) or safeextractfromdict(download_result, ['album', 'title'], None)),
ext='mp3', file_size_bytes=None, file_size=None, identifier=str(song_id), duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=DeezerMusicClientUtils.getcoverurl(safeextractfromdict(download_result, ['results', 'ALB_PICTURE'], None)) or safeextractfromdict(download_result, ['album', 'cover_xl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# --use preview audio link
if (not song_info.with_valid_download_url):
download_url = safeextractfromdict(download_result, ['results', 'MEDIA', 0, 'HREF'], None) or download_result.get('preview')
try: duration_in_secs = float(safeextractfromdict(download_result, ['results', 'DURATION'], 0) or download_result.get('duration', 0))
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'id': song_id}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['results', 'SNG_TITLE'], None) or download_result.get('title')), singers=legalizestring(safeextractfromdict(download_result, ['results', 'ART_NAME'], None) or safeextractfromdict(download_result, ['artist', 'name'], None)), album=legalizestring(safeextractfromdict(download_result, ['results', 'ALB_TITLE'], None) or safeextractfromdict(download_result, ['album', 'title'], None)),
ext=str(download_url).split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=str(song_id), duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=DeezerMusicClientUtils.getcoverurl(safeextractfromdict(download_result, ['results', 'ALB_PICTURE'], None)) or safeextractfromdict(download_result, ['album', 'cover_xl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
try: (resp := self.post('https://auth.deezer.com/login/renew?jo=p&rto=c&i=c', **request_overrides)).raise_for_status(); headers = {"Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Origin": "https://www.deezer.com", "Referer": "https://www.deezer.com/", "Authorization": f"Bearer {resp2json(resp=resp)['jwt']}"}; payload = {"operationName": "GetLyrics", "variables": {"trackId": str(song_id)}, "query": "query GetLyrics($trackId: String!) { track(trackId: $trackId) { id lyrics { id text ...SynchronizedWordByWordLines ...SynchronizedLines licence copyright writers __typename } __typename } } fragment SynchronizedWordByWordLines on Lyrics { id synchronizedWordByWordLines { start end words { start end word __typename } __typename } __typename } fragment SynchronizedLines on Lyrics { id synchronizedLines { lrcTimestamp line lineTranslated milliseconds duration __typename } __typename }"}; (resp := requests.post("https://pipe.deezer.com/api", headers=headers, json=payload, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp); lyric = cleanlrc(DeezerMusicClientUtils.covert2lrclyrics(lyric_result['data']['track']['lyrics'])) or 'NULL'
except Exception: lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; self._setauthinfo(request_overrides=request_overrides)
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp=resp)['data']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}; self._setauthinfo(request_overrides=request_overrides)
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, DEEZER_MUSIC_HOSTS)): return song_infos
if (not self.default_cookies or 'arl' not in self.default_cookies): self.logger_handle.warning(f'{self.source}.parseplaylist >>> cookies are not configured, so song downloads are restricted and only the preview portion of the track can be downloaded.')
# get tracks in playlist
tracks_in_playlist, page, page_size, playlist_result_first = [], 1, 500, {}
while True:
payload = {'playlist_id': playlist_id, 'start': (page - 1) * page_size, 'tab': 0, 'header': True, 'lang': 'de', 'nb': page_size}
try: (resp := self.post(f"https://www.deezer.com/ajax/gw-light.php?method=deezer.pagePlaylist&input=3&api_version=1.0&api_token={safeextractfromdict(self.auth_info, ['results', 'checkForm'], None)}", json=payload, **request_overrides)).raise_for_status()
except Exception: break
if not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['results', 'SONGS', 'data'], []): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['results', 'SONGS', 'data'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['results', 'DATA', 'NB_SONG'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["SNG_ID"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['results', 'DATA', 'TITLE'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,147 @@
'''
Function:
Implementation of FiveSingMusicClient: https://5sing.kugou.com/index.html
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import re
import copy
from bs4 import BeautifulSoup
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import FIVESING_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse, urljoin
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, byte2mb, resp2json, usesearchheaderscookies, safeextractfromdict, extractdurationsecondsfromlrc, seconds2hms, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
'''FiveSingMusicClient'''
class FiveSingMusicClient(BaseMusicClient):
source = 'FiveSingMusicClient'
MUSIC_QUALITIES = ['sq', 'hq', 'lq']
def __init__(self, **kwargs):
super(FiveSingMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Referer": "https://5sing.kugou.com/"}
self.default_parse_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Referer": "https://5sing.kugou.com/"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'keyword': keyword, 'sort': 1, 'page': 1, 'filter': 0, 'type': 0}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'http://search.5sing.kugou.com/home/json?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('songId'))) or (not (song_type := search_result.get('typeEname'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
(resp := self.get('http://mobileapi.5sing.kugou.com/song/getSongUrl', params={'songid': str(song_id), 'songtype': song_type}, **request_overrides)).raise_for_status()
download_result: dict = resp2json(resp)
for quality in FiveSingMusicClient.MUSIC_QUALITIES:
download_url = safeextractfromdict(download_result, ['data', f'{quality}url'], '') or safeextractfromdict(download_result, ['data', f'{quality}url_backup'], '')
if not download_url or not (str(download_url).startswith('http')): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('songName')), singers=legalizestring(search_result.get('singer')), album='NULL', ext=safeextractfromdict(download_result, ['data', f'{quality}ext'], 'mp3'),
file_size_bytes=safeextractfromdict(download_result, ['data', f'{quality}size'], 0), file_size=byte2mb(safeextractfromdict(download_result, ['data', f'{quality}size'], 0)), identifier=song_id, duration='-:-:-', lyric=None, cover_url=safeextractfromdict(download_result, ['data', 'user', 'I'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
params = {'songid': str(song_id), 'songtype': song_type, 'songfields': '', 'userfields': ''}
try: (resp := self.get('http://mobileapi.5sing.kugou.com/song/newget', params=params, **request_overrides)).raise_for_status(); lyric = cleanlrc(safeextractfromdict((lyric_result := resp2json(resp)), ['data', 'dynamicWords'], '')) or 'NULL'
except Exception: lyric_result, lyric = dict(), 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
song_info.album = legalizestring(safeextractfromdict(lyric_result, ['data', 'albumName'], None))
song_info.cover_url = safeextractfromdict(lyric_result, ['data', 'user', 'I'], None)
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['list']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, FIVESING_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
try: playlist_result = resp2json(self.get(f'http://mobileapi.5sing.kugou.com/song/getsonglist?id={playlist_id}&songfields=ID,user', **request_overrides))
except Exception: playlist_result = dict()
soup, playlist_result['song_list'] = BeautifulSoup(self.get(playlist_url, **request_overrides).text, "lxml"), []
for li in soup.select("ul.dj_songitems > li"):
title_a, singer_a = li.select_one("span.s_title a.songlist_hits"), li.select_one("span.s_soner a")
info_node = li.select_one("a.paly_btn[songinfo]") or li.select_one("a.add_btn[songinfo]")
kind, song_id = ((m.group(1), m.group(2)) if (m := re.match(r"([a-z]+)\$(\d+)", str(info_node["songinfo"]))) else (None, None)) if info_node and info_node.has_attr("songinfo") else (None, None)
coll_btn = li.select_one("a.coll_btn[songid]")
song_id, kind = (coll_btn.get("songid"), {"1": "yc", "2": "fc", "3": "bz"}.get(coll_btn.get("songkind"))) if (not song_id) and coll_btn else (song_id, kind)
playlist_result['song_list'].append({"songName": title_a.get_text(strip=True) if title_a else None, "songId": song_id, "typeEname": kind, "song_url": urljoin("http://5sing.kugou.com", title_a["href"]) if title_a and title_a.has_attr("href") else None, "singer": singer_a.get_text(strip=True) if singer_a else None, "singer_url": urljoin("http://5sing.kugou.com", singer_a["href"]) if singer_a and singer_a.has_attr("href") else None})
tracks_in_playlist = playlist_result['song_list']
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['data', 'T'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,155 @@
'''
Function:
Implementation of JamendoMusicClient: https://www.jamendo.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import os
import copy
import random
import hashlib
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import JAMENDO_MUSIC_HOSTS
from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode, parse_qs
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, resp2json, usesearchheaderscookies, seconds2hms, safeextractfromdict, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester, LyricSearchClient
'''JamendoMusicClient'''
class JamendoMusicClient(BaseMusicClient):
source = 'JamendoMusicClient'
def __init__(self, **kwargs):
super(JamendoMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"referer": "https://www.jamendo.com/search?q=musicdl", "sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "x-jam-version": "4rkl5f", "x-jam-call": "$536ab7feabd2404af7b6e54b4db74039734b58b3*0.5310391483096057~", "x-requested-with": "XMLHttpRequest",
}
self.default_parse_headers = {
"referer": "https://www.jamendo.com/search?q=musicdl", "sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "x-jam-version": "4rkl5f", "x-jam-call": "$536ab7feabd2404af7b6e54b4db74039734b58b3*0.5310391483096057~", "x-requested-with": "XMLHttpRequest",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'query': keyword, 'type': 'track', 'limit': self.search_size_per_source, 'identities': 'www', 'offset': 0}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://www.jamendo.com/api/search?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
make_xjam_call_func = lambda path='/api/tracks': f"${hashlib.sha1((path + (rand := str(random.random()))).encode('utf-8')).hexdigest()}*{rand}~"
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
(headers := copy.deepcopy(self.default_headers))['x-jam-call'] = make_xjam_call_func(path='/api/tracks')
try: (resp := self.get('https://www.jamendo.com/api/tracks?', headers=headers, params={'id[]': song_id}, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)[0]
except: download_result = {}
(headers := copy.deepcopy(self.default_headers))['x-jam-call'] = make_xjam_call_func(path='/api/artists')
artist_id = safeextractfromdict(search_result, ['artist', 'id'], None) or search_result.get('artistId') or download_result.get('artistId')
if not safeextractfromdict(search_result, ['artist', 'name'], None): download_result['artist'] = resp2json(self.get('https://www.jamendo.com/api/artists?', headers=headers, params={'id[]': artist_id}, **request_overrides))[0]
(headers := copy.deepcopy(self.default_headers))['x-jam-call'] = make_xjam_call_func(path='/api/albums')
album_id = safeextractfromdict(search_result, ['album', 'id'], None) or search_result.get('albumId') or download_result.get('albumId')
if not legalizestring(safeextractfromdict(search_result, ['album', 'name'], None)): download_result['album'] = resp2json(self.get('https://www.jamendo.com/api/albums?', headers=headers, params={'id[]': album_id}, **request_overrides))[0]
candidate_urls = [safeextractfromdict(download_result, list(path), None) for path in [('stream', 'flac'), ('download', 'flac'), ('stream', 'mp33'), ('stream', 'mp32'), ('download', 'mp3'), ('stream', 'mp3'), ('stream', 'ogg'), ('download', 'ogg')]]
candidate_urls = [c for c in candidate_urls if c and str(c).startswith('http')]
if candidate_urls: candidate_urls = [urlunsplit((*urlsplit(candidate_urls[0])[:3], urlencode([(k, 'flac' if k == 'format' else v) for k, v in parse_qsl(urlsplit(str(candidate_urls[0])).query, keep_blank_values=True)]), urlsplit(str(candidate_urls[0])).fragment))] + candidate_urls
if not candidate_urls: candidate_urls = [safeextractfromdict(search_result, list(path), None) for path in [('download', 'mp3'), ('stream', 'mp3'), ('download', 'ogg'), ('stream', 'ogg')]]
if not (candidate_urls := [c for c in candidate_urls if c and str(c).startswith('http')]): return song_info
for download_url in ([f"https://prod-1.storage.jamendo.com/download/track/{song_id}/flac/"] + candidate_urls):
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name') or download_result.get('name')), singers=legalizestring(safeextractfromdict(search_result, ['artist', 'name'], None) or safeextractfromdict(download_result, ['artist', 'name'], None)), album=legalizestring(safeextractfromdict(search_result, ['album', 'name'], None) or safeextractfromdict(download_result, ['album', 'name'], None)),
ext=('mp3' if (f := (parse_qs(urlsplit(str(download_url)).query).get('format', [None])[0] or re.search(r'/download/track/\d+/([^/]+)/', urlsplit(str(download_url)).path).group(1))).startswith('mp3') else f), file_size_bytes=None, file_size=None, identifier=song_id, duration_s=search_result.get('duration') or download_result.get('duration', 0), duration=seconds2hms(search_result.get('duration') or download_result.get('duration')), lyric=download_result.get('lyrics') or 'NULL',
cover_url=f"https://usercontent.jamendo.com?type=album&id={album_id}&width=300&trackid={song_id}", download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if song_info.lyric and song_info.lyric not in {'NULL'}: song_info.lyric = cleanlrc(song_info.lyric.replace('<br />', '\n'))
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# supplement lyric results
lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
make_xjam_call_func = lambda path='/api/search': f"${hashlib.sha1((path + (rand := str(random.random()))).encode('utf-8')).hexdigest()}*{rand}~"
# successful
try:
# --search results
(headers := copy.deepcopy(self.default_headers))['x-jam-call'] = make_xjam_call_func(path='/api/search')
(resp := self.get(search_url, headers=headers, **request_overrides)).raise_for_status()
for search_result in resp2json(resp):
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = re.search(r'/playlist/([^/]+)', playlist_url).group(1) if re.search(r'/playlist/([^/]+)', playlist_url) else None, []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, JAMENDO_MUSIC_HOSTS)): return song_infos
make_xjam_call_func = lambda path='/api/playlists': f"${hashlib.sha1((path + (rand := str(random.random()))).encode('utf-8')).hexdigest()}*{rand}~"
# get tracks in playlist
(headers := copy.deepcopy(self.default_headers))['x-jam-call'] = make_xjam_call_func(path='/api/playlists')
playlist_result = self.get('https://www.jamendo.com/api/playlists?', headers=headers, params={'id[]': playlist_id}, **request_overrides)
tracks_in_playlist = (playlist_result := resp2json(playlist_result)[0])['tracks']
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,149 @@
'''
Function:
Implementation of JooxMusicClient: https://www.joox.com/intl
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import copy
import base64
import json_repair
from bs4 import BeautifulSoup
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import JOOX_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse, parse_qs
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, extractdurationsecondsfromlrc, cookies2string, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester, LyricSearchClient
'''JooxMusicClient'''
class JooxMusicClient(BaseMusicClient):
source = 'JooxMusicClient'
FUZZY_MUSIC_QUALITIES = [
"master_tapeUrl", "master_tapeURL", "master_tape_url", "masterTapeUrl", "masterTapeURL", "rMasterTapeUrl", "rMasterTapeURL", "hiresUrl", "hiresURL", "hires_url", "hiResUrl", "hiResURL", "rHiresUrl", "rHiResUrl", "flacUrl", "flacURL", "flac_url", "rFlacUrl", "rflacUrl", "rFLACUrl", "apeUrl", "apeURL", "ape_url", "rApeUrl", "rapeUrl", "stereo_atmosUrl", "stereo_atmosURL", "stereo_atmos_url", "stereoAtmosUrl", "stereoAtmosURL", "atmosUrl", "atmosURL", "atmos_url", "rStereoAtmosUrl", "rAtmosUrl", "dolby448Url", "dolby448URL", "dolby448_url", "rDolby448Url",
"rDolby448URL", "dolby256Url", "dolby256URL", "dolby256_url", "rDolby256Url", "rDolby256URL", "r320Url", "r320url", "r320_url", "320Url", "320URL", "320_url", "url320", "mp3320Url", "mp3_320_url", "highUrl", "high_url", "r320oggUrl", "r320OggUrl", "r320OggURL", "r320_ogg_url", "320oggUrl", "320OggUrl", "ogg320Url", "ogg_320_url", "r192oggUrl", "r192OggUrl", "r192OggURL", "r192_ogg_url", "192oggUrl", "192OggUrl", "ogg192Url", "ogg_192_url", "r192k_mnacUrl", "r192k_mnacURL", "r192k_mnac_url", "r192kMnacUrl", "r192kMnacURL", "192k_mnacUrl", "192kMnacUrl",
"mnac192Url", "mnac_192_url", "r192mnacUrl", "r192Url", "r192url", "r192_url", "192Url", "192URL", "192_url", "url192", "m4a192Url", "aac192Url", "aac_192_url", "mp3Url", "r128Url", "r128url", "r128_url", "128Url", "128URL", "128_url", "url128", "m4a128Url", "aac128Url", "mp3128Url", "m4aUrl", "r96Url", "r96url", "r96_url", "96Url", "96URL", "96_url", "url96", "r48Url", "r48url", "r48_url", "48Url", "48URL", "48_url", "url48", "r24Url", "r24url", "r24_url", "24Url", "24URL", "24_url", "url24", "lowUrl", "low_url", "previewUrl", "preview_url", "refrainUrl",
"refrainURL", "refrain_url", "chorusUrl", "chorus_url", "clipUrl", "clip_url", "snippetUrl", "snippet_url", "trialUrl", "trial_url",
]
MUSIC_QUALITIES = [('r320Url', '320'), ('r192Url', '192'), ('mp3Url', '128'), ('m4aUrl', '96')]
def __init__(self, **kwargs):
super(JooxMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "cookie": "wmid=142420656; user_type=1; country=id; session_key=2a5d97d05dc8fe238150184eaf3519ad;", "x-forwarded-for": "36.73.34.109"}
self.default_parse_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "cookie": "wmid=142420656; user_type=1; country=id; session_key=2a5d97d05dc8fe238150184eaf3519ad;", "x-forwarded-for": "36.73.34.109"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
if self.default_search_cookies: self.default_search_headers['cookie'] = cookies2string(self.default_search_cookies)
if self.default_parse_cookies: self.default_parse_headers['cookie'] = cookies2string(self.default_parse_cookies)
if self.default_download_cookies: self.default_download_headers['cookie'] = cookies2string(self.default_download_cookies)
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'country': 'hk', 'lang': 'zh_TW', 'key': keyword, 'type': '0'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://cache.api.joox.com/openjoox/v2/search_type?'
page_rule = copy.deepcopy(default_rule)
search_urls = [base_url + urlencode(page_rule)]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, lang: str = 'zh_TW', country: str = 'hk', song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
(resp := self.get('https://api.joox.com/web-fcgi-bin/web_get_songinfo', params={'songid': song_id, 'lang': lang, 'country': country}, **request_overrides)).raise_for_status()
download_result = json_repair.loads(resp.text.removeprefix('MusicInfoCallback(')[:-1])
candidate_results: list[dict] = [{'quality': fmq, 'url': download_result.get(fmq)} for fmq in JooxMusicClient.FUZZY_MUSIC_QUALITIES if download_result.get(fmq)]
for candidate_result in candidate_results:
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(', '.join([singer.get('name') for singer in (search_result.get('artist_list', []) or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(search_result.get('album_name')), ext=str(candidate_result['url']).split('?')[0].split('.')[-1], file_size=None, identifier=song_id, duration_s=download_result.get('minterval') or 0, duration=seconds2hms(download_result.get('minterval') or 0), lyric=None, cover_url=download_result.get('imgSrc'),
download_url=candidate_result['url'], download_url_status=self.audio_link_tester.test(candidate_result['url'], request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
params = {'musicid': song_id, 'country': country, 'lang': lang}
try: (resp := self.get('https://api.joox.com/web-fcgi-bin/web_lyric', params=params, **request_overrides)).raise_for_status(); lyric_result: dict = json_repair.loads(resp.text.replace('MusicJsonCallback(', '')[:-1]) or {}; lyric = cleanlrc(base64.b64decode(lyric_result.get('lyric', '')).decode('utf-8')) or 'NULL'
except Exception: lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
parsed_search_url = parse_qs(urlparse(search_url).query, keep_blank_values=True)
lang, country = parsed_search_url['lang'][0], parsed_search_url['country'][0]
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp=resp)['tracks']:
if isinstance(search_result, list): search_result = search_result[0]
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, lang=lang, country=country, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides, lang, country = request_overrides or {}, 'zh_TW', 'hk'
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, JOOX_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
(resp := self.get(playlist_url, **request_overrides)).raise_for_status()
script_tag = (BeautifulSoup(resp.text, 'lxml')).find('script', id='__NEXT_DATA__')
if not script_tag: return song_infos
tracks_in_playlist = (playlist_result := json_repair.loads(script_tag.string))['props']['pageProps']['allPlaylistTracks']['tracks']['items']
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, lang=lang, country=country, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result['props']['pageProps']['allPlaylistTracks'], ['name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,248 @@
'''
Function:
Implementation of KugouMusicClient: http://www.kugou.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import time
import random
import base64
import hashlib
import warnings
import json_repair
from .base import BaseMusicClient
from urllib.parse import urlencode
from rich.progress import Progress
from pathvalidate import sanitize_filepath
from ..utils.hosts import KUGOU_MUSIC_HOSTS
from urllib.parse import urlparse, parse_qs, urljoin
from ..utils.kugouutils import KugouMusicClientUtils, MUSIC_QUALITIES
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, byte2mb, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, optionalimport, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
warnings.filterwarnings('ignore')
'''KugouMusicClient'''
class KugouMusicClient(BaseMusicClient):
source = 'KugouMusicClient'
def __init__(self, **kwargs):
super(KugouMusicClient, self).__init__(**kwargs)
self.default_search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_parse_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_download_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"format": "json", "keyword": keyword, "showtype": 1, "page": 1, "pagesize": 10}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'http://mobilecdn.kugou.com/api/v3/search/song?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['pagesize'] = page_size
page_rule['page'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithcggapi'''
def _parsewithcggapi(self, search_result: dict, request_overrides: dict = None) -> "SongInfo":
# init
curl_cffi, request_overrides, file_hash, MUSIC_QUALITIES = optionalimport('curl_cffi'), request_overrides or {}, search_result['hash'], ['lossless', 'exhigh', 'hires', 'standard', 'ogg']
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(str(meta.get('size', '0.00MB')).removesuffix('MB').strip()) if isinstance(meta, dict) else 0
# parse
for quality in MUSIC_QUALITIES:
try: (resp := curl_cffi.requests.get(f"https://music-api2.cenguigui.cn/?kg=&id={file_hash}&type=song&format=json&level={quality}", timeout=10, impersonate="chrome131", verify=False, **request_overrides)).raise_for_status()
except Exception: break
if 'data' not in (download_result := json_repair.loads(resp.text)) or (safe_obtain_filesize_func(download_result['data']) < 0.01): continue
if not (download_url := safeextractfromdict(download_result, ['data', 'url'], '')) or not str(download_url).startswith('http'): continue
try: duration_in_secs = search_result.get('duration') or (float(search_result.get('timelen', 0) or 0) / 1000)
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['data', 'artist'], None)), album=legalizestring(search_result.get('album_name') or safeextractfromdict(search_result, ['albuminfo', 'name'], None)),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=str(safeextractfromdict(download_result, ['data', 'size'], "") or "0.00 MB").removesuffix('MB').strip() + ' MB', identifier=file_hash, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=safeextractfromdict(download_result, ['data', 'pic'], None), download_url=download_url,
download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.ext.startswith('m'): continue # encrypted format like mgg, skip by default
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithjbsouapi'''
def _parsewithjbsouapi(self, search_result: dict, request_overrides: dict = None) -> "SongInfo":
# init
request_overrides, file_hash, base_url = request_overrides or {}, search_result['hash'], 'https://www.jbsou.cn/'
headers = {
"accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "origin": "https://www.jbsou.cn",
"priority": "u=1, i", "referer": "https://www.jbsou.cn/", "sec-ch-ua": '"Not(A:Brand";v="8", "Chromium";v="144", "Google Chrome";v="144"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "x-requested-with": "XMLHttpRequest",
"sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
}
# parse
(resp := self.post('https://www.jbsou.cn/', data={'input': file_hash, 'filter': 'id', 'type': 'kugou', 'page': '1'}, headers=headers, **request_overrides)).raise_for_status()
download_url = urljoin(base_url, safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 0, 'url'], ''))
try: download_url = self.session.head(download_url, headers=headers, allow_redirects=True, **request_overrides).url
except Exception: return SongInfo(source=self.source)
if not download_url or not str(download_url).startswith('http'): return SongInfo(source=self.source)
try: duration_in_secs = search_result.get('duration') or (float(search_result.get('timelen', 0) or 0) / 1000)
except Exception: duration_in_secs = 0
try: cover_url = self.session.head(urljoin(base_url, safeextractfromdict(download_result, ['data', 0, 'cover'], "")), headers=headers, allow_redirects=True, **request_overrides).url
except Exception: cover_url = None
if not cover_url: cover_url = safeextractfromdict(search_result, ['trans_param', 'union_cover'], None)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 0, 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['data', 0, 'artist'], None)),
album=legalizestring(search_result.get('album_name') or safeextractfromdict(search_result, ['albuminfo', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=str(file_hash), duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs),
lyric='NULL', cover_url=cover_url, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.cover_url and isinstance(song_info.cover_url, str) and ('{size}' in song_info.cover_url): song_info.cover_url = song_info.cover_url.format(size=300)
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return song_info
lyric_url = urljoin(base_url, safeextractfromdict(download_result, ['data', 0, 'lrc'], ""))
try: (resp := self.get(lyric_url, headers=headers, allow_redirects=True, **request_overrides)).raise_for_status(); lyric = cleanlrc(resp.text)
except Exception: lyric = 'NULL'
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
if self.default_cookies or request_overrides.get('cookies'): return SongInfo(source=self.source)
for imp_func in [self._parsewithcggapi, self._parsewithjbsouapi]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source)
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('hash'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
try: duration_in_secs = search_result.get('duration') or (float(search_result.get('timelen', 0) or 0) / 1000)
except Exception: duration_in_secs = 0
for quality in MUSIC_QUALITIES:
if ('impersonate' not in (per_request_overrides := copy.deepcopy(request_overrides))) and self.enable_curl_cffi: per_request_overrides['impersonate'] = random.choice(self.cc_impersonates)
per_request_overrides['proxies'] = per_request_overrides.pop('proxies', None) or self._autosetproxies()
try: download_result: dict = KugouMusicClientUtils.getsongurl(self.session, hash_value=song_id, quality=quality, request_overrides=per_request_overrides, cookies=copy.deepcopy(per_request_overrides.pop('cookies', None) or self.default_cookies))
except Exception: download_result, download_url = {}, None
download_url = safeextractfromdict(download_result, ['url'], '') or safeextractfromdict(download_result, ['backupUrl'], '')
if not download_url:
md5_hex = hashlib.md5((str(song_id) + 'kgcloudv2').encode("utf-8")).hexdigest()
try: (resp := self.get(f"https://trackercdn.kugou.com/i/v2/?cdnBackup=1&behavior=download&pid=1&cmd=21&appid=1001&hash={song_id}&key={md5_hex}", **request_overrides)).raise_for_status(); download_result: dict = resp2json(resp)
except Exception: continue
download_url = safeextractfromdict(download_result, ['url'], '') or safeextractfromdict(download_result, ['backup_url'], '') or safeextractfromdict(download_result, ['backupUrl'], '') or safeextractfromdict(download_result, ['mp3Url'], '') or safeextractfromdict(download_result, ['backupMp3Url'], '')
if download_url and isinstance(download_url, (list, tuple)): download_url = list(download_url)[0]
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('songname', None) or search_result.get('songname_original') or search_result.get('filename') or search_result.get('name')), singers=legalizestring(search_result.get('singername') or ', '.join([singer.get('name') for singer in (search_result.get('singerinfo') or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(search_result.get('album_name') or safeextractfromdict(search_result, ['albuminfo', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=download_result.get('fileSize', 0), file_size=byte2mb(download_result.get('fileSize', 0)), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=safeextractfromdict(search_result, ['trans_param', 'union_cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.cover_url and isinstance(song_info.cover_url, str) and ('{size}' in song_info.cover_url): song_info.cover_url = song_info.cover_url.format(size=300)
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info_flac.with_valid_download_url and song_info_flac.largerthan(song_info): song_info = song_info_flac
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
params = {'keyword': search_result.get('filename', ''), 'duration': search_result.get('duration', '99999'), 'hash': song_id}
try: (resp := self.get('http://lyrics.kugou.com/search', params=params, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp); (resp := self.get(f"http://lyrics.kugou.com/download?ver=1&client=pc&id={lyric_result['candidates'][0]['id']}&accesskey={lyric_result['candidates'][0]['accesskey']}&fmt=lrc&charset=utf8", **request_overrides)).raise_for_status(); lyric_result['lyrics.kugou.com/download'] = resp2json(resp=resp); lyric = cleanlrc(base64.b64decode(lyric_result['lyrics.kugou.com/download']['content']).decode('utf-8')) or 'NULL'
except: lyric_result, lyric = dict(), 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['data']['info']:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlparse(playlist_url).query, keep_blank_values=False).get('id')[0], []; assert playlist_id
except: playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, KUGOU_MUSIC_HOSTS)): return song_infos
assert 'special/single/' in urlparse(playlist_url).path, 'kugou playlist link must look like "https://www.kugou.com/yy/special/single/6914288.html"'
headers = {'User-Agent': 'Android9-AndroidPhone-11239-18-0-playlist-wifi', 'Host': 'gatewayretry.kugou.com', 'x-router': 'pubsongscdn.kugou.com', 'mid': '239526275778893399526700786998289824956', 'dfid': '-', 'clienttime': str(time.time()).split('.')[0]}
# get tracks in playlist
tracks_in_playlist, page, playlist_result_first = [], 1, {}
while True:
api_url = f'http://gatewayretry.kugou.com/v2/get_other_list_file?specialid={playlist_id}&need_sort=1&module=CloudMusic&clientver=11239&pagesize=300&specalidpgc={playlist_id}&userid=0&page={page}&type=0&area_code=1&appid=1005'
kugou_signature_func = lambda api_url: hashlib.md5(("OIlwieks28dk2k092lksi2UIkp" + "".join(sorted(str(api_url).split("?", 1)[1].split("&"))) + "OIlwieks28dk2k092lksi2UIkp").encode("utf-8")).hexdigest()
try: (resp := self.get(api_url + '&signature=' + kugou_signature_func(api_url), headers=headers, **request_overrides)).raise_for_status()
except Exception: continue
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['data', 'info'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['data', 'info'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['data', 'count'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["hash"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
try: (resp := self.get(playlist_url, headers={'referer': 'https://www.kugou.com/songlist/'}, **request_overrides)).raise_for_status(); playlist_name = json_repair.loads(re.search(r'var\s+specialInfo\s*=\s*(\{.*?\});', resp.text, re.S).group(1))['name']
except: playlist_name = None
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,230 @@
'''
Function:
Implementation of KuwoMusicClient: http://www.kuwo.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import time
import random
import base64
import warnings
from .base import BaseMusicClient
from rich.progress import Progress
from pathvalidate import sanitize_filepath
from ..utils.hosts import KUWO_MUSIC_HOSTS
from ..utils.kuwoutils import KuwoMusicClientUtils
from urllib.parse import urlencode, urlparse, parse_qs
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, optionalimport, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
warnings.filterwarnings('ignore')
def remove_prefix(value: str, prefix: str) -> str:
if prefix and value.startswith(prefix):
return value[len(prefix):]
return value
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''KuwoMusicClient'''
class KuwoMusicClient(BaseMusicClient):
source = 'KuwoMusicClient'
MUSIC_QUALITIES = [(22000, 'flac'), (320, 'mp3')] # playable flac and mp3 formats
ENC_MUSIC_QUALITIES = [(4000, '4000kflac'), (2000, '2000kflac'), (320, '320kmp3'), (192, '192kmp3'), (128, '128kmp3')] # encrypted mgg format
def __init__(self, **kwargs):
super(KuwoMusicClient, self).__init__(**kwargs)
self.default_search_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_download_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_parse_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"vipver": "1", "client": "kt", "ft": "music", "cluster": "0", "strategy": "2012", "encoding": "utf8", "rformat": "json", "mobi": "1", "issubtitle": "1", "show_copyright_off": "1", "pn": "0", "rn": "10", "all": keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'http://www.kuwo.cn/search/searchMusicBykeyWord?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['rn'] = page_size
page_rule['pn'] = str(int(count // page_size))
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithcggapi'''
def _parsewithcggapi(self, search_result: dict, request_overrides: dict = None):
# init
curl_cffi, request_overrides, song_id = optionalimport('curl_cffi'), request_overrides or {}, remove_prefix(str(search_result.get('MUSICRID') or search_result.get('musicrid')), 'MUSIC_')
MUSIC_QUALITIES = ["acc", "wma", "ogg", "standard", "exhigh", "ape", "lossless", "hires", "zp", "hifi", "sur", "jymaster"][::-1][3:]
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(remove_suffix(str(meta.get('size', '0.00MB')), 'MB').strip()) if isinstance(meta, dict) else 0
# parse
for quality in MUSIC_QUALITIES:
try: (resp := curl_cffi.requests.get(f"https://kw-api.cenguigui.cn/?id={song_id}&type=song&level={quality}&format=json", timeout=10, impersonate="chrome131", verify=False, **request_overrides)).raise_for_status()
except Exception: (resp := self.get(f"https://kw-api.cenguigui.cn/?id={song_id}&type=song&level={quality}&format=json", timeout=10, **request_overrides)).raise_for_status()
if 'data' not in (download_result := resp2json(resp=resp)) or (safe_obtain_filesize_func(download_result['data']) < 0.01): continue
if not (download_url := safeextractfromdict(download_result, ['data', 'url'], '')) or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['data', 'artist'], None)),
album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=remove_suffix(str(safeextractfromdict(download_result, ['data', 'size'], "") or "0.00"), 'MB').strip() + ' MB',
identifier=str(song_id), duration_s=safeextractfromdict(download_result, ['data', 'duration'], 0), duration=seconds2hms(safeextractfromdict(download_result, ['data', 'duration'], 0)), lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lyric'], 'NULL')) or 'NULL',
cover_url=safeextractfromdict(download_result, ['data', 'pic'], ""), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithyyy001api'''
def _parsewithyyy001api(self, search_result: dict, request_overrides: dict = None):
# init
decrypt_func = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8')
MUSIC_QUALITIES, REQUEST_KEYS = ["ff", "p", "h"], ['YzJmNjBlZDYtOTlmZC0xNjJlLWM0NzAtYjIxNDkwOGViNWI0YjYzYzFhN2E=', 'NTVjNTY3YzItNTJlNS1kMzdiLTE1N2MtMDE0MDIxNzEwYzc1NzY2OWNkYjc=', 'OTY4M2MwNzQtY2E3ZS01ZGYwLTUyZGEtMWEzNGZiNjVhOTZhZGU2NTczYjU=', 'OTdkZjQ0OTUtYzRjOS01MmFhLTNlODAtZjliZGFiODU1Y2UxZWIwN2JlZDk=']
request_overrides, song_id = request_overrides or {}, remove_prefix(str(search_result.get('MUSICRID') or search_result.get('musicrid')), 'MUSIC_')
# parse
for quality in MUSIC_QUALITIES:
resp = next((resp for _ in range(5) if (resp := self.get(f"https://api.yyy001.com/api/kwmusic/?apikey={decrypt_func(random.choice(REQUEST_KEYS))}&action=music_url&music_id={song_id}&quality={quality}", timeout=10, **request_overrides)).json()['code'] in {'200', 200} or (time.sleep(1) or False)), None)
download_url = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'url'], '')
if not download_url or not str(download_url).startswith('http'): continue
ext = download_url.split('?')[0].split('.')[-1]; duration_in_secs = search_result.get('DURATION') or search_result.get('duration')
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('SONGNAME') or search_result.get('name')), singers=legalizestring(search_result.get('ARTIST') or search_result.get('artist')), album=legalizestring(search_result.get('ALBUM') or search_result.get('album')),
ext=ext, file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('hts_MVPIC') or search_result.get('albumpic'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
if self.default_cookies or request_overrides.get('cookies'): return SongInfo(source=self.source)
for imp_func in [self._parsewithcggapi, self._parsewithyyy001api]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source)
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac, song_id = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source), remove_prefix(str(search_result.get('MUSICRID') or search_result.get('musicrid')), 'MUSIC_')
if not isinstance(search_result, dict) or (not (search_result.get('MUSICRID') or search_result.get('musicrid'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for quality in KuwoMusicClient.MUSIC_QUALITIES:
query = f"user=0&corp=kuwo&source=kwplayer_ar_5.1.0.0_B_jiakong_vh.apk&p2p=1&type=convert_url2&sig=0&format={quality[1]}&rid={song_id}"
try: (resp := self.get(f"http://mobi.kuwo.cn/mobi.s?f=kuwo&q={KuwoMusicClientUtils.encryptquery(query)}", headers={"user-agent": "okhttp/3.10.0"}, **request_overrides)).raise_for_status(); download_result = resp.text
except Exception: continue
if not (download_url := re.search(r'http[^\s$\"]+', download_result)): continue
download_url = download_url.group(0); ext = download_url.split('?')[0].split('.')[-1]; duration_in_secs = search_result.get('DURATION') or search_result.get('duration')
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('SONGNAME') or search_result.get('name')), singers=legalizestring(search_result.get('ARTIST') or search_result.get('artist')), album=legalizestring(search_result.get('ALBUM') or search_result.get('album')),
ext=ext, file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('hts_MVPIC') or search_result.get('albumpic'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info_flac.with_valid_download_url and song_info_flac.largerthan(song_info): song_info = song_info_flac
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
encoded_params = KuwoMusicClientUtils.buildlyricsparams(song_id, True)
try: (resp := self.get(f"http://newlyric.kuwo.cn/newlyric.lrc?{encoded_params}", **request_overrides)).raise_for_status(); lyric_result = {'content': resp.content}; lyric = cleanlrc(KuwoMusicClientUtils.convertrawlrc(KuwoMusicClientUtils.decodelyrics(resp.content, True))) or 'NULL'
except Exception: lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['abslist']:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
request_overrides.setdefault('timeout', (10, 30))
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlparse(playlist_url).query, keep_blank_values=False).get('id')[0], []; assert playlist_id
except: playlist_id, song_infos = remove_suffix(remove_suffix(urlparse(playlist_url).path.strip('/').split('/')[-1], '.html'), '.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, KUWO_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, playlist_result_first = [], 1, {}
while True:
try: (resp := self.get(f"https://m.kuwo.cn/newh5app/wapi/api/www/playlist/playListInfo?pid={playlist_id}&pn={page}&rn=100", **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['data', 'musicList'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['data', 'musicList'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['data', 'total'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["musicrid"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['data', 'name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,168 @@
'''
Function:
Implementation of MiguMusicClient: https://music.migu.cn/v5/#/musicLibrary
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import requests
from .base import BaseMusicClient
from rich.progress import Progress
from pathvalidate import sanitize_filepath
from ..utils.hosts import MIGU_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse, parse_qs, urlsplit, urljoin
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, byte2mb, resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
'''MiguMusicClient'''
class MiguMusicClient(BaseMusicClient):
source = 'MiguMusicClient'
MUSIC_QUALITIES = {'LQ': 'mp3', 'PQ': 'mp3', 'HQ': 'mp3', 'SQ': 'flac', 'ZQ': 'flac', 'Z3D': 'flac', 'ZQ24': 'flac', 'ZQ32': 'flac'}
def __init__(self, **kwargs):
super(MiguMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "activityid": "v4_zt_2022_music", "appid": "ce", "channel": "014X031", "connection": "keep-alive", "deviceid": "E60C6B2F-7F11-4362-9FCE-6F1CC86E0F18",
"host": "c.musicapp.migu.cn", "hwid": "", "imei": "", "h5page": "", "imsi": "", "location-info": "", "mgm-user-agent": "", "oaid": "", "uid": "", "location-data": "", "logid": "h5page[1808]", "mgm-network-operators": "02", "mgm-network-standard": "03", "mgm-network-type": "03", "recommendstatus": "1",
"referer": "https://y.migu.cn/app/v4/zt/2022/music/index.html", "sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "origin": "https://y.migu.cn", "sec-fetch-mode": "cors",
"sec-fetch-site": "same-site", "subchannel": "014X031", "test": "00", "ua": "Android_migu", "version": "6.8.8", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
self.default_parse_headers = {
"accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "activityid": "v4_zt_2022_music", "appid": "ce", "channel": "014X031", "connection": "keep-alive", "deviceid": "E60C6B2F-7F11-4362-9FCE-6F1CC86E0F18",
"host": "c.musicapp.migu.cn", "hwid": "", "imei": "", "h5page": "", "imsi": "", "location-info": "", "mgm-user-agent": "", "oaid": "", "uid": "", "location-data": "", "logid": "h5page[1808]", "mgm-network-operators": "02", "mgm-network-standard": "03", "mgm-network-type": "03", "recommendstatus": "1",
"referer": "https://y.migu.cn/app/v4/zt/2022/music/index.html", "sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "origin": "https://y.migu.cn", "sec-fetch-mode": "cors",
"sec-fetch-site": "same-site", "subchannel": "014X031", "test": "00", "ua": "Android_migu", "version": "6.8.8", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
self.default_download_headers = {
"accept": "*/*", "accept-encoding": "identity;q=1, *;q=0", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "connection": "keep-alive", "host": "freetyst.nf.migu.cn", "range": "bytes=0-", "sec-fetch-mode": "no-cors", "sec-fetch-dest": "audio",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-site": "same-site", "referer": "https://y.migu.cn/app/v4/zt/2022/music/index.html",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"text": keyword, 'pageNo': 1, 'pageSize': 20, 'isCopyright': 1, 'sort': 1, 'searchSwitch': {"song": 1, "album": 0, "singer": 0, "tagSong": 1, "mvSong": 0, "bestShow": 1}}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://c.musicapp.migu.cn/v1.0/content/search_all.do?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['pageSize'] = page_size
page_rule['pageNo'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (content_id := search_result.get('contentId'))) or (not (copyright_id := search_result.get('contentId'))): return song_info
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(str(meta.get('size') or meta.get('iosSize') or meta.get('androidSize') or meta.get('isize') or meta.get('asize') or '0').removesuffix('MB').strip()) if isinstance(meta, dict) else 0
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for rate in sorted((search_result.get('rateFormats', []) or []) + (search_result.get('newRateFormats', []) or []) + (search_result.get('audioFormats', []) or []), key=lambda x: int(safe_obtain_filesize_func(x)), reverse=True):
if (not isinstance(rate, dict)) or (byte2mb(safe_obtain_filesize_func(rate)) == 'NULL') or (not rate.get('formatType')) or (not rate.get('resourceType')): continue
if rate['formatType'] in {'Z3D'}: continue # TODO: support decrypt Z3D files in migu music
try: (resp := self.get(f"https://c.musicapp.migu.cn/MIGUM3.0/strategy/listen-url/v2.4?resourceType={rate['resourceType']}&netType=01&scene=&toneFlag={rate['formatType']}&contentId={content_id}&copyrightId={copyright_id}&lowerQualityContentId={content_id}", **request_overrides)).raise_for_status()
except Exception: continue
download_url = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'url'], "") or f"https://app.pd.nf.migu.cn/MIGUM3.0/v1.0/content/sub/listenSong.do?channel=mx&copyrightId={copyright_id}&contentId={content_id}&toneFlag={rate['formatType']}&resourceType={rate['resourceType']}&userId=15548614588710179085069&netType=00"
if not download_url or not str(download_url).startswith('http'): continue
download_url = re.sub(r'(?<=/)MP3_128_16_Stero(?=/)', 'MP3_320_16_Stero', download_url)
duration_in_secs = safeextractfromdict(download_result, ['data', 'song', 'duration'], 0)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name') or search_result.get('songName')), singers=legalizestring(', '.join([singer.get('name') for singer in (search_result.get('singers') or search_result.get('singerList') or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(search_result.get('album') or (', '.join([album.get('name') for album in (search_result.get('albums') or []) if isinstance(album, dict) and album.get('name')]))), ext=MiguMusicClient.MUSIC_QUALITIES.get(rate['formatType']) or 'mp3', file_size_bytes=safe_obtain_filesize_func(rate), file_size=byte2mb(safe_obtain_filesize_func(rate)), identifier=content_id,
duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=safeextractfromdict(search_result, ['imgItems', -1, 'img'], None) or next((search_result.get(k) for k in ("img3", "img2", "img1") if search_result.get(k)), None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.cover_url and not song_info.cover_url.startswith('http'): song_info.cover_url = urljoin('https://d.musicapp.migu.cn', song_info.cover_url)
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
lyric_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "Referer": "https://y.migu.cn/"}
try: lyric_url = safeextractfromdict(search_result, ['lyricUrl'], '') or self.get(f"https://app.c.nf.migu.cn/MIGUM3.0/strategy/pc/listen/v1.0?scene=&netType=01&resourceType=2&copyrightId={copyright_id}&contentId={content_id}&toneFlag=PQ", **request_overrides).json()['data']['lrcUrl']; (resp := requests.get(lyric_url, headers=lyric_headers, allow_redirects=True, **request_overrides)).raise_for_status(); resp.encoding = 'utf-8'; lyric, lyric_result = cleanlrc(resp.text), {'lyric': resp.text}
except Exception: lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['songResultData']['result']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlsplit(urlsplit(playlist_url).fragment).query).get('playlistId')[0], []; assert playlist_id
except: playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, MIGU_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, playlist_result_first = [], 1, {}
while True:
try: (resp := self.get(f"https://app.c.nf.migu.cn/MIGUM3.0/resource/playlist/song/v2.0?pageNo={page}&pageSize=50&playlistId={playlist_id}", **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['data', 'songList'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['data', 'songList'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['data', 'totalCount'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["contentId"]: d for d in tracks_in_playlist}.values())
try: (resp := self.get(f'https://app.c.nf.migu.cn/resource/playlist/v2.0?playlistId={playlist_id}', **request_overrides)).raise_for_status(); playlist_result_first['meta_info'] = resp2json(resp=resp)
except Exception: pass
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['meta_info', 'data', 'title'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,449 @@
'''
Function:
Implementation of NeteaseMusicClient: https://music.163.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import json
import copy
import time
import base64
import random
import hashlib
import warnings
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from urllib.parse import urlparse, parse_qs
from ..utils.hosts import NETEASE_MUSIC_HOSTS, hostmatchessuffix, obtainhostname
from ..utils.neteaseutils import EapiCryptoUtils, MUSIC_QUALITIES, DEFAULT_COOKIES
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, extractdurationsecondsfromlrc, touchdir, byte2mb, useparseheaderscookies, cleanlrc, SongInfo, AudioLinkTester
warnings.filterwarnings('ignore')
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''NeteaseMusicClient'''
class NeteaseMusicClient(BaseMusicClient):
source = 'NeteaseMusicClient'
def __init__(self, **kwargs):
super(NeteaseMusicClient, self).__init__(**kwargs)
self.default_search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'Referer': 'https://music.163.com/'}
self.default_parse_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'Referer': 'https://music.163.com/'}
self.default_download_headers = {}
self.default_headers = self.default_search_headers
self.default_search_cookies = self.default_search_cookies or DEFAULT_COOKIES
self.default_parse_cookies = self.default_parse_cookies or DEFAULT_COOKIES
self.default_download_cookies = self.default_download_cookies or DEFAULT_COOKIES
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'s': keyword, 'type': 1, 'limit': 10, 'offset': 0}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://music.163.com/api/cloudsearch/pc'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = int(count // page_size) * page_size
search_urls.append({'url': base_url, 'data': page_rule})
count += page_size
# return
return search_urls
'''_parsewithxiaoqinapi'''
def _parsewithxiaoqinapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
to_seconds_func = lambda x: (lambda s: 0 if not s else (lambda p: p[-3]*3600+p[-2]*60+p[-1] if len(p)>=3 else p[0]*60+p[1] if len(p)==2 else p[0] if len(p)==1 else 0)([int(v) for v in re.findall(r'\d+', s.replace('', ':'))]) if (':' in s or '' in s) else (lambda h,m,sec,num: (lambda tot: tot if tot>0 else num)(h*3600+m*60+sec))(int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:小时|时|h|hr)', s)) else 0, int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:分钟|分|m|min)', s)) else 0, (int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:秒|s|sec)', s)) else (int(mo.group(1)) if (mo:=re.search(r'(?:分钟|分|m|min)\s*(\d+)\b', s)) else 0)), int(mo.group(0)) if (mo:=re.search(r'\d+', s)) else 0))(str(x).strip().lower())
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.post('https://wyapi-eo.toubiec.cn/api/getSongUrl', json={'id': song_id, 'level': quality}, timeout=10, verify=False, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'url'], '')
if not download_url or not str(download_url).startswith('http'): continue
try: (resp := self.post('https://wyapi-eo.toubiec.cn/api/getSongInfo', json={'id': song_id}, timeout=10, verify=False, **request_overrides)).raise_for_status(); download_result['song_info'] = resp2json(resp=resp)
except Exception: pass
try: (resp := self.post('https://wyapi-eo.toubiec.cn/api/getSongLyric', json={'id': song_id}, timeout=10, verify=False, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = {}
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result, 'quality': quality}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['song_info', 'data', 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['song_info', 'data', 'singer'], None)), album=legalizestring(safeextractfromdict(download_result, ['song_info', 'data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None,
identifier=song_id, duration_s=to_seconds_func(safeextractfromdict(download_result, ['data', 'duration'], "")), duration=seconds2hms(to_seconds_func(safeextractfromdict(download_result, ['data', 'duration'], ""))), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], "")) or "NULL", cover_url=safeextractfromdict(download_result, ['song_info', 'data', 'picimg'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithcggapi'''
def _parsewithcggapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(remove_suffix(str(meta.get('size', '0.00MB')), 'MB').strip()) if isinstance(meta, dict) else 0
to_seconds_func = lambda x: (lambda s: 0 if not s else (lambda p: p[-3]*3600+p[-2]*60+p[-1] if len(p)>=3 else p[0]*60+p[1] if len(p)==2 else p[0] if len(p)==1 else 0)([int(v) for v in re.findall(r'\d+', s.replace('', ':'))]) if (':' in s or '' in s) else (lambda h,m,sec,num: (lambda tot: tot if tot>0 else num)(h*3600+m*60+sec))(int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:小时|时|h|hr)', s)) else 0, int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:分钟|分|m|min)', s)) else 0, (int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:秒|s|sec)', s)) else (int(mo.group(1)) if (mo:=re.search(r'(?:分钟|分|m|min)\s*(\d+)\b', s)) else 0)), int(mo.group(0)) if (mo:=re.search(r'\d+', s)) else 0))(str(x).strip().lower())
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(url=f'https://api-v2.cenguigui.cn/api/netease/music_v1.php?id={song_id}&type=json&level={quality}', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
if '获取歌曲地址失败,可能是会员到期了' in resp2json(resp=resp)['data']['url']: break
if 'data' not in (download_result := resp2json(resp=resp)) or (safe_obtain_filesize_func(download_result['data']) < 0.01): continue
if not (download_url := safeextractfromdict(download_result, ['data', 'url'], '')) or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'name'], None)), singers=legalizestring(safeextractfromdict(download_result, ['data', 'artist'], None)), album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=remove_suffix(str(safeextractfromdict(download_result, ['data', 'size'], '')), 'MB').strip() + ' MB',
identifier=song_id, duration_s=to_seconds_func(safeextractfromdict(download_result, ['data', 'duration'], '')), duration=seconds2hms(to_seconds_func(safeextractfromdict(download_result, ['data', 'duration'], ''))), lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lyric'], 'NULL')), cover_url=safeextractfromdict(download_result, ['data', 'pic'], ""), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithtmetuapi'''
def _parsewithtmetuapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(url=f'https://www.tmetu.cn/api/music/api.php?miss=songAll&id={song_id}&level={quality}&withLyric=true', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'audioUrl'], '')
if not download_url or not str(download_url).startswith('http'): continue
try: duration_in_secs = float(safeextractfromdict(download_result, ['data', 'duration'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'name'], None)), singers=legalizestring(str(safeextractfromdict(download_result, ['data', 'artists'], '') or '').replace('/', ', ')), album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=safeextractfromdict(download_result, ['data', 'size'], None),
file_size=byte2mb(safeextractfromdict(download_result, ['data', 'size'], 0)), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lyric'], 'NULL')) or 'NULL', cover_url=safeextractfromdict(download_result, ['data', 'picUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithtmetuapi'''
def _parsewithtmetuapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
# parse
for quality in MUSIC_QUALITIES:
signature = hashlib.md5(((timestamp_str := str(int(time.time()))) + 'kxz_163music_secret_key_2024').encode('utf-8')).hexdigest()
params = {"action": "music", "url": str(song_id), "level": quality, "type": "json", "timestamp": timestamp_str, "signature": signature}
try: (resp := self.get(url=f'https://music.rrvenn.cn/api/api.php', params=params, timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['url'], '')
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(str(download_result.get('ar_name')).replace('/', ', ') if download_result.get('ar_name') else download_result.get('ar_name')), album=legalizestring(download_result.get('al_name')),
ext=str(download_url).split('?')[0].split('.')[-1], file_size=remove_suffix(str(download_result.get('size')), 'MB').strip() + ' MB', identifier=str(song_id), duration_s=extractdurationsecondsfromlrc(download_result.get('lyric')), duration=seconds2hms(extractdurationsecondsfromlrc(download_result.get('lyric'))), lyric=cleanlrc(download_result.get('lyric') or 'NULL') or 'NULL',
cover_url=download_result.get('pic'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithxuanluogeapi'''
def _parsewithxuanluogeapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(url=f'https://music.xuanluoge.top/api.php?miss=getMusicUrl&id={song_id}&level={quality}', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 0, 'url'], '')
if not download_url or not str(download_url).startswith('http'): continue
try: (resp := self.get(url=f'https://music.xuanluoge.top/api.php?miss=songDetail&id={song_id}', timeout=10, **request_overrides)).raise_for_status(); download_result['songDetail'] = resp2json(resp=resp)
except Exception: pass
try: (resp := self.get(url=f'https://music.xuanluoge.top/api.php?miss=lyric&id={song_id}', timeout=10, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = dict()
try: duration_in_secs = float(safeextractfromdict(download_result, ['songDetail', 'data', 'dt'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result, 'quality': quality}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['songDetail', 'data', 'name'], None)), singers=legalizestring(', '.join([singer.get('name') for singer in (safeextractfromdict(download_result, ['songDetail', 'data', 'ar'], []) or []) if isinstance(singer, dict) and singer.get('name')])), album=legalizestring(safeextractfromdict(download_result, ['songDetail', 'data', 'al', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1],
file_size_bytes=safeextractfromdict(download_result, ['data', 0, 'size'], None), file_size=byte2mb(safeextractfromdict(download_result, ['data', 0, 'size'], 0)), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], 'NULL')) or 'NULL', cover_url=safeextractfromdict(download_result, ['songDetail', 'data', 'al', 'picUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithbugpkapi'''
def _parsewithbugpkapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(remove_suffix(str(meta.get('size', '0.00MB')), 'MB').strip()) if isinstance(meta, dict) else 0
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(f'https://api.bugpk.com/api/163_music?ids={song_id}&level={quality}&type=json', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
if 'url' not in (download_result := resp2json(resp=resp)) or (safe_obtain_filesize_func(download_result) < 0.01): continue
if not (download_url := safeextractfromdict(download_result, ['url'], '')) or not str(download_url).startswith('http'): continue
lyric, download_url_status = cleanlrc(safeextractfromdict(download_result, ['lyric'], 'NULL')) or 'NULL', self.audio_link_tester.test(download_url, request_overrides)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(str(download_result.get('ar_name')).replace('/', ', ') if download_result.get('ar_name') else download_result.get('ar_name')), album=legalizestring(download_result.get('al_name')), ext=download_url.split('?')[0].split('.')[-1],
file_size_bytes=None, file_size=remove_suffix(str(safeextractfromdict(download_result, ['size'], '')), 'MB').strip() + ' MB', identifier=song_id, duration_s=extractdurationsecondsfromlrc(lyric), duration=seconds2hms(extractdurationsecondsfromlrc(lyric)), lyric=lyric, cover_url=download_result.get('pic'), download_url=download_url, download_url_status=download_url_status,
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithyutangxiaowuapi'''
def _parsewithyutangxiaowuapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(remove_suffix(str(meta.get('size', '0.00MB')), 'MB').strip()) if isinstance(meta, dict) else 0
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(f'https://yutangxiaowu.cn:4000/Song_V1?url={song_id}&level={quality}&type=json', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
if 'url' not in (download_result := resp2json(resp=resp)) or (safe_obtain_filesize_func(download_result) < 0.01): continue
if not (download_url := safeextractfromdict(download_result, ['url'], '')) or not str(download_url).startswith('http'): continue
lyric, download_url_status = cleanlrc(safeextractfromdict(download_result, ['lyric'], 'NULL')) or 'NULL', self.audio_link_tester.test(download_url, request_overrides)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(str(download_result.get('ar_name')).replace('/', ', ') if download_result.get('ar_name') else download_result.get('ar_name')), album=legalizestring(download_result.get('al_name')), ext=download_url.split('?')[0].split('.')[-1],
file_size_bytes=None, file_size=remove_suffix(str(safeextractfromdict(download_result, ['size'], "")), 'MB').strip() + ' MB', identifier=song_id, duration_s=extractdurationsecondsfromlrc(lyric), duration=seconds2hms(extractdurationsecondsfromlrc(lyric)), lyric=lyric, cover_url=download_result.get('pic'), download_url=download_url, download_url_status=download_url_status,
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithnycnmbyfunsapi'''
def _parsewithnycnmbyfunsapi(self, search_result: dict, request_overrides: dict = None):
# init
decrypt_func, REQUEST_KEYS = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8'), ['OTJiMWE4ZWQyMjg5ZmI4ZTk4NTAxZWMyYzE2Yzk4MWRmMWI1NzliMjhhM2Y2ZjIyMDFiYmJlNDc2YmI3Njc0MA==']
request_overrides, song_id = request_overrides or {}, search_result['id']
# parse
for quality in MUSIC_QUALITIES[4:]:
try: (resp := self.get(f'https://api.nycnm.cn/API/163music.php?ids={song_id}&level={quality}&type=json&apikey={decrypt_func(random.choice(REQUEST_KEYS))}', timeout=10, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: break
try: download_url = self.get(f'https://api.byfuns.top/1/?id={song_id}&level={quality}', timeout=10, **request_overrides).text.strip()
except Exception: break
if not str(download_url).startswith('http'): continue
lyric, download_url_status = cleanlrc(safeextractfromdict(download_result, ['lyric'], 'NULL')) or 'NULL', self.audio_link_tester.test(download_url, request_overrides)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(str(download_result.get('ar_name')).replace('/', ', ') if download_result.get('ar_name') else download_result.get('ar_name')), album=legalizestring(download_result.get('al_name')),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=extractdurationsecondsfromlrc(lyric), duration=seconds2hms(extractdurationsecondsfromlrc(lyric)), lyric=lyric, cover_url=download_result.get('pic'), download_url=download_url, download_url_status=download_url_status,
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithcunyuapi'''
def _parsewithcunyuapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(url=f'https://www.cunyuapi.top/163music_play?id={song_id}&quality={quality}', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['song_file_url'], '')
if not download_url or not str(download_url).startswith('http'): continue
duration_in_secs = extractdurationsecondsfromlrc(str(download_result.get('lyric', 'NULL') or 'NULL'))
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(str(safeextractfromdict(download_result, ['ar_name'], '') or '').replace('/', ', ')), album=legalizestring(download_result.get('al_name', None)), ext=download_url.split('?')[0].split('.')[-1],
file_size=remove_suffix(str(download_result.get('size') or ''), 'MB').strip() + ' MB', identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(download_result.get('lyric', 'NULL')) or 'NULL', cover_url=download_result.get('img'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithcyruiapi'''
def _parsewithcyruiapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result['id']
try: (resp := self.get(f'https://blog.cyrui.cn/netease/api/getSongDetail.php?id={song_id}', **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = dict()
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.get(url=f'https://blog.cyrui.cn/netease/api/getMusicUrl.php?id={song_id}&level={quality}', timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_result['getMusicUrl'] = resp2json(resp=resp)
if not (download_url := safeextractfromdict(download_result, ['getMusicUrl', 'data', 0, 'url'], '')) or not download_url.startswith('http'): continue
try: duration_in_secs = float(safeextractfromdict(download_result, ['songs', 0, 'dt'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['songs', 0, 'name'], None)), singers=legalizestring(', '.join([singer.get('name') for singer in (safeextractfromdict(download_result, ['songs', 0, 'ar'], []) or []) if isinstance(singer, dict) and singer.get('name')])), album=legalizestring(safeextractfromdict(download_result, ['songs', 0, 'al', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1],
file_size_bytes=safeextractfromdict(download_result, ['getMusicUrl', 'data', 0, 'size'], 0), file_size=byte2mb(safeextractfromdict(download_result, ['getMusicUrl', 'data', 0, 'size'], 0)), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=safeextractfromdict(download_result, ['songs', 0, 'al', 'picUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithxianyuwapi'''
def _parsewithxianyuwapi(self, search_result: dict, request_overrides: dict = None):
# init
decrypt_func, REQUEST_KEYS = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8'), ['c2stOTUwZTc4MTNjMzhjMmUzMWQzOWQ4NzlkMzIwNDg4OTU=', 'c2stNjJjZGIwM2UyMjcwZWIzOTY4Y2NhNzg4MTM5OWY0MTI=']
request_overrides, song_id, song_info = request_overrides or {}, search_result['id'], SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]})
# parse
(resp := self.get(f'https://apii.xianyuw.cn/api/v1/163-music-search?id={song_id}&key={decrypt_func(random.choice(REQUEST_KEYS))}&no_url=0&br=hires', **request_overrides)).raise_for_status()
download_url: str = (download_result := resp2json(resp=resp))['data']['url']
if not download_url or not str(download_url).startswith('http'): return song_info
lyric = cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], 'NULL')) or 'NULL'
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': 'hires'}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'title'], None)), singers=legalizestring(str(safeextractfromdict(download_result, ['data', 'author'], '')).replace('/', ', ')), album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)),
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=song_id, duration_s=extractdurationsecondsfromlrc(lyric), duration=seconds2hms(extractdurationsecondsfromlrc(lyric)), lyric=lyric, cover_url=safeextractfromdict(download_result, ['data', 'cover'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.album == 'NULL': song_info.album = legalizestring(safeextractfromdict(search_result, ['al', 'name'], None))
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
cookies = self.default_cookies or request_overrides.get('cookies')
if cookies and (cookies != DEFAULT_COOKIES): return SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]})
for imp_func in [self._parsewithcggapi, self._parsewithxuanluogeapi, self._parsewithtmetuapi, self._parsewithbugpkapi, self._parsewithcyruiapi, self._parsewithcunyuapi, self._parsewithyutangxiaowuapi, self._parsewithnycnmbyfunsapi, self._parsewithxianyuwapi, self._parsewithxiaoqinapi, self._parsewithtmetuapi]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]})
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]}), request_overrides or {}, song_info_flac or SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]})
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
if not search_result.get('name', None):
try: (resp := self.post("https://interface3.music.163.com/api/v3/song/detail", data={'c': json.dumps([{"id": song_id, "v": 0}])}, **request_overrides)).raise_for_status(); search_result.update(resp2json(resp=resp)['songs'][0])
except Exception: pass
for quality_idx, quality in enumerate(MUSIC_QUALITIES):
if song_info_flac.with_valid_download_url and quality_idx >= MUSIC_QUALITIES.index(song_info_flac.raw_data.get('quality', MUSIC_QUALITIES[-1])): song_info = song_info_flac; break
params = {'ids': [song_id], 'level': quality, 'encodeType': 'flac', 'header': json.dumps({"os": "pc", "appver": "", "osver": "", "deviceId": "pyncm!", "requestId": str(random.randrange(20000000, 30000000))})}
if quality == 'sky': params['immerseType'] = 'c51'
params = EapiCryptoUtils.encryptparams(url='https://interface3.music.163.com/eapi/song/enhance/player/url/v1', payload=params)
(cookies := {"os": "pc", "appver": "", "osver": "", "deviceId": "pyncm!"}).update(copy.deepcopy(self.default_cookies))
try: (resp := self.post('https://interface3.music.163.com/eapi/song/enhance/player/url/v1', data={"params": params}, cookies=cookies, **request_overrides)).raise_for_status()
except Exception: continue
if ('data' not in (download_result := resp2json(resp))) or (not download_result['data']): continue
if not (download_url := safeextractfromdict(download_result, ['data', 0, 'url'], '')) or not str(download_url).startswith('http'): continue
try: duration_in_secs = float(search_result.get('dt', 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(', '.join([singer.get('name') for singer in (safeextractfromdict(search_result, ['ar'], []) or []) if isinstance(singer, dict) and singer.get('name')])), album=legalizestring(safeextractfromdict(search_result, ['al', 'name'], None)),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=safeextractfromdict(search_result, ['al', 'picUrl'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info_flac.with_valid_download_url and song_info_flac.largerthan(song_info): song_info = song_info_flac
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
data = {'id': song_id, 'cp': 'false', 'tv': '0', 'lv': '0', 'rv': '0', 'kv': '0', 'yv': '0', 'ytv': '0', 'yrv': '0'}
try: (resp := self.post('https://interface3.music.163.com/api/song/lyric', data=data, **request_overrides)).raise_for_status(); lyric = cleanlrc(safeextractfromdict((lyric_result := resp2json(resp)), ['lrc', 'lyric'], 'NULL')) or 'NULL'
except Exception: lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = {}, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; search_meta = copy.deepcopy(search_url); search_url = search_meta.pop('url')
# successful
try:
# --search results
(resp := self.post(search_url, **search_meta, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['result']['songs']:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if (cookies := self.default_cookies or request_overrides.get('cookies')) and (cookies != DEFAULT_COOKIES) else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source, raw_data={'quality': MUSIC_QUALITIES[-1]})
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
request_overrides.setdefault('timeout', (10, 30))
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlparse(urlparse(playlist_url).fragment).query, keep_blank_values=True).get('id')[0], []; assert playlist_id
except: playlist_id, song_infos = remove_suffix(remove_suffix(urlparse(playlist_url).path.strip('/').split('/')[-1], '.html'), '.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, NETEASE_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
(resp := self.post('https://music.163.com/api/v6/playlist/detail', data={'id': playlist_id}, **request_overrides)).raise_for_status()
tracks_in_playlist = (safeextractfromdict((playlist_result := resp2json(resp=resp)), ['playlist', 'trackIds'], []) or [])
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if (cookies := self.default_cookies or request_overrides.get('cookies')) and (cookies != DEFAULT_COOKIES) else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['playlist', 'name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,172 @@
'''
Function:
Implementation of QianqianMusicClient: http://music.taihe.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import time
import copy
import hashlib
from .base import BaseMusicClient
from rich.progress import Progress
from pathvalidate import sanitize_filepath
from urllib.parse import urlencode, urlparse
from ..utils.hosts import QIANQIAN_MUSIC_HOSTS
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, byte2mb, resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, cookies2string, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
'''QianqianMusicClient'''
class QianqianMusicClient(BaseMusicClient):
source = 'QianqianMusicClient'
APPID = '16073360'
MUSIC_QUALITIES = ['3000', '320', '128', '64']
def __init__(self, **kwargs):
super(QianqianMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "referer": "https://music.91q.com/player", "sec-ch-ua-platform": "\"Windows\"",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-fetch-site": "same-origin", "sec-fetch-dest": "empty", "sec-ch-ua-mobile": "?0", "priority": "u=1, i",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "from": "web", "sec-fetch-mode": "cors",
}
if self.default_search_cookies: self.default_search_headers['authorization'] = f"access_token {self.default_search_cookies.get('access_token', '')}"
if self.default_search_cookies: self.default_search_headers['cookie'] = cookies2string(self.default_search_cookies)
self.default_parse_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "referer": "https://music.91q.com/player", "sec-ch-ua-platform": "\"Windows\"",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-fetch-site": "same-origin", "sec-fetch-dest": "empty", "sec-ch-ua-mobile": "?0", "priority": "u=1, i",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "from": "web", "sec-fetch-mode": "cors",
}
if self.default_parse_cookies: self.default_parse_headers['authorization'] = f"access_token {self.default_parse_cookies.get('access_token', '')}"
if self.default_parse_cookies: self.default_parse_headers['cookie'] = cookies2string(self.default_parse_cookies)
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
if self.default_download_cookies: self.default_download_headers['authorization'] = f"access_token {self.default_download_cookies.get('access_token', '')}"
if self.default_download_cookies: self.default_download_headers['cookie'] = cookies2string(self.default_download_cookies)
self.default_headers = self.default_search_headers
self._initsession()
'''_addsignandtstoparams'''
def _addsignandtstoparams(self, params: dict):
secret = '0b50b02fd0d73a9c4c8c3a781c30845f'
params['timestamp'] = str(int(time.time()))
keys = sorted(params.keys()); string = "&".join(f"{k}={params[k]}" for k in keys)
params['sign'] = hashlib.md5((string + secret).encode('utf-8')).hexdigest()
return params
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'word': keyword, 'type': '1', 'pageNo': '1', 'pageSize': '10', 'appid': QianqianMusicClient.APPID}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://music.91q.com/v1/search?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['pageSize'] = page_size
page_rule['pageNo'] = str(int(count // page_size) + 1)
page_rule = self._addsignandtstoparams(params=page_rule)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('TSID'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for rate in QianqianMusicClient.MUSIC_QUALITIES:
params = self._addsignandtstoparams(params={'TSID': song_id, 'appid': QianqianMusicClient.APPID, 'rate': rate})
try: (resp := self.get("https://music.91q.com/v1/song/tracklink", params=params, **request_overrides)).raise_for_status()
except Exception: continue
download_url = safeextractfromdict((download_result := resp2json(resp)), ['data', 'path'], '') or safeextractfromdict(download_result, ['data', 'trail_audio_info', 'path'], '')
if not download_url or not str(download_url).startswith('http'): continue
file_size_bytes, duration_in_secs = safeextractfromdict(download_result, ['data', 'size'], 0), safeextractfromdict(download_result, ['data', 'duration'], 0)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(', '.join([singer.get('name') for singer in (search_result.get('artist', []) or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(search_result.get('albumTitle', None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=file_size_bytes, file_size=byte2mb(file_size_bytes), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
try: (resp := self.get(search_result['lyric'], **request_overrides)).raise_for_status(); resp.encoding = 'utf-8'; lyric, lyric_result = cleanlrc(resp.text) or 'NULL', dict(lyric=resp.text)
except Exception: lyric_result, lyric = dict(), 'NULL'
if (song_info.singers == 'NULL') and lyric and (song_info.lyric not in {'NULL'}): song_info.singers = (m.group(1) if (m := re.search(r'\[ar:(.*?)\]', lyric)) else 'NULL')
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['data']['typeTrack']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, QIANQIAN_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, playlist_result_first = [], 1, None
while True:
params = {'pageNo': page, 'pageSize': 50, 'appid': QianqianMusicClient.APPID, 'id': playlist_id}
try: (resp := self.get(f"https://music.91q.com/v1/tracklist/info", params=self._addsignandtstoparams(params=params), **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['data', 'trackList'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['data', 'trackList'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['data', 'trackCount'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["TSID"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['data', 'title'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,266 @@
'''
Function:
Implementation of QobuzMusicClient: https://play.qobuz.com/discover
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import time
import copy
import base64
import hashlib
import requests
from itertools import product
from .base import BaseMusicClient
from collections import OrderedDict
from pathvalidate import sanitize_filepath
from ..utils.hosts import QOBUZ_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse, urljoin, parse_qs
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, resp2json, usesearchheaderscookies, seconds2hms, safeextractfromdict, hostmatchessuffix, obtainhostname, useparseheaderscookies, SongInfo, AudioLinkTester, LyricSearchClient
'''QobuzMusicClient'''
class QobuzMusicClient(BaseMusicClient):
source = 'QobuzMusicClient'
APP_ID = None
SECRETS = None
MUSIC_QUALITIES = (27, 7, 6, 5)
get_token_func = lambda cookies, *keys: next((cookies.get(k) for k in keys if cookies.get(k)), None)
def __init__(self, **kwargs):
super(QobuzMusicClient, self).__init__(**kwargs)
if self.default_search_cookies: assert QobuzMusicClient.get_token_func(self.default_search_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token"), '"x-user-auth-token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#qobuz-music-download'
if self.default_parse_cookies: assert QobuzMusicClient.get_token_func(self.default_parse_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token"), '"x-user-auth-token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#qobuz-music-download'
if self.default_download_cookies: assert QobuzMusicClient.get_token_func(self.default_download_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token"), '"x-user-auth-token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#qobuz-music-download'
self.default_search_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://play.qobuz.com", "priority": "u=1, i", "referer": "https://play.qobuz.com/", "sec-ch-ua": '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
}
self.default_parse_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://play.qobuz.com", "priority": "u=1, i", "referer": "https://play.qobuz.com/", "sec-ch-ua": '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
if self.default_search_cookies: self.default_search_headers.update({'X-User-Auth-Token': QobuzMusicClient.get_token_func(self.default_search_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token")})
if self.default_parse_cookies: self.default_parse_headers.update({'X-User-Auth-Token': QobuzMusicClient.get_token_func(self.default_parse_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token")})
if self.default_download_cookies: self.default_download_headers.update({'X-User-Auth-Token': QobuzMusicClient.get_token_func(self.default_download_cookies, "user_auth_token", "X-User-Auth-Token", "x-user-auth-token")})
self.default_headers = self.default_search_headers; self.default_search_cookies = {}; self.default_parse_cookies = {}; self.default_download_cookies = {}
self._initsession()
'''_setappidandsecrets'''
def _setappidandsecrets(self, request_overrides: dict = None) -> tuple[str, list[str]]:
if (QobuzMusicClient.APP_ID is not None) and (QobuzMusicClient.SECRETS is not None): self.default_headers.update({"X-App-Id": QobuzMusicClient.APP_ID}); return
request_overrides = request_overrides or {}
(resp := self.get("https://play.qobuz.com/login", **request_overrides)).raise_for_status()
bundle_url = re.search(r'<script src="(/resources/\d+\.\d+\.\d+-[a-z]\d{3}/bundle\.js)"></script>', resp.text).group(1)
(resp := self.get(urljoin("https://play.qobuz.com", bundle_url), **request_overrides)).raise_for_status()
app_id = str(re.search(r'production:{api:{appId:"(?P<app_id>\d{9})",appSecret:"(\w{32})', resp.text).group("app_id"))
seed_matches, secrets = re.finditer(r'[a-z]\.initialSeed\("(?P<seed>[\w=]+)",window\.utimezone\.(?P<timezone>[a-z]+)\)', resp.text), OrderedDict()
for match in seed_matches: seed, timezone = match.group("seed", "timezone"); secrets[timezone] = [seed]
secrets.move_to_end(list(secrets.items())[1][0], last=False)
info_extras_regex = r'name:"\w+/(?P<timezone>{timezones})",info:"(?P<info>[\w=]+)",extras:"(?P<extras>[\w=]+)"'.format(timezones="|".join(timezone.capitalize() for timezone in secrets))
for match in re.finditer(info_extras_regex, resp.text): timezone, info, extras = match.group("timezone", "info", "extras"); secrets[timezone.lower()] += [info, extras]
for secret_pair in secrets: secrets[secret_pair] = base64.standard_b64decode("".join(secrets[secret_pair])[:-44]).decode("utf-8")
if "" in (vals := list(secrets.values())): vals.remove("")
QobuzMusicClient.APP_ID, QobuzMusicClient.SECRETS = app_id, vals
self.default_headers.update({"X-App-Id": QobuzMusicClient.APP_ID})
return QobuzMusicClient.APP_ID, QobuzMusicClient.SECRETS
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}; self._setappidandsecrets(request_overrides=request_overrides)
# search rules
default_rule = {'query': keyword, 'offset': 0, 'limit': 10}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://www.qobuz.com/api.json/0.2/catalog/search?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithdabyeetsuapi'''
def _parsewithdabyeetsuapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",}
# parse
for quality in QobuzMusicClient.MUSIC_QUALITIES:
try: (resp := requests.get(f"https://dab.yeet.su/api/stream?trackId={song_id}&quality={quality}", headers=headers, timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['url'], '')
if not download_url or not str(download_url).startswith('http'): continue
quality = parse_qs(urlparse(download_url).query, keep_blank_values=True).get('fmt') or quality; quality = quality[0] if isinstance(quality, list) else quality
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['performer', 'name'], None)), album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None)), ext='mp3' if quality in {5} else 'flac',
file_size_bytes=None, file_size=None, identifier=song_id, duration_s=search_result.get('duration'), duration=seconds2hms(search_result.get('duration')), lyric=None, cover_url=legalizestring(safeextractfromdict(search_result, ['album', 'image', 'large'], None)), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3' if quality in {5} else 'flac'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithdabmusicapi'''
def _parsewithdabmusicapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",}
# parse
for quality in QobuzMusicClient.MUSIC_QUALITIES:
try: (resp := requests.get(f"https://dabmusic.xyz/api/stream?trackId={song_id}&quality={quality}", headers=headers, timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['url'], '')
if not download_url or not str(download_url).startswith('http'): continue
quality = parse_qs(urlparse(download_url).query, keep_blank_values=True).get('fmt') or quality; quality = quality[0] if isinstance(quality, list) else quality
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['performer', 'name'], None)), album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None)), ext='mp3' if quality in {5} else 'flac',
file_size_bytes=None, file_size=None, identifier=song_id, duration_s=search_result.get('duration'), duration=seconds2hms(search_result.get('duration')), lyric=None, cover_url=legalizestring(safeextractfromdict(search_result, ['album', 'image', 'large'], None)), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3' if quality in {5} else 'flac'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithafkarxyzapi'''
def _parsewithafkarxyzapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",}
# parse
(resp := requests.get(f"https://qbz.afkarxyz.fun/api/track/{song_id}", headers=headers, timeout=10, **request_overrides)).raise_for_status()
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['url'], '')
if not download_url or not str(download_url).startswith('http'): return SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]})
quality = parse_qs(urlparse(download_url).query, keep_blank_values=True).get('fmt'); quality = quality[0] if isinstance(quality, list) else QobuzMusicClient.MUSIC_QUALITIES[-1]
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['performer', 'name'], None)), album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None)), ext='mp3' if quality in {5} else 'flac',
file_size_bytes=None, file_size=None, identifier=song_id, duration_s=search_result.get('duration'), duration=seconds2hms(search_result.get('duration')), lyric=None, cover_url=legalizestring(safeextractfromdict(search_result, ['album', 'image', 'large'], None)), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3' if quality in {5} else 'flac'
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
if QobuzMusicClient.get_token_func(self.default_headers, "X-User-Auth-Token", "x-user-auth-token"): return SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]})
for imp_func in [self._parsewithdabmusicapi, self._parsewithdabyeetsuapi, self._parsewithafkarxyzapi]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]})
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
self._setappidandsecrets(request_overrides=request_overrides); song_info, request_overrides, song_info_flac = SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]}), request_overrides or {}, song_info_flac or SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]})
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for (quality, secret) in list(product(QobuzMusicClient.MUSIC_QUALITIES, QobuzMusicClient.SECRETS)):
if song_info_flac.with_valid_download_url and QobuzMusicClient.MUSIC_QUALITIES.index(quality) >= QobuzMusicClient.MUSIC_QUALITIES.index(song_info_flac.raw_data.get('quality', QobuzMusicClient.MUSIC_QUALITIES[-1])): song_info = song_info_flac; break
r_sig = f"trackgetFileUrlformat_id{quality}intentstreamtrack_id{song_id}{(unix_ts := time.time())}{secret}"
r_sig_hashed = hashlib.md5(r_sig.encode("utf-8")).hexdigest()
params = {"request_ts": unix_ts, "request_sig": r_sig_hashed, "track_id": song_id, "format_id": quality, "intent": "stream"}
try: (resp := self.get('https://www.qobuz.com/api.json/0.2/track/getFileUrl', params=params, **request_overrides)).raise_for_status()
except Exception: continue
download_url = safeextractfromdict((download_result := resp2json(resp=resp)), ['url'], None)
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['performer', 'name'], None)), album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None)), ext='mp3' if quality in {5} else 'flac',
file_size_bytes=None, file_size=None, identifier=song_id, duration_s=download_result.get('duration'), duration=seconds2hms(download_result.get('duration')), lyric=None, cover_url=legalizestring(safeextractfromdict(search_result, ['album', 'image', 'large'], None)), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3' if quality in {5} else 'flac'
if song_info_flac.with_valid_download_url and song_info_flac.largerthan(song_info): song_info = song_info_flac
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; self._setappidandsecrets(request_overrides=request_overrides)
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['tracks']['items']:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if QobuzMusicClient.get_token_func(self.default_headers, "X-User-Auth-Token", "x-user-auth-token") else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source, raw_data={'quality': QobuzMusicClient.MUSIC_QUALITIES[-1]})
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}; self._setappidandsecrets()
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, QOBUZ_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, page_size, playlist_result_first = [], 1, 500, {}
while True:
try: (resp := self.get("https://www.qobuz.com/api.json/0.2/playlist/get?", params={"playlist_id": playlist_id, "extra": 'tracks', "offset": (page-1)*page_size, 'limit': page_size}, **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['tracks', 'items'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['tracks', 'items'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['tracks', 'total'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["id"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if QobuzMusicClient.get_token_func(self.default_headers, "X-User-Auth-Token", "x-user-auth-token") else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
+312
View File
@@ -0,0 +1,312 @@
'''
Function:
Implementation of QQMusicClient: https://y.qq.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import json
import random
import base64
from .base import BaseMusicClient
from rich.progress import Progress
from ..utils.hosts import QQ_MUSIC_HOSTS
from pathvalidate import sanitize_filepath
from urllib.parse import urlparse, parse_qs
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils.qqutils import QQMusicClientUtils, SearchType, Credential, ThirdPartVKeysAPISongFileType, SongFileType, EncryptedSongFileType
from ..utils import touchdir, resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, extractdurationsecondsfromlrc, useparseheaderscookies, obtainhostname, hostmatchessuffix, optionalimport, cleanlrc, SongInfo, AudioLinkTester
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''QQMusicClient'''
class QQMusicClient(BaseMusicClient):
source = 'QQMusicClient'
def __init__(self, use_encrypted_endpoint: bool = False, **kwargs):
super(QQMusicClient, self).__init__(**kwargs)
self.use_encrypted_endpoint = use_encrypted_endpoint
self.default_search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'Referer': 'https://y.qq.com/', 'Origin': 'https://y.qq.com/',}
self.default_parse_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'Referer': 'https://y.qq.com/', 'Origin': 'https://y.qq.com/',}
self.default_download_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'Referer': 'http://y.qq.com',}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'searchid': QQMusicClientUtils.randomsearchid(), 'query': keyword, 'search_type': SearchType.SONG.value, 'num_per_page': self.search_size_per_page, 'page_num': 1, 'highlight': 1, 'grp': 1}
default_rule.update(rule)
# construct search urls based on search rules
base_url = QQMusicClientUtils.enc_endpoint if self.use_encrypted_endpoint else QQMusicClientUtils.endpoint
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['num_per_page'] = page_size
page_rule['page_num'] = int(count // page_size) + 1
payload = QQMusicClientUtils.buildrequestdata(params=page_rule, module="music.search.SearchCgiService", method="DoSearchForQQMusicMobile", credential=Credential().fromcookiesdict(self.default_cookies or request_overrides.get('cookies', {})))
search_urls.append({'url': base_url, 'data': json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")})
if self.use_encrypted_endpoint: search_urls[-1]['params'] = {"sign": QQMusicClientUtils.sign(payload)}
count += page_size
# return
return search_urls
'''_parsewithvkeysapi'''
def _parsewithvkeysapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result.get('mid') or search_result.get('songmid')
safe_obtain_filesize_func = lambda meta: (lambda s: (lambda: float(s))() if s.replace('.', '', 1).isdigit() else 0)(remove_suffix(str(meta.get('size', '0.00MB')), 'MB').strip()) if isinstance(meta, dict) else 0
to_seconds_func = lambda x: (lambda s: 0 if not s else (lambda p: p[-3]*3600+p[-2]*60+p[-1] if len(p)>=3 else p[0]*60+p[1] if len(p)==2 else p[0] if len(p)==1 else 0)([int(v) for v in re.findall(r'\d+', s.replace('', ':'))]) if (':' in s or '' in s) else (lambda h,m,sec,num: (lambda tot: tot if tot>0 else num)(h*3600+m*60+sec))(int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:小时|时|h|hr)', s)) else 0, int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:分钟|分|m|min)', s)) else 0, (int(mo.group(1)) if (mo:=re.search(r'(\d+)\s*(?:秒|s|sec)', s)) else (int(mo.group(1)) if (mo:=re.search(r'(?:分钟|分|m|min)\s*(\d+)\b', s)) else 0)), int(mo.group(0)) if (mo:=re.search(r'\d+', s)) else 0))(str(x).strip().lower())
# parse
for quality in list(ThirdPartVKeysAPISongFileType.ID_TO_NAME.value.keys())[::-1]:
try: (resp := self.get(f"https://api.vkeys.cn/v2/music/tencent/geturl?mid={song_id}&quality={quality}", timeout=10, **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'url'], None)) or (safe_obtain_filesize_func(download_result['data']) < 0.01): continue
if not (download_url := download_result['data']['url']) or not str(download_url).startswith('http'): continue
try: (resp := self.get(f"https://api.vkeys.cn/v2/music/tencent/lyric?mid={song_id}", timeout=10, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = {}
duration_in_secs = safeextractfromdict(download_result, ['data', 'interval'], 0)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'song'], None)), singers=legalizestring(str(safeextractfromdict(download_result, ['data', 'singer'], '') or '').replace('/', ', ')), album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1],
file_size_bytes=None, file_size=remove_suffix(str(safeextractfromdict(download_result, ['data', 'size'], '0.00')), 'MB').strip() + ' MB', identifier=song_id, duration_s=to_seconds_func(duration_in_secs), duration=seconds2hms(to_seconds_func(duration_in_secs)), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], 'NULL')) or 'NULL', cover_url=safeextractfromdict(download_result, ['data', 'cover'], None), download_url=download_url,
download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithlittleyouziapi'''
def _parsewithlittleyouziapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, search_result.get('mid') or search_result.get('songmid')
# parse
for quality in range(0, 11):
try: (resp := self.get(f"https://www.littleyouzi.com/api/v2/qqmusic?mid={song_id}&quality={quality}", timeout=10, **request_overrides)).raise_for_status()
except Exception: break
download_url: str = safeextractfromdict((download_result := resp2json(resp=resp)), ['data', 'audio'], '')
if not download_url or not str(download_url).startswith('http'): continue
try: (resp := self.get(f"https://www.littleyouzi.com/api/v2/qqmusic?mid={song_id}&lyrics=true", timeout=10, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = {}
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None) or search_result.get('songname', None)), singers=legalizestring(', '.join([singer.get('name', '') for singer in (search_result.get('singer', []) or []) if isinstance(singer, dict) and singer.get('name', None)])),
album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None) or search_result.get('albumname')), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=search_result.get('interval', 0), duration=seconds2hms(search_result.get('interval', 0)), lyric=cleanlrc(lyric_result.get('content') or 'NULL'),
cover_url=None, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithnkiapi'''
def _parsewithnkiapi(self, search_result: dict, request_overrides: dict = None):
# init
decrypt_func, curl_cffi = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8'), optionalimport('curl_cffi')
request_overrides, song_id, song_info = request_overrides or {}, search_result.get('mid') or search_result.get('songmid'), SongInfo(source=self.source)
REQUEST_KEYS = ['MjhmZWNlOTI1NDM5YjA1Mjc5MmE5Nzk4OWM4NzBjZWQzODAzYTcxYzZiNTM0ZjcxZTVhNTMzMzhiMmQzMWVmOA==', 'YzRjNGY1ZmMzNmJhZDRjYWNiOTg4MzllMTRmZWE0MDI3N2IzNWVhMmViMWJhYmRhZDdiYmRlMTI4NDAwZjNiMQ==']
# parse
try: (resp := curl_cffi.requests.get(f'https://api.nki.pw/API/music_open_api.php?mid={song_id}&apikey={decrypt_func(random.choice(REQUEST_KEYS))}', timeout=10, impersonate="chrome131", verify=False, **request_overrides)).raise_for_status()
except Exception: (resp := self.get(f'https://api.nki.pw/API/music_open_api.php?mid={song_id}&apikey={decrypt_func(random.choice(REQUEST_KEYS))}', timeout=10, **request_overrides)).raise_for_status()
download_url: str = (download_result := resp2json(resp=resp)).get('song_play_url_sq') or download_result.get('song_play_url_pq') or download_result.get('song_play_url_accom') or download_result.get('song_play_url_hq') or download_result.get('song_play_url') or download_result.get('song_play_url_standard') or download_result.get('song_play_url_fq')
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('song_name')), singers=legalizestring(download_result.get('singer_name')), album=legalizestring(download_result.get('album_name')),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration=download_result.get('duration', '-:-:-'), lyric=cleanlrc(download_result.get('song_lyric', 'NULL')) or 'NULL', cover_url=download_result.get('album_pic', None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewithtangapi'''
def _parsewithtangapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, search_result.get('mid') or search_result.get('songmid'), SongInfo(source=self.source)
# parse
(resp := self.get(f'https://tang.api.s01s.cn/music_open_api.php?mid={song_id}', **request_overrides)).raise_for_status()
download_url: str = (download_result := resp2json(resp=resp)).get('song_play_url_sq') or download_result.get('song_play_url_pq') or download_result.get('song_play_url_accom') or download_result.get('song_play_url_hq') or download_result.get('song_play_url') or download_result.get('song_play_url_standard') or download_result.get('song_play_url_fq')
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('song_name')), singers=legalizestring(download_result.get('singer_name')), album=legalizestring(download_result.get('album_name')),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration=download_result.get('duration', '-:-:-'), lyric=cleanlrc(download_result.get('song_lyric', 'NULL')) or 'NULL', cover_url=download_result.get('album_pic', None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewithxianyuwapi'''
def _parsewithxianyuwapi(self, search_result: dict, request_overrides: dict = None):
# init
decrypt_func, REQUEST_KEYS = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8'), ['c2stOTUwZTc4MTNjMzhjMmUzMWQzOWQ4NzlkMzIwNDg4OTU=', 'c2stNjJjZGIwM2UyMjcwZWIzOTY4Y2NhNzg4MTM5OWY0MTI=']
request_overrides, song_id, song_info = request_overrides or {}, search_result.get('mid') or search_result.get('songmid'), SongInfo(source=self.source)
# parse
(resp := self.get(f'https://apii.xianyuw.cn/api/v1/qq-music-search?id={song_id}&key={decrypt_func(random.choice(REQUEST_KEYS))}&no_url=0&br=hires', **request_overrides)).raise_for_status()
download_url: str = (download_result := resp2json(resp=resp))['data']['url']
if not download_url or not str(download_url).startswith('http'): return song_info
lyric = cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], 'NULL')) or 'NULL'
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['data', 'title'], None)), singers=legalizestring(str(safeextractfromdict(download_result, ['data', 'author'], '')).replace('/', ', ')),
album=legalizestring(safeextractfromdict(download_result, ['data', 'album'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=extractdurationsecondsfromlrc(lyric), duration=seconds2hms(extractdurationsecondsfromlrc(lyric)),
lyric=lyric, cover_url=safeextractfromdict(download_result, ['data', 'cover'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if not song_info.album or song_info.album in {'NULL'}: song_info.album = legalizestring(safeextractfromdict(search_result, ['album', 'title'], None) or search_result.get('albumname'))
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
if self.default_cookies or request_overrides.get('cookies'): return SongInfo(source=self.source)
for imp_func in [self._parsewithvkeysapi, self._parsewithtangapi, self._parsewithnkiapi, self._parsewithxianyuwapi, self._parsewithlittleyouziapi]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source)
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('mid') or search_result.get('songmid'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
# --non-vip / vip users using enc_endpoint
if self.use_encrypted_endpoint:
for quality in EncryptedSongFileType.SORTED_QUALITIES.value:
params = {"filename": [f"{quality[0]}{song_id}{song_id}{quality[1]}"], "guid": QQMusicClientUtils.randomguid(), "songmid": [song_id], 'songtype': [0]}
current_rule = QQMusicClientUtils.buildrequestdata(params=params, module="music.vkey.GetEVkey", method="CgiGetEVkey", credential=Credential().fromcookiesdict(self.default_cookies or request_overrides.get('cookies', {})), common_override={"ct": "19"})
try: (resp := self.post(QQMusicClientUtils.enc_endpoint, data=json.dumps(current_rule, ensure_ascii=False, separators=(",", ":")).encode("utf-8"), params={"sign": QQMusicClientUtils.sign(current_rule)}, **request_overrides)).raise_for_status()
except Exception: continue
download_url = safeextractfromdict((download_result := resp2json(resp)), ['music.vkey.GetEVkey.CgiGetEVkey', 'data', "midurlinfo", 0, "purl"], "") or safeextractfromdict(download_result, ['music.vkey.GetEVkey.CgiGetEVkey', 'data', "midurlinfo", 0, "wifiurl"], "")
ekey = safeextractfromdict(download_result, ['music.vkey.GetEVkey.CgiGetEVkey', 'data', "midurlinfo", 0, "ekey"], "")
if not download_url: continue
download_url = QQMusicClientUtils.music_domain + download_url
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'ekey': ekey}, source=self.source, song_name=legalizestring(search_result.get('title') or search_result.get('songname')), singers=legalizestring(', '.join([singer.get('name') for singer in (search_result.get('singer', []) or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None) or search_result.get('albumname', None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=str(song_id), duration_s=search_result.get('interval', 0), duration=seconds2hms(search_result.get('interval', 0)), lyric=None, cover_url=None,
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.cover_url = f"https://y.gtimg.cn/music/photo_new/T002R800x800M000{safeextractfromdict(search_result, ['album', 'mid'], '') or search_result.get('albummid')}.jpg"
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
'''
# encrypted audio extension, not conduct this part
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
'''
if song_info.with_valid_download_url: break
# --non-vip / vip users using endpoint
else:
for quality in SongFileType.SORTED_QUALITIES.value:
params = {"filename": [f"{quality[0]}{song_id}{song_id}{quality[1]}"], "guid": QQMusicClientUtils.randomguid(), "songmid": [song_id], 'songtype': [0]}
current_rule = QQMusicClientUtils.buildrequestdata(params=params, module="music.vkey.GetVkey", method="UrlGetVkey", credential=Credential().fromcookiesdict(self.default_cookies or request_overrides.get('cookies', {})), common_override={"ct": "19"})
try: (resp := self.post(QQMusicClientUtils.endpoint, data=json.dumps(current_rule, ensure_ascii=False, separators=(",", ":")).encode("utf-8"), **request_overrides)).raise_for_status()
except Exception: continue
download_url = safeextractfromdict((download_result := resp2json(resp)), ['music.vkey.GetVkey.UrlGetVkey', 'data', "midurlinfo", 0, "purl"], "") or safeextractfromdict(download_result, ['music.vkey.GetVkey.UrlGetVkey', 'data', "midurlinfo", 0, "wifiurl"], "")
if not download_url: continue
download_url = QQMusicClientUtils.music_domain + download_url
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title') or search_result.get('songname')), singers=legalizestring(', '.join([singer.get('name') for singer in (search_result.get('singer', []) or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(safeextractfromdict(search_result, ['album', 'title'], None) or search_result.get('albumname', None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=str(song_id), duration_s=search_result.get('interval', 0), duration=seconds2hms(search_result.get('interval', 0)), lyric=None,
cover_url=None, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.cover_url = f"https://y.gtimg.cn/music/photo_new/T002R800x800M000{safeextractfromdict(search_result, ['album', 'mid'], '') or search_result.get('albummid')}.jpg"
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info_flac.with_valid_download_url and song_info_flac.largerthan(song_info): song_info = song_info_flac
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
params = {'songmid': str(song_id), 'g_tk': '5381', 'loginUin': '0', 'hostUin': '0', 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'platform': 'yqq'}
lyric_request_overrides = copy.deepcopy(request_overrides); lyric_request_overrides.pop('headers', {})
try: (resp := self.get('https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric_new.fcg', headers={'Referer': 'https://y.qq.com/portal/player.html'}, params=params, **lyric_request_overrides)).raise_for_status(); lyric = (lyric_result := resp2json(resp)).get('lyric'); lyric = 'NULL' if not lyric else cleanlrc(base64.b64decode(lyric).decode('utf-8'))
except Exception: lyric_result, lyric = {}, "NULL"
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = {}, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
search_meta, request_overrides = copy.deepcopy(search_url), request_overrides or {}; search_url = search_meta.pop('url')
# successful
try:
# --search results
(resp := self.post(search_url, **search_meta, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['music.search.SearchCgiService.DoSearchForQQMusicMobile']['data']['body']['item_song']:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
request_overrides.setdefault('timeout', (10, 30))
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlparse(playlist_url).query, keep_blank_values=False).get('id')[0], []; assert playlist_id
except: playlist_id, song_infos = remove_suffix(remove_suffix(urlparse(playlist_url).path.strip('/').split('/')[-1], '.html'), '.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, QQ_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
(resp := self.get("https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg", headers={"Referer": f"https://y.qq.com/n/ryqq/playlist/{playlist_id}"}, params={"disstid": str(playlist_id), "type": "1", "json": "1", "utf8": "1", "onlysong": "0", "format": "json"}, **request_overrides)).raise_for_status()
tracks_in_playlist = (safeextractfromdict((playlist_result := resp2json(resp=resp)), ['cdlist', 0, 'songlist'], []) or safeextractfromdict(playlist_result, ['cdlist', 0, 'list'], []) or safeextractfromdict(playlist_result, ['songlist'], []) or [])
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['cdlist', 0, 'dissname'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,173 @@
'''
Function:
Implementation of SodaMusicClient: https://www.douyin.com/qishui/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import os
import copy
import json_repair
from pathlib import Path
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import SODA_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse, parse_qs
from ..utils.sodautils import AudioDecryptor, SodaTimedLyricsParser
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, byte2mb, resp2json, usesearchheaderscookies, safeextractfromdict, seconds2hms, usedownloadheaderscookies, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester, SongInfoUtils
'''SodaMusicClient'''
class SodaMusicClient(BaseMusicClient):
source = 'SodaMusicClient'
def __init__(self, **kwargs):
super(SodaMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_parse_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=[], progress=progress, song_progress_id=song_progress_id, auto_supplement_song=False)
with open(song_info.save_path, "rb") as fp: file_data = bytearray(fp.read())
output_filepath = (output_filepath := Path(song_info.save_path)).parent / f'{output_filepath.stem}.m4a'
AudioDecryptor.decrypt(file_data=file_data, play_auth=song_info.raw_data['play_auth'], output_filepath=str(output_filepath))
if not os.path.samefile(song_info.save_path, str(output_filepath)): os.remove(song_info.save_path)
song_info._save_path = str(output_filepath); downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
return downloaded_song_infos
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
self.search_size_per_page = min(self.search_size_per_page, 20)
# search rules
default_rule = {
'aid': '386088', 'app_name': 'luna_pc', 'region': 'cn', 'geo_region': 'cn', 'os_region': 'cn', 'sim_region': '', 'device_id': '1088932190113307', 'cdid': '', 'iid': '2332504177791808', 'version_name': '3.0.0', 'version_code': '30000000', 'channel': 'official', 'build_mode': 'master', 'network_carrier': '', 'ac': 'wifi', 'tz_name': 'Asia/Shanghai',
'resolution': '', 'device_platform': 'windows', 'device_type': 'Windows', 'os_version': 'Windows 11 Home China', 'fp': '1088932190113307', 'q': keyword, 'cursor': 0, 'search_id': '4ee2bc52-db9b-42c3-85cf-cdac2fe02efe', 'search_method': 'input', 'debug_params': '', 'from_search_id': 'aa21093-d49e-4d29-b6c7-548b170d12a0', 'search_scene': '',
}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.qishui.com/luna/pc/search/track?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['cursor'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := safeextractfromdict(search_result, ['entity', 'track', 'id'], None))): return song_info
rank_audio_func = lambda video_list: sorted(video_list, key=lambda x: (x.get('Size'), x.get('Bitrate')), reverse=True)
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
(resp := self.get(f'https://api.qishui.com/luna/pc/track_v2?track_id={song_id}&media_type=track&queue_type=', **request_overrides)).raise_for_status()
(resp := self.get((download_result := resp2json(resp))['track_player']['url_player_info'], **request_overrides)).raise_for_status()
download_result['url_player_info_response'] = resp2json(resp)
audios_sorted: list[dict] = rank_audio_func(safeextractfromdict(download_result, ['url_player_info_response', 'Result', 'Data', 'PlayInfoList'], []) or [])
audios_sorted: list[dict] = [a for a in audios_sorted if (a.get('MainPlayUrl') or a.get('BackupPlayUrl'))]
for audio_sorted in audios_sorted:
download_url = audio_sorted.get('MainPlayUrl') or audio_sorted.get('BackupPlayUrl'); play_auth = safeextractfromdict(audio_sorted, ['PlayAuth'], '')
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}, 'play_auth': play_auth}, source=self.source, song_name=legalizestring(safeextractfromdict(search_result, ['entity', 'track', 'name'], None)), singers=legalizestring(', '.join([singer.get('name') for singer in (safeextractfromdict(search_result, ['entity', 'track', 'artists'], []) or []) if isinstance(singer, dict) and singer.get('name')])), album=legalizestring(safeextractfromdict(search_result, ['entity', 'track', 'album', 'name'], None)), ext=audio_sorted.get('Format', 'm4a'), file_size_bytes=audio_sorted.get('Size', 0), file_size=byte2mb(audio_sorted.get('Size', 0)),
identifier=str(song_id), duration_s=audio_sorted.get('Duration'), duration=seconds2hms(audio_sorted.get('Duration')), lyric=cleanlrc(SodaTimedLyricsParser.tolrclinelevel(SodaTimedLyricsParser.parsetimedlyrics(safeextractfromdict(download_result, ['lyric', 'content'], '')))) or 'NULL', cover_url=str(safeextractfromdict(search_result, ['entity', 'track', 'album', 'url_cover', 'urls', 0], '')) + str(safeextractfromdict(search_result, ['entity', 'track', 'album', 'url_cover', 'uri'], '')) + '~c5_375x375.jpg', download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
try:
(resp := self.get(f'https://music.douyin.com/qishui/share/track?track_id={song_id}', **request_overrides)).raise_for_status()
lyric_result = json_repair.loads(re.search(r'_ROUTER_DATA\s*=\s*({[\s\S]*?});', resp.text).group(1).strip())
sentences, lrc_list = lyric_result['loaderData']['track_page']['audioWithLyricsOption']['lyrics']['sentences'], []
for sentence in sentences:
if not isinstance(sentence, dict): continue
start_ms = sentence.get('startMs', 0); sentence_text = "".join([w.get('text', '') for w in sentence.get('words', []) if isinstance(w, dict)])
minutes, seconds, m_seconds = start_ms // 60000, (start_ms % 60000) // 1000, start_ms % 1000; time_tag = f"[{minutes:02d}:{seconds:02d}.{m_seconds:03d}]"
lrc_list.append(f"{time_tag}{sentence_text}")
lyric = cleanlrc("\n".join(lrc_list)) or 'NULL'
except Exception: lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['result_groups'][0]['data']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
try: playlist_id, song_infos = parse_qs(urlparse(playlist_url).query, keep_blank_values=False).get('playlist_id')[0], []; assert playlist_id
except: playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, SODA_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, page_size, playlist_result_first = [], 1, 20, {}
while True:
params = {'playlist_id': playlist_id, 'cursor': str(page_size * (page - 1)), 'cnt': str(page_size), 'aid': '386088', 'device_platform': 'web', 'channel': 'pc_web'}
try: (resp := self.get(f"https://api.qishui.com/luna/pc/playlist/detail?", params=params, **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['media_resources'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['media_resources'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['playlist', 'count_tracks'], 0)) <= len(tracks_in_playlist)): break
tracks_in_playlist = list({d["id"]: d for d in tracks_in_playlist}.values())
for track_idx in range(len(tracks_in_playlist)):
try: tracks_in_playlist[track_idx]['entity']['track'] = safeextractfromdict(tracks_in_playlist[track_idx], ['entity', 'track_wrapper', 'track'], {})
except Exception: continue
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['playlist', 'title'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,179 @@
'''
Function:
Implementation of SoundCloudMusicClient: https://soundcloud.com/discover
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import re
import copy
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from urllib.parse import urlencode, urlparse
from ..utils.hosts import SOUNDCLOUD_MUSIC_HOSTS
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, resp2json, usesearchheaderscookies, seconds2hms, safeextractfromdict, hostmatchessuffix, obtainhostname, useparseheaderscookies, SongInfo, AudioLinkTester, LyricSearchClient
'''SoundCloudMusicClient'''
class SoundCloudMusicClient(BaseMusicClient):
source = 'SoundCloudMusicClient'
CLIENT_ID = None
def __init__(self, **kwargs):
super(SoundCloudMusicClient, self).__init__(**kwargs)
if self.default_search_cookies: assert ("oauth_token" in self.default_search_cookies), '"oauth_token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#soundcloud-music-download'
if self.default_parse_cookies: assert ("oauth_token" in self.default_parse_cookies), '"oauth_token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#soundcloud-music-download'
if self.default_download_cookies: assert ("oauth_token" in self.default_download_cookies), '"oauth_token" should be configured, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#soundcloud-music-download'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_parse_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
if self.default_search_cookies: self.default_search_headers.update({'Authorization': self.default_search_cookies["oauth_token"]})
if self.default_parse_cookies: self.default_parse_headers.update({'Authorization': self.default_parse_cookies["oauth_token"]})
if self.default_download_cookies: self.default_download_headers.update({'Authorization': self.default_download_cookies["oauth_token"]})
self._initsession()
'''_setclientid'''
def _setclientid(self, request_overrides: dict = None):
if SoundCloudMusicClient.CLIENT_ID: return
request_overrides = request_overrides or {}
try: (resp := self.session.get('https://soundcloud.com/', **request_overrides)).raise_for_status()
except: SoundCloudMusicClient.CLIENT_ID = '9jZvetLfDs6An08euQgJ0lYlHkKdGFzV'; return
script_urls = re.findall(r'<script[^>]+src="([^"]+)"', resp.text)
for url in reversed(script_urls):
try: resp = self.session.get(url, **request_overrides); m = re.search(r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', resp.text) if resp.status_code == 200 else None
except Exception: continue
if m: SoundCloudMusicClient.CLIENT_ID = m.group(1); return SoundCloudMusicClient.CLIENT_ID
SoundCloudMusicClient.CLIENT_ID = '9jZvetLfDs6An08euQgJ0lYlHkKdGFzV'; return
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
self._setclientid(request_overrides=request_overrides)
# search rules
default_rule = {'q': keyword, 'sc_a_id': 'ab15798461680579b387acf67441b40149e528cd', 'facet': 'genre', 'user_id': '704923-225181-486085-807554', 'client_id': SoundCloudMusicClient.CLIENT_ID, 'limit': '20', 'offset': '0', 'linked_partitioning': '1', 'app_version': '1769771069', 'app_locale': 'en'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api-v2.soundcloud.com/search/tracks?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
self._setclientid(request_overrides=request_overrides); song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
guess_codec_func = lambda t: ((lambda preset, mime: "opus" if ("opus" in preset or "opus" in mime) else "aac" if ("aac" in preset or "mp4a" in mime or "audio/mp4" in mime or "m4a" in mime) else "mp3" if ("mp3" in preset or "audio/mpeg" in mime) else "abr" if ("abr" in preset) else "unknown")((safeextractfromdict(t, ["preset"], "") or "").lower(), (safeextractfromdict(t, ["format", "mime_type"], "") or "").lower()))
guess_bitrate_kbps_func = lambda t: (lambda preset: (lambda m: int(m.group(1)) if m else 128 if preset == "mp3_0_1" else 64 if preset == "opus_0_0" else 128 if preset.startswith("abr") else 0)(re.search(r"(\d+)\s*k", preset)))((safeextractfromdict(t, ["preset"], "") or "").lower())
quality_rank_func = lambda t: {"hq": 2, "sq": 1}.get((safeextractfromdict(t, ["quality"], "") or "").lower(), 0)
codec_rank_func = lambda codec: {"opus": 4, "aac": 3, "abr": 2, "mp3": 1, "unknown": 0}.get((codec or "").lower(), 0)
protocol_rank_func = lambda t: {"progressive": 2, "hls": 1}.get((safeextractfromdict(t, ["format", "protocol"], "") or "").lower(), 0)
sort_key_func = lambda t: (lambda c, br: (quality_rank_func(t), br, codec_rank_func(c), protocol_rank_func(t)))(guess_codec_func(t), guess_bitrate_kbps_func(t))
# supplement incomplete tracks
if not safeextractfromdict(search_result, ['media', 'transcodings'], []): search_result = resp2json(self.get(f"https://api-v2.soundcloud.com/tracks/{song_id}", params={"client_id": SoundCloudMusicClient.CLIENT_ID}, **request_overrides))
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for transcoding in sorted((safeextractfromdict(search_result, ['media', 'transcodings'], []) or []), key=sort_key_func, reverse=True):
if not isinstance(transcoding, dict): continue
preset, mime_type = transcoding.get('preset', '') or '', safeextractfromdict(transcoding, ['format', 'mime_type'], '') or ''
download_url, protocol = transcoding.get('url', '') or '', safeextractfromdict(transcoding, ['format', 'protocol'], '') or ''
if str(protocol).startswith(('ctr-', 'cbc-')): continue # TODO: Solve DRM issues in SoundCloud
ext = (('opus' if ('opus' in preset or 'opus' in mime_type) else None) or ('m4a' if ('aac' in preset or 'm4a' in mime_type) else None) or 'mp3')
if f"{protocol}_{preset}" in {"original_download"}:
try: (resp := self.get(f'https://api-v2.soundcloud.com/tracks/{song_id}/download', params={'client_id': SoundCloudMusicClient.CLIENT_ID}, **request_overrides)).raise_for_status()
except Exception: continue
download_url = (download_result := resp2json(resp=resp)).get('redirectUri')
if not download_url or not str(download_url).startswith('http'): continue
else:
try: (resp := self.get(download_url, params={'client_id': SoundCloudMusicClient.CLIENT_ID}, **request_overrides)).raise_for_status()
except Exception: continue
download_url = (download_result := resp2json(resp=resp)).get('url')
if not download_url or not str(download_url).startswith('http'): continue
if str(protocol).lower() in {'hls'}:
try: (resp := self.get(download_url, allow_redirects=True, **request_overrides)).raise_for_status()
except Exception: continue
download_url_status = {'ok': True}
else:
download_url_status = self.audio_link_tester.test(download_url, request_overrides)
try: duration_in_secs = int(float(safeextractfromdict(search_result, ['duration'], 0)) / 1000)
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(safeextractfromdict(search_result, ['publisher_metadata', 'artist'], None) or safeextractfromdict(search_result, ['user', 'username'], None)), album=legalizestring(safeextractfromdict(search_result, ['publisher_metadata', 'album_title'], None)),
ext=ext, file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('artwork_url'), download_url=download_url, download_url_status=download_url_status
)
if str(protocol).lower() in {'hls'}: song_info.protocol, song_info.file_size = 'HLS', 'HLS'
else:
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# supplement lyric results
lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; self._setclientid(request_overrides=request_overrides)
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in resp2json(resp)['collection']:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}; self._setclientid()
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, SOUNDCLOUD_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
(resp := self.get("https://api-v2.soundcloud.com/resolve", params={"url": playlist_url, "client_id": SoundCloudMusicClient.CLIENT_ID}, **request_overrides)).raise_for_status()
tracks_in_playlist = (playlist_result := resp2json(resp=resp))['tracks']; playlist_id = playlist_result['id']
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result, ['title'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,235 @@
'''
Function:
Implementation of SpotifyMusicClient: https://open.spotify.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import copy
from bs4 import BeautifulSoup
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from urllib.parse import urlparse, parse_qs
from ..utils.hosts import SPOTIFY_MUSIC_HOSTS
from ..utils.spotifyutils import SpotifyMusicClientPlaylistUtils, SpotifyMusicClientSearchUtils
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import byte2mb, touchdir, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, naiveguessextfromaudiobytes, useparseheaderscookies, obtainhostname, hostmatchessuffix, extractdurationsecondsfromlrc, SongInfo, AudioLinkTester, LyricSearchClient
'''SpotifyMusicClient'''
class SpotifyMusicClient(BaseMusicClient):
source = 'SpotifyMusicClient'
def __init__(self, **kwargs):
super(SpotifyMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Accept": "application/json", "Accept-Language": "en-US,en;q=0.9", "Referer": "https://open.spotify.com/", "Origin": "https://open.spotify.com/"}
self.default_parse_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Accept": "application/json", "Accept-Language": "en-US,en;q=0.9", "Referer": "https://open.spotify.com/", "Origin": "https://open.spotify.com/"}
self.default_download_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls based on search rules
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
search_urls.append({'api': SpotifyMusicClientSearchUtils.searchbykeyword, 'inputs': {'session': copy.deepcopy(self.session), 'query': keyword, 'limit': page_size, 'offset': count, 'rule': copy.deepcopy(rule), 'request_overrides': request_overrides}})
count += page_size
# return
return search_urls
'''_parsewithspotisaverapi'''
def _parsewithspotisaverapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
headers = {
"referer": "https://spotisaver.net/en1", "sec-ch-ua": '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty",
"sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
}
# parse
(resp := self.get(f'https://spotisaver.net/api/get_playlist.php?id={song_id}&type=track&lang=en', headers=headers, **request_overrides)).raise_for_status()
payload = {"track": (download_result := resp2json(resp=resp))["tracks"][0], "download_dir": "downloads", "filename_tag": "SPOTISAVER", "user_ip": "2601:1e23:dac0:b1d7:39a4:640e:4700:01c7", "is_premium": "true"}
(resp := self.post('https://spotisaver.net/api/download_track.php', json=payload, headers=headers, **request_overrides)).raise_for_status()
try: duration_in_secs = float(safeextractfromdict(download_result, ['tracks', 0, 'duration_ms'], 0)) / 1000
except Exception: duration_in_secs = 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['tracks', 0, 'name'], None)), singers=legalizestring(', '.join(safeextractfromdict(download_result, ['tracks', 0, 'artists'], []) or [])), album=legalizestring(safeextractfromdict(download_result, ['tracks', 0, 'album'], None)), ext=naiveguessextfromaudiobytes(resp.content),
file_size_bytes=resp.content.__sizeof__(), file_size=byte2mb(resp.content.__sizeof__()), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=safeextractfromdict(download_result, ['tracks', 0, 'image', 'url'], None), download_url=None, downloaded_contents=resp.content, download_url_status={'ok': True},
)
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewithspotubedlapi'''
def _parsewithspotubedlapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
headers = {
"referer": "https://spotubedl.com/", "sec-ch-ua": '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty",
"sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"
}
# parse
(resp := self.get(f'https://spotubedl.com/api/metadata/{song_id}', headers=headers, **request_overrides)).raise_for_status()
vid = parse_qs(urlparse(str((download_result := resp2json(resp=resp))['youtube_url'])).query, keep_blank_values=True).get('v')[0]
(resp := self.get(f'https://spotubedl.com/api/download/{vid}?engine=v1&format=mp3&quality=320', headers=headers, **request_overrides)).raise_for_status()
download_url = resp2json(resp=resp)['url']; download_result['youtube_resp'] = resp2json(resp=resp)
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(', '.join(download_result.get('artists', []) or [])),
album=legalizestring(download_result.get('album_name', None)), ext='mp3', file_size_bytes=None, file_size=None, identifier=song_id, duration_s=download_result.get('duration'), duration=seconds2hms(download_result.get('duration')),
lyric=None, cover_url=download_result.get('cover_url'), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), default_download_headers=headers,
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewithspotidownloaderapi'''
def _parsewithspotidownloaderapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id = request_overrides or {}, str(search_result['id'])
# fetch token
(resp := self.get('https://spdl.afkarxyz.fun/token', headers={"User-Agent": "CharlesPikachu-musicdl"}, **request_overrides)).raise_for_status()
session_token = (download_result := resp2json(resp=resp))['token']
headers = {"Authorization": f"Bearer {session_token}", "Content-Type": "application/json", "Origin": "https://spotidownloader.com", "Referer": "https://spotidownloader.com/"}
# parse
(resp := self.post(f'https://api.spotidownloader.com/download', headers=headers, json={"id": song_id}, **request_overrides)).raise_for_status()
download_result.update(resp2json(resp=resp))
download_urls: list[str] = [u for u in [download_result.get('linkFlac'), download_result.get('link')] if u and str(u).startswith('http')]
try: duration_in_secs = float(safeextractfromdict(search_result, ['item', 'data', 'duration', 'totalMilliseconds'], 0) or safeextractfromdict(search_result, ['itemV2', 'data', 'trackDuration', 'totalMilliseconds'], 0)) / 1000
except Exception: duration_in_secs = 0
for download_url in download_urls:
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(safeextractfromdict(download_result, ['metadata', 'title'], None)), singers=legalizestring(safeextractfromdict(download_result, ['metadata', 'artists'], None)), album=legalizestring(safeextractfromdict(download_result, ['metadata', 'album'], None)),
ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=None, cover_url=safeextractfromdict(download_result, ['metadata', 'cover'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if song_info.ext in {'m4s', 'mp4'}: song_info.ext = 'm4a'
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithspotmateapi'''
def _parsewithspotmateapi(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, session = request_overrides or {}, str(search_result['id']), copy.deepcopy(self.session)
session.headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Mobile Safari/537.36'}
(resp := session.get('https://spotmate.online/en', **request_overrides)).raise_for_status()
cookies = "; ".join([f"{cookie.name}={cookie.value}" for cookie in session.cookies])
soup = BeautifulSoup(resp.text, 'lxml'); meta_tag = soup.find('meta', attrs={'name': 'csrf-token'}); csrf_token = meta_tag.get('content')
headers = {
'authority': 'spotmate.online', 'accept': '*/*', 'accept-language': 'id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7', 'origin': 'https://spotmate.online', 'referer': 'https://spotmate.online/en', 'x-csrf-token': csrf_token,
'sec-ch-ua': '"Not A(Brand";v="8", "Chromium";v="132"', 'sec-ch-ua-mobile': '?1', 'sec-ch-ua-platform': '"Android"', 'sec-fetch-dest': 'empty', 'sec-fetch-site': 'same-origin', 'content-type': 'application/json',
'user-agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Mobile Safari/537.36', 'cookie': cookies, 'sec-fetch-mode': 'cors',
}
# parse
(resp := session.post('https://spotmate.online/getTrackData', json={'spotify_url': f'https://open.spotify.com/track/{song_id}'}, headers=headers, **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
(resp := session.post('https://spotmate.online/convert', json={'urls': f'https://open.spotify.com/track/{song_id}'}, headers=headers, **request_overrides)).raise_for_status()
download_result['convert'] = resp2json(resp=resp); download_url = download_result['convert']['url']
try: duration_in_secs = float(safeextractfromdict(download_result, ['duration_ms'], 0)) / 1000
except Exception: duration_in_secs = 0
try: ext = parse_qs(urlparse(download_url).query, keep_blank_values=True).get('format')[0]
except Exception: ext = 'mp3'
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('name', '')), singers=legalizestring(', '.join([singer.get('name') for singer in (download_result.get('artists', []) or []) if isinstance(singer, dict) and singer.get('name')])),
album=legalizestring(safeextractfromdict(search_result, ['itemV2', 'data', 'albumOfTrack', 'name'], None) or safeextractfromdict(search_result, ['item', 'data', 'albumOfTrack', 'name'], None)), ext=ext, file_size=None, identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL',
cover_url=safeextractfromdict(download_result, ['album', 'images', 0, 'url'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides), default_download_headers=headers,
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
for imp_func in [self._parsewithspotisaverapi, self._parsewithspotidownloaderapi, self._parsewithspotmateapi, self._parsewithspotubedlapi]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source)
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('id'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
pass # TODO: Solve DRM Issues in Spotify
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides, search_api, search_api_inputs = request_overrides or {}, search_url['api'], search_url['inputs']
# successful
try:
# --search results
for search_result in safeextractfromdict((search_resp := search_api(**search_api_inputs)), ['data', 'searchV2', 'tracksV2', 'items'], []) or safeextractfromdict(search_resp, ['data', 'searchV2', 'tracks', 'items'], []):
search_result['id'] = safeextractfromdict(search_result, ['item', 'data', 'id'], None)
if not search_result['id']: search_result['id'] = str(safeextractfromdict(search_result, ['item', 'data', 'uri'], '')).removeprefix('spotify:track:')
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, SPOTIFY_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
playlist_result_first, tracks_in_playlist = SpotifyMusicClientPlaylistUtils.parse(copy.deepcopy(self.session), playlist_id=playlist_id, request_overrides=request_overrides)
tracks_in_playlist = list({d["id"]: d for d in tracks_in_playlist}.values())
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
song_info_flac = self._parsewiththirdpartapis(search_result=track_info, request_overrides=request_overrides)
lossless_quality_is_sufficient = False if self.default_cookies or request_overrides.get('cookies') else True
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=song_info_flac, lossless_quality_is_sufficient=lossless_quality_is_sufficient, request_overrides=request_overrides)
except Exception: song_info = song_info_flac
if not song_info.with_valid_download_url: song_info = song_info_flac
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['data', 'playlistV2', 'name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,169 @@
'''
Function:
Implementation of StreetVoiceMusicClient: https://www.streetvoice.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import copy
import time
from bs4 import BeautifulSoup
from .base import BaseMusicClient
from rich.progress import Progress
from pathvalidate import sanitize_filepath
from ..utils.hosts import STREETVOICE_MUSIC_HOSTS
from urllib.parse import urlencode, urljoin, urlparse, urlsplit, urlunsplit
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils import touchdir, legalizestring, resp2json, usesearchheaderscookies, seconds2hms, safeextractfromdict, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
'''StreetVoiceMusicClient'''
class StreetVoiceMusicClient(BaseMusicClient):
source = 'StreetVoiceMusicClient'
def __init__(self, **kwargs):
super(StreetVoiceMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "Referer": "https://www.streetvoice.cn/", "x-requested-with": "XMLHttpRequest"}
self.default_parse_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "Referer": "https://www.streetvoice.cn/", "x-requested-with": "XMLHttpRequest"}
self.default_download_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "Referer": "https://www.streetvoice.cn/", "x-requested-with": "XMLHttpRequest"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
self.search_size_per_page = min(10, self.search_size_per_page)
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'page': 1, 'q': keyword, 'type': 'song', '_pjax': '#pjax-container'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://www.streetvoice.cn/search/?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size) + 1
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('song_id'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
try: (resp := self.get(f"https://www.streetvoice.cn/api/v5/song/{song_id}/?_={int(time.time() * 1000)}", **request_overrides)).raise_for_status()
except Exception: return song_info
try: (hls_resp := self.post(f"https://www.streetvoice.cn/api/v5/song/{song_id}/hls/file/", **request_overrides)).raise_for_status()
except Exception: return song_info
(download_result := resp2json(resp=resp))['hls/file'] = resp2json(resp=hls_resp)
if not (download_url := download_result['hls/file']['file']) or not str(download_url).startswith('http'): return song_info
try: (resp := self.session.head(download_url, **request_overrides)).raise_for_status(); download_url_status = {'ok': True}
except Exception: return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('name')), singers=legalizestring(safeextractfromdict(download_result, ['user', 'profile', 'nickname'], None)),
album=legalizestring(safeextractfromdict(download_result, ['album', 'name'], None)), ext=download_url.removesuffix('.m3u8').split('?')[0].split('.')[-1], file_size_bytes=None, file_size='HLS', identifier=song_id, duration_s=download_result.get('length'),
duration=seconds2hms(download_result.get('length')), lyric=cleanlrc(safeextractfromdict(download_result, ['lyrics'], 'NULL')), cover_url=download_result.get('image'), download_url=download_url, download_url_status=download_url_status, protocol='HLS'
)
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# return
return song_info
'''_extractonesearchpage'''
def _extractonesearchpage(self, html_text: str, page_url: str):
soup, search_results = BeautifulSoup(html_text, "lxml"), []
for li in soup.select("ul.list-group-song li.work-item.item_box"):
title_a = li.select_one(".work-item-info h4 a"); artist_a = li.select_one(".work-item-info h5 a")
img = li.select_one(".cover-block img"); play_btn = li.select_one("button.js-search[data-id]")
like_btn = li.select_one("button.js-like-btn[data-like-count]"); like_raw = like_btn.get("data-like-count") if like_btn else None
song_href = title_a.get("href") if title_a else None; artist_href = artist_a.get("href") if artist_a else None
search_results.append({
"song_id": play_btn.get("data-id") if play_btn else None, "title": title_a.get_text(strip=True) if title_a else None, "artist": artist_a.get_text(strip=True) if artist_a else None, "song_url": urljoin(page_url, song_href) if song_href else None,
"artist_url": urljoin(page_url, artist_href) if artist_href else None, "cover_url": img.get("src") if img else None, "like_raw": like_raw,
})
return search_results
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in self._extractonesearchpage(resp.text, "https://www.streetvoice.cn/"):
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''_extractplaylistpagesongs'''
def _extractplaylistpagesongs(self, html_text, base_url='https://streetvoice.cn'):
soup, songs, seen = BeautifulSoup(html_text, 'lxml'), [], set()
for li in soup.select('#item_box_list_1 li.item_box'):
artist_a = li.select_one('.work-item-info h5 a') or li.select_one('.work-item-info h4 a'); num_el = li.select_one('.work-item-number h4')
if not (song_a := li.select_one('.work-item-info h4 a[href*="/songs/"]')): continue
if (url := urljoin(base_url, song_a['href'])) in seen: continue
seen.add(url); songs.append({'index': int(num_el.get_text(strip=True)) if num_el else None, 'title': ' '.join(song_a.stripped_strings), 'song_url': url, 'song_id': urlparse(url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), 'artist': artist_a.get_text(strip=True) if artist_a else None, 'artist_url': urljoin(base_url, artist_a['href']) if artist_a and artist_a.has_attr('href') else None})
return songs
'''_extractplaylistname'''
def _extractplaylistname(self, html_text):
soup = BeautifulSoup(html_text, 'lxml')
for sel in ['.work-page-header-wrapper h1', '#sticky .work-item-info h4', 'title']:
node = soup.select_one(sel)
if not (node := soup.select_one(sel)): continue
text = ' '.join(node.stripped_strings)
if sel == 'title': text = text.split(' - ')[0].strip()
if text: return text
return None
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_url = urlunsplit(urlsplit(playlist_url)._replace(query="", fragment=""))
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, STREETVOICE_MUSIC_HOSTS)): return song_infos
# get tracks in playlist
tracks_in_playlist, page, playlist_result_first = [], 1, {}
while True:
request_page_url = playlist_url if page == 1 else f"{playlist_url}?page={page}"
try: (resp := self.get(request_page_url, allow_redirects=True, **request_overrides)).raise_for_status()
except Exception: break
(playlist_result := {'name': self._extractplaylistname(resp.text), 'id': playlist_id})['songs'] = self._extractplaylistpagesongs(resp.text, "https://streetvoice.cn")
if not playlist_result['songs']: break
tracks_in_playlist.extend(playlist_result['songs']); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['name'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,198 @@
'''
Function:
Implementation of TIDALMusicClient: https://tidal.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import copy
import aigpy
import base64
import tempfile
from pathlib import Path
from .base import BaseMusicClient
from pathvalidate import sanitize_filepath
from ..utils.hosts import TIDAL_MUSIC_HOSTS
from urllib.parse import urlencode, urlparse
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
from ..utils.tidalutils import TIDALMusicClientUtils, SearchResult, SessionStorage, Track, TidalTvSession, StreamUrl, Artist
from ..utils import legalizestring, resp2json, seconds2hms, touchdir, replacefile, usesearchheaderscookies, usedownloadheaderscookies, safeextractfromdict, useparseheaderscookies, hostmatchessuffix, obtainhostname, cleanlrc, SongInfo, SongInfoUtils
'''TIDALMusicClient'''
class TIDALMusicClient(BaseMusicClient):
source = 'TIDALMusicClient'
def __init__(self, **kwargs):
super(TIDALMusicClient, self).__init__(**kwargs)
assert self.default_search_cookies or self.default_download_cookies or self.default_parse_cookies, f'cookies are not configured, so TIDAL is unavailable, refer to https://musicdl.readthedocs.io/en/latest/Quickstart.html#tidal-high-quality-music-download.'
session_storage = SessionStorage(**(self.default_search_cookies or self.default_download_cookies or self.default_parse_cookies))
self.tidal_tv_session = TidalTvSession(session_storage.client_id, session_storage.client_secret)
self.tidal_tv_session.setstorage(session_storage); TIDALMusicClientUtils.SESSION_STORAGE = session_storage
self.default_search_headers = {"X-Tidal-Token": self.tidal_tv_session.client_id, "Authorization": f"Bearer {self.tidal_tv_session.access_token}", "Connection": "Keep-Alive", "Accept-Encoding": "gzip", "User-Agent": "TIDAL_ANDROID/1039 okhttp/3.14.9"}
self.default_parse_headers = {"X-Tidal-Token": self.tidal_tv_session.client_id, "Authorization": f"Bearer {self.tidal_tv_session.access_token}", "Connection": "Keep-Alive", "Accept-Encoding": "gzip", "User-Agent": "TIDAL_ANDROID/1039 okhttp/3.14.9"}
self.default_download_headers = {"X-Tidal-Token": self.tidal_tv_session.client_id, "Authorization": f"Bearer {self.tidal_tv_session.access_token}", "Connection": "Keep-Alive", "Accept-Encoding": "gzip", "User-Agent": "TIDAL_ANDROID/1039 okhttp/3.14.9"}
self.default_headers = self.default_search_headers
self.default_search_cookies = {}; self.default_parse_cookies = {}; self.default_download_cookies = {}; self.default_cookies = {}
self._initsession()
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
if isinstance(song_info.download_url, str): return super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=downloaded_song_infos, progress=progress, song_progress_id=song_progress_id, auto_supplement_song=auto_supplement_song)
request_overrides = request_overrides or {}
try:
touchdir(song_info.work_dir); stream_url: StreamUrl = song_info.download_url; stream_resp: dict = song_info.raw_data['download']
download_ext, final_ext = TIDALMusicClientUtils.guessstreamextension(stream=stream_url), f'.{song_info.ext}'
remux_required = TIDALMusicClientUtils.shouldremuxflac(download_ext, final_ext, stream_url)
assert TIDALMusicClientUtils.flacremuxavailable(), f'FLAC stream for {stream_url.url} requires remuxing but no backend is available.'
progress.update(song_progress_id, total=1, kind='overall'); progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Downloading)")
with tempfile.TemporaryDirectory(prefix="musicdl-TIDALMusicClient-track-") as tmpdir:
download_part = os.path.join(tmpdir, f"download{download_ext}.part" if download_ext else "download.part")
if "vnd.tidal.bt" in stream_resp['manifestMimeType']:
tool = aigpy.download.DownloadTool(download_part, stream_url.urls); tool.setUserProgress(None); tool.setPartSize(song_info.chunk_size)
check, err = tool.start(showProgress=False)
if not check: raise RuntimeError(err)
elif "dash+xml" in stream_resp['manifestMimeType']:
local_file_path, manifest_content = os.path.join(tmpdir, str(song_info.identifier) + '.mpd'), base64.b64decode(stream_resp['manifest'])
with open(local_file_path, "wb") as fp: fp.write(manifest_content)
check = TIDALMusicClientUtils.downloadstreamwithnm3u8dlre(local_file_path, download_part, silent=self.disable_print, random_uuid=str(song_info.identifier))
if not check: raise RuntimeError(f"N_m3u8DL-RE error while dealing with {manifest_content.decode('utf-8')}")
download_part = max(Path(download_part).parent.glob(f"{Path(download_part).name}*"), key=lambda p: p.stat().st_mtime, default=None)
decrypted_target, remux_target = os.path.join(tmpdir, f"decrypted{download_ext}" if download_ext else "decrypted"), os.path.join(tmpdir, "remux.flac")
decrypted_path = TIDALMusicClientUtils.decryptdownloadedaudio(stream_url, download_part, decrypted_target); processed_path = decrypted_path
if remux_required:
processed_path, backend_used = TIDALMusicClientUtils.remuxflacstream(decrypted_path, remux_target)
if processed_path != decrypted_path and os.path.exists(decrypted_path): os.remove(decrypted_path)
else: final_ext = download_ext; processed_path = decrypted_path
replacefile(processed_path, song_info.save_path)
progress.update(song_progress_id, total=os.path.getsize(song_info.save_path), kind='download'); progress.advance(song_progress_id, os.path.getsize(song_info.save_path))
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
return downloaded_song_infos
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
self.tidal_tv_session.refresh(request_overrides=request_overrides); TIDALMusicClientUtils.SESSION_STORAGE = self.tidal_tv_session.getstorage()
self.default_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_search_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_parse_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_download_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
# search rules
default_rule = {'countryCode': self.tidal_tv_session.country_code, 'limit': 10, 'offset': 0, 'query': keyword, 'types': 'ARTISTS,ALBUMS,TRACKS,VIDEOS,PLAYLISTS'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.tidalhifi.com/v1/search?'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['limit'] = page_size
page_rule['offset'] = count
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: Track, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, Track)) or (not (song_id := search_result.id)): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
for quality in TIDALMusicClientUtils.MUSIC_QUALITIES:
try: download_url, stream_resp = TIDALMusicClientUtils.getstreamurl(song_id, quality=quality[1], apply_thirdpart_apis=(not self.tidal_tv_session.isvipaccount(request_overrides=request_overrides)), request_overrides=request_overrides)
except Exception: continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': stream_resp, 'lyric': {}, 'quality': quality}, source=self.source, song_name=legalizestring(search_result.title), singers=legalizestring(', '.join([str(singer.name) for singer in (search_result.artists or []) if isinstance(singer, Artist)])),
album=legalizestring(search_result.album.title), ext=TIDALMusicClientUtils.getexpectedextension(download_url).removeprefix('.'), file_size_bytes='HLS', file_size='HLS', identifier=search_result.id, duration_s=search_result.duration, duration=seconds2hms(search_result.duration), lyric=None,
cover_url=TIDALMusicClientUtils.getcoverurl(search_result.album.cover), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url.urls[0], request_overrides),
)
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: return song_info
# supplement lyric results
params = {'countryCode': self.tidal_tv_session.country_code, 'include': 'lyrics'}
try: (resp := self.get(f'https://openapi.tidal.com/v2/tracks/{song_id}', params=params, **request_overrides)).raise_for_status(); lyric = cleanlrc(safeextractfromdict((lyric_result := resp2json(resp)), ['included', 0, 'attributes', 'lrcText'], 'NULL'))
except Exception: lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
for search_result in aigpy.model.dictToModel(resp2json(resp=resp), SearchResult()).tracks.items:
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
'''parseplaylist'''
@useparseheaderscookies
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, TIDAL_MUSIC_HOSTS)): return song_infos
self.tidal_tv_session.refresh(request_overrides=request_overrides); TIDALMusicClientUtils.SESSION_STORAGE = self.tidal_tv_session.getstorage()
self.default_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_search_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_parse_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
self.default_download_headers.update({"Authorization": f"Bearer {self.tidal_tv_session.access_token}"})
# get tracks in playlist
tracks_in_playlist, page, page_size, playlist_result_first = [], 1, 50, {}
while True:
params = {'offset': (page - 1) * page_size, 'limit': page_size, 'countryCode': self.tidal_tv_session.country_code, 'locale': 'en_US', 'deviceType': 'BROWSER'}
try: (resp := self.get(f"https://tidal.com/v1/playlists/{playlist_id}/items", params=params, **request_overrides)).raise_for_status()
except Exception: break
if (not safeextractfromdict((playlist_result := resp2json(resp=resp)), ['items'], [])): break
tracks_in_playlist.extend(safeextractfromdict(playlist_result, ['items'], [])); page += 1
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if (float(safeextractfromdict(playlist_result, ['totalNumberOfItems'], 0)) <= len(tracks_in_playlist)): break
for track_idx in range(len(tracks_in_playlist)):
try: tracks_in_playlist[track_idx] = aigpy.model.dictToModel(tracks_in_playlist[track_idx]['item'], Track()); assert tracks_in_playlist[track_idx].id
except Exception: continue
tracks_in_playlist = list({d.id: d for d in tracks_in_playlist}.values())
try: playlist_result_first['meta_info'] = resp2json(self.get(f'https://tidal.com/v1/playlists/{playlist_id}?countryCode={self.tidal_tv_session.country_code}&locale=en_US&deviceType=BROWSER', **request_overrides))
except Exception: pass
# parse track by track in playlist
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
for idx, track_info in enumerate(tracks_in_playlist):
if idx > 0: main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
if song_info.with_valid_download_url: song_infos.append(song_info)
main_process_context.advance(main_progress_id, 1)
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
# post processing
playlist_name = safeextractfromdict(playlist_result_first, ['meta_info', 'title'], None)
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
for song_info in song_infos:
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
# return results
return song_infos
@@ -0,0 +1,240 @@
'''
Function:
Implementation of YouTubeMusicClient: https://music.youtube.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
import base64
import random
from ytmusicapi import YTMusic
from .base import BaseMusicClient
from rich.progress import Progress
from ..utils.youtubeutils import YouTube, REPAIDAPI_KEYS
from ..utils import legalizestring, resp2json, usesearchheaderscookies, byte2mb, seconds2hms, usedownloadheaderscookies, touchdir, safeextractfromdict, SongInfo, SongInfoUtils, AudioLinkTester, LyricSearchClient
'''YouTubeMusicClient'''
class YouTubeMusicClient(BaseMusicClient):
source = 'YouTubeMusicClient'
def __init__(self, **kwargs):
super(YouTubeMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_download'''
@usedownloadheaderscookies
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0, auto_supplement_song: bool = True):
if isinstance(song_info.download_url, str): return super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=downloaded_song_infos, progress=progress, song_progress_id=song_progress_id, auto_supplement_song=auto_supplement_song)
request_overrides = request_overrides or {}
try:
touchdir(song_info.work_dir)
total_size, chunk_size, downloaded_size = int(song_info.download_url.filesize), song_info.get('chunk_size', 1024 * 1024), 0
progress.update(song_progress_id, total=total_size)
with open(song_info.save_path, "wb") as fp:
for chunk in song_info.download_url.iterchunks(chunk_size=chunk_size):
if not chunk: continue
fp.write(chunk); downloaded_size = downloaded_size + len(chunk)
if total_size > 0: downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, total_size / 1024 / 1024)
else: progress.update(song_progress_id, total=downloaded_size); downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, downloaded_size / 1024 / 1024)
progress.advance(song_progress_id, len(chunk))
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Downloading: {downloading_text})")
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Success)")
downloaded_song_infos.append(SongInfoUtils.supplsonginfothensavelyricsthenwritetags(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print) if auto_supplement_song else copy.deepcopy(song_info))
except Exception as err:
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name[:10] + '...' if len(song_info.song_name) > 13 else song_info.song_name[:13]} (Error: {err})")
return downloaded_song_infos
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
decrypt_func = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8')
# adapt ytmusicapi to conduct music file search
ytmusic_search_api = YTMusic(auth=rule.get('auth', None), user=rule.get('user', None), requests_session=None, proxies=request_overrides.get('proxies', None) or self._autosetproxies(), language=rule.get('language', 'en'), location=rule.get('location', ''), oauth_credentials=rule.get('oauth_credentials', '')).search
ytmusic_search_rule = {'query': keyword, 'filter': rule.get('filter', None), 'scope': rule.get('scope', None), 'limit': self.search_size_per_source, 'ignore_spelling': rule.get('ignore_spelling', False)}
# adapt rapidapi to conduct music file search
rapidapi_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36", "X-Rapidapi-Host": "youtube-music-api3.p.rapidapi.com", "X-Rapidapi-Key": decrypt_func(random.choice(REPAIDAPI_KEYS)),
"Referer": "https://music-download-lake.vercel.app/", "Origin": "https://music-download-lake.vercel.app", "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
}
rapidapi_params = {'q': keyword, 'type': 'song', 'limit': self.search_size_per_source}
rapidapi_search_rule = {'headers': rapidapi_headers, 'params': rapidapi_params, 'url': 'https://youtube-music-api3.p.rapidapi.com/search'}
# construct search urls
search_urls = [{'candidate_apis': [{'api': self.get, 'inputs': rapidapi_search_rule, 'method': 'rapidapi'}, {'api': ytmusic_search_api, 'inputs': ytmusic_search_rule, 'method': 'ytmusicapi'}]}]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsewithyt1s'''
def _parsewithyt1s(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info, MUSIC_QUALITIES = request_overrides or {}, search_result['videoId'], SongInfo(source=self.source), ['320', '256', '128', '96'][:2]
transform_search_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(str(d).split(":"))) + list(map(int, str(d).split(":")))))
# parse
for quality in MUSIC_QUALITIES:
try: (resp := self.post('https://embed.dlsrv.online/api/download/mp3', json={"videoId": song_id, "format": "mp3", "quality": quality}, headers={"Content-Type": "application/json", "Origin": "https://embed.dlsrv.online", "Accept": "*/*"}, timeout=10, **request_overrides)).raise_for_status()
except Exception: continue
download_url: str = (download_result := resp2json(resp=resp)).get('url')
if not download_url or not str(download_url).startswith('http'): continue
try: (resp := self.get(download_url, allow_redirects=True, **request_overrides)).raise_for_status()
except Exception: continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author') or (', '.join([singer.get('name') for singer in (search_result.get('artists') or []) if isinstance(singer, dict) and singer.get('name')]))), album=legalizestring(search_result.get('album')),
ext='mp3', file_size_bytes=resp.content.__sizeof__(), file_size=byte2mb(resp.content.__sizeof__()), identifier=song_id, duration_s=search_result.get('duration_seconds', 0) or 0, duration=transform_search_duration_func(search_result.get('duration', '0:00') or '0:00'), lyric='NULL', cover_url=search_result.get('thumbnail') or safeextractfromdict(search_result, ['thumbnails', -1, 'url'], None),
download_url=download_url, download_url_status={'ok': True}, downloaded_contents=resp.content, default_download_headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"},
)
if song_info.file_size_bytes < 100: song_info.download_url_status = {'ok': False}
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithmp3youtube'''
def _parsvidewithmp3youtube(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info, MUSIC_QUALITIES = request_overrides or {}, search_result['videoId'], SongInfo(source=self.source), ['320', '256', '128', '96'][:2]
transform_search_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(str(d).split(":"))) + list(map(int, str(d).split(":")))))
(resp := self.get('https://api.mp3youtube.cc/v2/sanity/key', headers={"Content-Type": "application/json", "Origin": "https://iframe.y2meta-uk.com", "Accept": "*/*"}, timeout=10, **request_overrides)).raise_for_status()
mp3youtube_request_key = resp2json(resp)['key']
# parse
for quality in MUSIC_QUALITIES:
audio_payload = {"link": f"https://youtu.be/{song_id}", "format": "mp3", "audioBitrate": quality, "videoQuality": "720", "vCodec": "h264"}
try: (resp := self.post('https://api.mp3youtube.cc/v2/converter', json=audio_payload, headers={"Content-Type": "application/json", "Origin": "https://iframe.y2meta-uk.com", "Accept": "*/*", "key": mp3youtube_request_key}, timeout=10, **request_overrides)).raise_for_status()
except Exception: continue
download_url: str = (download_result := resp2json(resp=resp)).get('url')
if not download_url or not str(download_url).startswith('http'): continue
try: (resp := self.get(download_url, allow_redirects=True, **request_overrides)).raise_for_status()
except Exception: continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author') or (', '.join([singer.get('name') for singer in (search_result.get('artists') or []) if isinstance(singer, dict) and singer.get('name')]))), album=legalizestring(search_result.get('album')),
ext='mp3', file_size_bytes=resp.content.__sizeof__(), file_size=byte2mb(resp.content.__sizeof__()), identifier=song_id, duration_s=search_result.get('duration_seconds', 0) or 0, duration=transform_search_duration_func(search_result.get('duration', '0:00') or '0:00'), lyric='NULL', cover_url=search_result.get('thumbnail') or safeextractfromdict(search_result, ['thumbnails', -1, 'url'], None),
download_url=download_url, download_url_status={'ok': True}, downloaded_contents=resp.content, default_download_headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"},
)
if song_info.file_size_bytes < 100: song_info.download_url_status = {'ok': False}
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithclipto'''
def _parsewithclipto(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, search_result['videoId'], SongInfo(source=self.source)
transform_search_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(str(d).split(":"))) + list(map(int, str(d).split(":")))))
# parse
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "content-type": "application/json", "origin": "https://www.clipto.com", "referer": "https://www.clipto.com/media-downloader/"}
(resp := self.post('https://www.clipto.com/api/youtube', json={"url": f"https://www.youtube.com/watch?v={song_id}"}, headers=headers, **request_overrides)).raise_for_status()
download_result = resp2json(resp=resp)
medias = [dr for dr in download_result['medias'] if isinstance(dr, dict) and (dr.get('type') in ('audio',) or 'audio' in dr.get('mimeType'))]
medias = sorted(medias, key=lambda x: int(float(x.get('contentLength', 0) or 0)), reverse=True)
for media in medias:
download_url: str = media.get('url')
if not download_url or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author') or (', '.join([singer.get('name') for singer in (search_result.get('artists') or []) if isinstance(singer, dict) and singer.get('name')]))),
album=legalizestring(search_result.get('album')), ext=media.get('extension', 'm4a') or 'm4a', file_size_bytes=int(float(media.get('contentLength', 0) or 0)), file_size=byte2mb(int(float(media.get('contentLength', 0) or 0))), identifier=song_id, duration_s=download_result.get('duration'), duration=seconds2hms(download_result.get('duration')),
lyric='NULL', cover_url=search_result.get('thumbnail') or safeextractfromdict(search_result, ['thumbnails', -1, 'url'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if song_info.ext in {'mp4', 'm4a', 'weba'}: song_info.ext = 'm4a'
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-': transform_search_duration_func(search_result.get('duration', '0:00') or '0:00')
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewithacethinker'''
def _parsewithacethinker(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_id, song_info = request_overrides or {}, search_result['videoId'], SongInfo(source=self.source)
transform_search_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(str(d).split(":"))) + list(map(int, str(d).split(":")))))
(resp := self.get('https://www.acethinker.ai/downloader/api/get_csrf_token.php', **request_overrides)).raise_for_status()
# parse
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "accept": "application/json, text/plain, */*", "referer": "https://www.acethinker.ai/freemp3finder", "x-csrf-token": resp2json(resp=resp)['token']}
(resp := self.get(f'https://www.acethinker.ai/downloader/api/dlapinewv2.php?url=https://www.youtube.com/watch?v={song_id}', headers=headers, **request_overrides)).raise_for_status()
download_result: dict = resp2json(resp=resp)['res_data']
medias = [a for a in download_result['formats'] if isinstance(a, dict) and str(a.get('vcodec')).lower() in {"", "none"}]
medias = sorted(medias, key=lambda x: int(float(x.get('filesize', 0) or 0)), reverse=True)
for media in medias:
if not (download_url := media.get('url')) or not str(download_url).startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author') or (', '.join([singer.get('name') for singer in (search_result.get('artists') or []) if isinstance(singer, dict) and singer.get('name')]))),
album=legalizestring(search_result.get('album')), ext=media.get('ext', 'm4a') or 'm4a', file_size_bytes=int(float(media.get('filesize', 0) or 0)), file_size=byte2mb(int(float(media.get('filesize', 0) or 0))), identifier=song_id, duration_s=download_result.get('duration', 0) or 0, duration=seconds2hms(download_result.get('duration', 0) or 0),
lyric='NULL', cover_url=search_result.get('thumbnail') or safeextractfromdict(search_result, ['thumbnails', -1, 'url'], None), download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if song_info.ext in {'mp4', 'm4a', 'weba'}: song_info.ext = 'm4a'
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-': transform_search_duration_func(search_result.get('duration', '0:00') or '0:00')
if song_info.with_valid_download_url: break
try: (resp := self.get(f'https://www.acethinker.ai/downloader/api/newytdlapi/youtube_mp3_audio_video_downloader.php?url=https://www.youtube.com/watch?v={song_id}', headers=headers, **request_overrides)).raise_for_status()
except Exception: continue
if not (parsed_in_no_us_area := resp2json(resp=resp)).get('download_url'): continue
song_info.update(dict(download_url=parsed_in_no_us_area.get('download_url'), download_url_status=self.audio_link_tester.test(parsed_in_no_us_area.get('download_url'), request_overrides)))
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if song_info.with_valid_download_url: break
# return
return song_info
'''_parsewiththirdpartapis'''
def _parsewiththirdpartapis(self, search_result: dict, request_overrides: dict = None):
if self.default_cookies or request_overrides.get('cookies'): return SongInfo(source=self.source)
for imp_func in [self._parsewithyt1s, self._parsvidewithmp3youtube, self._parsewithacethinker, self._parsewithclipto]:
try: song_info_flac = imp_func(search_result, request_overrides); assert song_info_flac.with_valid_download_url; break
except: song_info_flac = SongInfo(source=self.source)
return song_info_flac
'''_parsewithofficialapiv1'''
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
# init
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('videoId'))): return song_info
# obtain basic song_info
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
else:
download_url = (cli := YouTube(video_id=search_result['videoId'])).streams.getaudioonly()
duration_in_secs = (float(download_url.durationMs) / 1000) or search_result.get('duration_seconds', 0) or 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': cli.vid_info, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('author') or (', '.join([singer.get('name') for singer in (search_result.get('artists') or []) if isinstance(singer, dict) and singer.get('name')]))), album=legalizestring(search_result.get('album')),
ext='mp3', file_size_bytes=download_url.filesize, file_size=byte2mb(download_url.filesize), identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('thumbnail') or safeextractfromdict(search_result, ['thumbnails', -1, 'url'], None), download_url=download_url, download_url_status={'ok': True},
)
if song_info.file_size_bytes < 100: song_info.download_url_status = {'ok': False}
# compare and select the best
song_info = song_info_flac if song_info_flac.with_valid_download_url and (not song_info.with_valid_download_url or song_info_flac.largerthan(song_info)) else song_info
# supplement lyric results
lyric_result, lyric = LyricSearchClient().search(artist_name=song_info.singers, track_name=song_info.song_name, request_overrides=request_overrides)
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = {}, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
candidate_apis = copy.deepcopy(search_url)['candidate_apis']
# successful
try:
# --search results
for candidate_api in candidate_apis[1:]:
try: resp = candidate_api['api'](**candidate_api['inputs']); candidate_api['method'] in ('rapidapi',) and resp.raise_for_status(); search_results = resp2json(resp=resp)['result'] if candidate_api['method'] in ('rapidapi',) else [s for s in resp if s['resultType'] == 'song'] if candidate_api['method'] in ('ytmusicapi',) else (_ for _ in ()).throw(ValueError(f"Unsupported method: {candidate_api['method']}")); assert len(search_results) > 0; break
except Exception: continue
for search_result in search_results:
# --parse with third part apis
song_info_flac = self._parsewiththirdpartapis(search_result=search_result, request_overrides=request_overrides)
# --parse with official apis
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=song_info_flac, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
except Exception: song_info = SongInfo(source=self.source)
# --append to song_infos
if not song_info.with_valid_download_url: song_info = song_info_flac
if not song_info.with_valid_download_url: continue
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,15 @@
'''initialize'''
from .mitu import MituMusicClient
from .kkws import KKWSMusicClient
from .jcpoo import JCPOOMusicClient
from .flmp3 import FLMP3MusicClient
from .htqyy import HTQYYMusicClient
from .twot58 import TwoT58MusicClient
from .fangpi import FangpiMusicClient
from .buguyy import BuguyyMusicClient
from .zhuolin import ZhuolinMusicClient
from .gequbao import GequbaoMusicClient
from .gequhai import GequhaiMusicClient
from .livepoo import LivePOOMusicClient
from .fivesong import FiveSongMusicClient
from .yinyuedao import YinyuedaoMusicClient
@@ -0,0 +1,135 @@
'''
Function:
Implementation of BuguyyMusicClient: https://buguyy.top/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import html
import copy
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''BuguyyMusicClient'''
class BuguyyMusicClient(BaseMusicClient):
source = 'BuguyyMusicClient'
def __init__(self, **kwargs):
super(BuguyyMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://buguyy.top", "priority": "u=1, i", "referer": "https://buguyy.top/",
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'keyword': keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://a.buguyy.top/newapi/search.php?'
page_rule = copy.deepcopy(default_rule)
search_urls = [base_url + urlencode(page_rule)]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = dict()
quark_download_urls = [u for u in [search_result.get('downurl', ''), search_result.get('ktmdownurl', '')] if u]
for quark_download_url in quark_download_urls:
m = re.search(r"(?i)(?:WAV|FLAC)#(https?://[^#]+)|MP3#(https?://[^#]+)", quark_download_url)
download_result, download_url = QuarkParser.parsefromurl(m.group(1) or m.group(2), **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album=legalizestring(safeextractfromdict(lyric_result, ['data', 'album'], None)),
ext="wav", file_size_bytes=None, file_size=None, identifier=search_result["id"], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')) or "NULL", cover_url=safeextractfromdict(search_result, ["picurl"], None),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(lyric_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = dict()
download_url = safeextractfromdict(download_result, ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')),
album=legalizestring(safeextractfromdict(download_result, ["data", "album"], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result.get("id"),
duration_s=None, duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], 'NULL')), cover_url=safeextractfromdict(search_result, ['picurl'], None), download_url=download_url,
download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, verify=False, **request_overrides)).raise_for_status()
search_results = resp2json(resp=resp)['data']['list']
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,145 @@
'''
Function:
Implementation of FangpiMusicClient: https://www.fangpi.net/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''FangpiMusicClient'''
class FangpiMusicClient(BaseMusicClient):
source = 'FangpiMusicClient'
def __init__(self, **kwargs):
super(FangpiMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36", "referer": "https://www.fangpi.net/"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://www.fangpi.net/s/{keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html, base_url="https://www.fangpi.net"):
soup, search_results, seen = BeautifulSoup(html, "lxml"), [], set()
result_card = next((card for card in soup.select("div.card") if "搜索结果" in card.get_text(" ", strip=True) and card.select_one("h1.mark")), None)
if result_card is None: return []
for row in result_card.select("div.row"):
detail, action = row.select_one('a[href^="/music/"][title]'), row.select_one('a.btn[href^="/music/"]')
if not detail or not action: continue
if (url := urljoin(base_url, detail["href"])) in seen: continue
seen.add(url); search_results.append({"id": detail["href"].rsplit("/", 1)[-1], "name": (row.select_one("span.text-primary") or detail).get_text(strip=True), "artist": row.select_one("small.text-jade").get_text(strip=True), "title": detail.get("title", "").strip(), "url": url})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
try: (resp := self.post('https://www.fangpi.net/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
except Exception: download_result['api/play-url'] = {}
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
except Exception: continue
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
if script_tag is None: continue
js_text: str = script_tag.string
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,113 @@
'''
Function:
Implementation of FiveSongMusicClient: https://www.5song.xyz/index.html
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''FiveSongMusicClient'''
class FiveSongMusicClient(BaseMusicClient):
source = 'FiveSongMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 0, "WAV": 1, "FLAC": 2, "APE": 3, "ALAC": 4, "AAC": 5, "MP3": 6, "OGG": 7, "M4A": 8}
def __init__(self, **kwargs):
super(FiveSongMusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 10)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.5song.xyz/search.html?keyword={keyword}')
else: search_urls.append(f'https://www.5song.xyz/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, base_url, search_results = BeautifulSoup(html_text, "lxml"), "https://www.5song.xyz", []
for li in soup.select("div.list ul > li"):
if not (a := li.select_one("a[href]")): continue
href = a.get("href", "").strip(); detail_url = urljoin(base_url, href)
title_el = a.select_one("div.con div.t h3"); title = title_el.get_text(strip=True) if title_el else None
formats = [s.get_text(strip=True) for s in a.select("div.con div.t span") if s.get_text(strip=True)]
singer_el = a.select_one("div.singerNum div.singer"); date_el = a.select_one("div.singerNum div.date"); num_el = a.select_one("div.singerNum div.num")
singer = singer_el.get_text(strip=True) if singer_el else None; date = date_el.get_text(strip=True) if date_el else None
num = num_el.get_text(strip=True) if num_el else None; img = a.select_one("div.pic img")
cover_url = urljoin(base_url, img.get("src")) if img and img.get("src") else None
search_results.append({"title": title, "formats": formats, "singer": singer, "date": date, "num": num, "detail_url": detail_url, "cover_url": cover_url})
return search_results
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides, base_url = request_overrides or {}, "https://www.5song.xyz"
guess_format_func = lambda label: (m.group(1) if (m := re.search(r"(DSD|WAV|FLAC|APE|ALAC|AAC|MP3|OGG|M4A)", str(label).upper())) else None)
sort_by_audio_quality_func = lambda link_list: sorted(link_list, key=lambda x: (FiveSongMusicClient.MUSIC_QUALITY_RANK.get((fmt := guess_format_func(x.get("label", ""))), 999), fmt or ""))
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('detail_url' not in search_result): continue
song_info, song_id = SongInfo(source=self.source), urlparse(str(search_result['detail_url'])).path.strip('/').split('/')[-1].split('.')[0]
# ----fetch basic information
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status()
except Exception: continue
soup, quark_links = BeautifulSoup(resp.text, "lxml"), []
for li in soup.select("div.download ul li[data-url]"):
if not (quark_url := (li.get("data-url") or "").strip()): continue
a = li.select_one("a[href]"); label = a.get_text(" ", strip=True) if a else None
pc_download_href = a.get("href", "").strip() if a else None
pc_download_url = urljoin(base_url, pc_download_href) if pc_download_href else None
if "quark" in quark_url: quark_links.append({"label": label, "quark_url": quark_url, "pc_download_url": pc_download_url})
if not quark_links: continue
download_result = dict(quark_links=quark_links)
# ----parse from quark links
for quark_link in sort_by_audio_quality_func(download_result['quark_links']):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_link['quark_url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL', ext='mp3', file_size_bytes=None, file_size=None,
identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc("\n".join([p.get_text(strip=True) for p in soup.select_one("div.viewCon div.text").select("p") if p.get_text(strip=True)])), cover_url=search_result.get('cover_url'),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,106 @@
'''
Function:
Implementation of FLMP3MusicClient: https://www.flmp3.pro/index.html
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, SongInfo, QuarkParser, AudioLinkTester
'''FLMP3MusicClient'''
class FLMP3MusicClient(BaseMusicClient):
source = 'FLMP3MusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(FLMP3MusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 12)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.flmp3.pro/search.html?keyword={keyword}')
else: search_urls.append(f'https://www.flmp3.pro/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
search_results, base_url, soup = [], "https://flmp3.pro", BeautifulSoup(html_text, "html.parser")
for li in soup.select("div.list ul.flex.flex-wrap > li"):
if not (a := li.select_one("a")): continue
song_href = a.get("href", ""); song_url = urljoin(base_url, song_href) if song_href else None; title_el = li.select_one("div.con div.t h3")
artist_el = li.select_one("div.con div.t p"); date_el = li.select_one("div.con div.date"); img_el = li.select_one("div.pic img")
search_results.append({"song_url": song_url, "title": title_el.get_text(strip=True) if title_el else None, "artist": artist_el.get_text(strip=True) if artist_el else None, "date": date_el.get_text(strip=True) if date_el else None, "img_url": img_el.get("src") if img_el else None, "img_alt": img_el.get("alt") if img_el else None})
return search_results
'''_parsesongdetailfordownloadpages'''
def _parsesongdetailfordownloadpages(self, html_text: str):
infer_quality_func = lambda text: next((q for q in FLMP3MusicClient.MUSIC_QUALITY_RANK.keys() if q in str(text).upper()), "UNKNOWN")
soup, base_url, links = BeautifulSoup(html_text, "html.parser"), "https://www.flmp3.pro", []
for a in soup.select(".btnBox a[href]"):
text, href = a.get_text(strip=True), a["href"]
if not href: continue
links.append({"text": text, "quality": infer_quality_func(text), "rank": FLMP3MusicClient.MUSIC_QUALITY_RANK.get(infer_quality_func(text), 0), "url": urljoin(base_url, href)})
links_sorted = sorted(links, key=lambda x: x["rank"], reverse=True)
song_id = urlparse(str(links_sorted[0]['url'])).path.strip('/').split('/')[-1].split('.')[0]
return {'links_sorted': links_sorted, 'song_id': song_id}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('song_url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['song_url'], **request_overrides)).raise_for_status(); download_result = self._parsesongdetailfordownloadpages(resp.text)
except Exception: continue
if not download_result['links_sorted']: continue
for download_page_details in download_result['links_sorted']:
try: (resp := self.get(download_page_details['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text, "lxml"); quark_download_url = soup.select_one("a.linkbtn[href]")['href']
except Exception: continue
if not quark_download_url or not quark_download_url.startswith('http'): continue
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size='NULL', identifier=download_result['song_id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('img_url', None),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,142 @@
'''
Function:
Implementation of GequbaoMusicClient: https://www.gequbao.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''GequbaoMusicClient'''
class GequbaoMusicClient(BaseMusicClient):
source = 'GequbaoMusicClient'
def __init__(self, **kwargs):
super(GequbaoMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://www.gequbao.com/s/{keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str, base_url: str = "https://www.gequbao.com"):
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
for row in soup.select("div.row.no-gutters.py-2d5.border-top.align-items-center"):
if not (a := row.select_one('a[href^="/music/"]')): continue
title = row.select_one("span.text-primary"); artist = row.select_one("small.text-jade")
search_results.append({"name": title.get_text(strip=True) if title else None, "artist": artist.get_text(strip=True) if artist else None, "url": urljoin(base_url, a["href"]), "id": a["href"].rstrip("/").split("/")[-1]})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
try: (resp := self.post('https://www.gequbao.com/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
except Exception: download_result['api/play-url'] = {}
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
except Exception: continue
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
if script_tag is None: continue
js_text: str = script_tag.string
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,153 @@
'''
Function:
Implementation of GequhaiMusicClient: https://www.gequhai.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import base64
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, extractdurationsecondsfromlrc, seconds2hms, searchdictbykey, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''GequhaiMusicClient'''
class GequhaiMusicClient(BaseMusicClient):
source = 'GequhaiMusicClient'
def __init__(self, **kwargs):
super(GequhaiMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 12)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.gequhai.com/s/{keyword}')
else: search_urls.append(f'https://www.gequhai.com/s/{keyword}?page={int(count // page_size) + 1}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, base_url, search_results = BeautifulSoup(html_text, "html.parser"), "https://www.gequhai.com", []
if not (table := soup.select_one("table#myTables")): return []
for tr in table.select("tbody tr"):
if len((tds := tr.find_all("td"))) < 3: continue
idx_text = tds[0].get_text(strip=True); a = tds[1].find("a")
title = a.get_text(strip=True) if a else tds[1].get_text(strip=True)
href: str = a.get("href", "") if a else ""; play_url = urljoin(base_url, href) if href else ""
singer = tds[2].get_text(strip=True); m = re.search(r"/play/(\d+)", href or ""); play_id = m.group(1) if m else None
search_results.append({"index": int(idx_text) if idx_text.isdigit() else idx_text, "title": title, "singer": singer, "href": href, "play_url": play_url, "play_id": play_id})
return search_results
'''_decodequarkurl'''
def _decodequarkurl(self, quark_url: str):
return base64.b64decode(quark_url.replace("#", "H")).decode("utf-8", errors="strict")
'''_extractappdataandwindowvars'''
def _extractappdataandwindowvars(self, js_text: str) -> dict:
out, m = {}, re.search(r"window\.appData\s*=\s*(\{.*?\})\s*;", js_text, flags=re.S)
if m: app = json_repair.loads(m.group(1)); out["appData"] = app; out.update(app)
for k, v in re.findall(r"window\.(\w+)\s*=\s*'([^']*)'\s*;", js_text): out[k] = v
for k, v in re.findall(r'window\.(\w+)\s*=\s*"([^"]*)"\s*;', js_text): out[k] = v
seen = set(out); out.update({k: int(v) if re.fullmatch(r"-?\d+", v) else float(v) for k, v in re.findall(r"window\.(\w+)\s*=\s*(-?\d+(?:\.\d+)?)\s*;", js_text) if not (k in seen or seen.add(k))})
seen = set(out); out.update({k: {"true": True, "false": False, "null": None}[str(v).lower()] for k, v in re.findall(r"window\.(\w+)\s*=\s*(true|false|null)\s*;", js_text, flags=re.I) if not (k in seen or seen.add(k))})
if "mp3_title" in out and "mp3_author" in out: out.setdefault("mp3_name", f"{out['mp3_title']}-{out['mp3_author']}")
if "mp3_extra_url" in out: out["mp3_extra_url_decoded"] = self._decodequarkurl(out["mp3_extra_url"])
return out
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
quark_download_url = download_result.get('mp3_extra_url_decoded', '')
if not quark_download_url or not str(quark_download_url).startswith('http'): return song_info
download_result['quark_parse_result'], download_url = QuarkParser.parsefromdirurl(quark_download_url, **self.quark_parser_config)
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]; duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author', None)), album='NULL', ext='mp3', file_size=None,
identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)),
cover_url=download_result.get('mp3_cover'), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
headers = {
"accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "x-custom-header": "SecretKey", "x-requested-with": "XMLHttpRequest", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "origin": "https://www.gequhai.com",
"priority": "u=1, i", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
(resp := self.post('https://www.gequhai.com/api/music', data={'id': download_result['play_id'], 'type': '0'}, headers=headers, **request_overrides)).raise_for_status()
download_result['api/music'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['api/music'], ['data', 'url'], '')
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1],
file_size=None, identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)), cover_url=download_result.get('mp3_cover'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractappdataandwindowvars(resp.text)
except Exception: continue
soup = BeautifulSoup(resp.text, 'lxml')
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,108 @@
'''
Function:
Implementation of HTQYYMusicClient: http://www.htqyy.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
from html import unescape
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, SongInfo, AudioLinkTester
'''HTQYYMusicClient'''
class HTQYYMusicClient(BaseMusicClient):
source = 'HTQYYMusicClient'
def __init__(self, **kwargs):
super(HTQYYMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-encoding": "gzip, deflate", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "cache-control": "max-age=0", "host": "www.htqyy.com", "proxy-connection": "keep-alive", "referer": "http://www.htqyy.com/", "upgrade-insecure-requests": "1",
}
self.default_download_headers = {"accept-encoding": "identity;q=1, *;q=0", "referer": "http://www.htqyy.com/", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'http://www.htqyy.com/home/search?wd={keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
base_url, soup = "http://www.htqyy.com", BeautifulSoup(html_text, "html.parser")
items, search_results = soup.select("ul#musicList li.musicItem"), []
for li in items:
chk = li.select_one('input[type="checkbox"][name="checked"]')
song_id = chk["value"].strip() if chk and chk.has_attr("value") else None
a_title = li.select_one("span.title a")
play_url = urljoin(base_url, play_href) if (play_href := a_title["href"].strip() if a_title and a_title.has_attr("href") else None) else None
artist = a_artist.get_text(" ", strip=True) if (a_artist := li.select_one("span.artistName a")) else None; artist_url = urljoin(base_url, a_artist["href"]) if a_artist and a_artist.has_attr("href") else None
album = a_album.get_text(" ", strip=True) if (a_album := li.select_one("span.albumName a")) else None; album_url = urljoin(base_url, a_album["href"]) if a_album and a_album.has_attr("href") else None
search_results.append({"id": song_id, "sid": a_title.get("sid") if a_title else None, "title": a_title.get_text(" ", strip=True) if a_title else None, "title_attr": a_title.get("title") if a_title else None, "artist": artist, "artist_url": artist_url, "album": album, "album_url": album_url, "play_url": play_url})
return search_results
'''_extractplayscriptinfo'''
def _extractplayscriptinfo(self, html_text: str):
unescape_func = lambda x: unescape(x) if isinstance(x, str) else x
grabvar_func = lambda name: (None if (m := re.search(rf'\bvar\s+{re.escape(name)}\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t)) is None else (int(v) if m.group(3) is not None else v) if (v := (m.group(1) or m.group(2) or m.group(3))) is not None else None)
soup, script_text = BeautifulSoup(html_text, "html.parser"), None
for s in soup.find_all("script"):
if not (txt := s.string or s.get_text()): continue
if ("PageData." in txt or "var PageData" in txt) and ("fileHost" in txt or "var mp3" in txt): script_text = txt; break
if not script_text: return {}
t, pagedata = script_text, {}
for m in re.finditer(r'PageData\.(\w+)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t):
key, val = m.group(1), m.group(2) or m.group(3) or m.group(4)
if m.group(4) is not None: val = int(val)
pagedata[key] = val
file_format, ip = grabvar_func("format") or pagedata.get("format"), grabvar_func("ip")
file_host, mp3_path, bd_text, bd_text2, img_url, mp3_url = grabvar_func("fileHost"), grabvar_func("mp3"), grabvar_func("bdText"), grabvar_func("bdText2"), grabvar_func("imgUrl"), None
if file_host and mp3_path and re.search(r'\bmp3\s*=\s*fileHost\s*\+\s*mp3\s*;', t): mp3_url = file_host + mp3_path
return {"format": unescape_func(file_format), "PageData": {k: unescape_func(v) for k, v in pagedata.items()}, "ip": unescape_func(ip), "fileHost": unescape_func(file_host), "mp3_path": unescape_func(mp3_path), "mp3_url": unescape_func(mp3_url), "bdText": unescape_func(bd_text), "bdText2": unescape_func(bd_text2), "imgUrl": unescape_func(img_url)}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractplayscriptinfo(resp.text)
except Exception: continue
download_url: str = download_result.get('mp3_url')
if not download_url or not download_url.startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album=legalizestring(search_result.get('album')),
ext=download_result.get('format', 'mp3') or download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id') or search_result.get('sid'), duration_s=None, duration='-:-:-', lyric='NULL', cover_url=download_result.get('imgUrl'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,171 @@
'''
Function:
Implementation of JCPOOMusicClient: https://www.jcpoo.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import copy
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''JCPOOMusicClient'''
class JCPOOMusicClient(BaseMusicClient):
source = 'JCPOOMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(JCPOOMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'page': 0, 'keyword': keyword}
default_rule.update(rule)
# construct search urls
base_url = 'https://www.jcpoo.cn/search?'
self.search_size_per_page = min(self.search_size_per_source, 30)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.jcpoo.cn/"
for li in soup.select("ul.tuij_song li.song_item2"):
if not (a := li.select_one("a[href]")): continue
href = a["href"].strip(); full_url = urljoin(base_url, href)
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
return search_results
'''_extractquarklinksfromhtml'''
def _extractquarklinksfromhtml(self, html_text: str):
PAT = re.compile(
r"""(?:const|let|var)\s+
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
(?P<quote>["'])
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
(?P=quote)
""", re.VERBOSE
)
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in JCPOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
soup, outs = BeautifulSoup(html_text, "lxml"), []
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
seen, uniq = set(), []
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
uniq = sorted(uniq, key=lambda x: JCPOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
'''_extractlrc'''
def _extractlrc(self, js_text: str):
# functions
norm_func = lambda s: re.sub(r"\s+", "", str(s))
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
# match
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
# return
return lyric_result, lyric
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,122 @@
'''
Function:
Implementation of KKWSMusicClient: https://www.kkws.cc/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import functools
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, safeextractfromdict, extractdurationsecondsfromlrc, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''KKWSMusicClient'''
class KKWSMusicClient(BaseMusicClient):
source = 'KKWSMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(KKWSMusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 15)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
search_urls.append(f'https://www.kkws.cc/search.html?key={keyword}&page={int(count // page_size) + 1}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup = BeautifulSoup(html_text, "lxml")
search_results, base_url = [], 'https://www.kkws.cc/'
for li in soup.select("ul.listbox > li"):
if not (a := li.select_one("h2 a[href]")): continue
title_attr = (a.get("title") or "").strip(); full_text = a.get_text(" ", strip=True)
name = title_attr.replace("免费下载", "").strip() if title_attr else full_text; name = re.sub(r"\s*\[[^\]]+\]\s*", " ", name).strip(); name = re.sub(r"\s*-\s*\d+(\.\d+)?[KMG]?\s*$", "", name).strip()
m_fmt = re.search(r"\[([^\]]+)\]", full_text); file_format = m_fmt.group(1).strip() if m_fmt else ""
m_size = re.search(r"-\s*([0-9.]+\s*[KMG]?)", full_text, re.IGNORECASE); size = (m_size.group(1).replace(" ", "") if m_size else "").strip()
ems = li.select("small em"); share_time, singer = "", ""
if len(ems) >= 1: share_time = ems[0].get_text(strip=True).replace("分享时间:", "").strip()
if len(ems) >= 2: singer = ems[-1].get_text(strip=True).replace("演唱:", "").strip()
m_id = re.search(r"/detail/(\d+)\.html", (href := urljoin(base_url, a["href"]))); item_id = m_id.group(1) if m_id else ""
search_results.append({"id": item_id, "name": name, "format": file_format, "size": size, "share_time": share_time, "singer": singer, "detail_url": href})
return search_results
'''_extractlyricsandquark'''
def _extractlyricsandquark(self, html_text: str, song_id: str, request_overrides: dict = None):
request_overrides = request_overrides or {}
tb = (soup := BeautifulSoup(html_text, "lxml")).select_one("#textbox")
to_mmss_func = lambda t: (lambda s: f"{s//60:02d}:{s%60:02d}")(int(float(t.split(":",1)[0])*60+float(t.split(":",1)[1])) if ":" in t else int(float(t)))
lyrics = "" if not tb else "\n".join((f"[{to_mmss_func(m.group(1))}] {m.group(2).strip()}" if (m:=re.match(r"^\[(\d+(?:\.\d+)?|\d{1,2}:\d{2}(?:\.\d+)?)\]\s*(.*)$", line)) else f"{line}") for line in (l.strip() for l in tb.get_text("\n").splitlines()) if line)
url_map, rank = {}, KKWSMusicClient.MUSIC_QUALITY_RANK
for a in soup.select("div.downbox a[onclick]"):
if not (onclick := (a.get("onclick") or "").strip()): continue
args = re.findall(r"'([^']*)'", onclick); name = fmt = url = None
if (parsed := ((args[1], args[2], args[3] or None) if onclick.startswith("openModel") and len(args) >= 4 else (args[1], args[2], None) if onclick.startswith("mbgotourl") and len(args) >= 3 else None)) is None: continue
name_fmt, url, fmt = parsed; name, fmt = ((lambda n, f2: (n, fmt or f2))(*map(str.strip, name_fmt.split("|", 1))) if "|" in name_fmt else (name_fmt.strip(), fmt))
try: url = resp2json(self.get(f'https://www.kkws.cc/getdown?url={url}&j=1&id={song_id}', allow_redirects=True, **request_overrides))['data']['decrypted_url']
except Exception: url = url
if not (url and "pan.quark.cn" in url): continue
e = url_map.setdefault(url, {"url": url, "formats": set(), "names": set()})
if fmt: e["formats"].add(functools.reduce(lambda f, k: k if k.lower() in f.lower() else f, rank, fmt))
if name: e["names"].add(name)
quark_links = sorted(({"url": e["url"], "formats": sorted(e["formats"]), "names": sorted(e["names"])} for e in url_map.values()), key=lambda x: rank.get(x["formats"][0] if x["formats"] else "UNKNOWN", 0), reverse=True)
quark_links = [q for q in quark_links if isinstance(q, dict) and q.get('url')]
return {"lyrics": lyrics, "quark_links": quark_links}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('detail_url' not in search_result) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status(); download_result = self._extractlyricsandquark(resp.text, search_result['id'], request_overrides)
except Exception: continue
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(download_result, ['lyrics'], '')),
cover_url=None, download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,171 @@
'''
Function:
Implementation of LivePOOMusicClient: https://www.livepoo.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import copy
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''LivePOOMusicClient'''
class LivePOOMusicClient(BaseMusicClient):
source = 'LivePOOMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(LivePOOMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'page': 0, 'keyword': keyword}
default_rule.update(rule)
# construct search urls
base_url = 'https://www.livepoo.cn/search?'
self.search_size_per_page = min(self.search_size_per_source, 30)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.livepoo.cn/"
for li in soup.select("ul.tuij_song li.song_item2"):
if not (a := li.select_one("a[href]")): continue
href = a["href"].strip(); full_url = urljoin(base_url, href)
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
return search_results
'''_extractquarklinksfromhtml'''
def _extractquarklinksfromhtml(self, html_text: str):
PAT = re.compile(
r"""(?:const|let|var)\s+
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
(?P<quote>["'])
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
(?P=quote)
""", re.VERBOSE
)
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in LivePOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
soup, outs = BeautifulSoup(html_text, "lxml"), []
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
seen, uniq = set(), []
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
uniq = sorted(uniq, key=lambda x: LivePOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
'''_extractlrc'''
def _extractlrc(self, js_text: str):
# functions
norm_func = lambda s: re.sub(r"\s+", "", str(s))
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
# match
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
# return
return lyric_result, lyric
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,127 @@
'''
Function:
Implementation of MituMusicClient: https://www.qqmp3.vip/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import copy
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''MituMusicClient'''
class MituMusicClient(BaseMusicClient):
source = 'MituMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(MituMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://www.qqmp3.vip", "priority": "u=1, i", "referer": "https://www.qqmp3.vip/", "sec-ch-ua": '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'keyword': keyword, 'type': 'search'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.qqmp3.vip/api/songs.php?'
page_rule = copy.deepcopy(default_rule)
search_urls = [base_url + urlencode(page_rule)]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
parse_format_func = lambda label: next((fmt for fmt in sorted(MituMusicClient.MUSIC_QUALITY_RANK, key=len, reverse=True) if re.search(rf"\b{re.escape(fmt)}\b", (s := str(label).upper())) or fmt in s), "UNKNOWN")
quality_score_func = lambda item: MituMusicClient.MUSIC_QUALITY_RANK.get(parse_format_func(item.split("$$", 1)[0]), 0)
# parse
try: (resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = {}
quark_download_urls: list[str] = search_result.get('downurl', []) or []
for quark_download_url in sorted(quark_download_urls, key=lambda x: quality_score_func(x), reverse=True):
download_result, download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size=None, identifier=search_result['rid'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status()
download_url = (download_result := resp2json(resp=resp))['data']['url']
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['rid'], duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)['data']
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('rid' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,102 @@
'''
Function:
Implementation of TwoT58MusicClient: https://www.2t58.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, RandomIPGenerator, AudioLinkTester
'''TwoT58MusicClient'''
class TwoT58MusicClient(BaseMusicClient):
source = 'TwoT58MusicClient'
def __init__(self, **kwargs):
super(TwoT58MusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-user": "?1",
"cookie": "Hm_tf_hx9umupwu8o=1766942296; Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f=1766942296; cac9054cc9568db7fa51d16ee602cd7b=fd6762f9a63b502fda3befef86ea6460; server_name_session=91a76d925399962c481089ef4a83ce4e; Hm_lvt_hx9umupwu8o=1766942296,1768900847; Hm_lpvt_hx9umupwu8o=1768901202", "referer": "https://www.2t58.com/so/%E5%8F%AF%E6%83%9C.html", "priority": "u=0, i",
"sec-ch-ua": "\"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"", "sec-ch-ua-mobile": "?0", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "sec-fetch-site": "same-origin",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 68)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.2t58.com/so/{keyword}.html')
else: search_urls.append(f'https://www.2t58.com/so/{keyword}/{int(count // page_size) + 1}.html')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup = BeautifulSoup(html_text, "lxml")
search_results, base_url = [], 'https://www.2t58.com/'
for a in soup.select(".play_list ul li .name a"):
title, href = a.get_text(strip=True), a.get("href", ""); song_id = urlparse(urljoin(base_url, href)).path.strip('/').split('/')[-1].split('.')[0]
search_results.append({"title": title, "url": urljoin(base_url, href) if base_url else href, "path": href, "id": song_id})
return search_results
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
for quality in ['flac', 'wav', '320']:
headers = copy.deepcopy(self.default_download_headers); RandomIPGenerator().addrandomipv4toheaders(headers=headers)
try: download_url = self.session.head(f"https://www.2t58.com/plug/down.php?ac=music&id={search_result['id']}&k={quality}", allow_redirects=True, headers=headers, **request_overrides).url
except Exception: continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(((m.group(1) if (m := re.search(r"《(.*?)》", (s := re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL"))))) else s).strip())),
singers=legalizestring(re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL")).split("", 1)[0].strip()), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'],
duration='-:-:-', lyric='NULL', cover_url=None, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
# --lyric results
try:
(resp := self.get(f"https://www.2t58.com/plug/down.php?ac=music&lk=lrc&id={search_result['id']}", **request_overrides)).raise_for_status()
song_info.lyric = cleanlrc(resp.text.replace('[00:00.00]欢迎来访爱听音乐网 www.2t58.com\r\n', ''))
song_info.duration_s = extractdurationsecondsfromlrc(song_info.lyric); song_info.duration = seconds2hms(song_info.duration_s)
except:
song_info.lyric, song_info.duration = 'NULL', '-:-:-'
# --cover results
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text); cover = soup.select_one("#mcover"); song_info.cover_url = cover["src"] if cover and cover.has_attr("src") else None
except: song_info.cover_url = None
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,150 @@
'''
Function:
Implementation of YinyuedaoMusicClient: https://1mp3.top/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import base64
from html import unescape
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, safeextractfromdict, seconds2hms, searchdictbykey, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''YinyuedaoMusicClient'''
class YinyuedaoMusicClient(BaseMusicClient):
source = 'YinyuedaoMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(YinyuedaoMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "priority": "u=0, i", "referer": "https://1mp3.top/",
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://1mp3.top/search.html?keyword={keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsemusicpage'''
def _parsemusicpage(self, html_text: str, base_url: str = ""):
soup, lyrics = BeautifulSoup(html_text, "html.parser"), "NULL"
if (article := soup.select_one("section#demo article")): lyrics = re.sub(r"\n+", "\n", unescape(article.get_text("\n", strip=True))).strip()
cover = ""; img = soup.select_one("#album-cover") or soup.select_one(".cover-art img")
if img and img.get("src"): cover = urljoin(base_url, img["src"].strip())
links, seen = [], set()
for a in soup.select("a.download-link[data-url]"):
fmt = (a.get("data-format") or "").strip().upper(); text = a.get_text(" ", strip=True)
if not (url := (a.get("data-url") or "").strip()): continue
fmt = fmt or ((m.group(1).upper()) if (m := re.search(r"\b(DSD|DSF|DFF|WAV|AIFF|FLAC|ALAC|APE|WV|OPUS|AAC|M4A|OGG|VORBIS|MP3|WMA)\b", text, re.I)) else None)
item = {"format": fmt, "score": YinyuedaoMusicClient.MUSIC_QUALITY_RANK.get(fmt, -1), "url": urljoin(base_url, url), "text": text}
if (key := (item["format"], item["url"])) not in seen: seen.add(key); links.append(item)
links.sort(key=lambda x: (-x["score"], x["format"], x["url"]))
return {"lyrics": lyrics, "cover": cover, "quark_links": links}
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text, base_url="https://www.1mp3.top"):
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
for a in soup.select('a[href^="/mdetail/"]'):
if len((cols := a.select("div.row > div"))) < 2: continue
token = (href := a.get("href", "")).rsplit("/", 1)[-1]
try: music_id = base64.b64decode(token).decode(errors="ignore").split("|", 1)[0]
except Exception: music_id = token
search_results.append({"id": music_id, "title": cols[0].get_text(" ", strip=True), "singer": cols[1].get_text(" ", strip=True), "url": urljoin(base_url, href)})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
# parse
for quark_download_url in download_result['quark_links']:
if not isinstance(quark_download_url, dict) or not safeextractfromdict(quark_download_url, ['format'], ''): continue
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result['quark_parse_result'], 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext=str(quark_download_url.get('format', 'mp3')).lower(), file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(download_result.get('lyrics')),
cover_url=download_result.get("cover"), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
if song_info.ext in {'mgg'}: continue
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
encrypted_id = urlparse(str(search_result["url"])).path.strip('/').split('/')[-1]
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
# parse
try: (resp := self.get(f'https://1mp3.top/geturl?id={encrypted_id}&quality=exhigh&type=json', **request_overrides)).raise_for_status()
except Exception: return song_info
download_result['geturl'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['geturl'], ['data', 'url'], None)
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id'), duration_s=None, duration='-:-:-', lyric=cleanlrc(download_result.get('lyrics')), cover_url=download_result.get("cover"),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if song_info.ext in {'mgg'}: return SongInfo(source=self.source)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); download_result: dict = self._parsemusicpage(resp.text)
except Exception: continue
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,89 @@
'''
Function:
Implementation of ZhuolinMusicClient: https://music.zhuolin.wang/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
from urllib.parse import urlsplit
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, extractdurationsecondsfromlrc, cleanlrc, SongInfo, LanZouYParser, AudioLinkTester
'''ZhuolinMusicClient'''
class ZhuolinMusicClient(BaseMusicClient):
source = 'ZhuolinMusicClient'
MUSIC_QUALITIES = {'128', '320', '2000'}
def __init__(self, **kwargs):
super(ZhuolinMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"types": "search", 'count': "20", 'source': "freemp3", 'pages': "1", 'name': keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://music.zhuolin.wang/plugns/api.php'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['count'] = page_size
page_rule['pages'] = int(count // page_size) + 1
search_urls.append({'url': base_url, 'data': page_rule})
count += page_size
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; search_meta = copy.deepcopy(search_url); search_url = search_meta.pop('url')
# successful
try:
# --search results
(resp := self.post(search_url, verify=False, **search_meta, **request_overrides)).raise_for_status()
for search_result in resp2json(resp=resp):
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result): continue
download_url, download_result = safeextractfromdict(search_result, ['url'], ""), {}
if 'lanzouy.com' in urlsplit(str(download_url)).hostname: download_result, download_url = LanZouYParser.parsefromurl(download_url)
if (not download_url) or (not download_url.startswith('http')): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(', '.join(safeextractfromdict(search_result, ['artist'], []) or [])),
album=legalizestring(safeextractfromdict(search_result, ['album', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: continue
# --lyric results
try:
(resp := self.post('https://music.zhuolin.wang/plugns/api.php', verify=False, data={'types': 'lyric', 'id': search_result['id'], 'source': 'freemp3'})).raise_for_status()
lyric_result = resp2json(resp=resp); lyric = safeextractfromdict(lyric_result, ['lyric'], '')
if lyric.startswith('http'): lyric = cleanlrc(self.get(lyric, **request_overrides).text)
lyric = lyric or 'NULL'; song_info.duration_s = extractdurationsecondsfromlrc(lyric); song_info.duration = seconds2hms(song_info.duration_s)
except:
lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,17 @@
'''initialize'''
from .data import SongInfo
from .hls import HLSDownloader
from .ip import RandomIPGenerator
from .quarkparser import QuarkParser
from .lanzouyparser import LanZouYParser
from .songinfoutils import SongInfoUtils
from .modulebuilder import BaseModuleBuilder
from .hosts import obtainhostname, hostmatchessuffix
from .importutils import optionalimport, optionalimportfrom
from .lyric import WhisperLRC, LyricSearchClient, extractdurationsecondsfromlrc, cleanlrc
from .logger import LoggerHandle, colorize, printtable, printfullline, smarttrunctable, cursorpickintable
from .misc import (
AudioLinkTester, legalizestring, touchdir, seconds2hms, byte2mb, cachecookies, resp2json, isvalidresp, safeextractfromdict, replacefile,
usedownloadheaderscookies, useparseheaderscookies, usesearchheaderscookies, cookies2dict, cookies2string, estimatedurationwithfilesizebr,
estimatedurationwithfilelink, searchdictbykey, shortenpathsinsonginfos, naiveguessextfromaudiobytes,
)
@@ -0,0 +1,849 @@
'''
Function:
Implementation of AppleMusicClient Utils (Refer To https://github.com/glomatico/gamdl)
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import os
import io
import m3u8
import uuid
import json
import base64
import shutil
import datetime
import requests
import subprocess
from enum import Enum
from typing import Any
from pathlib import Path
from xml.dom import minidom
from mutagen.mp4 import MP4
from xml.etree import ElementTree
from dataclasses import dataclass
from platformdirs import user_log_dir
from pywidevine import PSSH, Cdm, Device
from urllib.parse import parse_qs, urlparse
from .misc import safeextractfromdict, resp2json
from pywidevine.license_protocol_pb2 import WidevinePsshData
'''settings'''
FOURCC_MAP = {"h264": "avc1", "h265": "hvc1"}
MEDIA_TYPE_STR_MAP = {1: "Song", 6: "Music Video"}
LEGACY_SONG_CODECS = {"aac-legacy", "aac-he-legacy"}
IMAGE_FILE_EXTENSION_MAP = {"jpeg": ".jpg", "tiff": ".tif"}
MEDIA_RATING_STR_MAP = {0: "None", 1: "Explicit", 2: "Clean"}
MP4_FORMAT_CODECS = ["ec-3", "hvc1", "audio-atmos", "audio-ec3"]
SONG_MEDIA_TYPE = {"song", "songs", "library-songs"}
ALBUM_MEDIA_TYPE = {"album", "albums", "library-albums"}
MUSIC_VIDEO_MEDIA_TYPE = {"music-video", "music-videos", "library-music-videos"}
ARTIST_MEDIA_TYPE = {"artist", "artists", "library-artists"}
UPLOADED_VIDEO_MEDIA_TYPE = {"post", "uploaded-videos"}
PLAYLIST_MEDIA_TYPE = {"playlist", "playlists", "library-playlists"}
UPLOADED_VIDEO_QUALITY_RANK = ["1080pHdVideo", "720pHdVideo", "sdVideoWithPlusAudio", "sdVideo", "sd480pVideo", "provisionalUploadVideo"]
SONG_CODEC_REGEX_MAP = {
"aac": r"audio-stereo-\d+", "aac-he": r"audio-HE-stereo-\d+", "aac-binaural": r"audio-stereo-\d+-binaural", "aac-downmix": r"audio-stereo-\d+-downmix", "aac-he-binaural": r"audio-HE-stereo-\d+-binaural",
"aac-he-downmix": r"audio-HE-stereo-\d+-downmix", "atmos": r"audio-atmos-.*", "ac3": r"audio-ac3-.*", "alac": r"audio-alac-.*",
}
DRM_DEFAULT_KEY_MAPPING = {
"urn:uuid:edef8ba9-79d6-4ace-a3c8-27dcd51d21ed": ("data:text/plain;base64,AAAAOHBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAABgSEAAAAAAAAAAAczEvZTEgICBI88aJmwY="),
"com.microsoft.playready": ("data:text/plain;charset=UTF-16;base64,vgEAAAEAAQC0ATwAVwBSAE0ASABFAEEARABFAFIAIAB4AG0AbABuAHMAPQAiAGgAdAB0AHAAOgAvAC8AcwBjAGgAZQBtAGEAcwAuAG0AaQBjAHIAbwBzAG8AZgB0AC4AYwBvAG0ALwBEAFIATQAvADIAMAAwADcALwAwADMALwBQAGwAYQB5AFIAZQBhAGQAeQBIAGUAYQBkAGUAcgAiACAAdgBlAHIAcwBpAG8AbgA9ACIANAAuADMALgAwAC4AMAAiAD4APABEAEEAVABBAD4APABQAFIATwBUAEUAQwBUAEkATgBGAE8APgA8AEsASQBEAFMAPgA8AEsASQBEACAAQQBMAEcASQBEAD0AIgBBAEUAUwBDAEIAQwAiACAAVgBBAEwAVQBFAD0AIgBBAEEAQQBBAEEAQQBBAEEAQQBBAEIAegBNAFMAOQBsAE0AUwBBAGcASQBBAD0APQAiAD4APAAvAEsASQBEAD4APAAvAEsASQBEAFMAPgA8AC8AUABSAE8AVABFAEMAVABJAE4ARgBPAD4APAAvAEQAQQBUAEEAPgA8AC8AVwBSAE0ASABFAEEARABFAFIAPgA="),
"com.apple.streamingkeydelivery": "skd://itunes.apple.com/P000000000/s1/e1",
}
DEFAULT_SONG_DECRYPTION_KEY = "32b8ade1769e26b1ffb8986352793fc6"
HARDCODED_WVD = """V1ZEAgIDAASoMIIEpAIBAAKCAQEAwnCFAPXy4U1J7p1NohAS+xl040f5FBaE/59bPp301bGz0UGFT9VoEtY3vaeakKh/d319xTNvCSWsEDRaMmp/wSnMiEZUkkl04872jx2uHuR4k6KYuuJoqhsIo1TwUBueFZynHBUJzXQeW8Eb1tYAROGwp8W7r+b0RIjHC89RFnfVXpYlF5I6McktyzJNSOwlQbMqlVihfSUkv3WRd3HFmA0Oxay51CEIkoTlNTHVlzVyhov5eHCDSp7QENRgaaQ03jC/CcgFOoQymhsBtRCM0CQmfuAHjA9e77R6m/GJPy75G9fqoZM1RMzVDHKbKZPd3sFd0c0+77gLzW8cWEaaHwIDAQABAoIBAQCB2pN46MikHvHZIcTPDt0eRQoDH/YArGl2Lf7J+sOgU2U7wv49KtCug9IGHwDiyyUVsAFmycrF2RroV45FTUq0vi2SdSXV7Kjb20Ren/vBNeQw9M37QWmU8Sj7q6YyWb9hv5T69DHvvDTqIjVtbM4RMojAAxYti5hmjNIh2PrWfVYWhXxCQ/WqAjWLtZBM6Oww1byfr5I/wFogAKkgHi8wYXZ4LnIC8V7jLAhujlToOvMMC9qwcBiPKDP2FO+CPSXaqVhH+LPSEgLggnU3EirihgxovbLNAuDEeEbRTyR70B0lW19tLHixso4ZQa7KxlVUwOmrHSZf7nVuWqPpxd+BAoGBAPQLyJ1IeRavmaU8XXxfMdYDoc8+xB7v2WaxkGXb6ToX1IWPkbMz4yyVGdB5PciIP3rLZ6s1+ruuRRV0IZ98i1OuN5TSR56ShCGg3zkd5C4L/xSMAz+NDfYSDBdO8BVvBsw21KqSRUi1ctL7QiIvfedrtGb5XrE4zhH0gjXlU5qZAoGBAMv2segn0Jx6az4rqRa2Y7zRx4iZ77JUqYDBI8WMnFeR54uiioTQ+rOs3zK2fGIWlrn4ohco/STHQSUTB8oCOFLMx1BkOqiR+UyebO28DJY7+V9ZmxB2Guyi7W8VScJcIdpSOPyJFOWZQKXdQFW3YICD2/toUx/pDAJh1sEVQsV3AoGBANyyp1rthmvoo5cVbymhYQ08vaERDwU3PLCtFXu4E0Ow90VNn6Ki4ueXcv/gFOp7pISk2/yuVTBTGjCblCiJ1en4HFWekJwrvgg3Vodtq8Okn6pyMCHRqvWEPqD5hw6rGEensk0K+FMXnF6GULlfn4mgEkYpb+PvDhSYvQSGfkPJAoGAF/bAKFqlM/1eJEvU7go35bNwEiij9Pvlfm8y2L8Qj2lhHxLV240CJ6IkBz1Rl+S3iNohkT8LnwqaKNT3kVB5daEBufxMuAmOlOX4PmZdxDj/r6hDg8ecmjj6VJbXt7JDd/c5ItKoVeGPqu035dpJyE+1xPAY9CLZel4scTsiQTkCgYBt3buRcZMwnc4qqpOOQcXK+DWD6QvpkcJ55ygHYw97iP/lF4euwdHd+I5b+11pJBAao7G0fHX3eSjqOmzReSKboSe5L8ZLB2cAI8AsKTBfKHWmCa8kDtgQuI86fUfirCGdhdA9AVP2QXN2eNCuPnFWi0WHm4fYuUB5be2c18ucxAb9CAESmgsK3QMIAhIQ071yBlsbLoO2CSB9Ds0cmRif6uevBiKOAjCCAQoCggEBAMJwhQD18uFNSe6dTaIQEvsZdONH+RQWhP+fWz6d9NWxs9FBhU/VaBLWN72nmpCof3d9fcUzbwklrBA0WjJqf8EpzIhGVJJJdOPO9o8drh7keJOimLriaKobCKNU8FAbnhWcpxwVCc10HlvBG9bWAEThsKfFu6/m9ESIxwvPURZ31V6WJReSOjHJLcsyTUjsJUGzKpVYoX0lJL91kXdxxZgNDsWsudQhCJKE5TUx1Zc1coaL+Xhwg0qe0BDUYGmkNN4wvwnIBTqEMpobAbUQjNAkJn7gB4wPXu+0epvxiT8u+RvX6qGTNUTM1QxymymT3d7BXdHNPu+4C81vHFhGmh8CAwEAASjwIkgBUqoBCAEQABqBAQQlRbfiBNDb6eU6aKrsH5WJaYszTioXjPLrWN9dqyW0vwfT11kgF0BbCGkAXew2tLJJqIuD95cjJvyGUSN6VyhL6dp44fWEGDSBIPR0mvRq7bMP+m7Y/RLKf83+OyVJu/BpxivQGC5YDL9f1/A8eLhTDNKXs4Ia5DrmTWdPTPBL8SIgyfUtg3ofI+/I9Tf7it7xXpT0AbQBJfNkcNXGpO3JcBMSgAIL5xsXK5of1mMwAl6ygN1Gsj4aZ052otnwN7kXk12SMsXheWTZ/PYh2KRzmt9RPS1T8hyFx/Kp5VkBV2vTAqqWrGw/dh4URqiHATZJUlhO7PN5m2Kq1LVFdXjWSzP5XBF2S83UMe+YruNHpE5GQrSyZcBqHO0QrdPcU35GBT7S7+IJr2AAXvnjqnb8yrtpPWN2ZW/IWUJN2z4vZ7/HV4aj3OZhkxC1DIMNyvsusUKoQQuf8gwKiEe8cFwbwFSicywlFk9la2IPe8oFShcxAzHLCCn/TIYUAvEL3/4LgaZvqWm80qCPYbgIP5HT8hPYkKWJ4WYknEWK+3InbnkzteFfGrQFCq4CCAESEGnj6Ji7LD+4o7MoHYT4jBQYjtW+kQUijgIwggEKAoIBAQDY9um1ifBRIOmkPtDZTqH+CZUBbb0eK0Cn3NHFf8MFUDzPEz+emK/OTub/hNxCJCao//pP5L8tRNUPFDrrvCBMo7Rn+iUb+mA/2yXiJ6ivqcN9Cu9i5qOU1ygon9SWZRsujFFB8nxVreY5Lzeq0283zn1Cg1stcX4tOHT7utPzFG/ReDFQt0O/GLlzVwB0d1sn3SKMO4XLjhZdncrtF9jljpg7xjMIlnWJUqxDo7TQkTytJmUl0kcM7bndBLerAdJFGaXc6oSY4eNy/IGDluLCQR3KZEQsy/mLeV1ggQ44MFr7XOM+rd+4/314q/deQbjHqjWFuVr8iIaKbq+R63ShAgMBAAEo8CISgAMii2Mw6z+Qs1bvvxGStie9tpcgoO2uAt5Zvv0CDXvrFlwnSbo+qR71Ru2IlZWVSbN5XYSIDwcwBzHjY8rNr3fgsXtSJty425djNQtF5+J2jrAhf3Q2m7EI5aohZGpD2E0cr+dVj9o8x0uJR2NWR8FVoVQSXZpad3M/4QzBLNto/tz+UKyZwa7Sc/eTQc2+ZcDS3ZEO3lGRsH864Kf/cEGvJRBBqcpJXKfG+ItqEW1AAPptjuggzmZEzRq5xTGf6or+bXrKjCpBS9G1SOyvCNF1k5z6lG8KsXhgQxL6ADHMoulxvUIihyPY5MpimdXfUdEQ5HA2EqNiNVNIO4qP007jW51yAeThOry4J22xs8RdkIClOGAauLIl0lLA4flMzW+VfQl5xYxP0E5tuhn0h+844DslU8ZF7U1dU2QprIApffXD9wgAACk26Rggy8e96z8i86/+YYyZQkc9hIdCAERrgEYCEbByzONrdRDs1MrS/ch1moV5pJv63BIKvQHGvLkaFwoMY29tcGFueV9uYW1lEgd1bmtub3duGioKCm1vZGVsX25hbWUSHEFuZHJvaWQgU0RLIGJ1aWx0IGZvciB4ODZfNjQaGwoRYXJjaGl0ZWN0dXJlX25hbWUSBng4Nl82NBodCgtkZXZpY2VfbmFtZRIOZ2VuZXJpY194ODZfNjQaIAoMcHJvZHVjdF9uYW1lEhBzZGtfcGhvbmVfeDg2XzY0GmMKCmJ1aWxkX2luZm8SVUFuZHJvaWQvc2RrX3Bob25lX3g4Nl82NC9nZW5lcmljX3g4Nl82NDo5L1BTUjEuMTgwNzIwLjAxMi80OTIzMjE0OnVzZXJkZWJ1Zy90ZXN0LWtleXMaHgoUd2lkZXZpbmVfY2RtX3ZlcnNpb24SBjE0LjAuMBokCh9vZW1fY3J5cHRvX3NlY3VyaXR5X3BhdGNoX2xldmVsEgEwMg4QASAAKA0wAEAASABQAA=="""
APPLE_MUSIC_COOKIE_DOMAIN = ".music.apple.com"
AMP_API_URL = "https://amp-api.music.apple.com"
ITUNES_PAGE_API_URL = "https://music.apple.com"
APPLE_MUSIC_HOMEPAGE_URL = "https://music.apple.com"
ITUNES_LOOKUP_API_URL = "https://itunes.apple.com/lookup"
WEBPLAYBACK_API_URL = "https://play.itunes.apple.com/WebObjects/MZPlay.woa/wa/webPlayback"
LICENSE_API_URL = "https://play.itunes.apple.com/WebObjects/MZPlay.woa/wa/acquireWebPlaybackLicense"
STOREFRONT_IDS = {
"AE": "143481-2,32", "AG": "143540-2,32", "AI": "143538-2,32", "AL": "143575-2,32", "AM": "143524-2,32", "AO": "143564-2,32", "AR": "143505-28,32", "AT": "143445-4,32",
"AU": "143460-27,32", "AZ": "143568-2,32", "BB": "143541-2,32", "BE": "143446-2,32", "BF": "143578-2,32", "BG": "143526-2,32", "BH": "143559-2,32", "BJ": "143576-2,32",
"BM": "143542-2,32", "BN": "143560-2,32", "BO": "143556-28,32", "BR": "143503-15,32", "BS": "143539-2,32", "BT": "143577-2,32", "BW": "143525-2,32", "BY": "143565-2,32",
"BZ": "143555-2,32", "CA": "143455-6,32", "CG": "143582-2,32", "CH": "143459-57,32", "CM": "143574-2,32", "CL": "143483-28,32", "CN": "143465-19,32", "CO": "143501-28,32",
"CR": "143495-28,32", "CV": "143580-2,32", "CY": "143557-2,32", "CZ": "143489-2,32", "DE": "143443-4,32", "DK": "143458-2,32", "DM": "143545-2,32", "DO": "143508-28,32",
"DZ": "143563-2,32", "EC": "143509-28,32", "EE": "143518-2,32", "EG": "143516-2,32", "ES": "143454-8,32", "FI": "143447-2,32", "FJ": "143583-2,32", "FM": "143591-2,32",
"FR": "143442-3,32", "GB": "143444-2,32", "GD": "143546-2,32", "GH": "143573-2,32", "GM": "143584-2,32", "GR": "143448-2,32", "GT": "143504-28,32", "GW": "143585-2,32",
"GY": "143553-2,32", "HK": "143463-45,32", "HN": "143510-28,32", "HR": "143494-2,32", "HU": "143482-2,32", "ID": "143476-2,32", "IE": "143449-2,32", "IL": "143491-2,32",
"IN": "143467-2,32", "IS": "143558-2,32", "IT": "143450-7,32", "JM": "143511-2,32", "JO": "143528-2,32", "JP": "143462-9,32", "KE": "143529-2,32", "KG": "143586-2,32",
"KH": "143579-2,32", "KN": "143548-2,32", "KR": "143466-13,32", "KW": "143493-2,32", "KY": "143544-2,32", "KZ": "143517-2,32", "LA": "143587-2,32", "LB": "143497-2,32",
"LC": "143549-2,32", "LK": "143486-2,32", "LR": "143588-2,32", "LT": "143520-2,32", "LU": "143451-2,32", "LV": "143519-2,32", "MD": "143523-2,32", "MG": "143531-2,32",
"MK": "143530-2,32", "ML": "143532-2,32", "MN": "143592-2,32", "MO": "143515-45,32", "MR": "143590-2,32", "MS": "143547-2,32", "MT": "143521-2,32", "MU": "143533-2,32",
"MW": "143589-2,32", "MX": "143468-28,32", "MY": "143473-2,32", "MZ": "143593-2,32", "NA": "143594-2,32", "NE": "143534-2,32", "NG": "143561-2,32", "NI": "143512-28,32",
"NL": "143452-10,32", "NO": "143457-2,32", "NP": "143484-2,32", "NZ": "143461-27,32", "OM": "143562-2,32", "PA": "143485-28,32", "PE": "143507-28,32", "PG": "143597-2,32",
"PH": "143474-2,32", "PK": "143477-2,32", "PL": "143478-2,32", "PT": "143453-24,32", "PW": "143595-2,32", "PY": "143513-28,32", "QA": "143498-2,32", "RO": "143487-2,32",
"RU": "143469-16,32", "SA": "143479-2,32", "SB": "143601-2,32", "SC": "143599-2,32", "SE": "143456-17,32", "SG": "143464-19,32", "SI": "143499-2,32", "SK": "143496-2,32",
"SL": "143600-2,32", "SN": "143535-2,32", "SR": "143554-2,32", "ST": "143598-2,32", "SV": "143506-28,32", "SZ": "143602-2,32", "TC": "143552-2,32", "TD": "143581-2,32",
"TH": "143475-2,32", "TJ": "143603-2,32", "TM": "143604-2,32", "TN": "143536-2,32", "TR": "143480-2,32", "TT": "143551-2,32", "TW": "143470-18,32", "TZ": "143572-2,32",
"UA": "143492-2,32", "UG": "143537-2,32", "US": "143441-1,32", "UY": "143514-2,32", "UZ": "143566-2,32", "VC": "143550-2,32", "VE": "143502-28,32", "VG": "143543-2,32",
"VN": "143471-2,32", "YE": "143571-2,32", "ZA": "143472-2,32", "ZW": "143605-2,32",
}
'''CoverFormat'''
class CoverFormat(Enum):
JPG = "jpg"
PNG = "png"
RAW = "raw"
'''RemuxFormatMusicVideo'''
class RemuxFormatMusicVideo(Enum):
M4V = "m4v"
MP4 = "mp4"
'''SyncedLyricsFormat'''
class SyncedLyricsFormat(Enum):
LRC = "lrc"
SRT = "srt"
TTML = "ttml"
'''MediaType'''
class MediaType(Enum):
SONG = 1
MUSIC_VIDEO = 6
def __str__(self): return MEDIA_TYPE_STR_MAP[self.value]
def __int__(self): return self.value
'''MediaRating'''
class MediaRating(Enum):
NONE = 0
EXPLICIT = 1
CLEAN = 2
def __str__(self): return MEDIA_RATING_STR_MAP[self.value]
def __int__(self): return self.value
'''MediaFileFormat'''
class MediaFileFormat(Enum):
MP4 = "mp4"
M4V = "m4v"
M4A = "m4a"
'''DownloadMode'''
class DownloadMode(Enum):
NM3U8DLRE = "nm3u8dlre"
'''RemuxMode'''
class RemuxMode(Enum):
FFMPEG = "ffmpeg"
MP4BOX = "mp4box"
'''SongCodec'''
class SongCodec(Enum):
AAC_LEGACY = "aac-legacy"
AAC_HE_LEGACY = "aac-he-legacy"
AAC = "aac"
AAC_HE = "aac-he"
AAC_BINAURAL = "aac-binaural"
AAC_DOWNMIX = "aac-downmix"
AAC_HE_BINAURAL = "aac-he-binaural"
AAC_HE_DOWNMIX = "aac-he-downmix"
ATMOS = "atmos"
AC3 = "ac3"
ALAC = "alac"
ASK = "ask"
def islegacy(self): return self.value in LEGACY_SONG_CODECS
'''MusicVideoCodec'''
class MusicVideoCodec(Enum):
H264 = "h264"
H265 = "h265"
ASK = "ask"
def fourcc(self): return FOURCC_MAP[self.value]
'''MusicVideoResolution'''
class MusicVideoResolution(Enum):
R240P = "240p"
R360P = "360p"
R480P = "480p"
R540P = "540p"
R720P = "720p"
R1080P = "1080p"
R1440P = "1440p"
R2160P = "2160p"
def __int__(self): return int(self.value[:-1])
'''Lyrics'''
@dataclass
class Lyrics:
synced: str = None
unsynced: str = None
'''MediaTags'''
@dataclass
class MediaTags:
album: str = None
album_artist: str = None
album_id: int = None
album_sort: str = None
artist: str = None
artist_id: int = None
artist_sort: str = None
comment: str = None
compilation: bool = None
composer: str = None
composer_id: int = None
composer_sort: str = None
copyright: str = None
date: datetime.date | str = None
disc: int = None
disc_total: int = None
gapless: bool = None
genre: str = None
genre_id: int = None
lyrics: str = None
media_type: MediaType = None
rating: MediaRating = None
storefront: str = None
title: str = None
title_id: int = None
title_sort: str = None
track: int = None
track_total: int = None
xid: str = None
'''asmp4tags'''
def asmp4tags(self, date_format: str = None):
disc_mp4 = [self.disc if self.disc is not None else 0, self.disc_total if self.disc_total is not None else 0]
if disc_mp4[0] == 0 and disc_mp4[1] == 0: disc_mp4 = None
track_mp4 = [self.track if self.track is not None else 0, self.track_total if self.track_total is not None else 0]
if track_mp4[0] == 0 and track_mp4[1] == 0: track_mp4 = None
if isinstance(self.date, datetime.date):
if date_format is None: date_mp4 = self.date.isoformat()
else: date_mp4 = self.date.strftime(date_format)
elif isinstance(self.date, str):
date_mp4 = self.date
else:
date_mp4 = None
mp4_tags = {
"\xa9alb": self.album, "aART": self.album_artist, "plID": self.album_id, "soal": self.album_sort, "\xa9ART": self.artist, "atID": self.artist_id, "soar": self.artist_sort, "\xa9cmt": self.comment, "xid": self.xid,
"cpil": bool(self.compilation) if self.compilation is not None else None, "\xa9wrt": self.composer, "cmID": self.composer_id, "soco": self.composer_sort, "cprt": self.copyright, "\xa9day": date_mp4, "trkn": track_mp4,
"disk": disc_mp4, "pgap": bool(self.gapless) if self.gapless is not None else None, "\xa9lyr": self.lyrics, "geID": self.genre_id, "stik": int(self.media_type) if self.media_type is not None else None, "\xa9nam": self.title,
"\xa9gen": self.genre, "rtng": int(self.rating) if self.rating is not None else None, "sfID": self.storefront, "cnID": self.title_id, "sonm": self.title_sort,
}
return {k: ([v] if not isinstance(v, bool) else v) for k, v in mp4_tags.items() if v is not None}
'''PlaylistTags'''
@dataclass
class PlaylistTags:
playlist_artist: str = None
playlist_id: int = None
playlist_title: str = None
playlist_track: int = None
'''StreamInfo'''
@dataclass
class StreamInfo:
stream_url: str = None
widevine_pssh: str = None
playready_pssh: str = None
fairplay_key: str = None
codec: str = None
width: int = None
height: int = None
'''StreamInfoAv'''
@dataclass
class StreamInfoAv:
media_id: str = None
video_track: StreamInfo = None
audio_track: StreamInfo = None
file_format: MediaFileFormat = None
'''DecryptionKey'''
@dataclass
class DecryptionKey:
kid: str = None
key: str = None
'''DecryptionKeyAv'''
@dataclass
class DecryptionKeyAv:
video_track: DecryptionKey = None
audio_track: DecryptionKey = None
'''DownloadItem'''
@dataclass
class DownloadItem:
media_metadata: dict = None
playlist_metadata: dict = None
random_uuid: str = None
lyrics: Lyrics = None
media_tags: MediaTags = None
extra_tags: dict = None
playlist_tags: PlaylistTags = None
stream_info: StreamInfoAv = None
decryption_key: DecryptionKeyAv = None
cover_url_template: str = None
cover_url: str = None
staged_path: str = None
final_path: str = None
playlist_file_path: str = None
synced_lyrics_path: str = None
cover_path: str = None
flat_filter_result: Any = None
error: Exception = None
'''UrlInfo'''
@dataclass
class UrlInfo:
storefront: str = None
type: str = None
slug: str = None
id: str = None
sub_id: str = None
library_storefront: str = None
library_type: str = None
library_id: str = None
'''AppleMusicClientAPIUtils'''
class AppleMusicClientAPIUtils:
def __init__(self, storefront: str = "us", language: str = "en-US", media_user_token: str | None = None, developer_token: str | None = None) -> None:
self.storefront = storefront
self.language = language
self.media_user_token = media_user_token
self.token = developer_token
@property
def active_subscription(self) -> bool: return safeextractfromdict(getattr(self, "account_info", {}), ['meta', 'subscription', 'active'], False)
@property
def account_restrictions(self) -> dict | None: return safeextractfromdict(getattr(self, "account_info", {}), ['data', 0, 'attributes', 'restrictions'], None)
'''createfromnetscapecookies'''
@classmethod
def createfromnetscapecookies(cls, cookies: dict, request_overrides: dict = None, *args, **kwargs) -> "AppleMusicClientAPIUtils":
request_overrides = request_overrides or {}
media_user_token = cookies.get('media-user-token')
if not media_user_token: raise ValueError('"media-user-token" is not configured in cookies.')
return cls.create(storefront=None, media_user_token=media_user_token, developer_token=None, request_overrides=request_overrides, *args, **kwargs)
'''createfromwrapper'''
@classmethod
def createfromwrapper(cls, wrapper_account_url: str = "http://127.0.0.1:30020/", request_overrides: dict = None, *args, **kwargs) -> "AppleMusicClientAPIUtils":
request_overrides = request_overrides or {}
wrapper_account_response = requests.get(wrapper_account_url)
wrapper_account_response.raise_for_status()
wrapper_account_info = wrapper_account_response.json()
return cls.create(storefront=None, media_user_token=wrapper_account_info["music_token"], developer_token=wrapper_account_info["dev_token"], request_overrides=request_overrides, *args, **kwargs)
'''create'''
@classmethod
def create(cls, storefront: str | None = "us", language: str = "en-US", media_user_token: str | None = None, developer_token: str | None = None, request_overrides: dict = None) -> "AppleMusicClientAPIUtils":
request_overrides = request_overrides or {}
api = cls(storefront=storefront, language=language, media_user_token=media_user_token, developer_token=developer_token)
api.initialize(request_overrides=request_overrides)
return api
'''initialize'''
def initialize(self, request_overrides: dict = None) -> None:
request_overrides = request_overrides or {}
self.initializeclient(); self.initializetoken(request_overrides=request_overrides); self.initializeaccountinfo(request_overrides=request_overrides)
'''initializeclient'''
def initializeclient(self) -> None:
self.client = requests.Session()
self.client.headers.update({
"accept": "*/*", "accept-language": "en-US", "origin": APPLE_MUSIC_HOMEPAGE_URL, "priority": "u=1, i", "referer": APPLE_MUSIC_HOMEPAGE_URL, "sec-ch-ua": '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"', "sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
})
'''gettoken'''
def gettoken(self, request_overrides: dict = None) -> str:
request_overrides = request_overrides or {}
(resp := self.client.get(APPLE_MUSIC_HOMEPAGE_URL, params={"l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
index_js_uri_match = re.search(r"/(assets/index-legacy[~-][^/\"]+\.js)", resp.text)
if not index_js_uri_match: raise Exception("index.js URI not found in Apple Music homepage")
index_js_uri = index_js_uri_match.group(1)
(resp := self.client.get(f"{APPLE_MUSIC_HOMEPAGE_URL}/{index_js_uri}", params={"l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
token_match = re.search('(?=eyJh)(.*?)(?=")', resp.text)
if not token_match: raise Exception("Token not found in index.js page")
token = token_match.group(1)
return token
'''initializetoken'''
def initializetoken(self, request_overrides: dict = None) -> None:
request_overrides = request_overrides or {}
self.token = self.token or self.gettoken(request_overrides=request_overrides)
self.client.headers.update({"authorization": f"Bearer {self.token}"})
'''initializeaccountinfo'''
def initializeaccountinfo(self, request_overrides: dict = None) -> None:
request_overrides = request_overrides or {}
if not self.media_user_token: return
self.client.cookies.update({"media-user-token": self.media_user_token})
self.account_info = self.getaccountinfo(request_overrides=request_overrides)
self.storefront = safeextractfromdict(self.account_info, ['meta', 'subscription', 'storefront'], 'us')
'''getaccountinfo'''
def getaccountinfo(self, meta: str | None = "subscription", request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/me/account", params={**({"meta": meta} if meta else {}), **{"l": self.language}}, allow_redirects=True, **request_overrides)).raise_for_status()
account_info = resp2json(resp=resp)
if not "data" in account_info or (meta and "meta" not in account_info): raise Exception("Error getting account info: ", resp.text)
return account_info
'''getsong'''
def getsong(self, song_id: str, extend: str = "extendedAssetUrls", include: str = "lyrics,albums", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/songs/{song_id}", params={"extend": extend, "include": include, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
song = resp2json(resp=resp)
if not ("data" in song): raise Exception("Error getting song: ", resp.text)
return song
'''getmusicvideo'''
def getmusicvideo(self, music_video_id: str, include: str = "albums", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/music-videos/{music_video_id}", params={"include": include, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
music_video = resp2json(resp=resp)
if not ("data" in music_video): raise Exception("Error getting music video: ", resp.text)
return music_video
'''getuploadedvideo'''
def getuploadedvideo(self, post_id: str, request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/uploaded-videos/{post_id}", params={"l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
uploaded_video = resp2json(resp=resp)
if not ("data" in uploaded_video): raise Exception("Error getting uploaded video: ", resp.text)
return uploaded_video
'''getalbum'''
def getalbum(self, album_id: str, extend: str = "extendedAssetUrls", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/albums/{album_id}", params={"extend": extend, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
album = resp2json(resp=resp)
if not ("data" in album): raise Exception("Error getting album: ", resp.text)
return album
'''getplaylist'''
def getplaylist(self, playlist_id: str, limit_tracks: int = 300, extend: str = "extendedAssetUrls", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/playlists/{playlist_id}", params={"limit[tracks]": limit_tracks, "extend": extend, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
playlist = resp2json(resp=resp)
if not ("data" in playlist): raise Exception("Error getting playlist: ", resp.text)
return playlist
'''getartist'''
def getartist(self, artist_id: str, include: str = "albums,music-videos", limit: int = 100, request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/artists/{artist_id}", params={"include": include, "l": self.language, **{f"limit[{_include}]": limit for _include in include.split(",")}}, allow_redirects=True, **request_overrides)).raise_for_status()
artist = resp2json(resp=resp)
if not ("data" in artist): raise Exception("Error getting artist:", resp.text)
return artist
'''getlibraryalbum'''
def getlibraryalbum(self, album_id: str, extend: str = "extendedAssetUrls", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/me/library/albums/{album_id}", params={"extend": extend, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
album = resp2json(resp=resp)
if not ("data" in album): raise Exception("Error getting library album: ", resp.text)
return album
'''getlibraryplaylist'''
def getlibraryplaylist(self, playlist_id: str, include: str = "tracks", limit: int = 100, extend: str = "extendedAssetUrls", request_overrides: dict = None) -> dict | None:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/me/library/playlists/{playlist_id}", params={"include": include, **{f"limit[{_include}]": limit for _include in include.split(",")}, "extend": extend, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
playlist = resp2json(resp=resp)
if not ("data" in playlist): raise Exception("Error getting library playlist: ", resp.text)
return playlist
'''getsearchresults'''
def getsearchresults(self, term: str, types: str = "songs,music-videos,albums,playlists,artists", limit: int = 50, offset: int = 0, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{AMP_API_URL}/v1/catalog/{self.storefront}/search", params={"term": term, "types": types, "limit": limit, "offset": offset, "l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
search_results = resp2json(resp=resp)
if not ("results" in search_results): raise Exception("Error searching: ", resp.text)
return search_results
'''extendapidata'''
def extendapidata(self, api_response: dict, extend: str = "extendedAssetUrls", request_overrides: dict = None):
request_overrides = request_overrides or {}
next_uri: str = api_response.get("next")
if not next_uri: return
next_uri_params = parse_qs(urlparse(next_uri).query)
limit = int(next_uri_params["offset"][0])
while next_uri:
extended_api_data = self.getextendedapidata(next_uri, limit, extend, request_overrides)
yield extended_api_data
next_uri = extended_api_data.get("next")
'''getextendedapidata'''
def getextendedapidata(self, next_uri: str, limit: int, extend: str, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.get(AMP_API_URL + next_uri, params={"limit": limit, "extend": extend, "l": self.language, **parse_qs(urlparse(next_uri).query)}, allow_redirects=True, **request_overrides)).raise_for_status()
extended_api_data = resp2json(resp=resp)
if not ("data" in extended_api_data): raise Exception("Error getting extended API data: ", resp.text)
return extended_api_data
'''getwebplayback'''
def getwebplayback(self, track_id: str, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.post(WEBPLAYBACK_API_URL, json={"salableAdamId": track_id, "language": self.language}, params={"l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
webplayback = resp2json(resp=resp)
if not ("songList" in webplayback): raise Exception("Error getting webplayback: ", resp.text)
return webplayback
'''getlicenseexchange'''
def getlicenseexchange(self, track_id: str, track_uri: str, challenge: str, key_system: str = "com.widevine.alpha", request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.post(LICENSE_API_URL, json={"challenge": challenge, "key-system": key_system, "uri": track_uri, "adamId": track_id, "isLibrary": False, "user-initiated": True}, params={"l": self.language}, allow_redirects=True, **request_overrides)).raise_for_status()
license_exchange = resp2json(resp=resp)
if not ("license" in license_exchange): raise Exception("Error getting license exchange: ", resp.text)
return license_exchange
'''AppleMusicClientItunesApiUtils'''
class AppleMusicClientItunesApiUtils:
def __init__(self, storefront: str = "us", language: str = "en-US") -> None:
self.storefront = storefront
self.language = language
self.initialize()
'''initialize'''
def initialize(self) -> None:
self.initializestorefrontid()
self.initializeclient()
'''initializestorefrontid'''
def initializestorefrontid(self) -> None:
try: self.storefront_id = STOREFRONT_IDS[self.storefront.upper()]
except KeyError: raise Exception(f"No storefront id for {self.storefront}")
'''initializeclient'''
def initializeclient(self) -> None:
self.client = requests.Session()
self.client.headers.update({"X-Apple-Store-Front": f"{self.storefront_id} t:music31"})
'''getlookupresult'''
def getlookupresult(self, media_id: str, entity: str = "album", request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.get(ITUNES_LOOKUP_API_URL, params={"id": media_id, "entity": entity, "country": self.storefront, "lang": self.language}, **request_overrides)).raise_for_status()
lookup_result = resp2json(resp)
if ("results" not in lookup_result): raise Exception("Error getting lookup result: ", resp.text)
return lookup_result
'''getitunespage'''
def getitunespage(self, media_type: str, media_id: str, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := self.client.get(f"{ITUNES_PAGE_API_URL}/{media_type}/{media_id}", params={"country": self.storefront, "lang": self.language}, **request_overrides)).raise_for_status()
itunes_page = resp2json(resp)
if ("storePlatformData" not in itunes_page): raise Exception("Error getting iTunes page: ", resp.text)
return itunes_page
'''AppleMusicClientDownloadSongUtils'''
class AppleMusicClientDownloadSongUtils:
cdm = Cdm.from_device(Device.loads(HARDCODED_WVD))
cdm.MAX_NUM_OF_SESSIONS = float("inf")
'''getrandomuuid4'''
@staticmethod
def getrandomuuid4() -> str: return uuid.uuid4().hex
'''parsedate'''
@staticmethod
def parsedate(date: str) -> datetime.datetime: return datetime.datetime.fromisoformat(date.split("Z")[0])
'''fixkeyid'''
@staticmethod
def fixkeyid(input_path: str):
count = 0
with open(input_path, "rb+") as f:
while (data := f.read(4096)):
pos, i = f.tell(), 0
while (tenc := max(0, data.find(b"tenc", i))):
kid = tenc + 12; f.seek(max(0, pos - 4096) + kid, 0); f.write(bytes.fromhex(f"{count:032}")); count += 1; i = kid + 1
f.seek(pos, 0)
'''getmediaidoflibrarymedia'''
@staticmethod
def getmediaidoflibrarymedia(library_media_metadata: dict) -> str:
play_params = safeextractfromdict(library_media_metadata, ['attributes', 'playParams'], {})
return play_params.get("catalogId", library_media_metadata["id"])
'''getlyrics'''
@staticmethod
def getlyrics(song_metadata: dict, synced_lyrics_format: SyncedLyricsFormat = SyncedLyricsFormat.LRC, apple_music_api: AppleMusicClientAPIUtils = None, request_overrides: dict = None) -> Lyrics | None:
# no lyrics
if not safeextractfromdict(song_metadata, ['attributes', 'hasLyrics'], None): return None
# init
parse_ttml_timestamp_func = lambda ts: datetime.datetime.fromtimestamp((lambda parts: (int(parts[-2]) * 60 + int(parts[-1])) if (len(parts) == 2 and ":" in ts) else (int(parts[-1]) / 1000) if (len(parts) == 1) else ((int(parts[-3]) * 60) if (len(parts) > 2) else 0) + float(f"{parts[-2]}.{parts[-1]}"))(re.findall(r"\d+", ts)), tz=datetime.timezone.utc)
get_lyrics_line_srt_func = lambda index, element: (f"{index}\n" f"{parse_ttml_timestamp_func(element.attrib.get('begin')).strftime('%H:%M:%S,%f')[:-3]} --> " f"{parse_ttml_timestamp_func(element.attrib.get('end')).strftime('%H:%M:%S,%f')[:-3]}\n" f"{element.text}\n")
get_lyrics_line_lrc_func = lambda element: ((lambda ts, text: (lambda ms_new: f"[{((ts + (datetime.timedelta(milliseconds=((int(ms_new[:2]) + 1) * 10))) - datetime.timedelta(microseconds=ts.microsecond)) if int(ms_new[-1]) >= 5 else ts).strftime('%M:%S.%f')[:-4]}]{text}")(ts.strftime("%f")[:-3]))(parse_ttml_timestamp_func(element.attrib.get("begin")), element.text))
# re-fetch lyrics if need
if ("relationships" not in song_metadata or "lyrics" not in song_metadata["relationships"]): song_metadata = (apple_music_api.getsong(AppleMusicClientDownloadSongUtils.getmediaidoflibrarymedia(song_metadata), request_overrides=request_overrides))["data"][0]
lyrics_ttml = safeextractfromdict(song_metadata, ['relationships', 'lyrics', 'data', 0, 'attributes', 'ttml'], None)
if not lyrics_ttml: return None
# refactor lyrics
lyrics_ttml_et, unsynced_lyrics, synced_lyrics, index = ElementTree.fromstring(lyrics_ttml), [], [], 1
for div in lyrics_ttml_et.iter("{http://www.w3.org/ns/ttml}div"):
stanza = []; unsynced_lyrics.append(stanza)
for p in div.iter("{http://www.w3.org/ns/ttml}p"):
if p.text is not None: stanza.append(p.text)
if p.attrib.get("begin"):
if synced_lyrics_format == SyncedLyricsFormat.LRC: synced_lyrics.append(get_lyrics_line_lrc_func(p))
if synced_lyrics_format == SyncedLyricsFormat.SRT: synced_lyrics.append(get_lyrics_line_srt_func(index, p))
if synced_lyrics_format == SyncedLyricsFormat.TTML:
if not synced_lyrics: synced_lyrics.append(minidom.parseString(lyrics_ttml).toprettyxml())
continue
index += 1
# return
return Lyrics(synced="\n".join(synced_lyrics + ["\n"]) if synced_lyrics else None, unsynced=("\n\n".join(["\n".join(lyric_group) for lyric_group in unsynced_lyrics]) if unsynced_lyrics else None))
'''getmediadate'''
@staticmethod
def getmediadate(media_id: str, itunes_api: AppleMusicClientItunesApiUtils, request_overrides: dict = None) -> datetime.datetime | None:
lookup_result = itunes_api.getlookupresult(media_id, request_overrides=request_overrides)
if not lookup_result["results"]: return None
release_date = safeextractfromdict(lookup_result, ['results', 0, 'releaseDate'], None)
if not release_date: return None
parsed_date = AppleMusicClientDownloadSongUtils.parsedate(release_date)
return parsed_date
'''gettags'''
@staticmethod
def gettags(webplayback: dict, lyrics: str | None = None, use_album_date: bool = False, itunes_api: AppleMusicClientItunesApiUtils = None, request_overrides: dict = None) -> MediaTags:
webplayback_metadata = safeextractfromdict(webplayback, ['songList', 0, 'assets', 0, 'metadata'], {})
tags = MediaTags(
album=webplayback_metadata["playlistName"], album_artist=webplayback_metadata["playlistArtistName"], album_id=int(webplayback_metadata["playlistId"]), album_sort=webplayback_metadata["sort-album"], disc=webplayback_metadata["discNumber"], track_total=webplayback_metadata["trackCount"],
artist=webplayback_metadata["artistName"], artist_id=int(webplayback_metadata["artistId"]), artist_sort=webplayback_metadata["sort-artist"], comment=webplayback_metadata.get("comments"), rating=MediaRating(webplayback_metadata["explicit"]), lyrics=lyrics if lyrics else None,
compilation=webplayback_metadata["compilation"], composer=webplayback_metadata.get("composerName"), composer_id=(int(webplayback_metadata.get("composerId")) if webplayback_metadata.get("composerId") else None), genre=webplayback_metadata.get("genre"), media_type=MediaType.SONG,
composer_sort=webplayback_metadata.get("sort-composer"), copyright=webplayback_metadata.get("copyright"), disc_total=webplayback_metadata["discCount"], gapless=webplayback_metadata["gapless"], genre_id=int(webplayback_metadata["genreId"]), xid=webplayback_metadata.get("xid"),
date=(AppleMusicClientDownloadSongUtils.getmediadate(webplayback_metadata["playlistId"], itunes_api, request_overrides) if use_album_date else (AppleMusicClientDownloadSongUtils.parsedate(webplayback_metadata["releaseDate"]) if webplayback_metadata.get("releaseDate") else None)),
track=webplayback_metadata["trackNumber"], storefront=webplayback_metadata["s"], title=webplayback_metadata["itemName"], title_id=int(webplayback_metadata["itemId"]), title_sort=webplayback_metadata["sort-name"],
)
return tags
'''getextratags'''
@staticmethod
def getextratags(song_metadata: dict, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
previews = safeextractfromdict(song_metadata, ['attributes', 'previews'], []) or []
if not previews: return {}
preview_url = previews[0]["url"]
preview_response = requests.get(preview_url, **request_overrides)
preview_bytes = preview_response.content
preview_tags = dict(MP4(io.BytesIO(preview_bytes)).tags)
return preview_tags
'''getplaylisttags'''
@staticmethod
def getplaylisttags(playlist_metadata: dict, media_metadata: dict) -> PlaylistTags:
playlist_track = (safeextractfromdict(playlist_metadata, ['relationships', 'tracks', 'data'], '').index(media_metadata) + 1)
return PlaylistTags(
playlist_artist=safeextractfromdict(playlist_metadata, ['attributes', 'curatorName'], 'Unknown'), playlist_id=playlist_metadata["attributes"]["playParams"]["id"],
playlist_title=playlist_metadata["attributes"]["name"], playlist_track=playlist_track,
)
'''getstreaminfolegacy'''
@staticmethod
def getstreaminfolegacy(webplayback: dict, codec: SongCodec, request_overrides: dict = None) -> StreamInfoAv:
request_overrides = request_overrides or {}
flavor = "32:ctrp64" if codec == SongCodec.AAC_HE_LEGACY else "28:ctrp256"
stream_info = StreamInfo()
stream_info.stream_url = next(i for i in webplayback["songList"][0]["assets"] if i["flavor"] == flavor)["URL"]
m3u8_obj = m3u8.loads(requests.get(stream_info.stream_url, **request_overrides).text)
stream_info.widevine_pssh = m3u8_obj.keys[0].uri
stream_info_av = StreamInfoAv(media_id=webplayback["songList"][0]["songId"], audio_track=stream_info, file_format=MediaFileFormat.M4A)
return stream_info_av
'''getdecryptionkeylegacy'''
@staticmethod
def getdecryptionkeylegacy(stream_info: StreamInfoAv, cdm: Cdm, apple_music_api: AppleMusicClientAPIUtils = None, request_overrides: dict = None) -> DecryptionKeyAv:
stream_info_audio, request_overrides = stream_info.audio_track, request_overrides or {}
try:
cdm_session = cdm.open(); widevine_pssh_data = WidevinePsshData()
widevine_pssh_data.algorithm = 1
widevine_pssh_data.key_ids.append(base64.b64decode(stream_info_audio.widevine_pssh.split(",")[1]))
pssh_obj = PSSH(widevine_pssh_data.SerializeToString())
challenge = base64.b64encode(cdm.get_license_challenge(cdm_session, pssh_obj)).decode()
license_resp = apple_music_api.getlicenseexchange(stream_info.media_id, stream_info.audio_track.widevine_pssh, challenge, request_overrides=request_overrides)
cdm.parse_license(cdm_session, license_resp["license"])
decryption_key = next(i for i in cdm.get_keys(cdm_session) if i.type == "CONTENT")
finally:
cdm.close(cdm_session)
decryption_key = DecryptionKeyAv(audio_track=DecryptionKey(kid=decryption_key.kid.hex, key=decryption_key.key.hex()))
return decryption_key
'''getdecryptionkey'''
@staticmethod
def getdecryptionkey(stream_info: StreamInfoAv, cdm: Cdm, apple_music_api: AppleMusicClientAPIUtils, request_overrides: dict = None) -> DecryptionKeyAv:
track_uri, track_id = stream_info.audio_track.widevine_pssh, stream_info.media_id
try:
cdm_session = cdm.open(); pssh_obj = PSSH(track_uri.split(",")[-1])
challenge = base64.b64encode(cdm.get_license_challenge(cdm_session, pssh_obj)).decode()
license = apple_music_api.getlicenseexchange(track_id, track_uri, challenge, request_overrides=request_overrides)
cdm.parse_license(cdm_session, license["license"])
decryption_key_info = next(i for i in cdm.get_keys(cdm_session) if i.type == "CONTENT")
finally:
cdm.close(cdm_session)
decryption_key = DecryptionKey(key=decryption_key_info.key.hex(), kid=decryption_key_info.kid.hex)
return DecryptionKeyAv(audio_track=decryption_key)
'''getplaylistfromcodec'''
@staticmethod
def getplaylistfromcodec(m3u8_data: dict, codec: SongCodec) -> dict | None:
matching_playlists = [playlist for playlist in m3u8_data["playlists"] if re.fullmatch(SONG_CODEC_REGEX_MAP[codec.value], playlist["stream_info"]["audio"])]
if not matching_playlists: return None
return max(matching_playlists, key=lambda x: x["stream_info"]["average_bandwidth"])
'''getm3u8metadata'''
@staticmethod
def getm3u8metadata(m3u8_data: dict, data_id: str):
for session_data in m3u8_data.get("session_data", []):
if session_data["data_id"] == data_id:
return json.loads(base64.b64decode(session_data["value"]).decode("utf-8"))
return None
'''getaudiosessionkeymetadata'''
@staticmethod
def getaudiosessionkeymetadata(m3u8_data: dict):
return AppleMusicClientDownloadSongUtils.getm3u8metadata(m3u8_data, "com.apple.hls.AudioSessionKeyInfo")
'''getassetmetadata'''
@staticmethod
def getassetmetadata(m3u8_data: dict):
return AppleMusicClientDownloadSongUtils.getm3u8metadata(m3u8_data, "com.apple.hls.audioAssetMetadata")
'''getdrmurifromsessionkey'''
@staticmethod
def getdrmurifromsessionkey(drm_infos: dict, drm_ids: list, drm_key: str) -> str | None:
for drm_id in drm_ids:
if drm_id != "1" and drm_key in drm_infos.get(drm_id, {}):
return drm_infos[drm_id][drm_key]["URI"]
return None
'''getdrmurifromm3u8keys'''
@staticmethod
def getdrmurifromm3u8keys(m3u8_obj: m3u8.M3U8, drm_key: str) -> str | None:
default_uri = DRM_DEFAULT_KEY_MAPPING[drm_key]
for key in m3u8_obj.keys:
if key.keyformat == drm_key and key.uri != default_uri: return key.uri
return None
'''getstreaminfo'''
@staticmethod
def getstreaminfo(song_metadata: dict, codec: SongCodec, request_overrides: dict = None) -> StreamInfoAv | None:
request_overrides = request_overrides or {}
m3u8_master_url: str = safeextractfromdict(song_metadata, ['attributes', 'extendedAssetUrls', 'enhancedHls'], None)
if not m3u8_master_url: return None
m3u8_master_obj = m3u8.loads(requests.get(m3u8_master_url, **request_overrides).text)
m3u8_master_data = m3u8_master_obj.data
playlist = AppleMusicClientDownloadSongUtils.getplaylistfromcodec(m3u8_master_data, codec)
if playlist is None: return None
stream_info = StreamInfo()
stream_info.stream_url = (f"{m3u8_master_url.rpartition('/')[0]}/{playlist['uri']}")
stream_info.codec = playlist["stream_info"]["codecs"]
is_mp4 = any(stream_info.codec.startswith(codec) for codec in MP4_FORMAT_CODECS)
session_key_metadata = AppleMusicClientDownloadSongUtils.getaudiosessionkeymetadata(m3u8_master_data)
if session_key_metadata:
asset_metadata = AppleMusicClientDownloadSongUtils.getassetmetadata(m3u8_master_data)
drm_ids = asset_metadata[playlist["stream_info"]["stable_variant_id"]]["AUDIO-SESSION-KEY-IDS"]
stream_info.widevine_pssh = AppleMusicClientDownloadSongUtils.getdrmurifromsessionkey(session_key_metadata, drm_ids, "urn:uuid:edef8ba9-79d6-4ace-a3c8-27dcd51d21ed")
stream_info.playready_pssh = AppleMusicClientDownloadSongUtils.getdrmurifromsessionkey(session_key_metadata, drm_ids, "com.microsoft.playready")
stream_info.fairplay_key = AppleMusicClientDownloadSongUtils.getdrmurifromsessionkey(session_key_metadata, drm_ids, "com.apple.streamingkeydelivery")
else:
m3u8_obj = m3u8.loads(requests.get(stream_info.stream_url, **request_overrides).text)
stream_info.widevine_pssh = AppleMusicClientDownloadSongUtils.getdrmurifromm3u8keys(m3u8_obj, "urn:uuid:edef8ba9-79d6-4ace-a3c8-27dcd51d21ed")
stream_info.playready_pssh = AppleMusicClientDownloadSongUtils.getdrmurifromm3u8keys(m3u8_obj, "com.microsoft.playready")
stream_info.fairplay_key = AppleMusicClientDownloadSongUtils.getdrmurifromm3u8keys(m3u8_obj, "com.apple.streamingkeydelivery")
stream_info_av = StreamInfoAv(audio_track=stream_info, file_format=MediaFileFormat.MP4 if is_mp4 else MediaFileFormat.M4A)
return stream_info_av
'''getrawcoverurl'''
@staticmethod
def getrawcoverurl(cover_url_template: str) -> str:
return re.sub(r"image/thumb/", "", re.sub(r"is1-ssl", "a1", cover_url_template))
'''getcoverurltemplate'''
@staticmethod
def getcoverurltemplate(metadata: dict, cover_format: CoverFormat) -> str:
if cover_format == CoverFormat.RAW:
cover_url_template = AppleMusicClientDownloadSongUtils.getrawcoverurl(metadata["attributes"]["artwork"]["url"])
cover_url_template = metadata["attributes"]["artwork"]["url"]
return cover_url_template
'''getcoverurl'''
@staticmethod
def getcoverurl(cover_url_template: str, cover_size: int, cover_format: CoverFormat) -> str:
cover_url = re.sub(r"\{w\}x\{h\}([a-z]{2})\.jpg", (f"{cover_size}x{cover_size}bb.{cover_format.value}" if cover_format != CoverFormat.RAW else ""), cover_url_template)
return cover_url
'''getdownloaditem'''
@staticmethod
def getdownloaditem(song_metadata: dict, playlist_metadata: dict, synced_lyrics_format: SyncedLyricsFormat = SyncedLyricsFormat.LRC, codec: SongCodec = SongCodec.AAC_LEGACY, apple_music_api: AppleMusicClientAPIUtils = None, itunes_api: AppleMusicClientItunesApiUtils = None, use_album_date: bool = False, fetch_extra_tags: bool = False, use_wrapper: bool = False, cover_format: CoverFormat = CoverFormat.JPG, cover_size: int = 1200, request_overrides: dict = None):
# init
request_overrides = request_overrides or {}
download_item = DownloadItem()
download_item.media_metadata, download_item.playlist_metadata = song_metadata, playlist_metadata
# lyrics
song_id = AppleMusicClientDownloadSongUtils.getmediaidoflibrarymedia(song_metadata)
download_item.lyrics = AppleMusicClientDownloadSongUtils.getlyrics(song_metadata, synced_lyrics_format=synced_lyrics_format, apple_music_api=apple_music_api, request_overrides=request_overrides)
# get media tags
webplayback = apple_music_api.getwebplayback(song_id, request_overrides=request_overrides)
download_item.media_tags = AppleMusicClientDownloadSongUtils.gettags(webplayback, download_item.lyrics.unsynced if download_item.lyrics else None, use_album_date, itunes_api, request_overrides)
if fetch_extra_tags: download_item.extra_tags = AppleMusicClientDownloadSongUtils.getextratags(song_metadata, request_overrides)
if playlist_metadata: download_item.playlist_tags = AppleMusicClientDownloadSongUtils.getplaylisttags(playlist_metadata, song_metadata)
# None for all paths as default value, auto set after searching
download_item.final_path = None; download_item.synced_lyrics_path = None; download_item.staged_path = None; download_item.playlist_file_path = None
# stream info and decryption key
if codec.islegacy():
download_item.stream_info = AppleMusicClientDownloadSongUtils.getstreaminfolegacy(webplayback, codec, request_overrides)
download_item.decryption_key = AppleMusicClientDownloadSongUtils.getdecryptionkeylegacy(download_item.stream_info, AppleMusicClientDownloadSongUtils.cdm, apple_music_api=apple_music_api, request_overrides=request_overrides)
else:
download_item.stream_info = AppleMusicClientDownloadSongUtils.getstreaminfo(song_metadata, codec, request_overrides=request_overrides)
if (not use_wrapper and download_item.stream_info and download_item.stream_info.audio_track.widevine_pssh):
download_item.decryption_key = AppleMusicClientDownloadSongUtils.getdecryptionkey(download_item.stream_info, AppleMusicClientDownloadSongUtils.cdm, apple_music_api=apple_music_api, request_overrides=request_overrides)
else:
download_item.decryption_key = None
# cover url
download_item.cover_url_template = AppleMusicClientDownloadSongUtils.getcoverurltemplate(song_metadata, cover_format)
download_item.cover_url = AppleMusicClientDownloadSongUtils.getcoverurl(download_item.cover_url_template, cover_size, cover_format)
# uuid for tmp results saving
download_item.random_uuid = AppleMusicClientDownloadSongUtils.getrandomuuid4()
# return
return download_item
'''remuxmp4box'''
@staticmethod
def remuxmp4box(input_path: str, output_path: str, silent: bool = False, artist: str = ''):
cmd = ["MP4Box", "-quiet", "-add", input_path, "-itags", f"artist={artist}", "-keep-utc", "-new", output_path]
capture_output = True if silent else False
ret = subprocess.run(cmd, check=True, capture_output=capture_output, text=True, encoding='utf-8', errors='ignore')
return (ret.returncode == 0)
'''remuxffmpeg'''
@staticmethod
def remuxffmpeg(input_path: str, output_path: str, decryption_key: str = None, silent: bool = False):
key = ["-decryption_key", decryption_key] if decryption_key else []
cmd = ['ffmpeg', "-loglevel", "error", "-y", *key, "-i", input_path, "-c", "copy", "-movflags", "+faststart", output_path]
capture_output = True if silent else False
ret = subprocess.run(cmd, check=True, capture_output=capture_output, text=True, encoding='utf-8', errors='ignore')
return (ret.returncode == 0)
'''decryptmp4decrypt'''
@staticmethod
def decryptmp4decrypt(input_path: str, output_path: str, decryption_key: str, legacy: bool, silent: bool = False):
if legacy: keys = ["--key", f"1:{decryption_key}"]
else: AppleMusicClientDownloadSongUtils.fixkeyid(input_path); keys = ["--key", "0" * 31 + "1" + f":{decryption_key}", "--key", "0" * 32 + f":{DEFAULT_SONG_DECRYPTION_KEY}"]
cmd = ["mp4decrypt", *keys, input_path, output_path]
capture_output = True if silent else False
ret = subprocess.run(cmd, check=True, capture_output=capture_output, text=True, encoding='utf-8', errors='ignore')
return (ret.returncode == 0)
'''decryptamdecrypt'''
@staticmethod
def decryptamdecrypt(input_path: str, output_path: str, media_id: str, fairplay_key: str, wrapper_decrypt_ip: str = "127.0.0.1:10020", silent: bool = False):
cmd = ['amdecrypt', wrapper_decrypt_ip, shutil.which('mp4decrypt'), media_id, fairplay_key, input_path, output_path]
capture_output = True if silent else False
ret = subprocess.run(cmd, check=True, capture_output=capture_output, text=True, encoding='utf-8', errors='ignore')
return (ret.returncode == 0)
'''stage'''
@staticmethod
def stage(encrypted_path: str, decrypted_path: str, staged_path: str, decryption_key: DecryptionKeyAv, codec: SongCodec, media_id: str, fairplay_key: str, remux_mode: RemuxMode = RemuxMode.MP4BOX, silent: bool = False, wrapper_decrypt_ip: str = "127.0.0.1:10020", artist: str = "", use_wrapper: bool = False):
if codec.islegacy() and remux_mode == RemuxMode.FFMPEG:
AppleMusicClientDownloadSongUtils.remuxffmpeg(encrypted_path, staged_path, decryption_key.audio_track.key, silent=silent)
elif codec.islegacy() or not use_wrapper:
AppleMusicClientDownloadSongUtils.decryptmp4decrypt(encrypted_path, decrypted_path, decryption_key.audio_track.key, codec.islegacy(), silent)
if remux_mode == RemuxMode.FFMPEG: AppleMusicClientDownloadSongUtils.remuxffmpeg(decrypted_path, staged_path, silent=silent)
else: AppleMusicClientDownloadSongUtils.remuxmp4box(decrypted_path, staged_path, silent=silent, artist=artist)
else:
AppleMusicClientDownloadSongUtils.decryptamdecrypt(encrypted_path, staged_path, media_id, fairplay_key, wrapper_decrypt_ip=wrapper_decrypt_ip, silent=silent)
'''downloadstreamwithnm3u8dlre'''
@staticmethod
def downloadstreamwithnm3u8dlre(stream_url: str, download_path: str, silent: bool = False, random_uuid: str = ''):
download_path_obj = Path(download_path)
download_path_obj.parent.mkdir(parents=True, exist_ok=True)
log_file_path = os.path.join(user_log_dir(appname='musicdl', appauthor='zcjin'), f"musicdl_{random_uuid}.log")
cmd = [
"N_m3u8DL-RE", stream_url, "--binary-merge", "--ffmpeg-binary-path", shutil.which('ffmpeg'), "--save-name", download_path_obj.stem,
"--save-dir", download_path_obj.parent, "--tmp-dir", download_path_obj.parent, "--log-file-path", log_file_path,
]
capture_output = True if silent else False
ret = subprocess.run(cmd, check=True, capture_output=capture_output, text=True, encoding='utf-8', errors='ignore')
return (ret.returncode == 0)
'''download'''
@staticmethod
def download(download_item: DownloadItem, work_dir: str = './', silent: bool = False, codec: SongCodec = SongCodec.AAC_LEGACY, wrapper_decrypt_ip: str = "127.0.0.1:10020", remux_mode: RemuxMode = RemuxMode.MP4BOX, artist: str = "", use_wrapper: bool = False):
ext = download_item.stream_info.file_format.value
encrypted_path = os.path.join(work_dir, f"{download_item.random_uuid}_encrypted.m4a")
is_success = AppleMusicClientDownloadSongUtils.downloadstreamwithnm3u8dlre(download_item.stream_info.audio_track.stream_url, encrypted_path, silent=silent, random_uuid=download_item.random_uuid)
decrypted_path = os.path.join(work_dir, f"{download_item.random_uuid}_decrypted.m4a")
download_item.staged_path = os.path.join(work_dir, f"{download_item.random_uuid}_staged.{ext}")
is_success = AppleMusicClientDownloadSongUtils.stage(
encrypted_path=encrypted_path, decrypted_path=decrypted_path, staged_path=download_item.staged_path, decryption_key=download_item.decryption_key,
codec=codec, media_id=download_item.media_metadata["id"], fairplay_key=download_item.stream_info.audio_track.fairplay_key, remux_mode=remux_mode,
silent=silent, wrapper_decrypt_ip=wrapper_decrypt_ip, artist=artist, use_wrapper=use_wrapper,
)
return is_success
+141
View File
@@ -0,0 +1,141 @@
'''
Function:
Implementation of SongInfo
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
from typing import Any, Dict, Optional
from dataclasses import dataclass, field, fields
from .misc import sanitize_filepath, safeextractfromdict, AudioLinkTester
def remove_prefix(value: str, prefix: str) -> str:
if prefix and value.startswith(prefix):
return value[len(prefix):]
return value
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''SongInfo'''
@dataclass
class SongInfo:
# raw data replied by requested APIs
raw_data: Dict[str, Any] = field(default_factory=dict)
# from which music client
source: Optional[str] = None
root_source: Optional[str] = None
# song information
song_name: Optional[str] = None
singers: Optional[str] = None
album: Optional[str] = None
ext: Optional[str] = None
file_size_bytes: Optional[int] = None
file_size: Optional[str] = None
duration_s: Optional[int] = None
duration: Optional[str] = None
bitrate: Optional[int] = None
codec: Optional[str] = None
samplerate: Optional[int] = None
channels: Optional[int] = None
# lyric
lyric: Optional[str] = None
# cover
cover_url: Optional[str] = None
# episodes, each item in episodes is SongInfo object, used by FM site like XimalayaMusicClient
episodes: Optional[list[SongInfo]] = None
# download url related variables
download_url: Optional[Any] = None
download_url_status: Optional[Any] = None
default_download_headers: Dict[str, Any] = field(default_factory=dict)
downloaded_contents: Optional[Any] = None
chunk_size: Optional[int] = 1024 * 1024
protocol: Optional[str] = 'HTTP' # should be in {'HTTP', 'HLS'}
@property
def with_valid_download_url(self) -> bool:
if self.episodes: return all([eps.with_valid_download_url for eps in self.episodes])
if isinstance(self.download_url, str): is_valid_download_url_format = self.download_url and self.download_url.startswith('http')
else: is_valid_download_url_format = bool(self.download_url)
with_downloaded_contents = bool(self.downloaded_contents)
is_downloadable = isinstance(self.download_url_status, dict) and self.download_url_status.get('ok')
if not is_downloadable and (safeextractfromdict(self.download_url_status, ['probe_status', 'ext'], None) in AudioLinkTester.VALID_AUDIO_EXTS): is_downloadable = True
return bool((is_valid_download_url_format or with_downloaded_contents) and is_downloadable)
# save info
work_dir: Optional[str] = './'
_save_path: Optional[str] = None
@property
def save_path(self) -> str:
if self._save_path is not None: return self._save_path
ext = remove_prefix(str(self.ext or ""), ".")
sp, same_name_file_idx = os.path.join(self.work_dir, f"{self.song_name} - {self.identifier}.{ext}"), 1
while os.path.exists(sp):
sp = os.path.join(self.work_dir, f"{self.song_name} - {self.identifier} ({same_name_file_idx}).{ext}")
same_name_file_idx += 1
sp = sanitize_filepath(sp)
self._save_path = sp
return sp
# identifier
identifier: Optional[str] = None
'''fieldnames'''
@classmethod
def fieldnames(cls) -> set[str]:
return {f.name for f in fields(cls)}
'''fromdict'''
@classmethod
def fromdict(cls, data: Dict[str, Any]) -> "SongInfo":
field_names = cls.fieldnames()
filtered = {k: v for k, v in data.items() if k in field_names}
if "episodes" in filtered and filtered["episodes"] and isinstance(filtered["episodes"], list):
episodes = [cls.fromdict(e) if isinstance(e, dict) else e for e in filtered["episodes"]]
filtered["episodes"] = episodes
return cls(**filtered)
'''todict'''
def todict(self) -> Dict[str, Any]:
converted_dict = {f.name: getattr(self, f.name) for f in fields(self)}
if self.episodes and isinstance(self.episodes, list): converted_dict['episodes'] = [e.todict() for e in self.episodes]
return converted_dict
'''update'''
def update(self, data: Dict[str, Any] = None, **kwargs: Any) -> "SongInfo":
if data is None: data = {}
merged: Dict[str, Any] = {**data, **kwargs}
field_names = self.fieldnames()
for k, v in merged.items():
if k in field_names: setattr(self, k, v)
return self
'''getitem'''
def __getitem__(self, key: str) -> Any:
field_names = self.fieldnames()
if key not in field_names: raise KeyError(key)
return getattr(self, key)
'''setitem'''
def __setitem__(self, key: str, value: Any) -> None:
field_names = self.fieldnames()
if key not in field_names: raise KeyError(key)
setattr(self, key, value)
'''contains'''
def __contains__(self, key: object) -> bool:
return isinstance(key, str) and key in self.fieldnames()
'''get'''
def get(self, key: str, default: Any = None) -> Any:
if key in self.fieldnames(): return getattr(self, key)
return default
'''largerthan'''
def largerthan(self, song_info: SongInfo):
# file_size_a
try: file_size_a = float(remove_suffix(str(self.file_size), 'MB').strip())
except Exception: file_size_a = 0.0
if not isinstance(file_size_a, (int, float)): file_size_a = 0.0
# file_size_b
try: file_size_b = float(remove_suffix(str(song_info.file_size), 'MB').strip())
except Exception: file_size_b = 0.0
if not isinstance(file_size_b, (int, float)): file_size_b = 0.0
# compare
return bool(file_size_a > file_size_b)
@@ -0,0 +1,64 @@
'''
Function:
Implementation of DeezerMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import base64
import hashlib
import binascii
import functools
from Cryptodome.Cipher import AES, Blowfish
'''DeezerMusicClientUtils'''
class DeezerMusicClientUtils():
BLOWFISH_SECRET = "g4el58wc0zvf9na1"
MUSIC_QUALITIES = ('FLAC', 'MP3_320', 'MP3_128')
IS_ENCRYPTED_RPATTERN = re.compile("/m(?:obile|edia)/")
SHARED_TOKENS = ['ZjI4N2JkNzRjM2Q1NGY5YmJmOTc5OTdjNzhkZWJkMzdiMTU4NjRjZDdhM2MwZjk0MjUxNWNjOWIwNGE1MWM1N2RhYmZiOTQ4YWYyNjM0MDFhOTRkZTUxOGI3MjRlZDdmNDBmMjcyMmNlZGMwMTgxZTEwYmZmNDk5MmVjNzc4NzU3MmU1MDUzZjk0Nzc1NjFiZjhkMjcwNDc0NzRiNzMxMTcxNjUyZWQxNzg0YzlmNTdhMTUxZDMxOTk2NmVjY2Ex']
token_decrypt_func = lambda t: base64.b64decode(str(t).encode('utf-8')).decode('utf-8')
'''decryptchunk'''
@staticmethod
def decryptchunk(key, data):
return Blowfish.new(key, Blowfish.MODE_CBC, b"\x00\x01\x02\x03\x04\x05\x06\x07").decrypt(data)
'''generateblowfishkey'''
@staticmethod
def generateblowfishkey(track_id: str) -> bytes:
md5_hash = hashlib.md5(str(track_id).encode()).hexdigest()
return "".join(chr(functools.reduce(lambda x, y: x ^ y, map(ord, t))) for t in zip(md5_hash[:16], md5_hash[16:], DeezerMusicClientUtils.BLOWFISH_SECRET)).encode()
'''getencryptedfileurl'''
@staticmethod
def getencryptedfileurl(meta_id: str, track_hash: str, media_version: str, format_number: int = 1):
url_bytes = b"\xa4".join((track_hash.encode(), str(format_number).encode(), str(meta_id).encode(), str(media_version).encode()))
info_bytes = bytearray(hashlib.md5(url_bytes).hexdigest().encode())
info_bytes.extend(b"\xa4"); info_bytes.extend(url_bytes); info_bytes.extend(b"\xa4")
padding_len = 16 - (len(info_bytes) % 16); info_bytes.extend(b"." * padding_len)
path = binascii.hexlify(AES.new(b"jo6aey6haid2Teih", AES.MODE_ECB).encrypt(info_bytes)).decode("utf-8")
return f"https://e-cdns-proxy-{track_hash[0]}.dzcdn.net/mobile/1/{path}"
'''getcoverurl'''
@staticmethod
def getcoverurl(pic_id: str):
if not pic_id: return None
return f"https://e-cdns-images.dzcdn.net/images/cover/{pic_id}/1200x1200.jpg"
'''covert2lrclyrics'''
@staticmethod
def covert2lrclyrics(lyrics_node: dict):
lrc_lines = []; lyrics_node.get("writers") and lrc_lines.append(f"[ar:{lyrics_node['writers']}]")
if (sync_lines := lyrics_node.get("synchronizedLines")):
for item in sync_lines: lrc_lines.append(f"{item.get('lrcTimestamp', '')}{item.get('line', '')}") if item.get("lrcTimestamp", "") else (lrc_lines.append(f"[{int(item['milliseconds']) // 60000:02d}:{(int(item['milliseconds']) % 60000) / 1000:05.2f}]{item.get('line', '')}") if "milliseconds" in item else None)
return "\n".join(lrc_lines)
else:
return lyrics_node.get("text")
'''decryptdownloadedaudiofile'''
@staticmethod
def decryptdownloadedaudiofile(src_path: str, dst_path: str, blowfish_key: str):
encrypt_chunk_size = 3 * 2048
with open(src_path, "rb") as src, open(dst_path, "wb") as dst:
while True:
if not (data := src.read(encrypt_chunk_size)): break
decrypted_chunk = DeezerMusicClientUtils.decryptchunk(blowfish_key, data[:2048]) + data[2048:] if len(data) >= 2048 else data
dst.write(decrypted_chunk)
+383
View File
@@ -0,0 +1,383 @@
'''
Function:
Implementation of HLSDownloader
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import re
import copy
import time
import math
import m3u8
import base64
import shutil
import hashlib
import requests
import threading
import concurrent.futures as cf
from pathlib import Path
from .misc import touchdir
from .logger import LoggerHandle
from urllib.parse import urljoin
from dataclasses import dataclass
from rich.progress import Progress
from typing import Optional, Dict, Any, Tuple, List, Union, Callable
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
'''SegmentJob'''
@dataclass(frozen=True)
class SegmentJob:
index: int
uri: str
byterange: Optional[str]
key_method: Optional[str]
key_uri: Optional[str]
key_iv: Optional[str]
keyformat: Optional[str]
media_sequence: int
map_uri: Optional[str]
map_byterange: Optional[str]
'''HLSDownloader'''
class HLSDownloader:
def __init__(self, output_dir: str = "downloads", proxies: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None, cookies: Optional[Dict[str, str]] = None, timeout: Tuple[float, float] = (10.0, 30.0), logger_handle: LoggerHandle = None,
verify_tls: bool = True, concurrency: int = 16, max_retries: int = 8, backoff_base: float = 0.6, backoff_cap: float = 10.0, chunk_size: int = 1024 * 256, strict_key_length: bool = False, disable_print: bool = False, request_overrides: dict = None):
# work dir
self.output_dir = output_dir
touchdir(self.output_dir)
# logger
self.logger_handle = logger_handle
self.disable_print = disable_print
# http requests
self.proxies = proxies or {}
self.headers = headers or {}
self.cookies = cookies or {}
self.timeout = timeout
self.verify_tls = verify_tls
self.chunk_size = int(chunk_size)
self.backoff_cap = float(backoff_cap)
self.backoff_base = float(backoff_base)
self.concurrency = max(1, int(concurrency))
self.max_retries = max(1, int(max_retries))
self.strict_key_length = bool(strict_key_length)
self.request_overrides = request_overrides or {}
# threading
self._tls = threading.local()
self._key_cache: Dict[str, bytes] = {}
self._key_cache_lock = threading.Lock()
'''download'''
def download(self, m3u8_url: str, output_path: str, quality: Union[str, int, Callable[[List[Dict[str, Any]]], int]] = "best", keep_segments: bool = False, temp_subdir: Optional[str] = None, progress: Progress = None, progress_id: int = 0) -> str:
master_or_media = self._loadm3u8(m3u8_url)
if master_or_media.is_variant:
variant_url = self._selectvariant(master_or_media, quality)
self.logger_handle.info(f"Selected variant: {variant_url}", disable_print=self.disable_print)
playlist = self._loadm3u8(variant_url)
else:
playlist = master_or_media
jobs, global_init_map = self._buildjobs(playlist)
temp_folder, global_init_path = os.path.join(self.output_dir, temp_subdir or f".hls_tmp_{self._safenamefromurl(m3u8_url)}"), None
touchdir(temp_folder)
if global_init_map:
global_init_path = os.path.join(temp_folder, "_global_init.bin")
if not self._fileok(global_init_path): self._atomicwrite(global_init_path, self._fetchbytes(global_init_map["uri"], global_init_map.get("byterange")))
seg_paths = self._downloadallsegments(jobs, temp_folder, progress=progress, progress_id=progress_id)
touchdir(os.path.dirname(os.path.abspath(output_path)) or ".")
self._mergefiles(global_init_path, seg_paths, output_path)
if not keep_segments: shutil.rmtree(temp_folder, ignore_errors=True)
return output_path
'''_getsession'''
def _getsession(self) -> requests.Session:
sess = getattr(self._tls, "session", None)
if sess is None:
sess = requests.Session()
sess.headers.update(self.headers)
if self.cookies: sess.cookies.update(self.cookies)
self._tls.session = sess
return sess
'''_request'''
def _request(self, url: str, method: str = "GET", headers: Optional[Dict[str, str]] = None, stream: bool = False, **kwargs) -> requests.Response:
kwargs.update(copy.deepcopy(self.request_overrides))
sess, last_exc = self._getsession(), None
hdrs = dict(self.headers)
if headers: hdrs.update(headers)
for attempt in range(1, self.max_retries + 1):
try:
resp = sess.request(method=method, url=url, headers=hdrs, proxies=self.proxies, timeout=self.timeout, verify=self.verify_tls, stream=stream, **kwargs)
if resp.status_code in (429, 500, 502, 503, 504): resp.close(); raise requests.HTTPError(f"HTTP {resp.status_code} for {url}")
resp.raise_for_status()
return resp
except Exception as e:
last_exc = e
t = min(self.backoff_cap, self.backoff_base * (2 ** (attempt - 1)))
t = t + (0.1 * t * (0.5 - (time.time() % 1)))
time.sleep(max(0.0, t))
raise RuntimeError(f"Request failed after retries: {url}\nLast error: {last_exc}")
'''_gettext'''
def _gettext(self, url: str) -> str:
resp = self._request(url, stream=False)
return resp.text
'''_getbytes'''
def _getbytes(self, url: str, headers: Optional[Dict[str, str]] = None) -> bytes:
resp = self._request(url, headers=headers, stream=True)
chunks = []
for c in resp.iter_content(chunk_size=self.chunk_size):
if c: chunks.append(c)
resp.close()
return b"".join(chunks)
'''_fetchbytes'''
def _fetchbytes(self, url: str, byterange: Optional[str]) -> bytes:
headers = {}
if byterange:
length, offset = self._parsebyterange(byterange)
headers["Range"] = f"bytes={offset}-{offset + length - 1}"
return self._getbytes(url, headers=headers)
'''_loadm3u8'''
def _loadm3u8(self, url: str) -> m3u8.M3U8:
text = self._gettext(url)
return m3u8.loads(text, uri=url)
'''_selectvariant'''
def _selectvariant(self, master: m3u8.M3U8, quality: Union[str, int, Callable[[List[Dict[str, Any]]], int]]) -> str:
variants, bw_func = [], lambda v: int(v.get("average_bandwidth") or v.get("bandwidth") or 0)
for i, p in enumerate(master.playlists or []):
si = getattr(p, "stream_info", None)
variants.append({
"index": i, "absolute_uri": getattr(p, "absolute_uri", None) or urljoin(master.base_uri or master.uri, p.uri), "uri": p.uri, "bandwidth": getattr(si, "bandwidth", None) if si else None,
"average_bandwidth": getattr(si, "average_bandwidth", None) if si else None, "resolution": getattr(si, "resolution", None) if si else None, "codecs": getattr(si, "codecs", None) if si else None,
"frame_rate": getattr(si, "frame_rate", None) if si else None,
})
if not variants: raise ValueError("Master playlist has no variants.")
if callable(quality): idx = int(quality(variants)); idx = max(0, min(idx, len(variants) - 1)); return variants[idx]["absolute_uri"]
if isinstance(quality, str):
q = quality.lower().strip()
if q == "best":
chosen = max(variants, key=bw_func)
elif q == "lowest":
chosen = min(variants, key=bw_func)
else:
m = re.search(r"(\d+)", q)
if m: target = int(m.group(1)); chosen = min(variants, key=lambda v: abs(bw_func(v) - target))
else: chosen = max(variants, key=bw_func)
else:
target = int(quality)
chosen = min(variants, key=lambda v: abs(bw_func(v) - target))
return chosen["absolute_uri"]
'''_buildjobs'''
def _buildjobs(self, playlist: m3u8.M3U8) -> Tuple[List[SegmentJob], Optional[Dict[str, Any]]]:
media_seq = int(getattr(playlist, "media_sequence", 0) or 0)
global_init, seg_map = None, getattr(playlist, "segment_map", None)
if seg_map:
try: sm0 = seg_map[0]; global_init = {"uri": getattr(sm0, "absolute_uri", None) or urljoin(playlist.base_uri, sm0.uri), "byterange": getattr(sm0, "byterange", None)}
except Exception: global_init = None
jobs: List[SegmentJob] = []
session_keys = getattr(playlist, "session_keys", None) or []
fallback_session_key, last_key_obj = session_keys[-1] if session_keys else None, None
for i, seg in enumerate(playlist.segments or []):
seg_uri, key_obj = getattr(seg, "absolute_uri", None) or urljoin(playlist.base_uri, seg.uri), getattr(seg, "key", None) or last_key_obj or fallback_session_key
if getattr(seg, "key", None) is not None: last_key_obj = getattr(seg, "key", None)
key_method, key_uri, key_iv, keyformat = (getattr(key_obj, k, None) for k in ("method", "uri", "iv", "keyformat")) if key_obj else (None, None, None, None)
key_uri_abs = (key_uri if key_uri and (key_uri.startswith("data:") or key_uri.startswith("skd://")) else (urljoin(playlist.base_uri, key_uri) if key_uri else None))
init_section = getattr(seg, "init_section", None)
map_uri, map_byterange = ((getattr(init_section, "absolute_uri", None) or (urljoin(playlist.base_uri, getattr(init_section, "uri", "")) if getattr(init_section, "uri", None) else None)), getattr(init_section, "byterange", None)) if init_section is not None else (None, None)
jobs.append(SegmentJob(index=i, uri=seg_uri, byterange=getattr(seg, "byterange", None), key_method=key_method, key_uri=key_uri_abs, key_iv=key_iv, keyformat=keyformat, media_sequence=media_seq, map_uri=map_uri, map_byterange=map_byterange))
return jobs, global_init
'''_downloadallsegments'''
def _downloadallsegments(self, jobs: List[SegmentJob], temp_folder: str, progress: Progress, progress_id: int) -> List[str]:
progress.update(progress_id, description=f"HLSDownloader._downloadallsegments >>> completed (0/{len(jobs)})", total=len(jobs), kind='hls')
byterange_cursor: Dict[str, int] = {}; seg_paths: List[Optional[str]] = [None] * len(jobs)
init_cache: Dict[str, str] = {}; init_inflight: Dict[str, threading.Event] = {}; init_cache_lock = threading.Lock()
def ensureinitsection_func(map_uri: str, map_byterange: Optional[str]) -> bytes:
key = f"{map_uri}|{map_byterange or ''}"
with init_cache_lock:
cached = init_cache.get(key)
if cached and self._fileok(cached): return Path(cached).read_bytes()
leader = (evt := init_inflight.get(key)) is None; evt = init_inflight[key] = threading.Event() if leader else evt
if not leader:
evt.wait()
with init_cache_lock: cached = init_cache.get(key)
return Path(cached).read_bytes() if cached and self._fileok(cached) else (_ for _ in ()).throw(RuntimeError(f"init_section download failed: {key}"))
try:
data = self._fetchbytes(map_uri, map_byterange)
path = os.path.join(temp_folder, f"_initsec_{abs(hash(key)) & 0xffffffff:08x}.bin")
self._atomicwrite(path, data)
with init_cache_lock: init_cache[key] = path
return data
finally:
with init_cache_lock: (evt := init_inflight.pop(key, None)) and evt.set()
def worker_func(job: SegmentJob) -> Tuple[int, str]:
seg_path = os.path.join(temp_folder, f"seg_{job.index:06d}.bin")
if self._fileok(seg_path): return job.index, seg_path
prepend = ensureinitsection_func(job.map_uri, job.map_byterange) if job.map_uri else b""
eff_byterange = self._normalizebyterange(job.uri, job.byterange, byterange_cursor) if job.byterange else job.byterange
data = self._fetchandmaybedecrypt(job, eff_byterange)
self._atomicwrite(seg_path, prepend + data)
return job.index, seg_path
exceptions: List[Exception] = []
with cf.ThreadPoolExecutor(max_workers=self.concurrency) as ex:
futures = [ex.submit(worker_func, j) for j in jobs]
for fut in cf.as_completed(futures):
try:
idx, path = fut.result()
seg_paths[idx] = path
except Exception as e:
exceptions.append(e)
finally:
progress.advance(progress_id, 1)
num_downloaded_segs = int(progress.tasks[progress_id].completed)
progress.update(progress_id, description=f"HLSDownloader._downloadallsegments >>> completed ({num_downloaded_segs}/{len(jobs)})")
if exceptions: raise exceptions[0]
return [p for p in seg_paths if p is not None]
'''_fetchandmaybedecrypt'''
def _fetchandmaybedecrypt(self, job: SegmentJob, eff_byterange: Optional[str]) -> bytes:
method_raw, keyformat = (job.key_method or "").strip(), (job.keyformat or "").strip().lower()
if not method_raw or method_raw.upper() == "NONE": return self._fetchbytes(job.uri, eff_byterange)
if keyformat and keyformat not in ("identity",): raise NotImplementedError(f"Unsupported KEYFORMAT={job.keyformat} (likely DRM).")
method = method_raw.upper().replace("_", "-")
dec_mode = self._classifyencryptionmethod(method)
if dec_mode in ("DRM", "UNSUPPORTED"): raise NotImplementedError(f"Unsupported encryption method: {method_raw}")
if not job.key_uri: raise RuntimeError(f"Encrypted segment missing key URI at seg {job.index}")
key, base_iv = self._prepareaeskey(method, self._getkeybytes(job.key_uri)), self._deriveiv(job.key_iv, job.media_sequence + job.index)
if not eff_byterange: ciphertext = self._fetchbytes(job.uri, None); return self._decryptwhole(ciphertext, dec_mode, key, base_iv)
length, offset = self._parsebyterange(eff_byterange)
block, end = 16, offset + length
aligned_start, aligned_end = (offset // block) * block, int(math.ceil(end / block) * block)
if dec_mode == "CBC":
fetch_start, drop = ((aligned_start - block, offset - aligned_start + block) if aligned_start > 0 else (aligned_start, offset - aligned_start)); fetch_len = aligned_end - fetch_start; fetch_range = f"{fetch_len}@{fetch_start}"
ciphertext = self._fetchbytes(job.uri, fetch_range)
iv = (b"\x00" * 16) if fetch_start > 0 else base_iv
plaintext = self._aescbcdecrypt(ciphertext, key, iv)
return plaintext[drop: drop+length]
else:
fetch_start, drop, fetch_len, fetch_range = aligned_start, offset - aligned_start, aligned_end - aligned_start, f"{aligned_end - aligned_start}@{aligned_start}"
ciphertext = self._fetchbytes(job.uri, fetch_range)
block_index = fetch_start // block
iv_int = int.from_bytes(base_iv, "big")
adj_iv = ((iv_int + block_index) % (1 << 128)).to_bytes(16, "big")
plaintext = self._aesctrcrypt(ciphertext, key, adj_iv)
return plaintext[drop: drop+length]
'''_decryptwhole'''
def _decryptwhole(self, ciphertext: bytes, dec_mode: str, key: bytes, iv: bytes) -> bytes:
if dec_mode == "CBC": return self._aescbcdecrypt(ciphertext, key, iv)
if dec_mode == "CTR": return self._aesctrcrypt(ciphertext, key, iv)
raise NotImplementedError(f"decrypt mode {dec_mode} not supported")
'''_classifyencryptionmethod'''
def _classifyencryptionmethod(self, method: str) -> str:
m = method.strip().upper()
if m in ("AES-128", "AES-128-CBC", "AES-CBC", "CBC"): return "CBC"
if m in ("AES-CTR", "AES-128-CTR", "AES-192-CTR", "AES-256-CTR"): return "CTR"
if m.startswith("SAMPLE-AES") or "SKD" in m: return "DRM"
return "UNSUPPORTED"
'''_getkeybytes'''
def _getkeybytes(self, key_uri: str) -> bytes:
if key_uri.startswith("data:"):
if "base64," in key_uri: b64 = key_uri.split("base64,", 1)[1]; return base64.b64decode(b64)
if "," in key_uri: raw = key_uri.split(",", 1)[1]; return raw.encode("utf-8", errors="ignore")
raise ValueError("Unsupported data: key URI")
if key_uri.startswith("skd://"): raise NotImplementedError("skd:// indicates DRM (FairPlay). Not supported.")
with self._key_cache_lock:
if key_uri in self._key_cache: return self._key_cache[key_uri]
b = self._getbytes(key_uri)
with self._key_cache_lock: self._key_cache[key_uri] = b
return b
'''_decodekeyguess'''
def _decodekeyguess(self, key_bytes: bytes) -> bytes:
b = key_bytes.strip()
if b"\x00" in b: return b
b2 = b
if b2.lower().startswith(b"0x"): b2 = b2[2:]
if re.fullmatch(rb"[0-9a-fA-F]+", b2) and len(b2) in (32, 48, 64):
try: return bytes.fromhex(b2.decode("ascii"))
except Exception: pass
if re.fullmatch(rb"[A-Za-z0-9+/=\r\n]+", b) and (len(b) % 4 == 0):
try:
dec = base64.b64decode(b, validate=False)
if len(dec) in (16, 24, 32): return dec
except Exception:
pass
return b
'''_expectedkeylen'''
def _expectedkeylen(self, method: str) -> int:
m = method.upper()
if "256" in m: return 32
if "192" in m: return 24
return 16
'''_prepareaeskey'''
def _prepareaeskey(self, method: str, key_bytes: bytes) -> bytes:
k = self._decodekeyguess(key_bytes)
want = self._expectedkeylen(method)
if len(k) == want: return k
if self.strict_key_length: raise ValueError(f"Bad key length for {method}: got {len(k)} bytes, expected {want}")
self.logger_handle.warning(f"Key length mismatch for {method}: got {len(k)}, expected {want}. Best-effort fix.", disable_print=self.disable_print)
if len(k) > want: return k[:want]
return (k + b"\x00" * want)[:want]
'''_deriveiv'''
def _deriveiv(self, iv_str: Optional[str], seq_num: int) -> bytes:
if not iv_str: return seq_num.to_bytes(16, byteorder="big", signed=False)
s = str(iv_str).strip().lower()
if s.startswith("0x"): s = s[2:]
try: iv = bytes.fromhex(s)
except Exception: iv = s.encode("utf-8", errors="ignore")
if len(iv) < 16: iv = (b"\x00" * (16 - len(iv))) + iv
if len(iv) > 16: iv = iv[-16:]
return iv
'''_aescbcdecrypt'''
def _aescbcdecrypt(self, ciphertext: bytes, key: bytes, iv: bytes) -> bytes:
if len(ciphertext) % 16 != 0: raise ValueError(f"CBC ciphertext length not multiple of 16: {len(ciphertext)} bytes")
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
dec = cipher.decryptor()
return dec.update(ciphertext) + dec.finalize()
'''_aesctrcrypt'''
def _aesctrcrypt(self, data: bytes, key: bytes, iv: bytes) -> bytes:
cipher = Cipher(algorithms.AES(key), modes.CTR(iv))
dec = cipher.decryptor()
return dec.update(data) + dec.finalize()
'''_parsebyterange'''
def _parsebyterange(self, s: str) -> Tuple[int, int]:
s = s.strip()
if "@" in s: a, b = s.split("@", 1); return int(a), int(b)
raise ValueError(f"BYTERANGE missing offset: {s}")
'''_normalizebyterange'''
def _normalizebyterange(self, uri: str, byterange: str, cursor: Dict[str, int]) -> str:
s = byterange.strip()
if "@" in s: length, offset = s.split("@", 1); length_i, offset_i = int(length), int(offset); cursor[uri] = offset_i + length_i; return f"{length_i}@{offset_i}"
length_i = int(s)
prev = cursor.get(uri, 0)
cursor[uri] = prev + length_i
return f"{length_i}@{prev}"
'''_mergefiles'''
def _mergefiles(self, global_init_path: Optional[str], seg_paths: List[str], output_path: str) -> None:
tmp_out = output_path + ".part"
with open(tmp_out, "wb") as out:
if global_init_path and self._fileok(global_init_path):
with open(global_init_path, "rb") as fp: shutil.copyfileobj(fp, out, length=1024 * 1024)
for p in seg_paths:
with open(p, "rb") as fp: shutil.copyfileobj(fp, out, length=1024 * 1024)
os.replace(tmp_out, output_path)
'''_safenamefromurl'''
def _safenamefromurl(self, url: str, max_len: int = 20) -> str:
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:max_len]
'''_fileok'''
def _fileok(self, path: str) -> bool:
return os.path.exists(path) and os.path.getsize(path) > 0
'''_atomicwrite'''
def _atomicwrite(self, path: str, data: bytes) -> None:
touchdir(os.path.dirname(os.path.abspath(path)) or ".")
pid, tid = os.getpid(), threading.get_ident()
tmp, last = f"{path}.tmp.{pid}.{tid}.{time.time_ns()}", None
with open(tmp, "wb") as fp:
fp.write(data)
try: fp.flush(); os.fsync(fp.fileno())
except Exception: pass
for i in range(12):
try: os.replace(tmp, path); return
except PermissionError as e: last = e; time.sleep(min(0.5, 0.03 * (2 ** i)))
except OSError as e: last = e; time.sleep(min(0.5, 0.03 * (2 ** i)))
try:
if os.path.exists(tmp): os.remove(tmp)
except Exception:
pass
raise last
@@ -0,0 +1,53 @@
'''
Function:
Implementation of URL Domain Related Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
from functools import lru_cache
from urllib.parse import urlsplit
'''settings'''
APPLE_MUSIC_HOSTS = {"music.apple.com", "geo.music.apple.com", "embed.music.apple.com", "itunes.apple.com", "geo.itunes.apple.com", "apple.com"}
DEEZER_MUSIC_HOSTS = {"deezer.com", "www.deezer.com", "deezer.page.link",}
FIVESING_MUSIC_HOSTS = {"5sing.kugou.com",}
JOOX_MUSIC_HOSTS = {"joox.com",}
JAMENDO_MUSIC_HOSTS = {"jamendo.com",}
KUWO_MUSIC_HOSTS = {"kuwo.cn", "www.kuwo.cn", "m.kuwo.cn", "mobile.kuwo.cn",}
KUGOU_MUSIC_HOSTS = {"www.kugou.com", "m.kugou.com", "kugou.com", "h5.kugou.com",}
MIGU_MUSIC_HOSTS = {"music.migu.cn", "m.music.migu.cn", "h5.nf.migu.cn", "c.migu.cn", "migu.cn"}
NETEASE_MUSIC_HOSTS = {"music.163.com", "y.music.163.com", "m.music.163.com", "3g.music.163.com", "163cn.tv",}
QQ_MUSIC_HOSTS = {"y.qq.com", "i.y.qq.com", "m.y.qq.com", "c.y.qq.com", "c6.y.qq.com", "music.qq.com",}
QIANQIAN_MUSIC_HOSTS = {"music.91q.com", "music.taihe.com", "music.baidu.com"}
QOBUZ_MUSIC_HOSTS = {"open.qobuz.com", "play.qobuz.com", "www.qobuz.com", "qobuz.com"}
STREETVOICE_MUSIC_HOSTS = {"streetvoice.cn"}
SOUNDCLOUD_MUSIC_HOSTS = {"soundcloud.com"}
SODA_MUSIC_HOSTS = {"qishui.douyin.com", "music.douyin.com", "www.qishui.com", "www.douyin.com", "z-qishui.douyin.com", "lf-luna-release.qishui.com", "luna-web.douyin.com", "bubble.qishui.com", "qishui.com", "douyin.com"}
SPOTIFY_MUSIC_HOSTS = {"open.spotify.com", "spotify.link", "play.spotify.com", "spotify.com"}
TIDAL_MUSIC_HOSTS = {"tidal.com", "listen.tidal.com", "embed.tidal.com",}
'''obtainhostname'''
@lru_cache(maxsize=200_000)
def obtainhostname(url: str) -> str | None:
if not url: return None
u = url.strip()
if "://" not in u: u = "https://" + u
try: host = urlsplit(u).hostname
except Exception: return None
return host.lower().strip(".") if host else None
'''hostmatchessuffix'''
def hostmatchessuffix(host: str | None, suffixes: set[str]) -> bool:
if not host: return False
h = host.lower().strip(".")
for s in suffixes:
s = s.lower().strip(".")
if h == s or h.endswith("." + s): return True
return False
@@ -0,0 +1,38 @@
'''
Function:
Implementation of Optional Import Related Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import sys
import warnings
import importlib
'''optionalimport'''
def optionalimport(name: str, show_warning: bool = False):
if name in sys.modules: return sys.modules[name]
try:
return importlib.import_module(name)
except ModuleNotFoundError:
missing = getattr(optionalimport, "_missing", set())
if (name not in missing) and show_warning: warnings.warn(f'Optional dependency "{name}" is not installed; skipping import.', category=ImportWarning, stacklevel=2)
missing.add(name)
optionalimport._missing = missing
return None
'''optionalimportfrom'''
def optionalimportfrom(module: str, attr: str, show_warning: bool = False):
try:
mod = sys.modules.get(module) or importlib.import_module(module)
return (getattr(mod, attr) if hasattr(mod, attr) else importlib.import_module(f"{module}.{attr}"))
except (ModuleNotFoundError, AttributeError):
key = (module, attr)
missing = getattr(optionalimportfrom, "_missing", set())
if (key not in missing) and show_warning: warnings.warn(f"Optional import failed: from {module} import {attr}", ImportWarning, stacklevel=2)
missing.add(key)
optionalimportfrom._missing = missing
return None
+103
View File
@@ -0,0 +1,103 @@
'''
Function:
Implementation of RandomIPGenerator
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import random
import requests
import ipaddress
from bisect import bisect
from typing import List, Optional, Sequence
'''RandomIPGenerator'''
class RandomIPGenerator:
def __init__(self, default_ipv4_prefixes: Optional[Sequence[str]] = None, default_ipv6_prefixes: Optional[Sequence[str]] = None, max_attempts: int = 10000):
self.max_attempts = max_attempts
self.default_ipv4_prefixes: List[str] = list(default_ipv4_prefixes or [])
self.default_ipv6_prefixes: List[str] = list(default_ipv6_prefixes or [])
'''ipv4'''
def ipv4(self, prefix: Optional[str] = None) -> str:
if prefix is None and self.default_ipv4_prefixes: prefix = random.choice(self.default_ipv4_prefixes)
if prefix is not None: return self._randomipv4inprefix(prefix)
else: return self._randomglobalipv4()
'''ipv6'''
def ipv6(self, prefix: Optional[str] = None) -> str:
if prefix is None and self.default_ipv6_prefixes: prefix = random.choice(self.default_ipv6_prefixes)
if prefix is not None: return self._randomipv6inprefix(prefix)
else: return self._randomglobalipv6()
'''randomipv4scn'''
def randomipv4scn(self, num_samples: int = 1) -> List[str]:
def buildsampler_func(blocks):
cum, s = [], 0
for _, c in blocks: s += c; cum.append(s)
total = s
def sample_func(n=10): out=[]; [out.append(str(ipaddress.IPv4Address((lambda bc: bc[0]+random.randrange(bc[1]))(blocks[bisect(cum, random.randrange(total))])))) for _ in range(n)]; return out
return sample_func
blocks = self._loadcnipv4blocks()
sampler = buildsampler_func(blocks)
return sampler(num_samples)
'''addrandomipv4toheaders'''
def addrandomipv4toheaders(self, headers: dict = None, prefix: Optional[str] = None) -> dict:
assert isinstance(headers, dict), f'input "headers" should be "dict", but get {type(headers)}'
random_ip = self.ipv4(prefix=prefix)
headers.update({"X-Forwarded-For": random_ip, "X-Real-IP": random_ip, "Forwarded": f"for={random_ip};proto=https"})
return headers
'''_loadcnipv4blocks'''
def _loadcnipv4blocks(self):
text = requests.get("https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest", timeout=30).text.splitlines()
blocks = []
for line in text:
if not line or line.startswith("#"): continue
parts = line.strip().split("|")
if len(parts) < 7: continue
_, cc, rtype, start, value, _, status = parts[:7]
if cc != "CN" or rtype != "ipv4": continue
if status not in ("allocated", "assigned"): continue
base = int(ipaddress.IPv4Address(start))
count = int(value)
if count > 0: blocks.append((base, count))
if not blocks: raise RuntimeError("No CN IPv4 blocks found. Check APNIC file format/URL.")
return blocks
'''_randomipv4inprefix'''
def _randomipv4inprefix(self, prefix: str) -> str:
net = ipaddress.IPv4Network(prefix, strict=False)
if net.prefixlen <= 30:
network_int = int(net.network_address)
broadcast_int = int(net.broadcast_address)
if broadcast_int - network_int <= 2: candidate_int = random.randint(network_int, broadcast_int)
else: candidate_int = random.randint(network_int + 1, broadcast_int - 1)
else:
offset = random.randrange(net.num_addresses)
candidate_int = int(net.network_address) + offset
addr = ipaddress.IPv4Address(candidate_int)
return str(addr)
'''_randomglobalipv4'''
def _randomglobalipv4(self) -> str:
attempts = 0
while attempts < self.max_attempts:
attempts += 1
candidate_int = random.getrandbits(32)
addr = ipaddress.IPv4Address(candidate_int)
if addr.is_global: return str(addr)
return str(addr)
'''_randomipv6inprefix'''
def _randomipv6inprefix(self, prefix: str) -> str:
net = ipaddress.IPv6Network(prefix, strict=False)
host_bits = 128 - net.prefixlen
rand_host = random.getrandbits(host_bits)
addr_int = int(net.network_address) + rand_host
addr = ipaddress.IPv6Address(addr_int)
return str(addr)
'''_randomglobalipv6'''
def _randomglobalipv6(self) -> str:
attempts = 0
while attempts < self.max_attempts:
attempts += 1
candidate_int = random.getrandbits(128)
addr = ipaddress.IPv6Address(candidate_int)
if addr.is_global: return str(addr)
return str(addr)
@@ -0,0 +1,181 @@
'''
Function:
Implementation of KugouMusicClient Utils
>>> old api: https://trackercdn.kugou.com/i/?cmd=4&pid=1&forceDown=0&vip=1&hash={file_hash}&key={MD5(file_hash+kgcloud)}
>>> webv2 play: https://trackercdnbj.kugou.com/i/v2/?cmd=23&pid=1&behavior=play&hash={file_hash}&key={MD5(file_hash+kgcloudv2)}
>>> appv2 play: https://trackercdn.kugou.com/i/v2/?appid=1005&pid=2&cmd=25&behavior=play&hash={file_hash}&key={MD5(file_hash+kgcloudv2)}
>>> appv2 download: https://trackercdn.kugou.com/i/v2/?cdnBackup=1&behavior=download&pid=1&cmd=21&appid=1001&hash={file_hash}&key={MD5(file_hash+kgcloudv2)}
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import json
import uuid
import time
import random
import base64
import hashlib
import requests
from Crypto.PublicKey import RSA
from .misc import safeextractfromdict
from typing import Any, Dict, Optional
from Crypto.Cipher import AES, PKCS1_v1_5
'''settings'''
IS_LITE = True
APPID = 3116 if IS_LITE else 1005
CLIENTVER = 11440 if IS_LITE else 20489
MUSIC_QUALITIES = ('viper_tape', 'viper_clear', 'viper_atmos', 'flac', 'high', '320', '128')
SIGNATURE_WEB_SECRET = "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"
SIGN_KEY_SECRET = "185672dd44712f60bb1736df5a377e82" if IS_LITE else "57ae12eb6890223e355ccfcb74edf70d"
SIGNATURE_ANDROID_SECRET = "LnT6xpN3khm36zse0QzvmgTZ3waWdRSA" if IS_LITE else "OIlwieks28dk2k092lksi2UIkp"
PUBLIC_RSA_KEY = """-----BEGIN PUBLIC KEY-----
MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDECi0Np2UR87scwrvTr72L6oO01rBbbBPriSDFPxr3Z5syug0O24QyQO8bg27+0+4kBzTBTBOZ/WWU0WryL1JSXRTXLgFVxtzIY41Pe7lPOgsfTCn5kZcvKhYKJesKnnJDNr5/abvTGf+rHG3YRwsCHcQ08/q6ifSioBszvb3QiwIDAQAB
-----END PUBLIC KEY-----""" if IS_LITE else """-----BEGIN PUBLIC KEY-----
MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDIAG7QOELSYoIJvTFJhMpe1s/gbjDJX51HBNnEl5HXqTW6lQ7LC8jr9fWZTwusknp+sVGzwd40MwP6U5yDE27M/X1+UR4tvOGOqp94TJtQ1EPnWGWXngpeIW5GxoQGao1rmYWAu6oi1z9XkChrsUdC6DJE5E221wf/4WLFxwAtRQIDAQAB
-----END PUBLIC KEY-----"""
'''KugouMusicClientUtils'''
class KugouMusicClientUtils:
'''md5hex'''
@staticmethod
def md5hex(data: Any) -> str:
if isinstance(data, (dict, list)): data = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
if isinstance(data, str): data = data.encode("utf-8")
return hashlib.md5(data).hexdigest()
'''randomstring'''
@staticmethod
def randomstring(length=16) -> str:
chars = "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"
return "".join(random.choice(chars) for _ in range(length))
'''calculatemid'''
@staticmethod
def calculatemid(seed: str) -> str:
return str(int(hashlib.md5(seed.encode("utf-8")).hexdigest(), 16))
'''pad'''
@staticmethod
def pad(data: bytes, block_size: int = 16) -> bytes:
pad_len = block_size - len(data) % block_size
return data + bytes([pad_len]) * pad_len
'''unpad'''
@staticmethod
def unpad(data: bytes) -> bytes:
pad_len = data[-1]
return data[:-pad_len]
'''rsaencryptpkcs1'''
@staticmethod
def rsaencryptpkcs1(data: Any, public_key_pem: str = PUBLIC_RSA_KEY) -> str:
if isinstance(data, (dict, list)): data = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
if isinstance(data, str): data = data.encode("utf-8")
rsa_key = RSA.import_key(public_key_pem)
cipher = PKCS1_v1_5.new(rsa_key)
enc = cipher.encrypt(data)
return enc.hex()
'''signatureandroid'''
@staticmethod
def signatureandroid(params: Dict[str, Any], data: str = "") -> str:
params_string = "".join(f"{k}={json.dumps(params[k], separators=(',', ':'), ensure_ascii=False) if isinstance(params[k], (dict, list)) else params[k]}" for k in sorted(params.keys()))
return KugouMusicClientUtils.md5hex(f"{SIGNATURE_ANDROID_SECRET}{params_string}{data}{SIGNATURE_ANDROID_SECRET}")
'''signatureandroidwithsecret'''
@staticmethod
def signatureandroidwithsecret(params: Dict[str, Any], data: str, secret: str = "OIlwieks28dk2k092lksi2UIkp") -> str:
params_string = "".join(f"{k}={json.dumps(params[k], separators=(',', ':'), ensure_ascii=False) if isinstance(params[k], (dict, list)) else params[k]}" for k in sorted(params.keys()))
return KugouMusicClientUtils.md5hex(f"{secret}{params_string}{data}{secret}")
'''signatureweb'''
@staticmethod
def signatureweb(params: Dict[str, Any]) -> str:
params_string = "".join(f"{k}={params[k]}" for k in sorted(params.keys()))
return KugouMusicClientUtils.md5hex(f"{SIGNATURE_WEB_SECRET}{params_string}{SIGNATURE_WEB_SECRET}")
'''signkey'''
@staticmethod
def signkey(hash_value: str, mid: str, userid: str, appid: str) -> str:
return KugouMusicClientUtils.md5hex(f"{hash_value}{SIGN_KEY_SECRET}{appid}{mid}{userid or 0}")
'''initdevice'''
@staticmethod
def initdevice(cookies: dict = None):
cookies = cookies or {}
guid = str(uuid.uuid4())
mid = KugouMusicClientUtils.calculatemid(guid)
cookies["KUGOU_API_GUID"] = guid
cookies["KUGOU_API_MID"] = mid
cookies["KUGOU_API_MAC"] = KugouMusicClientUtils.randomstring(12)
cookies["KUGOU_API_DEV"] = KugouMusicClientUtils.randomstring(16)
return cookies
'''updatecookies'''
@staticmethod
def updatecookies(resp: requests.Response, cookies: dict):
for k, v in resp.cookies.items(): cookies[k] = v
return cookies
'''sendrequest'''
@staticmethod
def sendrequest(session: requests.Session, method: str, url: str, params: Optional[Dict[str, Any]] = None, data: Optional[Any] = None, headers: Optional[Dict[str, str]] = None, encrypt_type: str = "android", base_url: str = "https://gateway.kugou.com", encrypt_key: bool = False, not_sign: bool = False, response_type: Optional[str] = None, cookies: Optional[Dict[str, str]] = None, cookies_override: Optional[Dict[str, str]] = None, request_overrides: dict = None):
# init
clienttime = int(time.time())
params, headers, used_cookies, request_overrides = params or {}, headers or {}, dict(cookies), request_overrides or {}
if cookies_override: used_cookies.update(cookies_override)
token, dfid, userid, mid = used_cookies.get("token", ""), used_cookies.get("dfid", "-"), used_cookies.get("userid", 0), used_cookies.get("KUGOU_API_MID", "-")
# construct params
default_params = {"dfid": dfid, "mid": mid, "uuid": "-", "appid": APPID, "clientver": CLIENTVER, "clienttime": clienttime}
if token: default_params["token"] = token
if userid: default_params["userid"] = userid
params = {**default_params, **params}
# encrypt key
if encrypt_key: params["key"] = KugouMusicClientUtils.signkey(params["hash"], params["mid"], params.get("userid"), params["appid"])
# signature
data_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False) if isinstance(data, (dict, list)) else (data or "")
if not_sign:
if "signature" in params: params.pop("signature", None)
else:
if "signature" not in params: params["signature"] = KugouMusicClientUtils.signatureweb(params) if encrypt_type == "web" else KugouMusicClientUtils.signatureandroid(params, data_str)
# construct headers
base_headers = {"User-Agent": "Android15-1070-11083-46-0-DiscoveryDRADProtocol-wifi", "dfid": dfid, "clienttime": str(params["clienttime"]), "mid": mid, "kg-rc": "1", "kg-thash": "5d816a0", "kg-rec": "1", "kg-rf": "B9EDA08A64250DEFFBCADDEE00F8F25F"}
final_headers = {**base_headers, **headers}
# send request
resp = session.request(method, f"{base_url}{url}", params=params, json=data, headers=final_headers, **request_overrides) if isinstance(data, (dict, list)) else session.request(method, f"{base_url}{url}", params=params, data=data, headers=final_headers, **request_overrides)
resp.raise_for_status()
KugouMusicClientUtils.updatecookies(resp, cookies)
# return
if response_type == "arraybuffer": return resp.content
try: return resp.json()
except Exception: return resp.text
'''registerdevice'''
@staticmethod
def registerdevice(session: requests.Session, cookies: dict, request_overrides: dict = None):
# construct
data_map = {
"availableRamSize": 4983533568, "availableRomSize": 48114719, "availableSDSize": 48114717, "basebandVer": "", "batteryLevel": 100, "batteryStatus": 3, "brand": "Redmi", "buildSerial": "unknown",
"device": "marble", "imei": cookies.get("KUGOU_API_GUID"), "imsi": "", "manufacturer": "Xiaomi", "uuid": cookies.get("KUGOU_API_GUID"), "accelerometer": False, "accelerometerValue": "",
"gravity": False, "gravityValue": "", "gyroscope": False, "gyroscopeValue": "", "light": False, "lightValue": "", "magnetic": False, "magneticValue": "", "orientation": False, "orientationValue": "",
"pressure": False, "pressureValue": "", "step_counter": False, "step_counterValue": "", "temperature": False, "temperatureValue": "",
}
# aes
aes_key = KugouMusicClientUtils.randomstring(6).lower(); encrypt_key = KugouMusicClientUtils.md5hex(aes_key)[:16]; encrypt_iv = KugouMusicClientUtils.md5hex(aes_key)[16: 32]
cipher = AES.new(encrypt_key.encode("utf-8"), AES.MODE_CBC, encrypt_iv.encode("utf-8"))
raw = json.dumps(data_map, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
enc = cipher.encrypt(KugouMusicClientUtils.pad(raw))
aes_body = base64.b64encode(enc).decode("utf-8")
p = KugouMusicClientUtils.rsaencryptpkcs1({"aes": aes_key, "uid": cookies.get("userid", 0), "token": cookies.get("token", "")})
# send request and return result
resp_raw: bytes = KugouMusicClientUtils.sendrequest(session, "POST", "/risk/v2/r_register_dev", params={"part": 1, "platid": 1, "p": p}, data=aes_body, base_url="https://userservice.kugou.com", encrypt_type="android", response_type="arraybuffer", cookies=cookies, request_overrides=request_overrides)
try:
text: str = resp_raw.decode("utf-8"); result = json.loads(text) if text.startswith("{") else None
if result: return result
except Exception:
pass
dec_cipher = AES.new(encrypt_key.encode("utf-8"), AES.MODE_CBC, encrypt_iv.encode("utf-8"))
decrypted = KugouMusicClientUtils.unpad(dec_cipher.decrypt(resp_raw)).decode("utf-8")
result: dict = json.loads(decrypted)
if result.get("status") == 1 and safeextractfromdict(result, ['data', 'dfid'], None): cookies["dfid"] = result["data"]["dfid"]
return result
'''getsongurl'''
@staticmethod
def getsongurl(session: requests.Session, hash_value: str, album_id: int = 0, album_audio_id: int = 0, quality: str = "128", free_part: bool = False, cookies: dict = None, request_overrides: dict = None):
params = {
"album_id": int(album_id), "area_code": 1, "hash": hash_value.lower(), "ssa_flag": "is_fromtrack", "version": 11436, "page_id": 151369488 if not IS_LITE else 967177915,
"quality": quality, "album_audio_id": int(album_audio_id), "behavior": "play", "pid": 2 if not IS_LITE else 411, "cmd": 26, "pidversion": 3001, "IsFreePart": 1 if free_part else 0,
"ppage_id": "463467626,350369493,788954147" if not IS_LITE else "356753938,823673182,967485191", "cdnBackup": 1, "kcard": 0, "module": "",
}
return KugouMusicClientUtils.sendrequest(session, "GET", "/v5/url", params=params, headers={"x-router": "trackercdn.kugou.com"}, encrypt_type="android", encrypt_key=True, cookies=cookies, cookies_override={'dfid': KugouMusicClientUtils.randomstring(24)}, request_overrides=request_overrides)
@@ -0,0 +1,193 @@
'''
Function:
Implementation of KuwoMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import math
import zlib
import base64
'''settings'''
MASK32 = (1 << 32) - 1
MASK64 = (1 << 64) - 1
'''HelperFunctions'''
class HelperFunctions():
@staticmethod
def u64(x: int) -> int: return x & MASK64
@staticmethod
def u32(x: int) -> int: return x & MASK32
@staticmethod
def rangen(n: int): return range(n)
@staticmethod
def power2(n: int) -> int: return 1 << n
@staticmethod
def longarray(*arr): return list(arr)
'''settings'''
SECRET_KEY_SONG, SECRET_KEY_LYRIC = b"ylzsxkwm", b'yeelion'
ARRAYLS = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1]
ARRAYLSMASK = HelperFunctions.longarray(0, 0x100001, 0x300003)
ARRAYE = HelperFunctions.longarray(31, 0, 1, 2, 3, 4, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1, 7, 8, 9, 10, 11, 12, -1, -1, 11, 12, 13, 14, 15, 16, -1, -1, 15, 16, 17, 18, 19, 20, -1, -1, 19, 20, 21, 22, 23, 24, -1, -1, 23, 24, 25, 26, 27, 28, -1, -1, 27, 28, 29, 30, 31, 30, -1, -1)
ARRAYIP1 = HelperFunctions.longarray(39, 7, 47, 15, 55, 23, 63, 31, 38, 6, 46, 14, 54, 22, 62, 30, 37, 5, 45, 13, 53, 21, 61, 29, 36, 4, 44, 12, 52, 20, 60, 28, 35, 3, 43, 11, 51, 19, 59, 27, 34, 2, 42, 10, 50, 18, 58, 26, 33, 1, 41, 9, 49, 17, 57, 25, 32, 0, 40, 8, 48, 16, 56, 24)
ARRAYIP2 = HelperFunctions.longarray(57, 49, 41, 33, 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3, 61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7, 56, 48, 40, 32, 24, 16, 8, 0, 58, 50, 42, 34, 26, 18, 10, 2, 60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6)
ARRAYMASK = [HelperFunctions.power2(n) for n in HelperFunctions.rangen(64)]
ARRAYMASK[-1] = -ARRAYMASK[-1]
ARRAYP = HelperFunctions.longarray(15, 6, 19, 20, 28, 11, 27, 16, 0, 14, 22, 25, 4, 17, 30, 9, 1, 7, 23, 13, 31, 26, 2, 8, 18, 12, 29, 5, 21, 10, 3, 24)
ARRAYPC1 = HelperFunctions.longarray(56, 48, 40, 32, 24, 16, 8, 0, 57, 49, 41, 33, 25, 17, 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35, 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 60, 52, 44, 36, 28, 20, 12, 4, 27, 19, 11, 3)
ARRAYPC2 = HelperFunctions.longarray(13, 16, 10, 23, 0, 4, -1, -1, 2, 27, 14, 5, 20, 9, -1, -1, 22, 18, 11, 3, 25, 7, -1, -1, 15, 6, 26, 19, 12, 1, -1, -1, 40, 51, 30, 36, 46, 54, -1, -1, 29, 39, 50, 44, 32, 47, -1, -1, 43, 48, 38, 55, 33, 52, -1, -1, 45, 41, 49, 35, 28, 31, -1, -1)
MATRIXNSBOX = [
[14,4,3,15,2,13,5,3,13,14,6,9,11,2,0,5,4,1,10,12,15,6,9,10,1,8,12,7,8,11,7,0,0,15,10,5,14,4,9,10,7,8,12,3,13,1,3,6,15,12,6,11,2,9,5,0,4,2,11,14,1,7,8,13],
[15,0,9,5,6,10,12,9,8,7,2,12,3,13,5,2,1,14,7,8,11,4,0,3,14,11,13,6,4,1,10,15,3,13,12,11,15,3,6,0,4,10,1,7,8,4,11,14,13,8,0,6,2,15,9,5,7,1,10,12,14,2,5,9],
[10,13,1,11,6,8,11,5,9,4,12,2,15,3,2,14,0,6,13,1,3,15,4,10,14,9,7,12,5,0,8,7,13,1,2,4,3,6,12,11,0,13,5,14,6,8,15,2,7,10,8,15,4,9,11,5,9,0,14,3,10,7,1,12],
[7,10,1,15,0,12,11,5,14,9,8,3,9,7,4,8,13,6,2,1,6,11,12,2,3,0,5,14,10,13,15,4,13,3,4,9,6,10,1,12,11,0,2,5,0,13,14,2,8,15,7,4,15,1,10,7,5,6,12,11,3,8,9,14],
[2,4,8,15,7,10,13,6,4,1,3,12,11,7,14,0,12,2,5,9,10,13,0,3,1,11,15,5,6,8,9,14,14,11,5,6,4,1,3,10,2,12,15,0,13,2,8,5,11,8,0,15,7,14,9,4,12,7,10,9,1,13,6,3],
[12,9,0,7,9,2,14,1,10,15,3,4,6,12,5,11,1,14,13,0,2,8,7,13,15,5,4,10,8,3,11,6,10,4,6,11,7,9,0,6,4,2,13,1,9,15,3,8,15,3,1,14,12,5,11,0,2,12,14,7,5,10,8,13],
[4,1,3,10,15,12,5,0,2,11,9,6,8,7,6,9,11,4,12,15,0,3,10,5,14,13,7,8,13,14,1,2,13,6,14,9,4,1,2,14,11,13,5,0,1,10,8,3,0,11,3,5,9,4,15,2,7,8,12,15,10,7,6,12],
[13,7,10,0,6,9,5,15,8,4,3,10,11,14,12,5,2,11,9,6,15,12,0,3,4,1,14,13,1,2,7,8,1,2,12,15,10,4,0,3,13,14,6,9,7,8,9,6,15,1,5,12,3,10,14,5,8,7,11,0,4,13,2,11],
]
'''KuwoMusicClientUtils'''
class KuwoMusicClientUtils:
'''bittransform'''
@staticmethod
def bittransform(arr_int, n, l):
l2 = 0
for i in HelperFunctions.rangen(n):
idx = arr_int[i]
if idx < 0: continue
if (l & ARRAYMASK[idx]) == 0: continue
l2 |= ARRAYMASK[i]
return HelperFunctions.u64(l2)
'''des64'''
@staticmethod
def des64(longs, l):
p_r, p_source, out = [0] * 8, [0, 0], KuwoMusicClientUtils.bittransform(ARRAYIP2, 64, l)
p_source[0], p_source[1] = HelperFunctions.u32(out), HelperFunctions.u32((out & 0xFFFFFFFF00000000) >> 32)
for i in HelperFunctions.rangen(16):
s_out, R = 0, KuwoMusicClientUtils.bittransform(ARRAYE, 64, p_source[1])
R ^= longs[i]
for j in HelperFunctions.rangen(8): p_r[j] = (R >> (j * 8)) & 0xFF
for sbi in reversed(HelperFunctions.rangen(8)): s_out = (s_out << 4) | (MATRIXNSBOX[sbi][p_r[sbi]] & 0xF)
R, L = KuwoMusicClientUtils.bittransform(ARRAYP, 32, s_out), p_source[0]
p_source[0] = p_source[1]
p_source[1] = HelperFunctions.u32(L ^ R)
p_source.reverse()
out = ((p_source[1] << 32) & 0xFFFFFFFF00000000) | (p_source[0] & 0xFFFFFFFF)
out = KuwoMusicClientUtils.bittransform(ARRAYIP1, 64, out)
return HelperFunctions.u64(out)
'''subkeys'''
@staticmethod
def subkeys(l, longs, mode):
l2 = KuwoMusicClientUtils.bittransform(ARRAYPC1, 56, l)
for i in HelperFunctions.rangen(16):
r = ARRAYLS[i]
mask = ARRAYLSMASK[r]
not_mask = HelperFunctions.u64(~mask)
part1, part2 = HelperFunctions.u64((l2 & mask) << (28 - r)), (l2 & not_mask) >> r
l2 = HelperFunctions.u64(part1 | part2)
longs[i] = KuwoMusicClientUtils.bittransform(ARRAYPC2, 64, l2)
if mode == 1:
for j in HelperFunctions.rangen(8): longs[j], longs[15 - j] = longs[15 - j], longs[j]
'''crypt'''
@staticmethod
def crypt(msg: bytes, key: bytes, mode: int) -> bytes:
l = 0
for i in HelperFunctions.rangen(8): l |= (key[i] & 0xFF) << (i * 8)
l, j, arr_long1 = HelperFunctions.u64(l), len(msg) // 8, [0] * 16
KuwoMusicClientUtils.subkeys(l, arr_long1, mode)
arr_long2 = [0] * j
for m in HelperFunctions.rangen(j):
v = 0
for n in HelperFunctions.rangen(8): v |= (msg[n + m * 8] & 0xFF) << (n * 8)
arr_long2[m] = HelperFunctions.u64(v)
arr_long3 = [0] * ((1 + 8 * (j + 1)) // 8)
for i1 in HelperFunctions.rangen(j): arr_long3[i1] = KuwoMusicClientUtils.des64(arr_long1, arr_long2[i1])
arr_byte1, l2 = msg[j * 8:], 0
for i1 in HelperFunctions.rangen(len(msg) % 8): l2 |= (arr_byte1[i1] & 0xFF) << (i1 * 8)
l2 = HelperFunctions.u64(l2)
if len(arr_byte1) != 0 or mode == 0: arr_long3[j] = KuwoMusicClientUtils.des64(arr_long1, l2)
out_bytes, i4 = bytearray(8 * len(arr_long3)), 0
for l3 in arr_long3:
for i6 in HelperFunctions.rangen(8): out_bytes[i4] = (l3 >> (i6 * 8)) & 0xFF; i4 += 1
return bytes(out_bytes)
'''encrypt'''
@staticmethod
def encrypt(msg: bytes) -> bytes:
return KuwoMusicClientUtils.crypt(msg, SECRET_KEY_SONG, 0)
'''decrypt'''
@staticmethod
def decrypt(msg: bytes) -> bytes:
return KuwoMusicClientUtils.crypt(msg, SECRET_KEY_SONG, 1)
'''encryptquery'''
@staticmethod
def encryptquery(query: str) -> str:
return base64.b64encode(KuwoMusicClientUtils.encrypt(query.encode("utf-8"))).decode("ascii")
'''xorencrypt'''
@staticmethod
def xorencrypt(data: bytes, key: bytes) -> bytes:
key_len = len(key)
output = bytearray(len(data))
for i in range(len(data)): output[i] = data[i] ^ key[i % key_len]
return bytes(output)
'''buildlyricsparams'''
@staticmethod
def buildlyricsparams(music_id, is_get_lyricx: bool = True):
params_str = f"user=12345,web,web,web&requester=localhost&req=1&rid=MUSIC_{music_id}"
if is_get_lyricx: params_str += '&lrcx=1'
buf_str = params_str.encode('utf-8')
encrypted_bytes = KuwoMusicClientUtils.xorencrypt(buf_str, SECRET_KEY_LYRIC)
final_params = base64.b64encode(encrypted_bytes).decode('utf-8')
return final_params
'''decodelyrics'''
@staticmethod
def decodelyrics(buf: bytes, is_get_lyricx: bool):
if buf[:10] != b'tp=content': return ''
try: split_index = buf.index(b'\r\n\r\n') + 4; compressed_data = buf[split_index:]
except ValueError: return ''
try: lrc_data = zlib.decompress(compressed_data)
except zlib.error: return ''
if not is_get_lyricx: return lrc_data.decode('gb18030', errors='ignore')
base64_str = lrc_data.decode('utf-8')
buf_str = base64.b64decode(base64_str)
decrypted_buffer = KuwoMusicClientUtils.xorencrypt(buf_str, SECRET_KEY_LYRIC)
final_lrc = decrypted_buffer.decode('gb18030', errors='ignore')
return final_lrc
'''formatlyricstime'''
@staticmethod
def formatlyricstime(ms):
if math.isnan(ms) or ms < 0: ms = 0
total_seconds = ms / 1000
minutes = math.floor(total_seconds / 60)
seconds = math.floor(total_seconds % 60)
milliseconds = round((ms % 1000))
return f"[{minutes:02}:{seconds:02}.{milliseconds:03}]"
'''convertrawlrc'''
@staticmethod
def convertrawlrc(raw_lrc: str) -> str:
out, i = [], 0
lines, rx_line, rx_word, rx_zh = re.split(r"\r\n|\r|\n", raw_lrc), re.compile(r"^\[(\d{2}:\d{2}\.\d{3})\](.*)$"), re.compile(r"<(-?\d+),(-?\d+)>([^<]*)"), re.compile(r"[\u4e00-\u9fa5]")
while i < len(lines):
line = lines[i]
m = rx_line.match(line)
if not m: out.append(line); i += 1; continue
ts, payload = m.group(1), m.group(2)
if not payload.replace("<0,0>", "").strip(): i += 1; continue
if payload.startswith("<0,0>") and rx_zh.search(payload): i += 1; continue
words = list(rx_word.finditer(payload))
lyric = "".join(w.group(3) for w in words) if words else payload.replace("<0,0>", "").strip(); trans = ""
if i + 1 < len(lines) and (nm := rx_line.match(lines[i + 1])):
next_payload = nm.group(2)
if next_payload.startswith("<0,0>") and rx_zh.search(next_payload): trans = next_payload.replace("<0,0>", "").strip(); i += 1
out.append(f"[{ts}]{lyric}")
if trans: out.append(f"[{ts}]{trans}")
i += 1
return "\n".join(out)
@@ -0,0 +1,143 @@
'''
Function:
Implementation of LanZouYParser
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import json
import random
import requests
from urllib.parse import urljoin, urlparse
'''LanZouYParser'''
class LanZouYParser():
'''parsefromurl'''
@staticmethod
def parsefromurl(url: str, passcode: str = '', max_tries: int = 3):
for _ in range(max_tries):
try:
download_result, download_url = LanZouYParser._parsefromurl(url=url, passcode=passcode)
assert download_url and str(download_url).startswith('http')
break
except:
download_result, download_url = {}, ""
if not download_url or not str(download_url).startswith('http'):
file_id = urlparse(url).path.strip('/').split('/')[-1]
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36'}
try:
resp = requests.get(f'https://api-v2.cenguigui.cn/api/lanzou/api.php?url=https://cenguigui.lanzouw.com/{file_id}', headers=headers)
download_result = resp.json()
download_url = download_result['data']['downurl']
assert download_url and str(download_url).startswith('http')
break
except:
download_result, download_url = {}, ""
return download_result, download_url
'''_randip'''
@staticmethod
def _randip() -> str:
ip2 = round(random.randint(600000, 2550000) / 10000)
ip3 = round(random.randint(600000, 2550000) / 10000)
ip4 = round(random.randint(600000, 2550000) / 10000)
arr1 = ["218", "218", "66", "66", "218", "218", "60", "60", "202", "204", "66", "66", "66", "59", "61", "60", "222", "221", "66", "59", "60", "60", "66", "218", "218", "62", "63", "64", "66", "66", "122", "211"]
ip1 = random.choice(arr1)
return f"{ip1}.{ip2}.{ip3}.{ip4}"
'''_httpget'''
@staticmethod
def _httpget(url: str, user_agent: str = "", referer: str = "", cookies: dict = None, timeout: int = 10) -> str:
headers = {"X-FORWARDED-FOR": LanZouYParser._randip(), "CLIENT-IP": LanZouYParser._randip()}
if user_agent: headers["User-Agent"] = user_agent
if referer: headers["Referer"] = referer
resp = requests.get(url, headers=headers, cookies=cookies, timeout=timeout, verify=False, allow_redirects=True)
resp.raise_for_status()
resp.encoding = resp.apparent_encoding or "utf-8"
return resp.text
'''_httppost'''
@staticmethod
def _httppost(data: dict, url: str, referer: str = "", user_agent: str = "", timeout: int = 10) -> str:
headers = {"X-FORWARDED-FOR": LanZouYParser._randip(), "CLIENT-IP": LanZouYParser._randip()}
if user_agent: headers["User-Agent"] = user_agent
if referer: headers["Referer"] = referer
resp = requests.post(url, data=data, headers=headers, timeout=timeout, verify=False, allow_redirects=True)
resp.raise_for_status()
resp.encoding = resp.apparent_encoding or "utf-8"
return resp.text
'''_httpredirecturl'''
@staticmethod
def _httpredirecturl(url: str, referer: str, user_agent: str, cookie_str: str, timeout: int = 10) -> str:
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Pragma": "no-cache", "Upgrade-Insecure-Requests": "1",
"User-Agent": user_agent, "Referer": referer, "Cookie": cookie_str,
}
resp = requests.get(url, headers=headers, timeout=timeout, verify=False, allow_redirects=False)
resp.raise_for_status()
loc = resp.headers.get("Location", "") or resp.headers.get("location", "")
if not loc: return ""
return urljoin(url, loc)
'''_acwscv2simple'''
@staticmethod
def _acwscv2simple(arg1: str):
if not arg1: return ""
mask = "3000176000856006061501533003690027800375"
pos_list = (15, 35, 29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36)
arg2 = "".join(arg1[p - 1] for p in pos_list if p <= len(arg1))
length = min(len(arg2), len(mask))
return "".join(f"{(int(arg2[i:i+2], 16) ^ int(mask[i:i+2], 16)):02x}" for i in range(0, length, 2))
'''_parsefromurl'''
@staticmethod
def _parsefromurl(url: str, passcode: str = ''):
# init
download_result, user_agent = {}, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
normalize_lanzou_url_func = lambda u: ("https://www.lanzouf.com/" + t.lstrip("/") if (t := (u.split(".com/", 1)[1] if ".com/" in u else None)) is not None else ("https://www.lanzouf.com" + u) if u.startswith("/") else u if u.startswith("http") else "https://www.lanzouf.com/" + u.lstrip("/"))
extract_first_func = lambda regex_list, text: next((m.group(1) for rgx in regex_list if (m := re.search(rgx, text, flags=re.S))), "")
# vist home page
url = normalize_lanzou_url_func(url)
homepage_url_html = LanZouYParser._httpget(url, user_agent=user_agent)
if "文件取消分享了" in homepage_url_html: raise
soft_name = extract_first_func([r'style="font-size: 30px;text-align: center;padding: 56px 0px 20px 0px;">(.*?)</div>', r'<div class="n_box_3fn".*?>(.*?)</div>', r"var filename = '(.*?)';", r'div class="b"><span>(.*?)</span></div>'], homepage_url_html)
soft_size = extract_first_func([r'<div class="n_filesize".*?>大小:(.*?)</div>', r'<span class="p7">文件大小:</span>(.*?)<br>'], homepage_url_html)
# with passcode
if "function down_p(){" in homepage_url_html:
segment = re.findall(r"'sign':'(.*?)',", homepage_url_html, flags=re.S)
ajaxm = re.findall(r"ajaxm\.php\?file=\d+", homepage_url_html, flags=re.S)
assert not (len(segment) < 2 or len(ajaxm) < 1)
post_data = {"action": "downprocess", "sign": segment[1], "p": passcode, "kd": 1}
post_url = "https://www.lanzouf.com/" + ajaxm[0]
parse_result = LanZouYParser._httppost(post_data, post_url, referer=url, user_agent=user_agent)
parse_result: dict = json.loads(parse_result)
soft_name = parse_result.get("inf") or soft_name
# without passcode
else:
link = extract_first_func([r'\n<iframe.*?name="[\s\S]*?"\ssrc="\/(.*?)"', r'<iframe.*?name="[\s\S]*?"\ssrc="\/(.*?)"'], homepage_url_html)
assert link
ifurl = "https://www.lanzouf.com/" + link.lstrip("/")
iframe_html = LanZouYParser._httpget(ifurl, user_agent=user_agent)
wp_sign = re.findall(r"wp_sign = '(.*?)'", iframe_html, flags=re.S)
ajaxdata = re.findall(r"ajaxdata = '(.*?)'", iframe_html, flags=re.S)
ajaxm = re.findall(r"ajaxm\.php\?file=\d+", iframe_html, flags=re.S)
assert not (len(wp_sign) < 1 or len(ajaxdata) < 1 or len(ajaxm) < 2)
post_data = {"action": "downprocess", "websignkey": ajaxdata[0], "signs": ajaxdata[0], "sign": wp_sign[0], "websign": "", "kd": 1, "ves": 1}
post_url = "https://www.lanzouf.com/" + ajaxm[1]
parse_result = LanZouYParser._httppost(post_data, post_url, referer=ifurl, user_agent=user_agent)
parse_result: dict = json.loads(parse_result)
# final parse
assert not (not isinstance(parse_result, dict) or parse_result.get("zt") != 1)
download_url = f"{parse_result['dom']}/file/{parse_result['url']}"
download_html = LanZouYParser._httpget(download_url, user_agent=user_agent)
arg1_list = re.findall(r"arg1='(.*?)'", download_html, flags=re.S)
if arg1_list:
decrypted = LanZouYParser._acwscv2simple(arg1_list[0])
cookie_str = f"down_ip=1; expires=Sat, 16-Nov-2019 11:42:54 GMT; path=/; domain=.baidupan.com; acw_sc__v2={decrypted}"
redirected_download_url = LanZouYParser._httpredirecturl(download_url, referer="https://developer.lanzoug.com", user_agent=user_agent, cookie_str=cookie_str)
if "http" in (redirected_download_url or ""): download_url = redirected_download_url
download_url = re.sub(r"pid=[^&]*&", "", download_url)
download_result = {"name": soft_name or "", "filesize": soft_size or "", "downUrl": download_url, "parse_result": parse_result}
# return
return download_result, download_url
@@ -0,0 +1,326 @@
'''
Function:
Implementation of Logging Related Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import os
import shutil
import logging
import collections.abc
import tabulate as tabmod
from wcwidth import wcswidth
from tabulate import tabulate
from prettytable import PrettyTable
from platformdirs import user_log_dir
from prompt_toolkit.layout import Layout
from prompt_toolkit.application import Application
from prompt_toolkit.key_binding import KeyBindings
from prompt_toolkit.layout.containers import HSplit, Window
from prompt_toolkit.application.current import get_app_or_none
from prompt_toolkit.layout.controls import FormattedTextControl
from prompt_toolkit.formatted_text import ANSI, to_formatted_text
from typing import Any, List, Optional, Sequence, Set, Tuple, Union, Dict
from prompt_toolkit.formatted_text.utils import fragment_list_width, split_lines, get_cwidth
'''settings'''
tabmod.WIDE_CHARS_MODE = True
NoTruncSpec = Optional[Sequence[Union[int, str]]]
ANSI_CSI_RE = re.compile(r"\x1b\[[0-9;?]*[ -/]*[@-~]")
AMBIGUOUS_MAP: Dict[str, str] = {
"·": ".", "": "*", "": "...", "": '"', "": '"', "": '"', "": '"', "": "'", "": "'", "": "'", "": "'", "": "-", "": "-", "": "-", " ": " ",
}
COLORS = {
'red': '\033[31m', 'green': '\033[32m', 'yellow': '\033[33m', 'blue': '\033[34m', 'pink': '\033[35m', 'cyan': '\033[36m', 'highlight': '\033[93m',
'number': '\033[96m', 'singer': '\033[93m', 'flac': '\033[95m', 'songname': '\033[91m'
}
'''LoggerHandle'''
class LoggerHandle():
appname, appauthor = 'musicdl', 'zcjin'
def __init__(self):
# set up log dir
log_dir = user_log_dir(appname=self.appname, appauthor=self.appauthor)
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, "musicdl.log")
self.log_file_path = log_file_path
# config logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler(log_file_path, encoding="utf-8"), logging.StreamHandler()])
'''log'''
@staticmethod
def log(level, message):
message = str(message)
logger = logging.getLogger(LoggerHandle.appname)
logger.log(level, message)
'''debug'''
def debug(self, message, disable_print=False):
message = str(message)
if disable_print:
fp = open(self.log_file_path, 'a', encoding='utf-8')
fp.write(message + '\n')
else:
LoggerHandle.log(logging.DEBUG, message)
'''info'''
def info(self, message, disable_print=False):
message = str(message)
if disable_print:
fp = open(self.log_file_path, 'a', encoding='utf-8')
fp.write(message + '\n')
else:
LoggerHandle.log(logging.INFO, message)
'''warning'''
def warning(self, message, disable_print=False):
message = str(message)
if disable_print:
fp = open(self.log_file_path, 'a', encoding='utf-8')
fp.write(message + '\n')
else:
if '\033[31m' not in message: message = colorize(message, 'red')
LoggerHandle.log(logging.WARNING, message)
'''error'''
def error(self, message, disable_print=False):
message = str(message)
if disable_print:
fp = open(self.log_file_path, 'a', encoding='utf-8')
fp.write(message + '\n')
else:
if '\033[31m' not in message: message = colorize(message, 'red')
LoggerHandle.log(logging.ERROR, message)
'''colorize'''
def colorize(string, color):
string = str(string)
if color not in COLORS: return string
return COLORS[color] + string + '\033[0m'
'''printfullline'''
def printfullline(ch: str = "*", end: str = '\n', terminal_right_space_len: int = 1):
cols = shutil.get_terminal_size().columns - terminal_right_space_len
assert cols > 0, f'"terminal_right_space_len" should smaller than {shutil.get_terminal_size()}'
print(ch * cols, end=end)
'''printtable'''
def printtable(titles, items, terminal_right_space_len=4):
assert isinstance(titles, collections.abc.Sequence) and isinstance(items, collections.abc.Sequence), 'title and items should be iterable'
table = PrettyTable(titles)
for item in items: table.add_row(item)
max_width = shutil.get_terminal_size().columns - terminal_right_space_len
assert max_width > 0, f'"terminal_right_space_len" should smaller than {shutil.get_terminal_size()}'
table.max_table_width = max_width
print(table)
return table
'''ptsizefallback'''
def ptsizefallback() -> Tuple[int, int]:
app = get_app_or_none()
if app is not None and getattr(app, "output", None) is not None:
try:
sz = app.output.get_size()
cols, rows = int(sz.columns), int(sz.rows)
if cols > 0 and rows > 0: return cols, rows
except Exception:
pass
s = shutil.get_terminal_size(fallback=(80, 24))
return int(s.columns), int(s.lines)
'''stripansi'''
def stripansi(s: str) -> str:
return ANSI_CSI_RE.sub("", s)
'''dispwidth'''
def dispwidth(s: Any) -> int:
if s is None: return 0
w = wcswidth(stripansi(str(s)))
return max(0, w)
'''normalizeforconsole'''
def normalizeforconsole(text: Any, *, enable: bool) -> str:
s = "" if text is None else str(text)
if not s: return s
s = s.replace("\r", "")
s = s.replace("\n", " ").replace("\t", " ")
if enable: s = "".join(AMBIGUOUS_MAP.get(ch, ch) for ch in s)
return s
'''truncatebydispwidth'''
def truncatebydispwidth(text: Any, max_width: int, ellipsis: str = "...") -> str:
s = "" if text is None else str(text)
if max_width <= 0: return ""
if dispwidth(s) <= max_width: return s
ell_w = dispwidth(ellipsis)
target = max_width if max_width <= ell_w else (max_width - ell_w)
out, used, i, emitted_ansi = [], 0, 0, False
while i < len(s) and used < target:
if s[i] == "\x1b":
m = ANSI_CSI_RE.match(s, i)
if m: out.append(m.group(0)); emitted_ansi = True; i = m.end(); continue
i += 1; continue
ch = s[i]; ch_w = max(wcswidth(ch), 0)
if used + ch_w > target: break
out.append(ch); used += ch_w; i += 1
if emitted_ansi and (not out or not str(out[-1]).endswith("\x1b[0m")): out.append("\x1b[0m")
core = "".join(out)
return core if max_width <= ell_w else (core + ellipsis)
'''truncatefragmentstocols'''
def truncatefragmentstocols(fragments: Sequence[Tuple], cols: int) -> List[Tuple]:
if cols <= 0: return []
out, used = [], 0
for style, text, *rest in fragments:
if not text: continue
buf: List[str] = []
for ch in text:
cw = get_cwidth(ch)
if used + cw > cols: break
buf.append(ch); used += cw
if buf: out.append((style, "".join(buf), *rest))
if used >= cols: break
return out
'''truncateandpadline'''
def truncateandpadline(fragments: Sequence[Tuple], cols: int) -> List[Tuple]:
line = truncatefragmentstocols(fragments, cols)
pad = cols - fragment_list_width(line)
if pad > 0: return list(line) + [("", " " * pad)]
return truncatefragmentstocols(line, cols)
'''smarttrunctable'''
def smarttrunctable(headers: Sequence[Any], rows: Sequence[Sequence[Any]], *, max_col_width: int = 40, min_col_width: int = 4, terminal_right_space_len: int = 2, no_trunc_cols: NoTruncSpec = None, term_width: Optional[int] = None, tablefmt: str = "grid", max_iterations: int = 2000) -> str:
headers_s = ["" if h is None else str(h) for h in headers]
rows_s, ncols = [[("" if c is None else str(c)) for c in r] for r in rows], len(headers_s)
if any(len(r) != ncols for r in rows_s): raise ValueError("All rows must have the same number of columns as headers")
if term_width is None: term_width = ptsizefallback()[0]
target_width = max(1, term_width - max(0, terminal_right_space_len))
protected: Set[int] = set()
if no_trunc_cols:
header_to_idx = {h: i for i, h in enumerate(headers_s)}
for spec in no_trunc_cols:
if isinstance(spec, int) and 0 <= spec < ncols: protected.add(spec)
elif not isinstance(spec, int):
idx = header_to_idx.get(str(spec))
if idx is not None: protected.add(idx)
col_natural = [dispwidth(h) for h in headers_s]
col_natural = [max(col_natural[j], *(dispwidth(r[j]) for r in rows_s)) for j in range(len(col_natural))]
col_limit: List[Optional[int]] = []
for j in range(ncols):
if j in protected: col_limit.append(None)
else: cap = col_natural[j]; cap = min(cap, max_col_width) if max_col_width else cap; col_limit.append(max(min_col_width, cap))
def rendercurrent() -> str:
th = [h if col_limit[j] is None else truncatebydispwidth(h, col_limit[j]) for j, h in enumerate(headers_s)]
tr = [[cell if col_limit[j] is None else truncatebydispwidth(cell, col_limit[j]) for j, cell in enumerate(r)] for r in rows_s]
return tabulate(tr, headers=th, tablefmt=tablefmt)
def tablewidth(table_str: str) -> int:
return max((dispwidth(line) for line in table_str.splitlines()), default=0)
last = ""
for _ in range(max_iterations):
table_str = rendercurrent()
last = table_str
if tablewidth(table_str) <= target_width: return table_str
cur_w = [dispwidth(h if col_limit[j] is None else truncatebydispwidth(h, col_limit[j])) for j, h in enumerate(headers_s)]
any(cur_w.__setitem__(j, max(cur_w[j], dispwidth(cell if col_limit[j] is None else truncatebydispwidth(cell, col_limit[j])))) or False for r in rows_s for j, cell in enumerate(r))
shrinkable = [j for j in range(ncols) if col_limit[j] is not None and col_limit[j] > min_col_width]
if not shrinkable: return last
j_widest = max(shrinkable, key=lambda j: cur_w[j])
col_limit[j_widest] = max(min_col_width, int(col_limit[j_widest]) - 1)
return last
'''cursorpickintable'''
def cursorpickintable(headers: Sequence[Any], rows: Sequence[Sequence[Any]], row_ids: Sequence[Any], *, no_trunc_cols: NoTruncSpec = None, terminal_right_space_len: int = 2, normalize_ambiguous: Optional[bool] = None, tablefmt: Optional[str] = None) -> List[Any]:
if len(rows) != len(row_ids): raise ValueError("rows and row_ids length mismatch")
ncols = len(headers)
if any(len(r) != ncols for r in rows): raise ValueError("All rows must have same number of columns as headers")
if normalize_ambiguous is None: normalize_ambiguous = (os.name == "nt")
if tablefmt is None: tablefmt = "grid" if os.name == "nt" else "fancy_grid"
headers_s = [normalizeforconsole(h, enable=normalize_ambiguous) for h in headers]
rows_s = [[normalizeforconsole(c, enable=normalize_ambiguous) for c in r] for r in rows]
kb, current, picked, view_start = KeyBindings(), 0, set(), 0
FIRST_DATA_LINE, LINES_PER_ROW = 3, 2
def termsize() -> Tuple[int, int]: return ptsizefallback()
def maxvisiblerows(term_lines: int) -> int:
overhead = 10; usable = max(2, term_lines - overhead)
return max(1, usable // LINES_PER_ROW)
def computeview() -> Tuple[int, int]:
nonlocal view_start; _, term_lines = termsize()
page = maxvisiblerows(term_lines)
start = max(0, min(current - page // 2, len(rows_s) - page))
end, view_start = min(len(rows_s), start + page), start
return start, end
def buildtable() -> str:
cols, _ = termsize()
start, end = computeview()
def marker(i: int) -> str:
at, sel = (i == current), (row_ids[i] in picked)
if at and sel: return ">*"
if at: return "> "
if sel: return "* "
return " "
view_rows: List[List[str]] = []
for i in range(start, end): row = list(rows_s[i]); row[0] = marker(i) + row[0]; view_rows.append(row)
view_headers = list(headers_s)
view_headers[0] = f"{view_headers[0]} ({start+1}-{end}/{len(rows_s)})"
return smarttrunctable(headers=view_headers, rows=view_rows, no_trunc_cols=no_trunc_cols, terminal_right_space_len=terminal_right_space_len, term_width=cols, tablefmt=tablefmt)
def render() -> List[Tuple]:
cols, term_lines = termsize()
frags = to_formatted_text(ANSI(buildtable()))
highlight_line = FIRST_DATA_LINE + (current - view_start) * LINES_PER_ROW
out, line_count = [], 0
for li, line_frags in enumerate(split_lines(frags)):
if li == highlight_line: line_frags = [(((style + " reverse").strip() if style else "reverse"), text, *rest) for style, text, *rest in line_frags]
out.extend(truncateandpadline(line_frags, cols)); out.append(("", "\n")); line_count += 1
help_text = ("\nUse ↑/↓ to move, PgUp/PgDn to jump, <space> toggle, a: all, i: invert, <enter> confirm, q/Esc cancel.\n")
help_frags = to_formatted_text(ANSI(help_text))
for line_frags in split_lines(help_frags): out.extend(truncateandpadline(line_frags, cols)); out.append(("", "\n")); line_count += 1
while line_count < term_lines: out.append(("", " " * cols)); out.append(("", "\n")); line_count += 1
return out
def invalidate(event) -> None: event.app.invalidate()
@kb.add("up")
def _(event):
nonlocal current; current = max(0, current - 1)
invalidate(event)
@kb.add("down")
def _(event):
nonlocal current; current = min(len(rows_s) - 1, current + 1)
invalidate(event)
@kb.add("pageup")
def _(event):
nonlocal current; _, term_lines = termsize()
current = max(0, current - maxvisiblerows(term_lines))
invalidate(event)
@kb.add("pagedown")
def _(event):
nonlocal current; _, term_lines = termsize()
current = min(len(rows_s) - 1, current + maxvisiblerows(term_lines))
invalidate(event)
@kb.add(" ")
def _(event): rid = row_ids[current]; (picked.remove(rid) if rid in picked else picked.add(rid)); invalidate(event)
@kb.add("a")
@kb.add("A")
def _(event): picked.clear(); picked.update(row_ids); invalidate(event)
@kb.add("i")
@kb.add("I")
def _(event): picked.symmetric_difference_update(row_ids); invalidate(event)
@kb.add("enter")
def _(event): event.app.exit(result=[rid for rid in row_ids if rid in picked])
@kb.add("escape")
@kb.add("q")
def _(event): event.app.exit(result=[])
app = Application(layout=Layout(HSplit([Window(FormattedTextControl(render), wrap_lines=False)])), key_bindings=kb, full_screen=True)
return app.run()
+141
View File
@@ -0,0 +1,141 @@
'''
Function:
Implementation of Lyric Related Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import re
import copy
import random
import tempfile
import requests
from typing import Optional
from .misc import resp2json
from urllib.parse import quote
from .importutils import optionalimportfrom
'''cleanlrc'''
cleanlrc = lambda text: "\n".join(line for raw in re.sub(r"\r\n?", "\n", str(text)).split("\n") if (line := raw.strip("\ufeff\u200b\u200c\u200d\u2060\u00a0 \t").strip()) and not re.fullmatch(r"\[(\d{2}:)?\d{2}:\d{2}(?:\.\d{1,3})?\]", line))
'''fractoseconds'''
def fractoseconds(frac: str | None) -> float:
if not frac: return 0.0
scale = 10 ** len(frac)
return int(frac) / scale
'''extractdurationsecondsfromlrc'''
def extractdurationsecondsfromlrc(lrc: str) -> Optional[float]:
if not lrc or (lrc == 'NULL'): return None
max_t, time_pattern_re = None, re.compile(r"\[(?:(\d{1,2}):)?(\d{1,2}):(\d{2})(?:\.(\d{1,3}))?\]")
for h, m, s, frac in time_pattern_re.findall(lrc):
hh = int(h) if h else 0; mm = int(m); ss = int(s)
t = hh * 3600 + mm * 60 + ss + fractoseconds(frac)
max_t = t if (max_t is None or t > max_t) else max_t
return max_t
'''WhisperLRC'''
class WhisperLRC:
def __init__(self, model_size_or_path="small", device="auto", compute_type="int8", cpu_threads=4, num_workers=1, **kwargs):
WhisperModel = optionalimportfrom('faster_whisper', 'WhisperModel')
self.whisper_model = WhisperModel(model_size_or_path, device=device, compute_type=compute_type, cpu_threads=cpu_threads, num_workers=num_workers, **kwargs) if WhisperModel else None
'''downloadtotmpdir'''
@staticmethod
def downloadtotmpdir(url: str, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None):
headers, cookies, request_overrides = headers or {}, cookies or {}, copy.deepcopy(request_overrides or {})
if 'headers' not in request_overrides: request_overrides['headers'] = headers
if 'timeout' not in request_overrides: request_overrides['timeout'] = timeout
if 'cookies' not in request_overrides: request_overrides['cookies'] = cookies
(resp := requests.get(url, stream=True, **request_overrides)).raise_for_status()
m = re.search(r"\.([a-z0-9]{2,5})(?:\?|$)", url, re.I)
fd, path = tempfile.mkstemp(suffix="."+(m.group(1).lower() if m else "bin"))
with os.fdopen(fd, "wb") as fp:
for ch in resp.iter_content(32768):
if ch: fp.write(ch)
return path
'''timestamp'''
@staticmethod
def timestamp(t):
t = max(0.0, float(t)); mm = int(t//60); ss = t - mm*60
return f"[{mm:02d}:{ss:05.2f}]"
'''fromurl'''
def fromurl(self, url: str, transcribe_overrides: dict = None, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None):
assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"'
transcribe_overrides, headers, cookies, request_overrides, tmp_file_path = transcribe_overrides or {}, headers or {}, cookies or {}, request_overrides or {}, ''
try:
tmp_file_path = self.downloadtotmpdir(url, headers=headers, timeout=timeout, cookies=cookies, request_overrides=request_overrides)
(default_transcribe_settings := {'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5}).update(transcribe_overrides)
segs, info = self.whisper_model.transcribe(tmp_file_path, **default_transcribe_settings)
lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs)
result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc}
return result
finally:
if tmp_file_path and os.path.exists(tmp_file_path): os.remove(tmp_file_path)
'''fromfilepath'''
def fromfilepath(self, file_path: str, transcribe_overrides: dict = None):
assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"'
transcribe_overrides = transcribe_overrides or {}
default_transcribe_settings = {'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5}
default_transcribe_settings.update(transcribe_overrides)
segs, info = self.whisper_model.transcribe(file_path, **default_transcribe_settings)
lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs)
result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc}
return result
'''LyricSearchClient'''
class LyricSearchClient():
'''search'''
@staticmethod
def search(track_name: str, artist_name: str, allowed_lyric_apis: tuple = ('searchbylrclibapig', 'searchbylrclibapis'), request_overrides: dict = None):
lyric_result, lyric = {}, 'NULL'
for lyric_api in allowed_lyric_apis:
if not callable(lyric_api): lyric_api = getattr(LyricSearchClient, lyric_api, None)
try: lyric_result, lyric = lyric_api(track_name=track_name, artist_name=artist_name, request_overrides=request_overrides)
except Exception: lyric_result, lyric = {}, 'NULL'
if lyric and (lyric not in {'NULL', 'None'}): return lyric_result, lyric
return lyric_result, lyric
'''searchbylrclibapig'''
@staticmethod
def searchbylrclibapig(track_name: str, artist_name: str, request_overrides: dict = None):
request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
(resp := requests.get("https://lrclib.net/api/get", params={"artist_name": artist_name, "track_name": track_name}, headers=headers, timeout=10, **request_overrides)).raise_for_status()
lyric = cleanlrc((lyric_result := resp2json(resp=resp)).get('syncedLyrics') or lyric_result.get('plainLyrics') or 'NULL')
return lyric_result, lyric
'''searchbylrclibapis'''
@staticmethod
def searchbylrclibapis(track_name: str, artist_name: str, request_overrides: dict = None):
request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
(resp := requests.get("https://lrclib.net/api/search", params={"q": f"{artist_name} {track_name}"}, headers=headers, timeout=10, **request_overrides)).raise_for_status()
lyric = cleanlrc((lyric_result := resp2json(resp=resp))[0].get('syncedLyrics') or lyric_result[0].get('plainLyrics') or 'NULL')
return lyric_result, lyric
'''searchbylyricsovhapiv1'''
@staticmethod
def searchbylyricsovhapiv1(track_name: str, artist_name: str, request_overrides: dict = None):
request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
(resp := requests.get(f"https://api.lyrics.ovh/v1/{quote(artist_name, safe='')}/{quote(track_name, safe='')}", headers=headers, timeout=10, **request_overrides))
lyric = cleanlrc((lyric_result := resp2json(resp=resp)).get('lyrics') or 'NULL')
return lyric_result, lyric
'''searchbyhappiapiv1'''
@staticmethod
def searchbyhappiapiv1(track_name: str, artist_name: str, request_overrides: dict = None):
request_overrides = request_overrides or {}; headers = {'accept': 'application/json', 'x-happi-token': 'hk254-C1VegxwlJjYdYFPtdUDpg8qiVpmAXVl0aA'}
(resp := requests.get('https://api.happi.dev/v1/lyrics', params={'artist': artist_name, 'track': track_name}, headers=headers, timeout=10, **request_overrides))
lyric = cleanlrc((lyric_result := resp2json(resp=resp))['result'][0]['lyrics'] or 'NULL')
return lyric_result, lyric
'''searchbymusixmatchapi'''
@staticmethod
def searchbymusixmatchapi(track_name: str, artist_name: str, request_overrides: dict = None):
candidate_req_keys = ['3bc1042fde1ac8c1979c400d6f921320']
request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
(resp := requests.get(f"https://api.musixmatch.com/ws/1.1/matcher.lyrics.get?apikey={random.choice(candidate_req_keys)}&q_track={track_name}&q_artist={artist_name}", headers=headers, timeout=10, **request_overrides))
lyric = cleanlrc((lyric_result := resp2json(resp=resp))['message']['body']['lyrics']['lyrics_body'] or 'NULL')
return lyric_result, lyric
+394
View File
@@ -0,0 +1,394 @@
'''
Function:
Implementation of Common Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import os
import html
import copy
import emoji
import errno
import pickle
import shutil
import bleach
import hashlib
import requests
import functools
import json_repair
import unicodedata
from io import BytesIO
from pathlib import Path
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
from mutagen.asf import ASF
from mutagen.flac import FLAC
from mutagen.aiff import AIFF
from mutagen.wave import WAVE
from bs4 import BeautifulSoup
from http.cookies import SimpleCookie
from .importutils import optionalimport
from mutagen import File as MutagenFile
from mutagen.oggvorbis import OggVorbis
from pathvalidate import sanitize_filepath, sanitize_filename
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''estimatedurationwithfilesizebr'''
def estimatedurationwithfilesizebr(file_size_bytes: int, br_kbps: float, return_seconds: bool = False) -> str:
if not file_size_bytes or not br_kbps or br_kbps <= 0: return "-:-:-"
total_bits = file_size_bytes * 8
duration_seconds = int(total_bits / (br_kbps * 1000))
if return_seconds: return duration_seconds
hours = duration_seconds // 3600
minutes = (duration_seconds % 3600) // 60
seconds = duration_seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
'''estimatedurationwithfilelink'''
def estimatedurationwithfilelink(filelink: str = '', headers: dict = None, request_overrides: dict = None):
headers, request_overrides = headers or {}, request_overrides or {}
try:
(resp := requests.get(filelink, headers=headers, timeout=10, **request_overrides)).raise_for_status()
audio = MutagenFile(BytesIO(resp.content))
length = getattr(audio.info, "length", 0)
return int(length)
except:
return 0
'''cookies2dict'''
def cookies2dict(cookies: str | dict = None):
if not cookies: cookies = {}
if isinstance(cookies, dict): return cookies
if isinstance(cookies, str): (c := SimpleCookie()).load(cookies); return {k: morsel.value for k, morsel in c.items()}
raise TypeError(f'cookies type is "{type(cookies)}", expect cookies to "str" or "dict" or "None".')
'''cookies2string'''
def cookies2string(cookies: str | dict = None):
if not cookies: cookies = ""
if isinstance(cookies, str): return cookies
if isinstance(cookies, dict): return (lambda c: ([c.__setitem__(k, "" if v is None else str(v)) for k, v in cookies.items()], "; ".join(m.OutputString() for m in c.values()))[1])(SimpleCookie())
raise TypeError(f'cookies type is "{type(cookies)}", expect cookies to "str" or "dict" or "None".')
'''touchdir'''
def touchdir(directory, exist_ok=True, mode=511, auto_sanitize=True):
if auto_sanitize: directory = sanitize_filepath(directory)
return os.makedirs(directory, exist_ok=exist_ok, mode=mode)
'''replacefile'''
def replacefile(src: str, dest: str):
try:
os.replace(src, dest)
except OSError as exc:
if exc.errno != errno.EXDEV: raise Exception
if os.path.exists(dest):
if os.path.isdir(dest): raise Exception
os.remove(dest)
shutil.move(src, dest)
'''legalizestring'''
def legalizestring(string: str, fit_gbk: bool = True, max_len: int = 255, fit_utf8: bool = True, replace_null_string: str = 'NULL'):
if not string: return replace_null_string
string = str(string)
string = string.replace(r'\"', '"')
string = re.sub(r"<\\/", "</", string)
string = re.sub(r"\\/>", "/>", string)
string = re.sub(r"\\u([0-9a-fA-F]{4})", lambda m: chr(int(m.group(1), 16)), string)
# html.unescape
for _ in range(2):
new_string = html.unescape(string)
if new_string == string: break
string = new_string
# bleach.clean
try: string = BeautifulSoup(string, "lxml").get_text(separator="")
except: string = bleach.clean(string, tags=[], attributes={}, strip=True)
# unicodedata.normalize
string = unicodedata.normalize("NFC", string)
# emoji.replace_emoji
string = emoji.replace_emoji(string, replace="")
# isprintable
string = "".join([ch for ch in string if ch.isprintable() and not unicodedata.category(ch).startswith("C")])
# sanitize_filename
string = sanitize_filename(string, max_len=max_len)
# fix encoding
if fit_gbk: string = string.encode("gbk", errors="ignore").decode("gbk", errors="ignore")
if fit_utf8: string = string.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
# return
string = re.sub(r"\s+", " ", string).strip()
if not string: string = replace_null_string
return string
'''shortenpathsinsonginfos'''
def shortenpathsinsonginfos(song_infos: list, max_path: int = 240, keep_ext: bool = True, with_hash_suffix: bool = False):
used_paths = set()
for info in song_infos:
raw_path = (info.save_path or "").strip()
if not raw_path or raw_path.upper() == "NULL": continue
src_path = Path(raw_path); output_dir = src_path.parent.resolve(); output_dir.mkdir(parents=True, exist_ok=True)
ext = src_path.suffix if keep_ext else ""; stem = src_path.stem
digest = hashlib.md5(str(src_path).encode("utf-8")).hexdigest()
for hash_len in (8, 10):
hash_suffix = f"-{digest[:hash_len]}" if with_hash_suffix else ""
max_stem_len = max(1, max_path - (len(str(output_dir)) + 1 + len(hash_suffix) + len(ext)))
safe_stem = (stem[:max_stem_len].rstrip(" .") or "NULL")
out_path = str(output_dir / f"{safe_stem}{hash_suffix}{ext}")
if out_path.lower() not in used_paths: break
used_paths.add(out_path.lower()); info._save_path = out_path
return song_infos
'''seconds2hms'''
def seconds2hms(seconds: int):
try:
seconds = int(float(seconds))
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
hms = '%02d:%02d:%02d' % (h, m, s)
if hms == '00:00:00': hms = '-:-:-'
except:
hms = '-:-:-'
return hms
'''byte2mb'''
def byte2mb(size: int):
try:
size = int(float(size))
if size == 0: return 'NULL'
size = round(size / 1024 / 1024, 2)
if size == 0.0: return 'NULL'
size = f'{size} MB'
except:
size = 'NULL'
return size
'''resp2json'''
def _valid_response_types():
response_types = [requests.Response]
curl_cffi = optionalimport('curl_cffi')
curl_requests = getattr(curl_cffi, 'requests', None) if curl_cffi else None
curl_response = getattr(curl_requests, 'Response', None) if curl_requests else None
if curl_response is not None:
response_types.append(curl_response)
return tuple(response_types)
'''resp2json'''
def resp2json(resp: requests.Response):
valid_resp_object = _valid_response_types()
if not isinstance(resp, valid_resp_object): return {}
try: result = resp.json()
except: result = json_repair.loads(resp.text)
if not result: result = dict()
return result
'''isvalidresp'''
def isvalidresp(resp: requests.Response, valid_status_codes: list | tuple | set = {200, 206}):
valid_resp_object = _valid_response_types()
if not isinstance(resp, valid_resp_object): return False
if resp is None or resp.status_code not in valid_status_codes: return False
return True
'''safeextractfromdict'''
def safeextractfromdict(data, progressive_keys, default_value = None):
try:
result = data
for key in progressive_keys: result = result[key]
except:
result = default_value
return result
'''cachecookies'''
def cachecookies(client_name: str = '', cache_cookie_path: str = '', client_cookies: dict = None):
if os.path.exists(cache_cookie_path):
with open(cache_cookie_path, 'rb') as fp: cookies = pickle.load(fp)
else:
cookies = dict()
with open(cache_cookie_path, 'wb') as fp:
cookies[client_name] = client_cookies
pickle.dump(cookies, fp)
'''usedownloadheaderscookies'''
def usedownloadheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_download_headers
if hasattr(self, 'default_download_cookies'): self.default_cookies = self.default_download_cookies
if hasattr(self, 'enable_download_curl_cffi'): self.enable_curl_cffi = self.enable_download_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''useparseheaderscookies'''
def useparseheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_parse_headers
if hasattr(self, 'default_parse_cookies'): self.default_cookies = self.default_parse_cookies
if hasattr(self, 'enable_parse_curl_cffi'): self.enable_curl_cffi = self.enable_parse_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''usesearchheaderscookies'''
def usesearchheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_search_headers
if hasattr(self, 'default_search_cookies'): self.default_cookies = self.default_search_cookies
if hasattr(self, 'enable_search_curl_cffi'): self.enable_curl_cffi = self.enable_search_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''searchdictbykey'''
def searchdictbykey(obj, target_key: str):
results = []
if isinstance(obj, dict):
for k, v in obj.items():
if k == target_key: results.append(v)
results.extend(searchdictbykey(v, target_key))
elif isinstance(obj, list):
for item in obj: results.extend(searchdictbykey(item, target_key))
return results
'''naiveguessextfromaudiobytes'''
def naiveguessextfromaudiobytes(content: bytes):
if (audio := MutagenFile(BytesIO(content))) is None: return None
if isinstance(audio, MP3): return "mp3"
if isinstance(audio, FLAC): return "flac"
if isinstance(audio, MP4): return "m4a"
if isinstance(audio, OggVorbis): return "ogg"
if isinstance(audio, WAVE): return "wav"
if isinstance(audio, AIFF): return "aiff"
if isinstance(audio, ASF): return "wma"
return None
'''AudioLinkTester'''
class AudioLinkTester(object):
VALID_AUDIO_EXTS = {
"aac", "aax", "aaxc", "ac3", "adts", "aif", "aifc", "aiff", "alac", "amr", "ape", "au", "avr", "awb", "caf", "cda", "dff", "dfsf", "dsf", "dss", "dts", "dtshd", "ec3", "f32",
"f64", "flac", "gsm", "hca", "htk", "iff", "ima", "ircam", "kar", "kss", "la", "l16", "m15", "m3u8", "m4a", "m4b", "m4p", "m4r", "mat4", "mat5", "med", "midi", "mid", "mlp",
"mod", "mo3", "mp1", "mp2", "mp3", "mpa", "mpc", "mp+", "mpp", "mptm", "msv", "mt2", "mtm", "mxmf", "nist", "nsf", "oga", "ogg", "okt", "oma", "ofr", "ofs", "opus", "paf",
"pcm", "ptm", "pvf", "ra", "ram", "rf64", "rmi", "rmj", "rmm", "rmx", "roq", "raw", "s3m", "sap", "sds", "sd2", "sd2f", "sf", "shn", "sid", "snd", "spc", "spx", "stm", "tak",
"tta", "thd", "ul", "ult", "umx", "voc", "vgm", "vgz", "wav", "wave", "wax", "w64", "wma", "wve", "wv", "wvx", "xi", "xm", "8svx", "16svx", "669", "amf", "dmf", "far", "gbs",
"gym", "hes", "it", "mdl", "mpc2k", "nsa", "psf", "psf1", "psf2", "ssf", "miniusf", "usf", "2sf", "gsf", "qsf", "spu", "at3", "aa3", "at9", "3ga", "m4s"
}
AUDIO_CT_PREFIX = "audio/"
AUDIO_CT_EXTRA = {"application/octet-stream", "application/x-flac", "application/flac", "application/x-mpegurl", "video/mp4"}
MAGIC = [(b"ID3", "mp3"), (b"\xFF\xFB", "mp3"), (b"fLaC", "flac"), (b"RIFF", "wav"), (b"OggS", "ogg"), (b"MThd", "midi"), (b"\x00\x00\x00\x18ftyp", "mp4/m4a")]
CTYPE_TO_EXT = {"audio/mpeg": "mp3", "audio/mp3": "mp3", "audio/mp4": "m4a", "audio/x-m4a": "m4a", "audio/aac": "aac", "audio/wav": "wav", "video/mp4": "mp4", "audio/x-wav": "wav", "audio/flac": "flac", "audio/x-flac": "flac", "audio/ogg": "ogg", "audio/opus": "opus", "audio/x-aac": "ogg", "audio/x-ogg": "ogg", "audio/x-m4p": "m4a"}
def __init__(self, timeout=(5, 15), headers: dict = None, cookies: dict = None):
self.session = requests.Session()
self.timeout = timeout
self.headers = {'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.headers.update(headers or {})
self.cookies = cookies or {}
'''isaudioct'''
@staticmethod
def isaudioct(ct: str):
if not ct: return False
ct = ct.lower().split(";", 1)[0].strip()
return ct.startswith(AudioLinkTester.AUDIO_CT_PREFIX) or ct in AudioLinkTester.AUDIO_CT_EXTRA
'''sniffmagic'''
@staticmethod
def sniffmagic(b: str):
for sig, fmt in AudioLinkTester.MAGIC:
if b.startswith(sig): return fmt
if len(b) >= 2 and b[0] == 0xFF and (b[1] & 0xF0) == 0xF0: return "aac/adts"
return None
'''probe'''
def probe(self, url: str, request_overrides: dict = None):
request_overrides, naive_guess_ext = copy.deepcopy(request_overrides or {}), url.split('?')[0].split('.')[-1]
if 'headers' not in request_overrides: request_overrides['headers'] = self.headers
if 'timeout' not in request_overrides: request_overrides['timeout'] = self.timeout
if 'cookies' not in request_overrides: request_overrides['cookies'] = self.cookies
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
# HEAD probe
try:
(resp := self.session.head(url, allow_redirects=True, **request_overrides)).raise_for_status()
resp_headers, final_url = resp.headers, resp.url; resp.close()
file_size, ctype = byte2mb(resp_headers.get('content-length')), remove_suffix(str(resp_headers.get('content-type')), '; charset=UTF-8')
if ctype == 'image/jpg; charset=UTF-8' or ctype == 'image/jpg': ctype = 'audio/mpeg'
if ctype == 'text/plain' and naive_guess_ext == 'm4s': ctype = 'audio/mp4'
ext = self.CTYPE_TO_EXT.get(ctype, 'NULL')
outputs = dict(file_size=file_size, ctype=ctype, ext=ext, download_url=url, final_url=final_url)
except:
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
if outputs['file_size'] and outputs['file_size'] not in ('NULL',): return outputs
# GETSTREAM probe
try:
(resp := self.session.get(url, allow_redirects=True, stream=True, **request_overrides)).raise_for_status()
resp_headers, final_url = resp.headers, resp.url; resp.close()
file_size, ctype = byte2mb(resp_headers.get('content-length')), remove_suffix(str(resp_headers.get('content-type')), '; charset=UTF-8')
if ctype == 'image/jpg; charset=UTF-8' or ctype == 'image/jpg': ctype = 'audio/mpeg'
if ctype == 'text/plain' and naive_guess_ext == 'm4s': ctype = 'audio/mp4'
ext = self.CTYPE_TO_EXT.get(ctype, 'NULL')
outputs = dict(file_size=file_size, ctype=ctype, ext=ext, download_url=url, final_url=final_url)
except:
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
return outputs
'''test'''
def test(self, url: str, request_overrides: dict = None):
request_overrides, naive_guess_ext = copy.deepcopy(request_overrides or {}), url.split('?')[0].split('.')[-1]
if 'headers' not in request_overrides: request_overrides['headers'] = self.headers
if 'timeout' not in request_overrides: request_overrides['timeout'] = self.timeout
if 'cookies' not in request_overrides: request_overrides['cookies'] = self.cookies
outputs = dict(ok=False, status=0, method="", final_url=None, ctype=None, clen=None, range=None, fmt=None, reason="")
# HEAD test
try:
resp = self.session.head(url, allow_redirects=True, **request_overrides)
clen = resp.headers.get("Content-Length")
clen = int(clen) if clen and clen.isdigit() else None
outputs.update(dict(status=resp.status_code, method="HEAD", final_url=str(resp.url), ctype=resp.headers.get("Content-Type"), clen=clen, range=(resp.headers.get("Accept-Ranges") or "").lower() == "bytes"))
if outputs["ctype"] == 'text/plain' and naive_guess_ext == 'm4s': outputs["ctype"] = 'audio/mp4'
if 200 <= resp.status_code < 300 and ((self.isaudioct(outputs["ctype"]) or (naive_guess_ext in ('m4s',))) and (outputs["clen"] or outputs["range"])): outputs.update(dict(ok=True, reason="HEAD success")); return outputs
except Exception as err:
outputs["reason"] = f"HEAD error: {err}"
# RANGEGET test
try:
resp = self.session.get(url, stream=True, allow_redirects=True, **request_overrides)
outputs.update(dict(status=resp.status_code, method="RANGEGET", final_url=str(resp.url)))
if resp.status_code not in (200, 206): outputs["reason"] = f"RANGEGET error: response status {resp.status_code}"; return outputs
chunk = b""
for b in resp.iter_content(chunk_size=16): chunk = b; break
resp.close()
outputs["ctype"] = outputs["ctype"] or resp.headers.get("Content-Type")
if outputs["ctype"] == 'text/plain' and naive_guess_ext == 'm4s': outputs["ctype"] = 'audio/mp4'
outputs["range"] = outputs["range"] or (resp.status_code == 206) or (resp.headers.get("Content-Range") is not None)
clen = resp.headers.get("Content-Length") or (resp.headers.get("Content-Range") or "").split("/")[-1]
if clen and clen.isdigit(): outputs["clen"] = int(clen)
outputs["fmt"] = self.sniffmagic(chunk)
if self.isaudioct(outputs["ctype"]) or outputs["fmt"] or (naive_guess_ext in ('m4s',)): outputs.update(dict(ok=True, reason="RANGEGET success"))
else: outputs.update(dict(ok=False, reason="RANGEGET error: Not audio-like (CT/magic)"))
except Exception as err:
outputs["reason"] = f"RANGEGET error: {err}"
# return
return outputs
@@ -0,0 +1,73 @@
'''
Function:
Implementation of BaseModuleBuilder
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
import collections
'''BaseModuleBuilder'''
class BaseModuleBuilder():
REGISTERED_MODULES = collections.OrderedDict()
def __init__(self, requires_register_modules=None, requires_renew_modules=None):
if requires_register_modules is not None and isinstance(requires_register_modules, (dict, collections.OrderedDict)):
for name, module in requires_register_modules.items(): self.register(name, module)
if requires_renew_modules is not None and isinstance(requires_renew_modules, (dict, collections.OrderedDict)):
for name, module in requires_renew_modules.items(): self.renew(name, module)
self.validate()
'''build'''
def build(self, module_cfg):
module_cfg = copy.deepcopy(module_cfg)
module_type = module_cfg.pop('type')
module = self.REGISTERED_MODULES[module_type](**module_cfg)
return module
'''register'''
def register(self, name, module):
assert callable(module)
assert name not in self.REGISTERED_MODULES
self.REGISTERED_MODULES[name] = module
'''renew'''
def renew(self, name, module):
assert callable(module)
assert name in self.REGISTERED_MODULES
self.REGISTERED_MODULES[name] = module
'''validate'''
def validate(self):
for _, module in self.REGISTERED_MODULES.items():
assert callable(module)
'''delete'''
def delete(self, name):
assert name in self.REGISTERED_MODULES
del self.REGISTERED_MODULES[name]
'''pop'''
def pop(self, name):
assert name in self.REGISTERED_MODULES
module = self.REGISTERED_MODULES.pop(name)
return module
'''get'''
def get(self, name):
assert name in self.REGISTERED_MODULES
module = self.REGISTERED_MODULES.get(name)
return module
'''items'''
def items(self):
return self.REGISTERED_MODULES.items()
'''clear'''
def clear(self):
return self.REGISTERED_MODULES.clear()
'''values'''
def values(self):
return self.REGISTERED_MODULES.values()
'''keys'''
def keys(self):
return self.REGISTERED_MODULES.keys()
'''copy'''
def copy(self):
return self.REGISTERED_MODULES.copy()
'''update'''
def update(self, requires_update_modules):
return self.REGISTERED_MODULES.update(requires_update_modules)
@@ -0,0 +1,86 @@
'''
Function:
Implementation of NeteaseMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import os
import json
import base64
import urllib
import codecs
import urllib.parse
from hashlib import md5
from Crypto.Cipher import AES
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
'''settings'''
MUSIC_QUALITIES = ['jymaster', 'dolby', 'sky', 'jyeffect', 'hires', 'lossless', 'exhigh', 'standard']
DEFAULT_COOKIES = {'MUSIC_U': '1eb9ce22024bb666e99b6743b2222f29ef64a9e88fda0fd5754714b900a5d70d993166e004087dd3b95085f6a85b059f5e9aba41e3f2646e3cebdbec0317df58c119e5'}
'''EapiCryptoUtils'''
class EapiCryptoUtils(object):
'''hexdigest'''
@staticmethod
def hexdigest(data: bytes):
return "".join([hex(d)[2:].zfill(2) for d in data])
'''hashdigest'''
@staticmethod
def hashdigest(text: str):
return md5(text.encode("utf-8")).digest()
'''hashhexdigest'''
@staticmethod
def hashhexdigest(text: str):
return EapiCryptoUtils.hexdigest(EapiCryptoUtils.hashdigest(text))
'''encryptparams'''
@staticmethod
def encryptparams(url: str, payload: dict, aes_key: bytes = b"e82ckenh8dichen8"):
url_path = urllib.parse.urlparse(url).path.replace("/eapi/", "/api/")
digest = EapiCryptoUtils.hashhexdigest(f"nobody{url_path}use{json.dumps(payload)}md5forencrypt")
params = f"{url_path}-36cd479b6b5-{json.dumps(payload)}-36cd479b6b5-{digest}"
padder = padding.PKCS7(algorithms.AES(aes_key).block_size).padder()
padded_data = padder.update(params.encode()) + padder.finalize()
cipher = Cipher(algorithms.AES(aes_key), modes.ECB())
encryptor = cipher.encryptor()
enc = encryptor.update(padded_data) + encryptor.finalize()
return EapiCryptoUtils.hexdigest(enc)
'''WeapiCryptoUtils'''
class WeapiCryptoUtils(object):
'''createsecretkey'''
@staticmethod
def createsecretkey(size: int):
return (''.join(map(lambda xx: (hex(ord(xx))[2:]), str(os.urandom(size)))))[0: 16]
'''aesencrypt'''
@staticmethod
def aesencrypt(string: str, sec_key: str):
pad = 16 - len(string) % 16
if isinstance(string, bytes): string = string.decode('utf-8')
string = string + str(pad * chr(pad))
sec_key = sec_key.encode('utf-8')
encryptor = AES.new(sec_key, 2, b'0102030405060708')
string = string.encode('utf-8')
ciphertext = encryptor.encrypt(string)
ciphertext = base64.b64encode(ciphertext)
return ciphertext
'''rsaencrypt'''
@staticmethod
def rsaencrypt(string: str, pub_key: str = '010001', modulus: str = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'):
string = string[::-1]
rs = int(codecs.encode(string.encode('utf-8'), 'hex_codec'), 16) ** int(pub_key, 16) % int(modulus, 16)
return format(rs, 'x').zfill(256)
'''encryptparams'''
@staticmethod
def encryptparams(params: dict):
string = json.dumps(params)
sec_key = WeapiCryptoUtils.createsecretkey(16)
enc_string = WeapiCryptoUtils.aesencrypt(string=WeapiCryptoUtils.aesencrypt(string=string, sec_key='0CoJUm6Qyw8W8jud'), sec_key=sec_key)
enc_sec_key = WeapiCryptoUtils.rsaencrypt(string=sec_key)
post_data = {'params': enc_string, 'encSecKey': enc_sec_key}
return post_data
@@ -0,0 +1,323 @@
'''
Function:
Implementation of QQMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import time
import orjson
import base64
import random
import string
import hashlib
import requests
import binascii
from enum import Enum
from uuid import uuid4
from datetime import datetime, timedelta
from dataclasses import dataclass, field, asdict
from typing import ClassVar, TypedDict, Any, cast
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
'''settings'''
PUBLIC_KEY = """-----BEGIN PUBLIC KEY-----
MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDEIxgwoutfwoJxcGQeedgP7FG9qaIuS0qzfR8gWkrkTZKM2iWHn2ajQpBRZjMSoSf6+KJGvar2ORhBfpDXyVtZCKpqLQ+FLkpncClKVIrBwv6PHyUvuCb0rIarmgDnzkfQAqVufEtR64iazGDKatvJ9y6B9NMbHddGSAUmRTCrHQIDAQAB
-----END PUBLIC KEY-----"""
SECRET = "ZdJqM15EeO2zWc08"
APP_KEY = "0AND0HD6FE4HY80F"
'''SongFileType'''
class SongFileType(Enum):
MASTER = ("AI00", ".flac")
ATMOS_2 = ("Q000", ".flac")
ATMOS_51 = ("Q001", ".flac")
FLAC = ("F000", ".flac")
OGG_640 = ("O801", ".ogg")
OGG_320 = ("O800", ".ogg")
OGG_192 = ("O600", ".ogg")
OGG_96 = ("O400", ".ogg")
MP3_320 = ("M800", ".mp3")
MP3_128 = ("M500", ".mp3")
ACC_192 = ("C600", ".m4a")
ACC_96 = ("C400", ".m4a")
ACC_48 = ("C200", ".m4a")
SORTED_QUALITIES = [
("AI00", ".flac"), ("Q000", ".flac"), ("Q001", ".flac"), ("F000", ".flac"), ("O801", ".ogg"), ("O800", ".ogg"), ("O600", ".ogg"), ("O400", ".ogg"),
("M800", ".mp3"), ("M500", ".mp3"), ("C600", ".m4a"), ("C400", ".m4a"), ("C200", ".m4a")
]
'''EncryptedSongFileType'''
class EncryptedSongFileType(Enum):
MASTER = ("AIM0", ".mflac")
ATMOS_2 = ("Q0M0", ".mflac")
ATMOS_51 = ("Q0M1", ".mflac")
FLAC = ("F0M0", ".mflac")
OGG_640 = ("O801", ".mgg")
OGG_320 = ("O800", ".mgg")
OGG_192 = ("O6M0", ".mgg")
OGG_96 = ("O4M0", ".mgg")
SORTED_QUALITIES = [
("AIM0", ".mflac"), ("Q0M0", ".mflac"), ("Q0M1", ".mflac"), ("F0M0", ".mflac"), ("O801", ".mgg"), ("O800", ".mgg"), ("O6M0", ".mgg"), ("O4M0", ".mgg")
]
'''ThirdPartVKeysAPISongFileType'''
class ThirdPartVKeysAPISongFileType(Enum):
TRIAL_LISTEN = (0,)
LOSSY_QUALITY = (1, 2, 3)
STANDARD_QUALITY = (4, 5, 6, 7)
HQ_QUALITY = (8,)
HQ_QUALITY_ENHANCED = (9,)
SQ_LOSSLESS_QUALITY = (10,)
HI_RES_QUALITY = (11,)
DOLBY_ATMOS = (12,)
PREMIUM_SPATIAL_AUDIO = (13,)
PREMIUM_MASTER_2_0 = (14,)
AI_ACCOMPANIMENT_MODE_4TRACK = (15,)
AI_5_1_QUALITY_6TRACK = (16,)
ID_TO_NAME = {
0: "TRIAL_LISTEN", 1: "LOSSY_QUALITY", 2: "LOSSY_QUALITY", 3: "LOSSY_QUALITY", 4: "STANDARD_QUALITY", 5: "STANDARD_QUALITY", 6: "STANDARD_QUALITY", 7: "STANDARD_QUALITY",
8: "HQ_QUALITY", 9: "HQ_QUALITY_ENHANCED", 10: "SQ_LOSSLESS_QUALITY", 11: "HI_RES_QUALITY", 12: "DOLBY_ATMOS", 13: "PREMIUM_SPATIAL_AUDIO", 14: "PREMIUM_MASTER_2_0",
15: "AI_ACCOMPANIMENT_MODE_4TRACK", 16: "AI_5_1_QUALITY_6TRACK",
}
'''SearchType'''
class SearchType(Enum):
SONG = 0
SINGER = 1
ALBUM = 2
SONGLIST = 3
MV = 4
LYRIC = 7
USER = 8
AUDIO_ALBUM = 15
AUDIO = 18
'''QimeiResult'''
class QimeiResult(TypedDict):
q16: str
q36: str
'''OSVersion'''
@dataclass
class OSVersion:
incremental: str = "5891938"
release: str = "10"
codename: str = "REL"
sdk: int = 29
'''Device'''
@dataclass
class Device:
display: str = field(default_factory=lambda: f"QMAPI.{random.randint(100000, 999999)}.001")
product: str = "iarim"
device: str = "sagit"
board: str = "eomam"
model: str = "MI 6"
fingerprint: str = field(default_factory=lambda: f"xiaomi/iarim/sagit:10/eomam.200122.001/{random.randint(1000000, 9999999)}:user/release-keys")
boot_id: str = field(default_factory=lambda: str(uuid4()))
proc_version: str = field(default_factory=lambda: f"Linux 5.4.0-54-generic-{''.join(random.choices(string.ascii_letters + string.digits, k=8))} (android-build@google.com)")
imei: str = field(default_factory=lambda: (lambda d: "".join(map(str, d)) + str(sum((x * 2 // 10 + x * 2 % 10) if i % 2 == 0 else x for i, x in enumerate(d)) * 9 % 10))([random.randint(0, 9) for _ in range(14)]))
brand: str = "Xiaomi"
bootloader: str = "U-boot"
base_band: str = ""
version: OSVersion = field(default_factory=OSVersion)
sim_info: str = "T-Mobile"
os_type: str = "android"
mac_address: str = "00:50:56:C0:00:08"
ip_address: ClassVar[list[int]] = [10, 0, 1, 3]
wifi_bssid: str = "00:50:56:C0:00:08"
wifi_ssid: str = "<unknown ssid>"
imsi_md5: list[int] = field(default_factory=lambda: list(hashlib.md5(bytes([random.randint(0, 255) for _ in range(16)])).digest()))
android_id: str = field(default_factory=lambda: binascii.hexlify(bytes([random.randint(0, 255) for _ in range(8)])).decode("utf-8"))
apn: str = "wifi"
vendor_name: str = "MIUI"
vendor_os_name: str = "qmapi"
qimei: None | str = None
'''Credential'''
@dataclass
class Credential:
openid: str = ""
refresh_token: str = ""
access_token: str = ""
expired_at: int = 0
musicid: int = 0
musickey: str = ""
unionid: str = ""
str_musicid: str = ""
refresh_key: str = ""
encrypt_uin: str = ""
login_type: int = 0
extra_fields: dict[str, Any] = field(default_factory=dict)
'''postinit'''
def __post_init__(self):
if not self.login_type: self.login_type = 1 if self.musickey and self.musickey.startswith("W_X") else 2
'''todict'''
def todict(self) -> dict:
d = asdict(self)
d["loginType"], d["encryptUin"] = d.pop("login_type"), d.pop("encrypt_uin")
return d
'''asjson'''
def asjson(self) -> str:
data = self.todict()
data.update(data.pop("extra_fields"))
return orjson.dumps(data).decode()
'''fromcookiesdict'''
@classmethod
def fromcookiesdict(cls, cookies: dict[str, Any]):
return cls(
openid=cookies.get("openid") or cookies.get("psrf_qqopenid") or cookies.get("wxopenid"), refresh_token=cookies.get("refresh_token") or cookies.get("psrf_qqrefresh_token") or cookies.get("wxrefresh_token"),
access_token=cookies.get("access_token") or cookies.get("psrf_qqaccess_token") or cookies.get("wxaccess_token"), expired_at=cookies.get("expired_at") or cookies.get("psrf_access_token_expiresAt"), extra_fields=cookies,
musicid=int(cookies.get("musicid", 0) or cookies.get("uin", 0)), musickey=cookies.get("musickey") or cookies.get("qqmusic_key"), unionid=cookies.get("unionid") or cookies.get("psrf_qqunionid") or cookies.get("wxunionid"),
str_musicid=cookies.get("str_musicid") or cookies.get("musicid") or cookies.get("uin"), refresh_key=cookies.get("refresh_key"), encrypt_uin=cookies.get("encryptUin"), login_type=cookies.get("loginType") or cookies.get("tmeLoginType"),
)
'''QQMusicClientUtils'''
class QQMusicClientUtils(object):
version, version_code, qimei_result, device = "13.2.5.8", 13020508, {}, Device()
endpoint = "https://u.y.qq.com/cgi-bin/musicu.fcg"
enc_endpoint = "https://u.y.qq.com/cgi-bin/musics.fcg"
music_domain = "https://isure.stream.qqmusic.qq.com/"
COMMON_DEFAULTS: ClassVar[dict[str, str]] = {"ct": "11", "tmeAppID": "qqmusic", "format": "json", "inCharset": "utf-8", "outCharset": "utf-8", "uid": "3931641530"}
@property
def qimei(self) -> QimeiResult:
if self.qimei_result: return self.qimei_result
self.qimei_result = QQMusicClientUtils.obtainqimei(version=QQMusicClientUtils.version, device=QQMusicClientUtils.device)
return self.qimei_result
'''rsaencrypt'''
@staticmethod
def rsaencrypt(content: bytes):
key = cast(RSAPublicKey, serialization.load_pem_public_key(PUBLIC_KEY.encode()))
return key.encrypt(content, padding.PKCS1v15())
'''aesencrypt'''
@staticmethod
def aesencrypt(key: bytes, content: bytes):
cipher = Cipher(algorithms.AES(key), modes.CBC(key))
padding_size = 16 - len(content) % 16
encryptor = cipher.encryptor()
return encryptor.update(content + (padding_size * chr(padding_size)).encode()) + encryptor.finalize()
'''calcmd5'''
@staticmethod
def calcmd5(*strings: str | bytes):
md5 = hashlib.md5()
for item in strings:
assert isinstance(item, (str, bytes))
if isinstance(item, bytes): md5.update(item)
elif isinstance(item, str): md5.update(item.encode())
return md5.hexdigest()
'''hash33'''
@staticmethod
def hash33(s: str, h: int = 0) -> int:
for c in s: h = (h << 5) + h + ord(c)
return 2147483647 & h
'''sign'''
@staticmethod
def sign(request: dict) -> str:
PART_1_INDEXES = [23, 14, 6, 36, 16, 40, 7, 19]
PART_2_INDEXES = [16, 1, 32, 12, 19, 27, 8, 5]
SCRAMBLE_VALUES = [89, 39, 179, 150, 218, 82, 58, 252, 177, 52, 186, 123, 120, 64, 242, 133, 143, 161, 121, 179]
PART_1_INDEXES = filter(lambda x: x < 40, PART_1_INDEXES)
hash = hashlib.sha1(orjson.dumps(request)).hexdigest().upper()
part1, part2, part3 = "".join(hash[i] for i in PART_1_INDEXES), "".join(hash[i] for i in PART_2_INDEXES), bytearray(20)
for i, v in enumerate(SCRAMBLE_VALUES): part3[i] = v ^ int(hash[i * 2 : i * 2 + 2], 16)
b64_part = re.sub(rb"[\\/+=]", b"", base64.b64encode(part3)).decode("utf-8")
return f"zzc{part1}{b64_part}{part2}".lower()
'''randombeaconid'''
@staticmethod
def randombeaconid():
beacon_id, time_month, rand1, rand2 = "", datetime.now().strftime("%Y-%m-") + "01", random.randint(100000, 999999), random.randint(100000000, 999999999)
for i in range(1, 41):
if i in [1, 2, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30, 33, 34, 37, 38]: beacon_id += f"k{i}:{time_month}{rand1}.{rand2}"
elif i == 3: beacon_id += "k3:0000000000000000"
elif i == 4: beacon_id += f"k4:{''.join(random.choices('123456789abcdef', k=16))}"
else: beacon_id += f"k{i}:{random.randint(0, 9999)}"
beacon_id += ";"
return beacon_id
'''randompayloadbydevice'''
@staticmethod
def randompayloadbydevice(device: Device, version: str):
fixed_rand = random.randint(0, 14400)
reserved = {
"harmony": "0", "clone": "0", "containe": "", "oz": "UhYmelwouA+V2nPWbOvLTgN2/m8jwGB+yUB5v9tysQg=", "oo": "Xecjt+9S1+f8Pz2VLSxgpw==",
"kelong": "0", "uptimes": (datetime.now() - timedelta(seconds=fixed_rand)).strftime("%Y-%m-%d %H:%M:%S"), "multiUser": "0", "bod": device.brand,
"dv": device.device, "firstLevel": "", "manufact": device.brand, "name": device.model, "host": "se.infra", "kernel": device.proc_version,
}
return {
"androidId": device.android_id, "platformId": 1, "appKey": APP_KEY, "appVersion": version, "beaconIdSrc": QQMusicClientUtils.randombeaconid(),
"brand": device.brand, "channelId": "10003505", "cid": "", "imei": device.imei, "imsi": "", "mac": "", "model": device.model, "networkType": "unknown",
"oaid": "", "osVersion": f"Android {device.version.release},level {device.version.sdk}", "qimei": "", "qimei36": "", "sdkVersion": "1.2.13.6",
"targetSdkVersion": "33", "audit": "", "userId": "{}", "packageId": "com.tencent.qqmusic", "deviceType": "Phone", "sdkName": "", "reserved": orjson.dumps(reserved).decode(),
}
'''obtainqimei'''
@staticmethod
def obtainqimei(version: str, device: Device):
try:
payload, ts = QQMusicClientUtils.randompayloadbydevice(device, version), int(time.time())
crypt_key, nonce = "".join(random.choices("adbcdef1234567890", k=16)), "".join(random.choices("adbcdef1234567890", k=16))
key = base64.b64encode(QQMusicClientUtils.rsaencrypt(crypt_key.encode())).decode()
params = base64.b64encode(QQMusicClientUtils.aesencrypt(crypt_key.encode(), orjson.dumps(payload))).decode()
extra = '{"appKey":"' + APP_KEY + '"}'
sign = QQMusicClientUtils.calcmd5(key, params, str(ts * 1000), nonce, SECRET, extra)
resp = requests.post("https://api.tencentmusic.com/tme/trpc/proxy",
headers={
"Host": "api.tencentmusic.com", "method": "GetQimei", "service": "trpc.tme_datasvr.qimeiproxy.QimeiProxy", "appid": "qimei_qq_android",
"sign": QQMusicClientUtils.calcmd5("qimei_qq_androidpzAuCmaFAaFaHrdakPjLIEqKrGnSOOvH", str(ts)), "user-agent": "QQMusic", "timestamp": str(ts),
},
json={"app": 0, "os": 1, "qimeiParams": {"key": key, "params": params, "time": str(ts), "nonce": nonce, "sign": sign, "extra": extra}},
)
data = orjson.loads(orjson.loads(resp.content)["data"])["data"]
device.qimei = data["q36"]
return QimeiResult(q16=data["q16"], q36=data["q36"])
except:
result = QimeiResult(q16="", q36="6c9d3cd110abca9b16311cee10001e717614")
return result
'''randomguid'''
@staticmethod
def randomguid():
return "".join(random.choices("abcdef1234567890", k=32))
'''randomsearchid'''
@staticmethod
def randomsearchid():
e = random.randint(1, 20)
t = e * 18014398509481984
n = random.randint(0, 4194304) * 4294967296
a = time.time()
r = round(a * 1000) % (24 * 60 * 60 * 1000)
return str(t + n + r)
'''buildcommonparams'''
@staticmethod
def buildcommonparams(credential: Credential = None, common_override: dict = None) -> dict[str, Any]:
common_override, credential = common_override or {}, credential or Credential()
qimei_result = QQMusicClientUtils().qimei
common = {"cv": QQMusicClientUtils.version_code, "v": QQMusicClientUtils.version_code, "QIMEI36": qimei_result['q36']}
common.update(QQMusicClientUtils.COMMON_DEFAULTS)
if bool(credential.musicid) and bool(credential.musickey): common.update({"qq": str(credential.musicid), "authst": credential.musickey, "tmeLoginType": str(credential.login_type)})
common.update(common_override)
return common
'''builddata'''
@staticmethod
def builddata(params: dict, module: str, method: str, process_bool: bool = True):
params = {k: int(v) if isinstance(v, bool) else v for k, v in params.items()} if process_bool else params
return {"module": module, "method": method, "param": params}
'''buildrequestdata'''
@staticmethod
def buildrequestdata(params: dict, module: str, method: str, credential: Credential = None, common_override: dict = None, process_bool: bool = True) -> dict[str, Any]:
return {"comm": QQMusicClientUtils.buildcommonparams(credential, common_override), f"{module}.{method}": QQMusicClientUtils.builddata(params, module, method, process_bool)}
@@ -0,0 +1,152 @@
'''
Function:
Implementation of QuarkParser
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import time
import requests
from urllib.parse import urlparse
from .misc import resp2json, cookies2dict
'''QuarkParser'''
class QuarkParser():
'''parsefromdirurl'''
@staticmethod
def parsefromdirurl(url: str, passcode: str = '', cookies: str | dict = '', max_tries: int = 3):
for _ in range(max_tries):
try: download_result, download_url = QuarkParser._parsefromdirurl(url=url, passcode=passcode, cookies=cookies); break
except Exception: download_result, download_url = {}, ""
return download_result, download_url
'''_parsefromdirurl'''
@staticmethod
def _parsefromdirurl(url: str, passcode: str = '', cookies: str | dict = ''):
# init
session, download_result = requests.Session(), {}
pwd_id = urlparse(url).path.strip('/').split('/')[-1]
cookies = cookies2dict(cookies)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.225.400 QQBrowser/12.2.5544.400',
'origin': 'https://pan.quark.cn', 'referer': 'https://pan.quark.cn/', 'accept-language': 'zh-CN,zh;q=0.9',
}
# share/sharepage/token
json_data = {'pwd_id': pwd_id, 'passcode': passcode, 'support_visit_limit_private_share': 'true'}
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', '__dt': '597', '__t': f'{str(int(time.time() * 1000))}'}
(resp := session.post('https://drive-h.quark.cn/1/clouddrive/share/sharepage/token', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
token_data = resp2json(resp=resp); stoken = token_data['data']['stoken']; download_result['token_data'] = token_data; time.sleep(0.1)
# share/sharepage/detail-1
params = {
'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', 'ver': '2', 'pwd_id': pwd_id, 'stoken': stoken, 'pdir_fid': '0', 'force': '0', '_page': '1', '_size': '50', '_fetch_banner': '1',
'_fetch_share': '1', 'fetch_relate_conversation': '1', '_fetch_total': '1', '_sort': 'file_type:asc,file_name:asc', '__dt': '951', '__t': f'{int(time.time() * 1000)}',
}
(resp := session.get('https://drive-h.quark.cn/1/clouddrive/share/sharepage/detail', params=params, cookies=cookies, headers=headers)).raise_for_status()
detail_data = resp2json(resp=resp); pdir_fid = detail_data["data"]["list"][0]["fid"]; download_result['detail_data-1'] = detail_data; time.sleep(0.1)
# clouddrive/file/info/path_list
params = {"pr": "ucpro", "fr": "pc", "uc_param_str": "", "__dt": "1266", "__t": f"{int(time.time() * 1000)}"}
json_data = {"file_path": ["/来自:分享"]}
(resp := session.post('https://drive-pc.quark.cn/1/clouddrive/file/info/path_list', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
path_list_data = resp2json(resp=resp); to_pdir_fid = path_list_data["data"][0]["fid"]; download_result['path_list_data'] = path_list_data; time.sleep(0.1)
# share/sharepage/detail-2
params = {
'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', 'ver': '2', 'pwd_id': pwd_id, 'stoken': stoken, 'pdir_fid': pdir_fid,
'force': '0', '_page': '1', '_size': '50', '_fetch_banner': '0', '_fetch_share': '0', 'fetch_relate_conversation': '0',
'_fetch_total': '1', '_sort': 'file_type:asc,file_name:asc', '__dt': '1804336', '__t': f'{int(time.time() * 1000)}',
}
(resp := session.get('https://drive-h.quark.cn/1/clouddrive/share/sharepage/detail', params=params, cookies=cookies, headers=headers)).raise_for_status()
detail_data = resp2json(resp=resp); file_list: list[dict] = detail_data["data"]["list"]; file_list = sorted(file_list, key=lambda x: x.get("size", 0), reverse=True)
pdir_fid = file_list[0]['pdir_fid']; download_result['detail_data-2'] = detail_data; time.sleep(0.1)
# share/sharepage/save
params = {"pr": "ucpro", "fr": "pc", "uc_param_str": "", "__dt": "1233372", "__t": f"{int(time.time() * 1000)}"}
json_data = {
'pwd_id': pwd_id, 'stoken': stoken, 'pdir_fid': pdir_fid, 'to_pdir_fid': to_pdir_fid, 'fid_list': [file_list[0]['fid']],
'fid_token_list': [file_list[0]['share_fid_token']], 'scene': 'link',
}
(resp := session.post(url='https://drive-pc.quark.cn/1/clouddrive/share/sharepage/save', params=params, cookies=cookies, json=json_data, headers=headers)).raise_for_status()
save_data = resp2json(resp=resp); task_id = save_data['data']['task_id']; download_result['save_data'] = save_data; time.sleep(0.1)
# clouddrive/task
for retry_index in range(5):
try:
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', 'task_id': task_id, 'retry_index': str(retry_index), '__dt': '1234221', '__t': f'{str(int(time.time() * 1000))}'}
(resp := session.get('https://drive-pc.quark.cn/1/clouddrive/task', params=params, cookies=cookies, headers=headers)).raise_for_status()
task_data = resp2json(resp=resp); fid_encrypt = task_data['data']['save_as']['save_as_top_fids'][0]
download_result['task_data'] = task_data; break
except:
time.sleep(0.1); continue
# clouddrive/file/download
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.56 Chrome/100.0.4896.160 Electron/18.3.5.12-a038f7b798 Safari/537.36 Channel/pckk_other_ch",
"Accept": "application/json, text/plain, */*", "Content-Type": "application/json", "accept-language": "zh-CN", "origin": "https://pan.quark.cn", "referer": "https://pan.quark.cn/",
}
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', '__dt': '1235217', '__t': f'{str(int(time.time() * 1000))}'}
json_data = {'fids': [fid_encrypt]}
(resp := session.post('https://drive-pc.quark.cn/1/clouddrive/file/download', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
download_data = resp2json(resp=resp); download_url = download_data["data"][0]["download_url"]; download_result['download_data'] = download_data
# return
return download_result, download_url
'''parsefromurl'''
@staticmethod
def parsefromurl(url: str, passcode: str = '', cookies: str | dict = '', max_tries: int = 3):
for _ in range(max_tries):
try: download_result, download_url = QuarkParser._parsefromurl(url=url, passcode=passcode, cookies=cookies); break
except Exception: download_result, download_url = {}, ""
return download_result, download_url
'''_parsefromurl'''
@staticmethod
def _parsefromurl(url: str, passcode: str = '', cookies: str | dict = ''):
# init
session, download_result = requests.Session(), {}
pwd_id = urlparse(url).path.strip('/').split('/')[-1]
cookies = cookies2dict(cookies)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.225.400 QQBrowser/12.2.5544.400',
'origin': 'https://pan.quark.cn', 'referer': 'https://pan.quark.cn/', 'accept-language': 'zh-CN,zh;q=0.9',
}
# share/sharepage/token
json_data = {'pwd_id': pwd_id, 'passcode': passcode}
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', '__dt': '596', '__t': f'{str(int(time.time() * 1000))}'}
(resp := session.post('https://drive-h.quark.cn/1/clouddrive/share/sharepage/token', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
token_data = resp2json(resp=resp); stoken = token_data['data']['stoken']; download_result['token_data'] = token_data; time.sleep(0.1)
# share/sharepage/detail
params = {
"pr": "ucpro", "fr": "pc", "uc_param_str": "", "ver": "2", "pwd_id": pwd_id, "stoken": stoken, "pdir_fid": "0", "force": "0",
"_page": "1", "_size": "50", "_fetch_banner": "1", "_fetch_share": "1", "fetch_relate_conversation": "1", "_fetch_total": "1",
"_sort": "file_type:asc,file_name:asc", "__dt": "1020", "__t": f"{int(time.time() * 1000)}"
}
(resp := session.get('https://drive-h.quark.cn/1/clouddrive/share/sharepage/detail', params=params, cookies=cookies, headers=headers)).raise_for_status()
detail_data = resp2json(resp=resp); fid = detail_data["data"]["list"][0]["fid"]; share_fid_token = detail_data["data"]["list"][0]["share_fid_token"]
download_result['detail_data'] = detail_data; time.sleep(0.1)
# clouddrive/file/info/path_list
params = {"pr": "ucpro", "fr": "pc", "uc_param_str": "", "__dt": "1266", "__t": f"{int(time.time() * 1000)}"}
json_data = {"file_path": ["/来自:分享"]}
(resp := session.post('https://drive-pc.quark.cn/1/clouddrive/file/info/path_list', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
path_list_data = resp2json(resp=resp); to_pdir_fid = path_list_data["data"][0]["fid"]; download_result['path_list_data'] = path_list_data; time.sleep(0.1)
# share/sharepage/save
params = {"pr": "ucpro", "fr": "pc", "uc_param_str": "", "__dt": "5660", "__t": f"{int(time.time() * 1000)}"}
json_data = {"pwd_id": pwd_id, "stoken": stoken, "pdir_fid": "0", "to_pdir_fid": to_pdir_fid, "fid_list": [fid], "fid_token_list": [share_fid_token], "scene": "link"}
(resp := session.post(url='https://drive-pc.quark.cn/1/clouddrive/share/sharepage/save', params=params, cookies=cookies, json=json_data, headers=headers)).raise_for_status()
save_data = resp2json(resp=resp); task_id = save_data['data']['task_id']; download_result['save_data'] = save_data; time.sleep(0.1)
# clouddrive/task
for retry_index in range(5):
try:
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', 'task_id': task_id, 'retry_index': str(retry_index), '__dt': '6355', '__t': f'{str(int(time.time() * 1000))}'}
(resp := session.get('https://drive-pc.quark.cn/1/clouddrive/task', params=params, cookies=cookies, headers=headers)).raise_for_status()
task_data = resp2json(resp=resp); fid_encrypt = task_data['data']['save_as']['save_as_top_fids'][0]
download_result['task_data'] = task_data; break
except:
time.sleep(0.1); continue
# clouddrive/file/download
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) quark-cloud-drive/2.5.56 Chrome/100.0.4896.160 Electron/18.3.5.12-a038f7b798 Safari/537.36 Channel/pckk_other_ch",
"Accept": "application/json, text/plain, */*", "Content-Type": "application/json", "accept-language": "zh-CN", "origin": "https://pan.quark.cn", "referer": "https://pan.quark.cn/",
}
params = {'pr': 'ucpro', 'fr': 'pc', 'uc_param_str': '', '__dt': '6743', '__t': f'{str(int(time.time() * 1000))}'}
json_data = {'fids': [fid_encrypt]}
(resp := session.post('https://drive-pc.quark.cn/1/clouddrive/file/download', params=params, json=json_data, cookies=cookies, headers=headers)).raise_for_status()
download_data = resp2json(resp=resp); download_url = download_data["data"][0]["download_url"]; download_result['download_data'] = download_data
# return
return download_result, download_url
@@ -0,0 +1,172 @@
'''
Function:
Implementation of SodaMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import struct
import base64
from typing import Dict, Any, List
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
'''SpadeDecryptor'''
class SpadeDecryptor:
'''bitcount'''
@staticmethod
def bitcount(n):
n = n & 0xFFFFFFFF
n = n - ((n >> 1) & 0x55555555)
n = (n & 0x33333333) + ((n >> 2) & 0x33333333)
return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
'''decodebase36'''
@staticmethod
def decodebase36(c):
if 48 <= c <= 57: return c - 48
if 97 <= c <= 122: return c - 97 + 10
return 0xFF
'''decryptspadeinner'''
@staticmethod
def decryptspadeinner(spade_key_bytes):
result = bytearray(len(spade_key_bytes))
buff = bytearray([0xFA, 0x55]) + spade_key_bytes
for i in range(len(result)):
v = (spade_key_bytes[i] ^ buff[i]) - SpadeDecryptor.bitcount(i) - 21
while v < 0: v += 255
result[i] = v
return result
'''extractkey'''
@classmethod
def extractkey(cls, play_auth_str):
binary_string = base64.b64decode(play_auth_str)
bytes_data = bytearray(binary_string)
if len(bytes_data) < 3: return None
padding_len = (bytes_data[0] ^ bytes_data[1] ^ bytes_data[2]) - 48
if len(bytes_data) < padding_len + 2: return None
inner_input = bytes_data[1: len(bytes_data)-padding_len]
tmp_buff = cls.decryptspadeinner(inner_input)
if len(tmp_buff) == 0: return None
skip_bytes = cls.decodebase36(tmp_buff[0])
decoded_message_len = len(bytes_data) - padding_len - 2
end_index = 1 + decoded_message_len - skip_bytes
final_bytes = tmp_buff[1:end_index]
return final_bytes.decode('utf-8')
'''AudioDecryptor'''
class AudioDecryptor:
'''readuint32be'''
@staticmethod
def readuint32be(data, offset):
return struct.unpack(">I", data[offset: offset+4])[0]
'''findbox'''
@staticmethod
def findbox(data: bytes, box_type: str, start: int = 0, end: int = None):
if end is None: end = len(data)
pos = start
while pos + 8 <= end:
size = AudioDecryptor.readuint32be(data, pos)
if size < 8: break
current_type_bytes = data[pos+4: pos+8]
try: current_type = current_type_bytes.decode('ascii', errors='ignore')
except: current_type = "????"
if current_type == box_type: return {'offset': pos, 'size': size, 'data': data[pos+8: pos+size]}
pos += size
return None
'''decrypt'''
@staticmethod
def decrypt(file_data: bytes, play_auth: str, output_filepath: str = "./decrypted.m4a"):
hex_key = SpadeDecryptor.extractkey(play_auth)
if not hex_key: return
moov = AudioDecryptor.findbox(file_data, 'moov')
if not moov: return
senc = AudioDecryptor.findbox(file_data, 'senc', start=moov['offset'] + 8, end=moov['offset'] + moov['size'])
trak = AudioDecryptor.findbox(file_data, 'trak', start=moov['offset'] + 8, end=moov['offset'] + moov['size'])
if not trak: return
mdia = AudioDecryptor.findbox(file_data, 'mdia', start=trak['offset'] + 8, end=trak['offset'] + trak['size'])
if not mdia: return
minf = AudioDecryptor.findbox(file_data, 'minf', start=mdia['offset'] + 8, end=mdia['offset'] + mdia['size'])
if not minf: return
stbl = AudioDecryptor.findbox(file_data, 'stbl', start=minf['offset'] + 8, end=minf['offset'] + minf['size'])
if not stbl: return
stsz = AudioDecryptor.findbox(file_data, 'stsz', start=stbl['offset'] + 8, end=stbl['offset'] + stbl['size'])
if not stsz: return
stsz_data = stsz['data']
sample_size_fixed, sample_count, sample_sizes = struct.unpack(">I", stsz_data[4: 8])[0], struct.unpack(">I", stsz_data[8: 12])[0], []
if sample_size_fixed != 0: sample_sizes = [sample_size_fixed] * sample_count
else:
for i in range(sample_count): sample_sizes.append(struct.unpack(">I", stsz_data[12 + i*4 : 16 + i*4])[0])
if not senc: senc = AudioDecryptor.findbox(file_data, 'senc', start=stbl['offset'] + 8, end=stbl['offset'] + stbl['size'])
if not senc: return
senc_body = senc['data']
senc_flags, senc_sample_count, ivs, ptr = struct.unpack(">I", senc_body[0:4])[0] & 0x00FFFFFF, struct.unpack(">I", senc_body[4:8])[0], [], 8
has_subsamples = (senc_flags & 0x02) != 0
for _ in range(senc_sample_count):
ivs.append(senc_body[ptr : ptr+8] + b'\x00'*8); ptr += 8
if has_subsamples: sub_count = struct.unpack(">H", senc_body[ptr: ptr+2])[0]; ptr += 2 + (sub_count * 6)
mdat = AudioDecryptor.findbox(file_data, 'mdat')
if not mdat: return
key_bytes, backend, decrypted_mdat, read_ptr = bytes.fromhex(hex_key), default_backend(), bytearray(), mdat['offset'] + 8
for i in range(len(sample_sizes)):
size = sample_sizes[i]
if i < len(ivs):
cipher = Cipher(algorithms.AES(key_bytes), modes.CTR(ivs[i]), backend=backend)
decryptor = cipher.decryptor()
plain_chunk = decryptor.update(file_data[read_ptr: read_ptr + size]) + decryptor.finalize()
decrypted_mdat.extend(plain_chunk)
else:
decrypted_mdat.extend(file_data[read_ptr: read_ptr + size])
read_ptr += size
stsd = AudioDecryptor.findbox(file_data, 'stsd', start=stbl['offset'] + 8, end=stbl['offset'] + stbl['size'])
if stsd:
offset, length = stsd['offset'], stsd['size']
original_stsd = file_data[offset: offset+length]
new_stsd = original_stsd.replace(b'enca', b'mp4a', 1)
file_data[offset: offset+length] = new_stsd
if len(decrypted_mdat) == mdat['size'] - 8: file_data[mdat['offset']+8: mdat['offset']+mdat['size']] = decrypted_mdat
else: pass
with open(output_filepath, "wb") as fp: fp.write(file_data)
'''SodaTimedLyricsParser'''
class SodaTimedLyricsParser:
LINE_PATTERN_RE = re.compile(r"^\[(\d+),(\d+)\]")
TOKEN_PATTERN_RE = re.compile(r"<(\d+),(\d+),(\d+)>")
'''parsetimedlyrics'''
@staticmethod
def parsetimedlyrics(text: str) -> List[Dict[str, Any]]:
if not text or text in {'NULL'}: return []
text = text.replace(r"\u003C", "<").replace(r"\u003E", ">")
lines_out: List[Dict[str, Any]] = []
for raw_line in text.splitlines():
if not (raw_line := raw_line.rstrip("\n")).strip(): continue
if not (m := SodaTimedLyricsParser.LINE_PATTERN_RE.match(raw_line.strip())): continue
line_start, line_dur = int(m.group(1)), int(m.group(2))
line_end, rest, tokens, pieces = line_start + line_dur, raw_line[m.end():], [], []
matches = list(SodaTimedLyricsParser.TOKEN_PATTERN_RE.finditer(rest))
for i, tm in enumerate(matches):
offset, dur, flag, seg_start = int(tm.group(1)), int(tm.group(2)), int(tm.group(3)), tm.end()
seg_end = matches[i + 1].start() if i + 1 < len(matches) else len(rest)
if (token_text := rest[seg_start: seg_end].replace("\r", "")) == "": continue
abs_start, abs_end = line_start + offset, line_start + offset + dur
tokens.append({"text": token_text, "offset_ms": offset, "duration_ms": dur, "flag": flag, "start_ms": abs_start, "end_ms": abs_end}); pieces.append(token_text)
lines_out.append({"line_start_ms": line_start, "line_duration_ms": line_dur, "line_end_ms": line_end, "text": "".join(pieces), "tokens": tokens, "raw": rest})
return lines_out
'''toplaintext'''
@staticmethod
def toplaintext(parsed: List[Dict[str, Any]]) -> str:
if not parsed: return
return "\n".join(line["text"] for line in parsed)
'''tolrclinelevel'''
@staticmethod
def tolrclinelevel(parsed: List[Dict[str, Any]], use_centiseconds: bool = True) -> str:
if not parsed: return
def fmt(ms: int) -> str:
mm, ss = ms // 60000, (ms % 60000) // 1000
if use_centiseconds: xx = (ms % 1000) // 10; return f"{mm:02d}:{ss:02d}.{xx:02d}"
else: return f"{mm:02d}:{ss:02d}"
return "\n".join(f"[{fmt(line['line_start_ms'])}]{line['text']}" for line in parsed)
@@ -0,0 +1,298 @@
'''
Function:
Implementation of SongInfoUtils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import os
import base64
import shutil
import requests
import tempfile
from pathlib import Path
from mutagen import File
from .data import SongInfo
from tinytag import TinyTag
from .lyric import WhisperLRC
from mimetypes import guess_type
from .logger import LoggerHandle
from mutagen.flac import Picture
from mutagen.mp4 import MP4Cover
from .misc import seconds2hms, byte2mb
from mutagen.id3 import ID3, USLT, APIC, TIT2, TALB, TPE1
'''SongInfoUtils'''
class SongInfoUtils:
'''supplsonginfothensavelyricsthenwritetags'''
@staticmethod
def supplsonginfothensavelyricsthenwritetags(song_info: SongInfo, logger_handle: LoggerHandle, disable_print: bool, auto_save_lyrics_then_write_tags: bool = True, enable_whisperlrc: bool = False) -> SongInfo:
path = Path(song_info.save_path)
# correct file size
size = path.stat().st_size
song_info.file_size_bytes = size
song_info.file_size = byte2mb(size=size)
# tinytag parse
try: tag = TinyTag.get(str(path))
except Exception as err: logger_handle.warning(f'SongInfoUtils.supplsonginfothensavelyricsthenwritetags >>> {str(path)} (Err: {err})', disable_print=disable_print); tag = None
if tag and tag.duration: song_info.duration_s = int(round(tag.duration)); song_info.duration = seconds2hms(tag.duration)
if tag and tag.bitrate: song_info.bitrate = int(round(tag.bitrate))
if tag and tag.samplerate: song_info.samplerate = int(tag.samplerate)
if tag and tag.channels: song_info.channels = int(tag.channels)
if tag and getattr(tag, "codec", None): song_info.codec = tag.codec
elif tag and getattr(tag, "extra", None) and isinstance(tag.extra, dict): song_info.codec = tag.extra.get("codec") or tag.extra.get("mime-type")
# lyric
if ((os.environ.get('ENABLE_WHISPERLRC', 'False').lower() == 'true') or enable_whisperlrc) and ((not song_info.lyric) or (song_info.lyric in {'NULL'})):
lyric_result = WhisperLRC(model_size_or_path='small').fromfilepath(str(path))
lyric = lyric_result['lyric']; song_info.lyric = lyric; song_info.raw_data['lyric'] = lyric_result
# write tags to audio file
if auto_save_lyrics_then_write_tags:
try: SongInfoUtils.savelyricsthenwritetagstoaudio(song_info, overwrite=False)
except: pass
# return
return song_info
'''savelyricsthenwritetagstoaudio'''
@staticmethod
def savelyricsthenwritetagstoaudio(song_info: SongInfo, overwrite: bool = False, *, timeout: int = 15) -> dict:
lyrics_text = SongInfoUtils.normalizetext(getattr(song_info, "lyric", None)); title = SongInfoUtils.normalizetext(getattr(song_info, "song_name", None))
album = SongInfoUtils.normalizetext(getattr(song_info, "album", None)); artists = SongInfoUtils.normalizetext(getattr(song_info, "singers", None))
cover_source = SongInfoUtils.normalizetext(getattr(song_info, "cover_url", None)); audio_path = Path(song_info.save_path)
results = {"lyrics_embedded": False, "basic_tags_embedded": False, "cover_embedded": False, "lrc_saved": False}
if lyrics_text: results["lrc_saved"] = SongInfoUtils.savelrctofile(audio_path, lyrics_text, overwrite=overwrite)
if lyrics_text: results["lyrics_embedded"] = SongInfoUtils.safeeditaudio(audio_path=audio_path, editor=SongInfoUtils.embedlyrics, overwrite=overwrite, lyrics_text=lyrics_text)
if title or album or artists: results["basic_tags_embedded"] = SongInfoUtils.safeeditaudio(audio_path=audio_path, editor=SongInfoUtils.embedbasictags, overwrite=overwrite, title=title, album=album, artists=artists)
if cover_source and SongInfoUtils.lookslikecoversource(cover_source): results["cover_embedded"] = SongInfoUtils.safeeditaudio(audio_path=audio_path, editor=SongInfoUtils.embedcover, overwrite=overwrite, cover_source=cover_source, timeout=timeout)
return results
'''savelrctofile'''
@staticmethod
def savelrctofile(audio_path: Path, lyrics_text: str, *, overwrite: bool = False) -> bool:
lrc_path = audio_path.with_suffix(".lrc")
if lrc_path.exists() and not overwrite: return False
content = (lyrics_text or "").replace("\r\n", "\n").strip()
if not content: return False
if not content.endswith("\n"): content += "\n"
return SongInfoUtils.atomicwritetext(lrc_path, content)
'''safeeditaudio'''
@staticmethod
def safeeditaudio(audio_path: Path, editor, **editor_kwargs) -> bool:
if not audio_path.exists(): return False
if not SongInfoUtils.audioreadable(audio_path): return False
temp_path = SongInfoUtils.maketemppath(audio_path)
backup_path = audio_path.with_suffix(audio_path.suffix + ".bak")
try:
shutil.copy2(audio_path, temp_path)
changed = bool(editor(temp_path, **editor_kwargs))
if not changed: return False
if not SongInfoUtils.audioreadable(temp_path): return False
backup_path.unlink(missing_ok=True)
os.replace(audio_path, backup_path)
os.replace(temp_path, audio_path)
if not SongInfoUtils.audioreadable(audio_path): os.replace(backup_path, audio_path); return False
backup_path.unlink(missing_ok=True)
return True
except Exception:
if (not audio_path.exists()) and backup_path.exists():
try: os.replace(backup_path, audio_path)
except Exception: pass
return False
finally:
temp_path.unlink(missing_ok=True)
'''safegeteditabletags'''
@staticmethod
def safegeteditabletags(audio):
if (tags := getattr(audio, "tags", None)) is not None: return tags
try: audio.add_tags()
except Exception: pass
return getattr(audio, "tags", None) or {}
'''embedlyrics'''
@staticmethod
def embedlyrics(audio_path: Path, *, overwrite: bool, lyrics_text: str) -> bool:
# init
audio = File(audio_path)
if audio is None: return False
cls = audio.__class__.__name__; text = (lyrics_text or "").replace("\r\n", "\n").strip()
if not text: return False
# MP3
if cls == "MP3":
id3 = SongInfoUtils.loadorcreateid3(audio_path)
has = any(k.startswith("USLT") for k in id3.keys())
if has and not overwrite: return False
if overwrite: id3.delall("USLT")
id3.add(USLT(encoding=3, lang="eng", desc="Lyrics", text=text))
id3.save(audio_path, v2_version=3)
return True
# MP4/M4A
if cls == "MP4":
tags = SongInfoUtils.safegeteditabletags(audio=audio); key = "\xa9lyr"
if tags.get(key) and not overwrite: return False
tags[key] = [text]; audio.tags = tags; audio.save()
return True
# FLAC/OGG/OPUS
if cls in {"FLAC", "OggVorbis", "OggOpus", "OggSpeex", "OggTheora"}:
tags = SongInfoUtils.safegeteditabletags(audio=audio); has = bool(tags.get("LYRICS"))
if has and not overwrite: return False
tags["LYRICS"] = [text]; audio.tags = tags; audio.save()
return True
# ASF/WMA
if cls == "ASF":
tags = SongInfoUtils.safegeteditabletags(audio=audio); key = "WM/Lyrics"
if tags.get(key) and not overwrite: return False
tags[key] = [text]; audio.tags = tags; audio.save()
return True
return False
'''embedbasictags'''
@staticmethod
def embedbasictags(audio_path: Path, *, overwrite: bool, title: str | None, album: str | None, artists: list[str] | None) -> bool:
# init
audio = File(audio_path)
if audio is None: return False
cls = audio.__class__.__name__; changed = False
# MP3
if cls == "MP3":
id3 = SongInfoUtils._load_or_create_id3(audio_path)
if title and (overwrite or not id3.getall("TIT2")): id3.setall("TIT2", [TIT2(encoding=3, text=title)]); changed = True
if album and (overwrite or not id3.getall("TALB")): id3.setall("TALB", [TALB(encoding=3, text=album)]); changed = True
if artists and (overwrite or not id3.getall("TPE1")): id3.setall("TPE1", [TPE1(encoding=3, text=artists)]); changed = True
if changed: id3.save(audio_path, v2_version=3)
return changed
# MP4/M4A
if cls == "MP4":
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if title and (overwrite or not tags.get("\xa9nam")): tags["\xa9nam"] = [title]; changed = True
if album and (overwrite or not tags.get("\xa9alb")): tags["\xa9alb"] = [album]; changed = True
if artists and (overwrite or not tags.get("\xa9ART")): tags["\xa9ART"] = artists; changed = True
if changed: audio.tags = tags; audio.save()
return changed
# FLAC / OGG / OPUS
if cls in {"FLAC", "OggVorbis", "OggOpus", "OggSpeex", "OggTheora"}:
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if title and (overwrite or not tags.get("TITLE")): tags["TITLE"] = [title]; changed = True
if album and (overwrite or not tags.get("ALBUM")): tags["ALBUM"] = [album]; changed = True
if artists and (overwrite or not tags.get("ARTIST")): tags["ARTIST"] = artists; changed = True
if changed: audio.tags = tags; audio.save()
return changed
# ASF/WMA
if cls == "ASF":
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if title and (overwrite or not tags.get("Title")): tags["Title"] = [title]; changed = True
if album and (overwrite or not tags.get("WM/AlbumTitle")): tags["WM/AlbumTitle"] = [album]; changed = True
if artists and (overwrite or not tags.get("Author")): tags["Author"] = artists; changed = True
if changed: audio.tags = tags; audio.save()
return changed
return False
'''embedcover'''
@staticmethod
def embedcover(audio_path: Path, *, overwrite: bool, cover_source: str, timeout: int = 15) -> bool:
audio = File(audio_path)
if audio is None: return False
cls = audio.__class__.__name__
cover_bytes, mime = SongInfoUtils.loadimagebytesandmime(cover_source, timeout=timeout)
# MP3
if cls == "MP3":
id3 = SongInfoUtils._load_or_create_id3(audio_path)
has = any(k.startswith("APIC") for k in id3.keys())
if has and not overwrite: return False
if overwrite: id3.delall("APIC")
id3.add(APIC(encoding=3, mime=mime, type=3, desc="Cover", data=cover_bytes))
id3.save(audio_path, v2_version=3)
return True
# MP4
if cls == "MP4":
if mime not in {"image/jpeg", "image/png"}: return False
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if tags.get("covr") and not overwrite: return False
image_format = MP4Cover.FORMAT_PNG if mime == "image/png" else MP4Cover.FORMAT_JPEG
tags["covr"] = [MP4Cover(cover_bytes, imageformat=image_format)]
audio.tags = tags; audio.save()
return True
# FLAC
if cls == "FLAC":
has = bool(getattr(audio, "pictures", []))
if has and not overwrite: return False
picture = Picture()
picture.type = 3; picture.mime = mime; picture.desc = "Cover"; picture.data = cover_bytes
if overwrite: audio.clear_pictures()
audio.add_picture(picture); audio.save()
return True
# OGG/OPUS
if cls in {"OggVorbis", "OggOpus", "OggSpeex", "OggTheora"}:
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if tags.get("METADATA_BLOCK_PICTURE") and not overwrite: return False
picture = Picture()
picture.type = 3; picture.mime = mime; picture.desc = "Cover"; picture.data = cover_bytes
tags["METADATA_BLOCK_PICTURE"] = [base64.b64encode(picture.write()).decode("ascii")]
audio.tags = tags; audio.save()
return True
# ASF/WMA
if cls == "ASF":
try: from mutagen.asf import ASFPicture
except Exception: return False
tags = SongInfoUtils.safegeteditabletags(audio=audio)
if tags.get("WM/Picture") and not overwrite: return False
picture = ASFPicture()
picture.type = 3; picture.mime_type = mime; picture.description = "Cover"; picture.data = cover_bytes
tags["WM/Picture"] = [picture]
audio.tags = tags; audio.save()
return True
return False
'''loadimagebytesandmime'''
@staticmethod
def loadimagebytesandmime(cover: str | Path, *, timeout: int = 15) -> tuple[bytes, str]:
cover_str = str(cover).strip()
if not cover_str: raise ValueError("Empty cover")
# local path
if not cover_str.startswith("http"): cover_path = Path(cover_str); data = cover_path.read_bytes(); mime = (guess_type(str(cover_path))[0] or "image/jpeg").split(";", 1)[0].lower(); return data, mime
# url
(resp := requests.get(cover_str, timeout=timeout, headers={"User-Agent": "Mozilla/5.0"}, allow_redirects=True)).raise_for_status()
data = resp.content or b""
content_type = (resp.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower()
mime = (content_type or (guess_type(cover_str)[0] or "image/jpeg")).split(";", 1)[0].lower()
# minimal signature fallback
signature = data[:8]
if signature.startswith(b"\xFF\xD8\xFF"): mime = "image/jpeg"
elif signature.startswith(b"\x89PNG\r\n\x1a\n"): mime = "image/png"
if not mime.startswith("image/"): raise ValueError(f"Not an image (Content-Type={content_type!r})")
return data, mime
'''normalizetext'''
@staticmethod
def normalizetext(value) -> str | None:
if not value or value in {'NULL', 'null', 'None', 'none'}: return None
text = str(value).strip()
return text or None
'''lookslikecoversource'''
@staticmethod
def lookslikecoversource(cover_source: str) -> bool:
return cover_source.startswith("http") or Path(cover_source).exists()
'''audioreadable'''
@staticmethod
def audioreadable(audio_path: Path) -> bool:
try:
if not audio_path.exists() or audio_path.stat().st_size <= 0: return False
audio = File(audio_path)
if audio is None or getattr(audio, "info", None) is None: return False
TinyTag.get(str(audio_path))
return True
except Exception:
return False
'''maketemppath'''
@staticmethod
def maketemppath(audio_path: Path) -> Path:
fd, temp_name = tempfile.mkstemp(prefix=audio_path.stem + ".", suffix=audio_path.suffix, dir=str(audio_path.parent))
os.close(fd)
return Path(temp_name)
'''atomicwritetext'''
@staticmethod
def atomicwritetext(path: Path, text: str) -> bool:
fd, temp_name = tempfile.mkstemp(prefix=path.stem + ".", suffix=path.suffix, dir=str(path.parent))
os.close(fd); temp_path = Path(temp_name)
try: temp_path.write_text(text, encoding="utf-8"); os.replace(temp_path, path); return True
except Exception: return False
finally: temp_path.unlink(missing_ok=True)
'''loadorcreateid3'''
@staticmethod
def loadorcreateid3(audio_path: Path) -> ID3:
try: return ID3(audio_path)
except Exception: return ID3()
@@ -0,0 +1,169 @@
'''
Function:
Implementation of SpotifyMusicClient Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
import time
import hmac
import base64
import hashlib
import requests
import json_repair
from typing import Dict, List, Tuple
from .misc import resp2json, safeextractfromdict
'''SpotifyMusicClientUtils'''
class SpotifyMusicClientUtils():
BROWSER_VERSION = '145'
COMMON_HEADERS = {'Content-Type': 'application/json', 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{BROWSER_VERSION}.0.0.0 Safari/537.36', 'Sec-Ch-Ua': f'"Chromium";v="{BROWSER_VERSION}", "Not(A:Brand";v="24", "Google Chrome";v="{BROWSER_VERSION}"'}
'''getlatesttotpsecret'''
@staticmethod
def getlatesttotpsecret(version: int = 61) -> dict:
VERSION_TO_SECRET = {
59: [123, 105, 79, 70, 110, 59, 52, 125, 60, 49, 80, 70, 89, 75, 80, 86, 63, 53, 123, 37, 117, 49, 52, 93, 77, 62, 47, 86, 48, 104, 68, 72],
60: [79, 109, 69, 123, 90, 65, 46, 74, 94, 34, 58, 48, 70, 71, 92, 85, 122, 63, 91, 64, 87, 87],
61: [44, 55, 47, 42, 70, 40, 34, 114, 76, 74, 50, 111, 120, 97, 75, 76, 94, 102, 43, 69, 49, 120, 118, 80, 64, 78],
}
return {"version": version, "secret": VERSION_TO_SECRET[version]}
'''generatetotp'''
@staticmethod
def generatetotp(secret: List[int]) -> str:
transformed = [e ^ ((t % 33) + 9) for t, e in enumerate(secret)]
hex_str = ("".join(str(num) for num in transformed)).encode('ascii').hex()
base32_secret = base64.b64encode(bytes.fromhex(hex_str)).decode('utf-8').replace('=', '')
base32_bytes = base64.b64decode(base32_secret + '==')
time_step = int(time.time() / 30); time_hex = format(time_step, '016x')
digest = hmac.new(base32_bytes, bytes.fromhex(time_hex), hashlib.sha1).digest()
offset = digest[19] & 0xf; code = int.from_bytes(digest[offset: offset+4], byteorder='big') & 0x7fffffff
return str(code % 1000000).zfill(6)
'''getaccesstoken'''
@staticmethod
def getaccesstoken(session: requests.Session, totp: str, totp_ver: int, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
params = {'reason': 'init', 'productType': 'web-player', 'totp': totp, 'totpVer': str(totp_ver), 'totpServer': totp}
(resp := session.get("https://open.spotify.com/api/token", params=params, headers=SpotifyMusicClientUtils.COMMON_HEADERS, **request_overrides)).raise_for_status()
return {"accessToken": (data := resp2json(resp=resp)).get('accessToken'), "clientId": data.get('clientId')}
'''getclienttoken'''
@staticmethod
def getclienttoken(session: requests.Session, client_version: str, client_id: str, device_id: str, request_overrides: dict = None) -> str:
request_overrides = request_overrides or {}
payload = {"client_data": {"client_version": client_version, "client_id": client_id, "js_sdk_data": {"device_brand": "unknown", "device_model": "unknown", "os": "windows", "os_version": "NT 10.0", "device_id": device_id, "device_type": "computer"}}}
headers = SpotifyMusicClientUtils.COMMON_HEADERS.copy()
headers.update({'Authority': 'clienttoken.spotify.com', 'Accept': 'application/json'})
(resp := session.post('https://clienttoken.spotify.com/v1/clienttoken', headers=headers, json=payload, **request_overrides)).raise_for_status()
return safeextractfromdict(resp2json(resp=resp), ['granted_token', 'token'], '')
'''extractjslinks'''
@staticmethod
def extractjslinks(html: str) -> List[str]:
script_tag_regex = re.compile(r'<script[^>]+src="([^"]+\.js)"[^>]*>')
return script_tag_regex.findall(html)
'''getsessiondata'''
@staticmethod
def getsessiondata(session: requests.Session, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
(resp := session.get('https://open.spotify.com', headers=SpotifyMusicClientUtils.COMMON_HEADERS, **request_overrides)).raise_for_status()
cookie_match = re.search(r'sp_t=([^;]+)', resp.headers.get('set-cookie', '')); device_id = cookie_match.group(1) if cookie_match else ''
app_server_config_match, client_version = re.search(r'<script id="appServerConfig" type="text/plain">([^<]+)</script>', resp.text), ''
try: client_version = json_repair.loads(base64.b64decode(app_server_config_match.group(1)).decode("utf-8")).get("clientVersion", "") if app_server_config_match else (m.group(1) if (m := re.search(r'"clientVersion":"([^"]+)"', resp.text)) else "")
except Exception: client_version = m.group(1) if (m := re.search(r'"clientVersion":"([^"]+)"', resp.text)) else ""
all_js_links, js_pack_relative = SpotifyMusicClientUtils.extractjslinks(resp.text), ''
js_pack_relative = next((link for link in all_js_links if 'web-player/web-player' in link and link.endswith('.js')), js_pack_relative)
if js_pack_relative.startswith('http'): js_pack = js_pack_relative
else: js_pack = f'https://open.spotify.com{js_pack_relative}' if js_pack_relative else ''
return {"deviceId": device_id, "clientVersion": client_version, "jsPack": js_pack}
'''SpotifyMusicClientPlaylistUtils'''
class SpotifyMusicClientPlaylistUtils():
'''extractmappings'''
@staticmethod
def extractmappings(js_code: str) -> Tuple[Dict[str, str], Dict[str, str]]:
matches = re.compile(r'\{\d+:"[^"]+"(?:,\d+:"[^"]+")*\}').findall(js_code)
if not matches or len(matches) < 5: return {}, {}
parse_match_func = lambda match_str: {key.strip(): value.strip().strip('"') for entry in re.split(r',(?=\d+:)', match_str[1:-1]) for key, sep, value in [entry.partition(':')] if sep}
return parse_match_func(matches[3]), parse_match_func(matches[4])
'''combinechunks'''
@staticmethod
def combinechunks(str_mapping: Dict[str, str], hash_mapping: Dict[str, str]) -> List[str]:
chunks = []
for key, string_val in str_mapping.items():
if (hash_val := hash_mapping.get(key)): chunks.append(f"{string_val}.{hash_val}.js")
return chunks
'''getsha256hash'''
@staticmethod
def getsha256hash(session: requests.Session, js_pack: str, request_overrides: dict = None) -> str:
fallback_hash, request_overrides = 'a67612f8c59f4cb4a9723d8e0e0e7b7cb8c5c3d45e3d8c4f5e6f7e8f9a0b1c2d', request_overrides or {}
if not js_pack: return fallback_hash
try:
(resp := session.get(js_pack, headers=SpotifyMusicClientUtils.COMMON_HEADERS, **request_overrides)).raise_for_status()
raw_hashes = resp.text; str_mapping, hash_mapping = SpotifyMusicClientPlaylistUtils.extractmappings(raw_hashes)
chunks = SpotifyMusicClientPlaylistUtils.combinechunks(str_mapping, hash_mapping)
for chunk in chunks:
chunk_url = f"https://open.spotifycdn.com/cdn/build/web-player/{chunk}"
try: raw_hashes += session.get(chunk_url, headers=SpotifyMusicClientUtils.COMMON_HEADERS, **request_overrides).text
except Exception: pass
return (m.group(1) if (m := re.search(r'"fetchPlaylist","(?:query|mutation)","([^"]+)"', raw_hashes)) else fallback_hash)
except Exception: return fallback_hash
'''fetchplaylist'''
@staticmethod
def fetchplaylist(session: requests.Session, access_token: str, client_token: str, client_version: str, playlist_id: str, js_pack: str, offset: int = 0, limit: int = 25, request_overrides: dict = None) -> dict:
request_overrides = request_overrides or {}
sha256_hash = SpotifyMusicClientPlaylistUtils.getsha256hash(session, js_pack, request_overrides=request_overrides)
payload = {"operationName": "fetchPlaylist", "variables": {"uri": f"spotify:playlist:{playlist_id}", "offset": offset, "limit": limit, "enableWatchFeedEntrypoint": False}, "extensions": {"persistedQuery": {"version": 1, "sha256Hash": sha256_hash}}}
headers = {'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{SpotifyMusicClientUtils.BROWSER_VERSION}.0.0.0 Safari/537.36', 'Sec-Ch-Ua': f'"Chromium";v="{SpotifyMusicClientUtils.BROWSER_VERSION}", "Not(A:Brand";v="24", "Google Chrome";v="{SpotifyMusicClientUtils.BROWSER_VERSION}"', 'Authorization': f'Bearer {access_token}', 'Client-Token': client_token, 'Spotify-App-Version': client_version, 'Content-Type': 'application/json;charset=UTF-8'}
(resp := session.post('https://api-partner.spotify.com/pathfinder/v2/query', headers=headers, json=payload, **request_overrides)).raise_for_status()
return resp2json(resp=resp)
'''getalltracks'''
@staticmethod
def getalltracks(session: requests.Session, access_token: str, client_token: str, client_version: str, playlist_id: str, js_pack: str, request_overrides: dict = None) -> List[dict]:
tracks, offset, limit, request_overrides, playlist_result_first = [], 0, 343, request_overrides or {}, {}
while True:
playlist_result = SpotifyMusicClientPlaylistUtils.fetchplaylist(session, access_token, client_token, client_version, playlist_id, js_pack, offset, limit, request_overrides=request_overrides)
if not playlist_result_first: playlist_result_first = copy.deepcopy(playlist_result)
if not (content := safeextractfromdict(playlist_result, ['data', 'playlistV2', 'content'], {})): break
tracks.extend(content.get('items', [])); total_count = content.get('totalCount', 0)
if total_count <= offset + limit: break
offset += limit
return tracks, playlist_result_first
'''parse'''
@staticmethod
def parse(session: requests.Session, playlist_id: str, request_overrides: dict = None) -> dict:
session, request_overrides = session or requests.Session(), request_overrides or {}
try:
session_data = SpotifyMusicClientUtils.getsessiondata(session, request_overrides=request_overrides)
device_id, client_version, js_pack = session_data['deviceId'], session_data['clientVersion'], session_data['jsPack']
secret_data = SpotifyMusicClientUtils.getlatesttotpsecret(); totp = SpotifyMusicClientUtils.generatetotp(secret_data['secret'])
token_data = SpotifyMusicClientUtils.getaccesstoken(session, totp, secret_data['version'], request_overrides=request_overrides)
access_token, client_id = token_data['accessToken'], token_data['clientId']; client_token = SpotifyMusicClientUtils.getclienttoken(session, client_version, client_id, device_id, request_overrides=request_overrides)
tracks, playlist_result_first = SpotifyMusicClientPlaylistUtils.getalltracks(session, access_token, client_token, client_version, playlist_id, js_pack, request_overrides=request_overrides)
for item in tracks: uri: str = safeextractfromdict(item, ['itemV2', 'data', 'uri'], None); item['id'], item['song_link'] = uri.split(':')[2], f"https://open.spotify.com/track/{uri.split(':')[2]}"
return tracks, playlist_result_first
except Exception: return [], {}
'''SpotifyMusicClientSearchUtils'''
class SpotifyMusicClientSearchUtils():
'''query'''
@staticmethod
def query(session: requests.Session, payload: dict, request_overrides: dict = None) -> dict:
session, request_overrides = session or requests.Session(), request_overrides or {}
session_data = SpotifyMusicClientUtils.getsessiondata(session, request_overrides=request_overrides)
device_id, client_version = session_data['deviceId'], session_data['clientVersion']
secret_data = SpotifyMusicClientUtils.getlatesttotpsecret(); totp = SpotifyMusicClientUtils.generatetotp(secret_data['secret'])
token_data = SpotifyMusicClientUtils.getaccesstoken(session, totp, secret_data['version'], request_overrides=request_overrides)
access_token, client_id = token_data['accessToken'], token_data['clientId']; client_token = SpotifyMusicClientUtils.getclienttoken(session, client_version, client_id, device_id, request_overrides=request_overrides)
headers = {'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{SpotifyMusicClientUtils.BROWSER_VERSION}.0.0.0 Safari/537.36', 'Sec-Ch-Ua': f'"Chromium";v="{SpotifyMusicClientUtils.BROWSER_VERSION}", "Not(A:Brand";v="24", "Google Chrome";v="{SpotifyMusicClientUtils.BROWSER_VERSION}"', 'Authorization': f'Bearer {access_token}', 'Client-Token': client_token, 'Spotify-App-Version': client_version, 'Content-Type': 'application/json;charset=UTF-8'}
(resp := session.post("https://api-partner.spotify.com/pathfinder/v2/query", json=payload, headers=headers, **request_overrides)).raise_for_status()
return resp2json(resp=resp)
'''searchbykeyword'''
@staticmethod
def searchbykeyword(session: requests.Session, query: str, limit: int, offset: int, rule: dict = None, request_overrides: dict = None) -> list:
request_overrides, rule = request_overrides or {}, rule or {}
(payload := {"variables": {"searchTerm": query, "offset": offset, "limit": limit, "numberOfTopResults": 5, "includeAudiobooks": True, "includeArtistHasConcertsField": False, "includePreReleases": True, "includeAuthors": False}, "operationName": "searchDesktop", "extensions": {"persistedQuery": {"version": 1, "sha256Hash": "fcad5a3e0d5af727fb76966f06971c19cfa2275e6ff7671196753e008611873c"}}}).update(rule)
return SpotifyMusicClientSearchUtils.query(session, payload, request_overrides=request_overrides)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff