Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,15 @@
'''initialize'''
from .mitu import MituMusicClient
from .kkws import KKWSMusicClient
from .jcpoo import JCPOOMusicClient
from .flmp3 import FLMP3MusicClient
from .htqyy import HTQYYMusicClient
from .twot58 import TwoT58MusicClient
from .fangpi import FangpiMusicClient
from .buguyy import BuguyyMusicClient
from .zhuolin import ZhuolinMusicClient
from .gequbao import GequbaoMusicClient
from .gequhai import GequhaiMusicClient
from .livepoo import LivePOOMusicClient
from .fivesong import FiveSongMusicClient
from .yinyuedao import YinyuedaoMusicClient
@@ -0,0 +1,135 @@
'''
Function:
Implementation of BuguyyMusicClient: https://buguyy.top/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import html
import copy
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''BuguyyMusicClient'''
class BuguyyMusicClient(BaseMusicClient):
source = 'BuguyyMusicClient'
def __init__(self, **kwargs):
super(BuguyyMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://buguyy.top", "priority": "u=1, i", "referer": "https://buguyy.top/",
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'keyword': keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://a.buguyy.top/newapi/search.php?'
page_rule = copy.deepcopy(default_rule)
search_urls = [base_url + urlencode(page_rule)]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = dict()
quark_download_urls = [u for u in [search_result.get('downurl', ''), search_result.get('ktmdownurl', '')] if u]
for quark_download_url in quark_download_urls:
m = re.search(r"(?i)(?:WAV|FLAC)#(https?://[^#]+)|MP3#(https?://[^#]+)", quark_download_url)
download_result, download_url = QuarkParser.parsefromurl(m.group(1) or m.group(2), **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album=legalizestring(safeextractfromdict(lyric_result, ['data', 'album'], None)),
ext="wav", file_size_bytes=None, file_size=None, identifier=search_result["id"], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')) or "NULL", cover_url=safeextractfromdict(search_result, ["picurl"], None),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(lyric_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
except Exception: download_result = dict()
download_url = safeextractfromdict(download_result, ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')),
album=legalizestring(safeextractfromdict(download_result, ["data", "album"], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result.get("id"),
duration_s=None, duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], 'NULL')), cover_url=safeextractfromdict(search_result, ['picurl'], None), download_url=download_url,
download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, verify=False, **request_overrides)).raise_for_status()
search_results = resp2json(resp=resp)['data']['list']
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,145 @@
'''
Function:
Implementation of FangpiMusicClient: https://www.fangpi.net/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''FangpiMusicClient'''
class FangpiMusicClient(BaseMusicClient):
source = 'FangpiMusicClient'
def __init__(self, **kwargs):
super(FangpiMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36", "referer": "https://www.fangpi.net/"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://www.fangpi.net/s/{keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html, base_url="https://www.fangpi.net"):
soup, search_results, seen = BeautifulSoup(html, "lxml"), [], set()
result_card = next((card for card in soup.select("div.card") if "搜索结果" in card.get_text(" ", strip=True) and card.select_one("h1.mark")), None)
if result_card is None: return []
for row in result_card.select("div.row"):
detail, action = row.select_one('a[href^="/music/"][title]'), row.select_one('a.btn[href^="/music/"]')
if not detail or not action: continue
if (url := urljoin(base_url, detail["href"])) in seen: continue
seen.add(url); search_results.append({"id": detail["href"].rsplit("/", 1)[-1], "name": (row.select_one("span.text-primary") or detail).get_text(strip=True), "artist": row.select_one("small.text-jade").get_text(strip=True), "title": detail.get("title", "").strip(), "url": url})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
try: (resp := self.post('https://www.fangpi.net/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
except Exception: download_result['api/play-url'] = {}
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
except Exception: continue
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
if script_tag is None: continue
js_text: str = script_tag.string
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,113 @@
'''
Function:
Implementation of FiveSongMusicClient: https://www.5song.xyz/index.html
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''FiveSongMusicClient'''
class FiveSongMusicClient(BaseMusicClient):
source = 'FiveSongMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 0, "WAV": 1, "FLAC": 2, "APE": 3, "ALAC": 4, "AAC": 5, "MP3": 6, "OGG": 7, "M4A": 8}
def __init__(self, **kwargs):
super(FiveSongMusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 10)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.5song.xyz/search.html?keyword={keyword}')
else: search_urls.append(f'https://www.5song.xyz/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, base_url, search_results = BeautifulSoup(html_text, "lxml"), "https://www.5song.xyz", []
for li in soup.select("div.list ul > li"):
if not (a := li.select_one("a[href]")): continue
href = a.get("href", "").strip(); detail_url = urljoin(base_url, href)
title_el = a.select_one("div.con div.t h3"); title = title_el.get_text(strip=True) if title_el else None
formats = [s.get_text(strip=True) for s in a.select("div.con div.t span") if s.get_text(strip=True)]
singer_el = a.select_one("div.singerNum div.singer"); date_el = a.select_one("div.singerNum div.date"); num_el = a.select_one("div.singerNum div.num")
singer = singer_el.get_text(strip=True) if singer_el else None; date = date_el.get_text(strip=True) if date_el else None
num = num_el.get_text(strip=True) if num_el else None; img = a.select_one("div.pic img")
cover_url = urljoin(base_url, img.get("src")) if img and img.get("src") else None
search_results.append({"title": title, "formats": formats, "singer": singer, "date": date, "num": num, "detail_url": detail_url, "cover_url": cover_url})
return search_results
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides, base_url = request_overrides or {}, "https://www.5song.xyz"
guess_format_func = lambda label: (m.group(1) if (m := re.search(r"(DSD|WAV|FLAC|APE|ALAC|AAC|MP3|OGG|M4A)", str(label).upper())) else None)
sort_by_audio_quality_func = lambda link_list: sorted(link_list, key=lambda x: (FiveSongMusicClient.MUSIC_QUALITY_RANK.get((fmt := guess_format_func(x.get("label", ""))), 999), fmt or ""))
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('detail_url' not in search_result): continue
song_info, song_id = SongInfo(source=self.source), urlparse(str(search_result['detail_url'])).path.strip('/').split('/')[-1].split('.')[0]
# ----fetch basic information
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status()
except Exception: continue
soup, quark_links = BeautifulSoup(resp.text, "lxml"), []
for li in soup.select("div.download ul li[data-url]"):
if not (quark_url := (li.get("data-url") or "").strip()): continue
a = li.select_one("a[href]"); label = a.get_text(" ", strip=True) if a else None
pc_download_href = a.get("href", "").strip() if a else None
pc_download_url = urljoin(base_url, pc_download_href) if pc_download_href else None
if "quark" in quark_url: quark_links.append({"label": label, "quark_url": quark_url, "pc_download_url": pc_download_url})
if not quark_links: continue
download_result = dict(quark_links=quark_links)
# ----parse from quark links
for quark_link in sort_by_audio_quality_func(download_result['quark_links']):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_link['quark_url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL', ext='mp3', file_size_bytes=None, file_size=None,
identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc("\n".join([p.get_text(strip=True) for p in soup.select_one("div.viewCon div.text").select("p") if p.get_text(strip=True)])), cover_url=search_result.get('cover_url'),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,106 @@
'''
Function:
Implementation of FLMP3MusicClient: https://www.flmp3.pro/index.html
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, SongInfo, QuarkParser, AudioLinkTester
'''FLMP3MusicClient'''
class FLMP3MusicClient(BaseMusicClient):
source = 'FLMP3MusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(FLMP3MusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 12)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.flmp3.pro/search.html?keyword={keyword}')
else: search_urls.append(f'https://www.flmp3.pro/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
search_results, base_url, soup = [], "https://flmp3.pro", BeautifulSoup(html_text, "html.parser")
for li in soup.select("div.list ul.flex.flex-wrap > li"):
if not (a := li.select_one("a")): continue
song_href = a.get("href", ""); song_url = urljoin(base_url, song_href) if song_href else None; title_el = li.select_one("div.con div.t h3")
artist_el = li.select_one("div.con div.t p"); date_el = li.select_one("div.con div.date"); img_el = li.select_one("div.pic img")
search_results.append({"song_url": song_url, "title": title_el.get_text(strip=True) if title_el else None, "artist": artist_el.get_text(strip=True) if artist_el else None, "date": date_el.get_text(strip=True) if date_el else None, "img_url": img_el.get("src") if img_el else None, "img_alt": img_el.get("alt") if img_el else None})
return search_results
'''_parsesongdetailfordownloadpages'''
def _parsesongdetailfordownloadpages(self, html_text: str):
infer_quality_func = lambda text: next((q for q in FLMP3MusicClient.MUSIC_QUALITY_RANK.keys() if q in str(text).upper()), "UNKNOWN")
soup, base_url, links = BeautifulSoup(html_text, "html.parser"), "https://www.flmp3.pro", []
for a in soup.select(".btnBox a[href]"):
text, href = a.get_text(strip=True), a["href"]
if not href: continue
links.append({"text": text, "quality": infer_quality_func(text), "rank": FLMP3MusicClient.MUSIC_QUALITY_RANK.get(infer_quality_func(text), 0), "url": urljoin(base_url, href)})
links_sorted = sorted(links, key=lambda x: x["rank"], reverse=True)
song_id = urlparse(str(links_sorted[0]['url'])).path.strip('/').split('/')[-1].split('.')[0]
return {'links_sorted': links_sorted, 'song_id': song_id}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('song_url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['song_url'], **request_overrides)).raise_for_status(); download_result = self._parsesongdetailfordownloadpages(resp.text)
except Exception: continue
if not download_result['links_sorted']: continue
for download_page_details in download_result['links_sorted']:
try: (resp := self.get(download_page_details['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text, "lxml"); quark_download_url = soup.select_one("a.linkbtn[href]")['href']
except Exception: continue
if not quark_download_url or not quark_download_url.startswith('http'): continue
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size='NULL', identifier=download_result['song_id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('img_url', None),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,142 @@
'''
Function:
Implementation of GequbaoMusicClient: https://www.gequbao.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''GequbaoMusicClient'''
class GequbaoMusicClient(BaseMusicClient):
source = 'GequbaoMusicClient'
def __init__(self, **kwargs):
super(GequbaoMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://www.gequbao.com/s/{keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str, base_url: str = "https://www.gequbao.com"):
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
for row in soup.select("div.row.no-gutters.py-2d5.border-top.align-items-center"):
if not (a := row.select_one('a[href^="/music/"]')): continue
title = row.select_one("span.text-primary"); artist = row.select_one("small.text-jade")
search_results.append({"name": title.get_text(strip=True) if title else None, "artist": artist.get_text(strip=True) if artist else None, "url": urljoin(base_url, a["href"]), "id": a["href"].rstrip("/").split("/")[-1]})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
try: (resp := self.post('https://www.gequbao.com/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
except Exception: download_result['api/play-url'] = {}
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
except Exception: song_info.duration = '-:-:-'
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
except Exception: continue
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
if script_tag is None: continue
js_text: str = script_tag.string
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,153 @@
'''
Function:
Implementation of GequhaiMusicClient: https://www.gequhai.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import base64
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, extractdurationsecondsfromlrc, seconds2hms, searchdictbykey, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''GequhaiMusicClient'''
class GequhaiMusicClient(BaseMusicClient):
source = 'GequhaiMusicClient'
def __init__(self, **kwargs):
super(GequhaiMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 12)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.gequhai.com/s/{keyword}')
else: search_urls.append(f'https://www.gequhai.com/s/{keyword}?page={int(count // page_size) + 1}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, base_url, search_results = BeautifulSoup(html_text, "html.parser"), "https://www.gequhai.com", []
if not (table := soup.select_one("table#myTables")): return []
for tr in table.select("tbody tr"):
if len((tds := tr.find_all("td"))) < 3: continue
idx_text = tds[0].get_text(strip=True); a = tds[1].find("a")
title = a.get_text(strip=True) if a else tds[1].get_text(strip=True)
href: str = a.get("href", "") if a else ""; play_url = urljoin(base_url, href) if href else ""
singer = tds[2].get_text(strip=True); m = re.search(r"/play/(\d+)", href or ""); play_id = m.group(1) if m else None
search_results.append({"index": int(idx_text) if idx_text.isdigit() else idx_text, "title": title, "singer": singer, "href": href, "play_url": play_url, "play_id": play_id})
return search_results
'''_decodequarkurl'''
def _decodequarkurl(self, quark_url: str):
return base64.b64decode(quark_url.replace("#", "H")).decode("utf-8", errors="strict")
'''_extractappdataandwindowvars'''
def _extractappdataandwindowvars(self, js_text: str) -> dict:
out, m = {}, re.search(r"window\.appData\s*=\s*(\{.*?\})\s*;", js_text, flags=re.S)
if m: app = json_repair.loads(m.group(1)); out["appData"] = app; out.update(app)
for k, v in re.findall(r"window\.(\w+)\s*=\s*'([^']*)'\s*;", js_text): out[k] = v
for k, v in re.findall(r'window\.(\w+)\s*=\s*"([^"]*)"\s*;', js_text): out[k] = v
seen = set(out); out.update({k: int(v) if re.fullmatch(r"-?\d+", v) else float(v) for k, v in re.findall(r"window\.(\w+)\s*=\s*(-?\d+(?:\.\d+)?)\s*;", js_text) if not (k in seen or seen.add(k))})
seen = set(out); out.update({k: {"true": True, "false": False, "null": None}[str(v).lower()] for k, v in re.findall(r"window\.(\w+)\s*=\s*(true|false|null)\s*;", js_text, flags=re.I) if not (k in seen or seen.add(k))})
if "mp3_title" in out and "mp3_author" in out: out.setdefault("mp3_name", f"{out['mp3_title']}-{out['mp3_author']}")
if "mp3_extra_url" in out: out["mp3_extra_url_decoded"] = self._decodequarkurl(out["mp3_extra_url"])
return out
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
quark_download_url = download_result.get('mp3_extra_url_decoded', '')
if not quark_download_url or not str(quark_download_url).startswith('http'): return song_info
download_result['quark_parse_result'], download_url = QuarkParser.parsefromdirurl(quark_download_url, **self.quark_parser_config)
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]; duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author', None)), album='NULL', ext='mp3', file_size=None,
identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)),
cover_url=download_result.get('mp3_cover'), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
if 'play_id' not in download_result or not download_result['play_id']: return song_info
headers = {
"accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "x-custom-header": "SecretKey", "x-requested-with": "XMLHttpRequest", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "origin": "https://www.gequhai.com",
"priority": "u=1, i", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
(resp := self.post('https://www.gequhai.com/api/music', data={'id': download_result['play_id'], 'type': '0'}, headers=headers, **request_overrides)).raise_for_status()
download_result['api/music'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['api/music'], ['data', 'url'], '')
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1],
file_size=None, identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)), cover_url=download_result.get('mp3_cover'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----fetch basic information
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractappdataandwindowvars(resp.text)
except Exception: continue
soup = BeautifulSoup(resp.text, 'lxml')
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,108 @@
'''
Function:
Implementation of HTQYYMusicClient: http://www.htqyy.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
from html import unescape
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, SongInfo, AudioLinkTester
'''HTQYYMusicClient'''
class HTQYYMusicClient(BaseMusicClient):
source = 'HTQYYMusicClient'
def __init__(self, **kwargs):
super(HTQYYMusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-encoding": "gzip, deflate", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "cache-control": "max-age=0", "host": "www.htqyy.com", "proxy-connection": "keep-alive", "referer": "http://www.htqyy.com/", "upgrade-insecure-requests": "1",
}
self.default_download_headers = {"accept-encoding": "identity;q=1, *;q=0", "referer": "http://www.htqyy.com/", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'http://www.htqyy.com/home/search?wd={keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
base_url, soup = "http://www.htqyy.com", BeautifulSoup(html_text, "html.parser")
items, search_results = soup.select("ul#musicList li.musicItem"), []
for li in items:
chk = li.select_one('input[type="checkbox"][name="checked"]')
song_id = chk["value"].strip() if chk and chk.has_attr("value") else None
a_title = li.select_one("span.title a")
play_url = urljoin(base_url, play_href) if (play_href := a_title["href"].strip() if a_title and a_title.has_attr("href") else None) else None
artist = a_artist.get_text(" ", strip=True) if (a_artist := li.select_one("span.artistName a")) else None; artist_url = urljoin(base_url, a_artist["href"]) if a_artist and a_artist.has_attr("href") else None
album = a_album.get_text(" ", strip=True) if (a_album := li.select_one("span.albumName a")) else None; album_url = urljoin(base_url, a_album["href"]) if a_album and a_album.has_attr("href") else None
search_results.append({"id": song_id, "sid": a_title.get("sid") if a_title else None, "title": a_title.get_text(" ", strip=True) if a_title else None, "title_attr": a_title.get("title") if a_title else None, "artist": artist, "artist_url": artist_url, "album": album, "album_url": album_url, "play_url": play_url})
return search_results
'''_extractplayscriptinfo'''
def _extractplayscriptinfo(self, html_text: str):
unescape_func = lambda x: unescape(x) if isinstance(x, str) else x
grabvar_func = lambda name: (None if (m := re.search(rf'\bvar\s+{re.escape(name)}\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t)) is None else (int(v) if m.group(3) is not None else v) if (v := (m.group(1) or m.group(2) or m.group(3))) is not None else None)
soup, script_text = BeautifulSoup(html_text, "html.parser"), None
for s in soup.find_all("script"):
if not (txt := s.string or s.get_text()): continue
if ("PageData." in txt or "var PageData" in txt) and ("fileHost" in txt or "var mp3" in txt): script_text = txt; break
if not script_text: return {}
t, pagedata = script_text, {}
for m in re.finditer(r'PageData\.(\w+)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t):
key, val = m.group(1), m.group(2) or m.group(3) or m.group(4)
if m.group(4) is not None: val = int(val)
pagedata[key] = val
file_format, ip = grabvar_func("format") or pagedata.get("format"), grabvar_func("ip")
file_host, mp3_path, bd_text, bd_text2, img_url, mp3_url = grabvar_func("fileHost"), grabvar_func("mp3"), grabvar_func("bdText"), grabvar_func("bdText2"), grabvar_func("imgUrl"), None
if file_host and mp3_path and re.search(r'\bmp3\s*=\s*fileHost\s*\+\s*mp3\s*;', t): mp3_url = file_host + mp3_path
return {"format": unescape_func(file_format), "PageData": {k: unescape_func(v) for k, v in pagedata.items()}, "ip": unescape_func(ip), "fileHost": unescape_func(file_host), "mp3_path": unescape_func(mp3_path), "mp3_url": unescape_func(mp3_url), "bdText": unescape_func(bd_text), "bdText2": unescape_func(bd_text2), "imgUrl": unescape_func(img_url)}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractplayscriptinfo(resp.text)
except Exception: continue
download_url: str = download_result.get('mp3_url')
if not download_url or not download_url.startswith('http'): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album=legalizestring(search_result.get('album')),
ext=download_result.get('format', 'mp3') or download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id') or search_result.get('sid'), duration_s=None, duration='-:-:-', lyric='NULL', cover_url=download_result.get('imgUrl'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,171 @@
'''
Function:
Implementation of JCPOOMusicClient: https://www.jcpoo.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import copy
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''JCPOOMusicClient'''
class JCPOOMusicClient(BaseMusicClient):
source = 'JCPOOMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(JCPOOMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'page': 0, 'keyword': keyword}
default_rule.update(rule)
# construct search urls
base_url = 'https://www.jcpoo.cn/search?'
self.search_size_per_page = min(self.search_size_per_source, 30)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.jcpoo.cn/"
for li in soup.select("ul.tuij_song li.song_item2"):
if not (a := li.select_one("a[href]")): continue
href = a["href"].strip(); full_url = urljoin(base_url, href)
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
return search_results
'''_extractquarklinksfromhtml'''
def _extractquarklinksfromhtml(self, html_text: str):
PAT = re.compile(
r"""(?:const|let|var)\s+
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
(?P<quote>["'])
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
(?P=quote)
""", re.VERBOSE
)
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in JCPOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
soup, outs = BeautifulSoup(html_text, "lxml"), []
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
seen, uniq = set(), []
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
uniq = sorted(uniq, key=lambda x: JCPOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
'''_extractlrc'''
def _extractlrc(self, js_text: str):
# functions
norm_func = lambda s: re.sub(r"\s+", "", str(s))
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
# match
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
# return
return lyric_result, lyric
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,122 @@
'''
Function:
Implementation of KKWSMusicClient: https://www.kkws.cc/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import functools
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, safeextractfromdict, extractdurationsecondsfromlrc, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''KKWSMusicClient'''
class KKWSMusicClient(BaseMusicClient):
source = 'KKWSMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(KKWSMusicClient, self).__init__(**kwargs)
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 15)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
search_urls.append(f'https://www.kkws.cc/search.html?key={keyword}&page={int(count // page_size) + 1}')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup = BeautifulSoup(html_text, "lxml")
search_results, base_url = [], 'https://www.kkws.cc/'
for li in soup.select("ul.listbox > li"):
if not (a := li.select_one("h2 a[href]")): continue
title_attr = (a.get("title") or "").strip(); full_text = a.get_text(" ", strip=True)
name = title_attr.replace("免费下载", "").strip() if title_attr else full_text; name = re.sub(r"\s*\[[^\]]+\]\s*", " ", name).strip(); name = re.sub(r"\s*-\s*\d+(\.\d+)?[KMG]?\s*$", "", name).strip()
m_fmt = re.search(r"\[([^\]]+)\]", full_text); file_format = m_fmt.group(1).strip() if m_fmt else ""
m_size = re.search(r"-\s*([0-9.]+\s*[KMG]?)", full_text, re.IGNORECASE); size = (m_size.group(1).replace(" ", "") if m_size else "").strip()
ems = li.select("small em"); share_time, singer = "", ""
if len(ems) >= 1: share_time = ems[0].get_text(strip=True).replace("分享时间:", "").strip()
if len(ems) >= 2: singer = ems[-1].get_text(strip=True).replace("演唱:", "").strip()
m_id = re.search(r"/detail/(\d+)\.html", (href := urljoin(base_url, a["href"]))); item_id = m_id.group(1) if m_id else ""
search_results.append({"id": item_id, "name": name, "format": file_format, "size": size, "share_time": share_time, "singer": singer, "detail_url": href})
return search_results
'''_extractlyricsandquark'''
def _extractlyricsandquark(self, html_text: str, song_id: str, request_overrides: dict = None):
request_overrides = request_overrides or {}
tb = (soup := BeautifulSoup(html_text, "lxml")).select_one("#textbox")
to_mmss_func = lambda t: (lambda s: f"{s//60:02d}:{s%60:02d}")(int(float(t.split(":",1)[0])*60+float(t.split(":",1)[1])) if ":" in t else int(float(t)))
lyrics = "" if not tb else "\n".join((f"[{to_mmss_func(m.group(1))}] {m.group(2).strip()}" if (m:=re.match(r"^\[(\d+(?:\.\d+)?|\d{1,2}:\d{2}(?:\.\d+)?)\]\s*(.*)$", line)) else f"{line}") for line in (l.strip() for l in tb.get_text("\n").splitlines()) if line)
url_map, rank = {}, KKWSMusicClient.MUSIC_QUALITY_RANK
for a in soup.select("div.downbox a[onclick]"):
if not (onclick := (a.get("onclick") or "").strip()): continue
args = re.findall(r"'([^']*)'", onclick); name = fmt = url = None
if (parsed := ((args[1], args[2], args[3] or None) if onclick.startswith("openModel") and len(args) >= 4 else (args[1], args[2], None) if onclick.startswith("mbgotourl") and len(args) >= 3 else None)) is None: continue
name_fmt, url, fmt = parsed; name, fmt = ((lambda n, f2: (n, fmt or f2))(*map(str.strip, name_fmt.split("|", 1))) if "|" in name_fmt else (name_fmt.strip(), fmt))
try: url = resp2json(self.get(f'https://www.kkws.cc/getdown?url={url}&j=1&id={song_id}', allow_redirects=True, **request_overrides))['data']['decrypted_url']
except Exception: url = url
if not (url and "pan.quark.cn" in url): continue
e = url_map.setdefault(url, {"url": url, "formats": set(), "names": set()})
if fmt: e["formats"].add(functools.reduce(lambda f, k: k if k.lower() in f.lower() else f, rank, fmt))
if name: e["names"].add(name)
quark_links = sorted(({"url": e["url"], "formats": sorted(e["formats"]), "names": sorted(e["names"])} for e in url_map.values()), key=lambda x: rank.get(x["formats"][0] if x["formats"] else "UNKNOWN", 0), reverse=True)
quark_links = [q for q in quark_links if isinstance(q, dict) and q.get('url')]
return {"lyrics": lyrics, "quark_links": quark_links}
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('detail_url' not in search_result) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status(); download_result = self._extractlyricsandquark(resp.text, search_result['id'], request_overrides)
except Exception: continue
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(download_result, ['lyrics'], '')),
cover_url=None, download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,171 @@
'''
Function:
Implementation of LivePOOMusicClient: https://www.livepoo.cn/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import ast
import copy
import json_repair
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''LivePOOMusicClient'''
class LivePOOMusicClient(BaseMusicClient):
source = 'LivePOOMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(LivePOOMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'page': 0, 'keyword': keyword}
default_rule.update(rule)
# construct search urls
base_url = 'https://www.livepoo.cn/search?'
self.search_size_per_page = min(self.search_size_per_source, 30)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['page'] = int(count // page_size)
search_urls.append(base_url + urlencode(page_rule))
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.livepoo.cn/"
for li in soup.select("ul.tuij_song li.song_item2"):
if not (a := li.select_one("a[href]")): continue
href = a["href"].strip(); full_url = urljoin(base_url, href)
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
return search_results
'''_extractquarklinksfromhtml'''
def _extractquarklinksfromhtml(self, html_text: str):
PAT = re.compile(
r"""(?:const|let|var)\s+
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
(?P<quote>["'])
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
(?P=quote)
""", re.VERBOSE
)
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in LivePOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
soup, outs = BeautifulSoup(html_text, "lxml"), []
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
seen, uniq = set(), []
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
uniq = sorted(uniq, key=lambda x: LivePOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
'''_extractlrc'''
def _extractlrc(self, js_text: str):
# functions
norm_func = lambda s: re.sub(r"\s+", "", str(s))
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
# match
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
# return
return lyric_result, lyric
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
for quark_info in download_result['quark_links']:
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
try: lyric_result, lyric = self._extractlrc(resp.text)
except Exception: lyric_result, lyric = {}, 'NULL'
download_result = self._extractquarklinksfromhtml(resp.text)
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,127 @@
'''
Function:
Implementation of MituMusicClient: https://www.qqmp3.vip/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import copy
from urllib.parse import urlencode
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''MituMusicClient'''
class MituMusicClient(BaseMusicClient):
source = 'MituMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(MituMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://www.qqmp3.vip", "priority": "u=1, i", "referer": "https://www.qqmp3.vip/", "sec-ch-ua": '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {'keyword': keyword, 'type': 'search'}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://api.qqmp3.vip/api/songs.php?'
page_rule = copy.deepcopy(default_rule)
search_urls = [base_url + urlencode(page_rule)]
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
parse_format_func = lambda label: next((fmt for fmt in sorted(MituMusicClient.MUSIC_QUALITY_RANK, key=len, reverse=True) if re.search(rf"\b{re.escape(fmt)}\b", (s := str(label).upper())) or fmt in s), "UNKNOWN")
quality_score_func = lambda item: MituMusicClient.MUSIC_QUALITY_RANK.get(parse_format_func(item.split("$$", 1)[0]), 0)
# parse
try: (resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
except Exception: lyric_result = {}
quark_download_urls: list[str] = search_result.get('downurl', []) or []
for quark_download_url in sorted(quark_download_urls, key=lambda x: quality_score_func(x), reverse=True):
download_result, download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext='mp3', file_size=None, identifier=search_result['rid'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
# parse
(resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status()
download_url = (download_result := resp2json(resp=resp))['data']['url']
if not download_url or not download_url.startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['rid'], duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = resp2json(resp)['data']
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('rid' not in search_result): continue
song_info = SongInfo(source=self.source)
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,102 @@
'''
Function:
Implementation of TwoT58MusicClient: https://www.2t58.com/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import copy
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, RandomIPGenerator, AudioLinkTester
'''TwoT58MusicClient'''
class TwoT58MusicClient(BaseMusicClient):
source = 'TwoT58MusicClient'
def __init__(self, **kwargs):
super(TwoT58MusicClient, self).__init__(**kwargs)
self.default_search_headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-user": "?1",
"cookie": "Hm_tf_hx9umupwu8o=1766942296; Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f=1766942296; cac9054cc9568db7fa51d16ee602cd7b=fd6762f9a63b502fda3befef86ea6460; server_name_session=91a76d925399962c481089ef4a83ce4e; Hm_lvt_hx9umupwu8o=1766942296,1768900847; Hm_lpvt_hx9umupwu8o=1768901202", "referer": "https://www.2t58.com/so/%E5%8F%AF%E6%83%9C.html", "priority": "u=0, i",
"sec-ch-ua": "\"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"", "sec-ch-ua-mobile": "?0", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "sec-fetch-site": "same-origin",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
self.search_size_per_page = min(self.search_size_per_source, 68)
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.2t58.com/so/{keyword}.html')
else: search_urls.append(f'https://www.2t58.com/so/{keyword}/{int(count // page_size) + 1}.html')
count += page_size
# return
return search_urls
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text: str):
soup = BeautifulSoup(html_text, "lxml")
search_results, base_url = [], 'https://www.2t58.com/'
for a in soup.select(".play_list ul li .name a"):
title, href = a.get_text(strip=True), a.get("href", ""); song_id = urlparse(urljoin(base_url, href)).path.strip('/').split('/')[-1].split('.')[0]
search_results.append({"title": title, "url": urljoin(base_url, href) if base_url else href, "path": href, "id": song_id})
return search_results
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('url' not in search_result) or ('id' not in search_result): continue
song_info = SongInfo(source=self.source)
for quality in ['flac', 'wav', '320']:
headers = copy.deepcopy(self.default_download_headers); RandomIPGenerator().addrandomipv4toheaders(headers=headers)
try: download_url = self.session.head(f"https://www.2t58.com/plug/down.php?ac=music&id={search_result['id']}&k={quality}", allow_redirects=True, headers=headers, **request_overrides).url
except Exception: continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(((m.group(1) if (m := re.search(r"《(.*?)》", (s := re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL"))))) else s).strip())),
singers=legalizestring(re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL")).split("", 1)[0].strip()), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'],
duration='-:-:-', lyric='NULL', cover_url=None, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if not song_info.with_valid_download_url: continue
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if not song_info.with_valid_download_url: continue
# --lyric results
try:
(resp := self.get(f"https://www.2t58.com/plug/down.php?ac=music&lk=lrc&id={search_result['id']}", **request_overrides)).raise_for_status()
song_info.lyric = cleanlrc(resp.text.replace('[00:00.00]欢迎来访爱听音乐网 www.2t58.com\r\n', ''))
song_info.duration_s = extractdurationsecondsfromlrc(song_info.lyric); song_info.duration = seconds2hms(song_info.duration_s)
except:
song_info.lyric, song_info.duration = 'NULL', '-:-:-'
# --cover results
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text); cover = soup.select_one("#mcover"); song_info.cover_url = cover["src"] if cover and cover.has_attr("src") else None
except: song_info.cover_url = None
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,150 @@
'''
Function:
Implementation of YinyuedaoMusicClient: https://1mp3.top/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import re
import base64
from html import unescape
from bs4 import BeautifulSoup
from rich.progress import Progress
from ..sources import BaseMusicClient
from urllib.parse import urljoin, urlparse
from ..utils import legalizestring, usesearchheaderscookies, safeextractfromdict, seconds2hms, searchdictbykey, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
'''YinyuedaoMusicClient'''
class YinyuedaoMusicClient(BaseMusicClient):
source = 'YinyuedaoMusicClient'
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
def __init__(self, **kwargs):
super(YinyuedaoMusicClient, self).__init__(**kwargs)
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
self.default_search_headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "priority": "u=0, i", "referer": "https://1mp3.top/",
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# construct search urls
search_urls = [f'https://1mp3.top/search.html?keyword={keyword}']
self.search_size_per_page = self.search_size_per_source
# return
return search_urls
'''_parsemusicpage'''
def _parsemusicpage(self, html_text: str, base_url: str = ""):
soup, lyrics = BeautifulSoup(html_text, "html.parser"), "NULL"
if (article := soup.select_one("section#demo article")): lyrics = re.sub(r"\n+", "\n", unescape(article.get_text("\n", strip=True))).strip()
cover = ""; img = soup.select_one("#album-cover") or soup.select_one(".cover-art img")
if img and img.get("src"): cover = urljoin(base_url, img["src"].strip())
links, seen = [], set()
for a in soup.select("a.download-link[data-url]"):
fmt = (a.get("data-format") or "").strip().upper(); text = a.get_text(" ", strip=True)
if not (url := (a.get("data-url") or "").strip()): continue
fmt = fmt or ((m.group(1).upper()) if (m := re.search(r"\b(DSD|DSF|DFF|WAV|AIFF|FLAC|ALAC|APE|WV|OPUS|AAC|M4A|OGG|VORBIS|MP3|WMA)\b", text, re.I)) else None)
item = {"format": fmt, "score": YinyuedaoMusicClient.MUSIC_QUALITY_RANK.get(fmt, -1), "url": urljoin(base_url, url), "text": text}
if (key := (item["format"], item["url"])) not in seen: seen.add(key); links.append(item)
links.sort(key=lambda x: (-x["score"], x["format"], x["url"]))
return {"lyrics": lyrics, "cover": cover, "quark_links": links}
'''_parsesearchresultsfromhtml'''
def _parsesearchresultsfromhtml(self, html_text, base_url="https://www.1mp3.top"):
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
for a in soup.select('a[href^="/mdetail/"]'):
if len((cols := a.select("div.row > div"))) < 2: continue
token = (href := a.get("href", "")).rsplit("/", 1)[-1]
try: music_id = base64.b64decode(token).decode(errors="ignore").split("|", 1)[0]
except Exception: music_id = token
search_results.append({"id": music_id, "title": cols[0].get_text(" ", strip=True), "singer": cols[1].get_text(" ", strip=True), "url": urljoin(base_url, href)})
return search_results
'''_parsesearchresultfromquark'''
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
# parse
for quark_download_url in download_result['quark_links']:
if not isinstance(quark_download_url, dict) or not safeextractfromdict(quark_download_url, ['format'], ''): continue
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['url'], **self.quark_parser_config)
if not download_url or not str(download_url).startswith('http'): continue
duration = [int(float(d)) for d in searchdictbykey(download_result['quark_parse_result'], 'duration') if int(float(d)) > 0]
duration_in_secs = duration[0] if duration else 0
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext=str(quark_download_url.get('format', 'mp3')).lower(), file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(download_result.get('lyrics')),
cover_url=download_result.get("cover"), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
)
if song_info.ext in {'mgg'}: continue
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if song_info.with_valid_download_url: break
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
# return
return song_info
'''_parsesearchresultfromweb'''
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, request_overrides: dict = None):
# init
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
encrypted_id = urlparse(str(search_result["url"])).path.strip('/').split('/')[-1]
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
# parse
try: (resp := self.get(f'https://1mp3.top/geturl?id={encrypted_id}&quality=exhigh&type=json', **request_overrides)).raise_for_status()
except Exception: return song_info
download_result['geturl'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['geturl'], ['data', 'url'], None)
if not download_url or not str(download_url).startswith('http'): return song_info
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id'), duration_s=None, duration='-:-:-', lyric=cleanlrc(download_result.get('lyrics')), cover_url=download_result.get("cover"),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
if song_info.ext in {'mgg'}: return SongInfo(source=self.source)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
# return
return song_info
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}
# successful
try:
# --search results
(resp := self.get(search_url, **request_overrides)).raise_for_status()
search_results = self._parsesearchresultsfromhtml(resp.text)
for search_result in search_results:
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result) or ('url' not in search_result): continue
song_info = SongInfo(source=self.source)
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); download_result: dict = self._parsemusicpage(resp.text)
except Exception: continue
# ----parse from quark links
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, request_overrides)
# ----parse from play url
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, request_overrides)
# ----filter if invalid
if not song_info.with_valid_download_url: continue
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos
@@ -0,0 +1,89 @@
'''
Function:
Implementation of ZhuolinMusicClient: https://music.zhuolin.wang/
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
import copy
from urllib.parse import urlsplit
from rich.progress import Progress
from ..sources import BaseMusicClient
from ..utils import resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, extractdurationsecondsfromlrc, cleanlrc, SongInfo, LanZouYParser, AudioLinkTester
'''ZhuolinMusicClient'''
class ZhuolinMusicClient(BaseMusicClient):
source = 'ZhuolinMusicClient'
MUSIC_QUALITIES = {'128', '320', '2000'}
def __init__(self, **kwargs):
super(ZhuolinMusicClient, self).__init__(**kwargs)
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
self.default_headers = self.default_search_headers
self._initsession()
'''_constructsearchurls'''
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
# init
rule, request_overrides = rule or {}, request_overrides or {}
# search rules
default_rule = {"types": "search", 'count': "20", 'source': "freemp3", 'pages': "1", 'name': keyword}
default_rule.update(rule)
# construct search urls based on search rules
base_url = 'https://music.zhuolin.wang/plugns/api.php'
search_urls, page_size, count = [], self.search_size_per_page, 0
while self.search_size_per_source > count:
page_rule = copy.deepcopy(default_rule)
page_rule['count'] = page_size
page_rule['pages'] = int(count // page_size) + 1
search_urls.append({'url': base_url, 'data': page_rule})
count += page_size
# return
return search_urls
'''_search'''
@usesearchheaderscookies
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
# init
request_overrides = request_overrides or {}; search_meta = copy.deepcopy(search_url); search_url = search_meta.pop('url')
# successful
try:
# --search results
(resp := self.post(search_url, verify=False, **search_meta, **request_overrides)).raise_for_status()
for search_result in resp2json(resp=resp):
# --download results
if not isinstance(search_result, dict) or ('id' not in search_result): continue
download_url, download_result = safeextractfromdict(search_result, ['url'], ""), {}
if 'lanzouy.com' in urlsplit(str(download_url)).hostname: download_result, download_url = LanZouYParser.parsefromurl(download_url)
if (not download_url) or (not download_url.startswith('http')): continue
song_info = SongInfo(
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(', '.join(safeextractfromdict(search_result, ['artist'], []) or [])),
album=legalizestring(safeextractfromdict(search_result, ['album', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('pic'),
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
)
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
if not song_info.with_valid_download_url: continue
# --lyric results
try:
(resp := self.post('https://music.zhuolin.wang/plugns/api.php', verify=False, data={'types': 'lyric', 'id': search_result['id'], 'source': 'freemp3'})).raise_for_status()
lyric_result = resp2json(resp=resp); lyric = safeextractfromdict(lyric_result, ['lyric'], '')
if lyric.startswith('http'): lyric = cleanlrc(self.get(lyric, **request_overrides).text)
lyric = lyric or 'NULL'; song_info.duration_s = extractdurationsecondsfromlrc(lyric); song_info.duration = seconds2hms(song_info.duration_s)
except:
lyric_result, lyric = {}, 'NULL'
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
# --append to song_infos
song_infos.append(song_info)
# --judgement for search_size
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
# --update progress
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
# failure
except Exception as err:
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
# return
return song_infos