Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
'''initialize'''
|
||||
from .mitu import MituMusicClient
|
||||
from .kkws import KKWSMusicClient
|
||||
from .jcpoo import JCPOOMusicClient
|
||||
from .flmp3 import FLMP3MusicClient
|
||||
from .htqyy import HTQYYMusicClient
|
||||
from .twot58 import TwoT58MusicClient
|
||||
from .fangpi import FangpiMusicClient
|
||||
from .buguyy import BuguyyMusicClient
|
||||
from .zhuolin import ZhuolinMusicClient
|
||||
from .gequbao import GequbaoMusicClient
|
||||
from .gequhai import GequhaiMusicClient
|
||||
from .livepoo import LivePOOMusicClient
|
||||
from .fivesong import FiveSongMusicClient
|
||||
from .yinyuedao import YinyuedaoMusicClient
|
||||
@@ -0,0 +1,135 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of BuguyyMusicClient: https://buguyy.top/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import html
|
||||
import copy
|
||||
from urllib.parse import urlencode
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''BuguyyMusicClient'''
|
||||
class BuguyyMusicClient(BaseMusicClient):
|
||||
source = 'BuguyyMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(BuguyyMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {
|
||||
"accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://buguyy.top", "priority": "u=1, i", "referer": "https://buguyy.top/",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
|
||||
}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# search rules
|
||||
default_rule = {'keyword': keyword}
|
||||
default_rule.update(rule)
|
||||
# construct search urls based on search rules
|
||||
base_url = 'https://a.buguyy.top/newapi/search.php?'
|
||||
page_rule = copy.deepcopy(default_rule)
|
||||
search_urls = [base_url + urlencode(page_rule)]
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
|
||||
except Exception: lyric_result = dict()
|
||||
quark_download_urls = [u for u in [search_result.get('downurl', ''), search_result.get('ktmdownurl', '')] if u]
|
||||
for quark_download_url in quark_download_urls:
|
||||
m = re.search(r"(?i)(?:WAV|FLAC)#(https?://[^#]+)|MP3#(https?://[^#]+)", quark_download_url)
|
||||
download_result, download_url = QuarkParser.parsefromurl(m.group(1) or m.group(2), **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album=legalizestring(safeextractfromdict(lyric_result, ['data', 'album'], None)),
|
||||
ext="wav", file_size_bytes=None, file_size=None, identifier=search_result["id"], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')) or "NULL", cover_url=safeextractfromdict(search_result, ["picurl"], None),
|
||||
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(lyric_result, ['data', 'duration'], '')))))[-3:])
|
||||
except Exception: song_info.duration = '-:-:-'
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
try: (resp := self.get(f'https://a.buguyy.top/newapi/geturl2.php?id={search_result["id"]}', verify=False, **request_overrides)).raise_for_status(); download_result = resp2json(resp=resp)
|
||||
except Exception: download_result = dict()
|
||||
download_url = safeextractfromdict(download_result, ['data', 'url'], '')
|
||||
if not download_url or not download_url.startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')),
|
||||
album=legalizestring(safeextractfromdict(download_result, ["data", "album"], None)), ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result.get("id"),
|
||||
duration_s=None, duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], 'NULL')), cover_url=safeextractfromdict(search_result, ['picurl'], None), download_url=download_url,
|
||||
download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
|
||||
except Exception: song_info.duration = '-:-:-'
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
song_info.lyric = re.sub(r'<br\s*/?>', '\n', song_info.lyric, flags=re.IGNORECASE); song_info.lyric = cleanlrc(html.unescape(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, verify=False, **request_overrides)).raise_for_status()
|
||||
search_results = resp2json(resp=resp)['data']['list']
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('id' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,145 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of FangpiMusicClient: https://www.fangpi.net/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import ast
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''FangpiMusicClient'''
|
||||
class FangpiMusicClient(BaseMusicClient):
|
||||
source = 'FangpiMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(FangpiMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36", "referer": "https://www.fangpi.net/"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'https://www.fangpi.net/s/{keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html, base_url="https://www.fangpi.net"):
|
||||
soup, search_results, seen = BeautifulSoup(html, "lxml"), [], set()
|
||||
result_card = next((card for card in soup.select("div.card") if "搜索结果" in card.get_text(" ", strip=True) and card.select_one("h1.mark")), None)
|
||||
if result_card is None: return []
|
||||
for row in result_card.select("div.row"):
|
||||
detail, action = row.select_one('a[href^="/music/"][title]'), row.select_one('a.btn[href^="/music/"]')
|
||||
if not detail or not action: continue
|
||||
if (url := urljoin(base_url, detail["href"])) in seen: continue
|
||||
seen.add(url); search_results.append({"id": detail["href"].rsplit("/", 1)[-1], "name": (row.select_one("span.text-primary") or detail).get_text(strip=True), "artist": row.select_one("small.text-jade").get_text(strip=True), "title": detail.get("title", "").strip(), "url": url})
|
||||
return search_results
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
|
||||
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
|
||||
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
if 'play_id' not in download_result or not download_result['play_id']: return song_info
|
||||
try: (resp := self.post('https://www.fangpi.net/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
|
||||
except Exception: download_result['api/play-url'] = {}
|
||||
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
|
||||
if not download_url or not download_url.startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
|
||||
except Exception: song_info.duration = '-:-:-'
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----fetch basic information
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
except Exception: continue
|
||||
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
|
||||
if script_tag is None: continue
|
||||
js_text: str = script_tag.string
|
||||
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
|
||||
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
|
||||
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
|
||||
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
|
||||
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,113 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of FiveSongMusicClient: https://www.5song.xyz/index.html
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''FiveSongMusicClient'''
|
||||
class FiveSongMusicClient(BaseMusicClient):
|
||||
source = 'FiveSongMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 0, "WAV": 1, "FLAC": 2, "APE": 3, "ALAC": 4, "AAC": 5, "MP3": 6, "OGG": 7, "M4A": 8}
|
||||
def __init__(self, **kwargs):
|
||||
super(FiveSongMusicClient, self).__init__(**kwargs)
|
||||
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
self.search_size_per_page = min(self.search_size_per_source, 10)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.5song.xyz/search.html?keyword={keyword}')
|
||||
else: search_urls.append(f'https://www.5song.xyz/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup, base_url, search_results = BeautifulSoup(html_text, "lxml"), "https://www.5song.xyz", []
|
||||
for li in soup.select("div.list ul > li"):
|
||||
if not (a := li.select_one("a[href]")): continue
|
||||
href = a.get("href", "").strip(); detail_url = urljoin(base_url, href)
|
||||
title_el = a.select_one("div.con div.t h3"); title = title_el.get_text(strip=True) if title_el else None
|
||||
formats = [s.get_text(strip=True) for s in a.select("div.con div.t span") if s.get_text(strip=True)]
|
||||
singer_el = a.select_one("div.singerNum div.singer"); date_el = a.select_one("div.singerNum div.date"); num_el = a.select_one("div.singerNum div.num")
|
||||
singer = singer_el.get_text(strip=True) if singer_el else None; date = date_el.get_text(strip=True) if date_el else None
|
||||
num = num_el.get_text(strip=True) if num_el else None; img = a.select_one("div.pic img")
|
||||
cover_url = urljoin(base_url, img.get("src")) if img and img.get("src") else None
|
||||
search_results.append({"title": title, "formats": formats, "singer": singer, "date": date, "num": num, "detail_url": detail_url, "cover_url": cover_url})
|
||||
return search_results
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides, base_url = request_overrides or {}, "https://www.5song.xyz"
|
||||
guess_format_func = lambda label: (m.group(1) if (m := re.search(r"(DSD|WAV|FLAC|APE|ALAC|AAC|MP3|OGG|M4A)", str(label).upper())) else None)
|
||||
sort_by_audio_quality_func = lambda link_list: sorted(link_list, key=lambda x: (FiveSongMusicClient.MUSIC_QUALITY_RANK.get((fmt := guess_format_func(x.get("label", ""))), 999), fmt or ""))
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('detail_url' not in search_result): continue
|
||||
song_info, song_id = SongInfo(source=self.source), urlparse(str(search_result['detail_url'])).path.strip('/').split('/')[-1].split('.')[0]
|
||||
# ----fetch basic information
|
||||
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status()
|
||||
except Exception: continue
|
||||
soup, quark_links = BeautifulSoup(resp.text, "lxml"), []
|
||||
for li in soup.select("div.download ul li[data-url]"):
|
||||
if not (quark_url := (li.get("data-url") or "").strip()): continue
|
||||
a = li.select_one("a[href]"); label = a.get_text(" ", strip=True) if a else None
|
||||
pc_download_href = a.get("href", "").strip() if a else None
|
||||
pc_download_url = urljoin(base_url, pc_download_href) if pc_download_href else None
|
||||
if "quark" in quark_url: quark_links.append({"label": label, "quark_url": quark_url, "pc_download_url": pc_download_url})
|
||||
if not quark_links: continue
|
||||
download_result = dict(quark_links=quark_links)
|
||||
# ----parse from quark links
|
||||
for quark_link in sort_by_audio_quality_func(download_result['quark_links']):
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_link['quark_url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL', ext='mp3', file_size_bytes=None, file_size=None,
|
||||
identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc("\n".join([p.get_text(strip=True) for p in soup.select_one("div.viewCon div.text").select("p") if p.get_text(strip=True)])), cover_url=search_result.get('cover_url'),
|
||||
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,106 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of FLMP3MusicClient: https://www.flmp3.pro/index.html
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''FLMP3MusicClient'''
|
||||
class FLMP3MusicClient(BaseMusicClient):
|
||||
source = 'FLMP3MusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(FLMP3MusicClient, self).__init__(**kwargs)
|
||||
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
self.search_size_per_page = min(self.search_size_per_source, 12)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.flmp3.pro/search.html?keyword={keyword}')
|
||||
else: search_urls.append(f'https://www.flmp3.pro/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
search_results, base_url, soup = [], "https://flmp3.pro", BeautifulSoup(html_text, "html.parser")
|
||||
for li in soup.select("div.list ul.flex.flex-wrap > li"):
|
||||
if not (a := li.select_one("a")): continue
|
||||
song_href = a.get("href", ""); song_url = urljoin(base_url, song_href) if song_href else None; title_el = li.select_one("div.con div.t h3")
|
||||
artist_el = li.select_one("div.con div.t p"); date_el = li.select_one("div.con div.date"); img_el = li.select_one("div.pic img")
|
||||
search_results.append({"song_url": song_url, "title": title_el.get_text(strip=True) if title_el else None, "artist": artist_el.get_text(strip=True) if artist_el else None, "date": date_el.get_text(strip=True) if date_el else None, "img_url": img_el.get("src") if img_el else None, "img_alt": img_el.get("alt") if img_el else None})
|
||||
return search_results
|
||||
'''_parsesongdetailfordownloadpages'''
|
||||
def _parsesongdetailfordownloadpages(self, html_text: str):
|
||||
infer_quality_func = lambda text: next((q for q in FLMP3MusicClient.MUSIC_QUALITY_RANK.keys() if q in str(text).upper()), "UNKNOWN")
|
||||
soup, base_url, links = BeautifulSoup(html_text, "html.parser"), "https://www.flmp3.pro", []
|
||||
for a in soup.select(".btnBox a[href]"):
|
||||
text, href = a.get_text(strip=True), a["href"]
|
||||
if not href: continue
|
||||
links.append({"text": text, "quality": infer_quality_func(text), "rank": FLMP3MusicClient.MUSIC_QUALITY_RANK.get(infer_quality_func(text), 0), "url": urljoin(base_url, href)})
|
||||
links_sorted = sorted(links, key=lambda x: x["rank"], reverse=True)
|
||||
song_id = urlparse(str(links_sorted[0]['url'])).path.strip('/').split('/')[-1].split('.')[0]
|
||||
return {'links_sorted': links_sorted, 'song_id': song_id}
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('song_url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
try: (resp := self.get(search_result['song_url'], **request_overrides)).raise_for_status(); download_result = self._parsesongdetailfordownloadpages(resp.text)
|
||||
except Exception: continue
|
||||
if not download_result['links_sorted']: continue
|
||||
for download_page_details in download_result['links_sorted']:
|
||||
try: (resp := self.get(download_page_details['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text, "lxml"); quark_download_url = soup.select_one("a.linkbtn[href]")['href']
|
||||
except Exception: continue
|
||||
if not quark_download_url or not quark_download_url.startswith('http'): continue
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album='NULL',
|
||||
ext='mp3', file_size_bytes=None, file_size='NULL', identifier=download_result['song_id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric='NULL', cover_url=search_result.get('img_url', None),
|
||||
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,142 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of GequbaoMusicClient: https://www.gequbao.com/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import ast
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''GequbaoMusicClient'''
|
||||
class GequbaoMusicClient(BaseMusicClient):
|
||||
source = 'GequbaoMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(GequbaoMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'https://www.gequbao.com/s/{keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str, base_url: str = "https://www.gequbao.com"):
|
||||
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
|
||||
for row in soup.select("div.row.no-gutters.py-2d5.border-top.align-items-center"):
|
||||
if not (a := row.select_one('a[href^="/music/"]')): continue
|
||||
title = row.select_one("span.text-primary"); artist = row.select_one("small.text-jade")
|
||||
search_results.append({"name": title.get_text(strip=True) if title else None, "artist": artist.get_text(strip=True) if artist else None, "url": urljoin(base_url, a["href"]), "id": a["href"].rstrip("/").split("/")[-1]})
|
||||
return search_results
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
|
||||
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
|
||||
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
if 'play_id' not in download_result or not download_result['play_id']: return song_info
|
||||
try: (resp := self.post('https://www.gequbao.com/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
|
||||
except Exception: download_result['api/play-url'] = {}
|
||||
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
|
||||
if not download_url or not download_url.startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
|
||||
except Exception: song_info.duration = '-:-:-'
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----fetch basic information
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
except Exception: continue
|
||||
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
|
||||
if script_tag is None: continue
|
||||
js_text: str = script_tag.string
|
||||
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
|
||||
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
|
||||
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
|
||||
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
|
||||
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,153 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of GequhaiMusicClient: https://www.gequhai.com/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import base64
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, extractdurationsecondsfromlrc, seconds2hms, searchdictbykey, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''GequhaiMusicClient'''
|
||||
class GequhaiMusicClient(BaseMusicClient):
|
||||
source = 'GequhaiMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(GequhaiMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
self.search_size_per_page = min(self.search_size_per_source, 12)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.gequhai.com/s/{keyword}')
|
||||
else: search_urls.append(f'https://www.gequhai.com/s/{keyword}?page={int(count // page_size) + 1}')
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup, base_url, search_results = BeautifulSoup(html_text, "html.parser"), "https://www.gequhai.com", []
|
||||
if not (table := soup.select_one("table#myTables")): return []
|
||||
for tr in table.select("tbody tr"):
|
||||
if len((tds := tr.find_all("td"))) < 3: continue
|
||||
idx_text = tds[0].get_text(strip=True); a = tds[1].find("a")
|
||||
title = a.get_text(strip=True) if a else tds[1].get_text(strip=True)
|
||||
href: str = a.get("href", "") if a else ""; play_url = urljoin(base_url, href) if href else ""
|
||||
singer = tds[2].get_text(strip=True); m = re.search(r"/play/(\d+)", href or ""); play_id = m.group(1) if m else None
|
||||
search_results.append({"index": int(idx_text) if idx_text.isdigit() else idx_text, "title": title, "singer": singer, "href": href, "play_url": play_url, "play_id": play_id})
|
||||
return search_results
|
||||
'''_decodequarkurl'''
|
||||
def _decodequarkurl(self, quark_url: str):
|
||||
return base64.b64decode(quark_url.replace("#", "H")).decode("utf-8", errors="strict")
|
||||
'''_extractappdataandwindowvars'''
|
||||
def _extractappdataandwindowvars(self, js_text: str) -> dict:
|
||||
out, m = {}, re.search(r"window\.appData\s*=\s*(\{.*?\})\s*;", js_text, flags=re.S)
|
||||
if m: app = json_repair.loads(m.group(1)); out["appData"] = app; out.update(app)
|
||||
for k, v in re.findall(r"window\.(\w+)\s*=\s*'([^']*)'\s*;", js_text): out[k] = v
|
||||
for k, v in re.findall(r'window\.(\w+)\s*=\s*"([^"]*)"\s*;', js_text): out[k] = v
|
||||
seen = set(out); out.update({k: int(v) if re.fullmatch(r"-?\d+", v) else float(v) for k, v in re.findall(r"window\.(\w+)\s*=\s*(-?\d+(?:\.\d+)?)\s*;", js_text) if not (k in seen or seen.add(k))})
|
||||
seen = set(out); out.update({k: {"true": True, "false": False, "null": None}[str(v).lower()] for k, v in re.findall(r"window\.(\w+)\s*=\s*(true|false|null)\s*;", js_text, flags=re.I) if not (k in seen or seen.add(k))})
|
||||
if "mp3_title" in out and "mp3_author" in out: out.setdefault("mp3_name", f"{out['mp3_title']}-{out['mp3_author']}")
|
||||
if "mp3_extra_url" in out: out["mp3_extra_url_decoded"] = self._decodequarkurl(out["mp3_extra_url"])
|
||||
return out
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
quark_download_url = download_result.get('mp3_extra_url_decoded', '')
|
||||
if not quark_download_url or not str(quark_download_url).startswith('http'): return song_info
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromdirurl(quark_download_url, **self.quark_parser_config)
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]; duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author', None)), album='NULL', ext='mp3', file_size=None,
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)),
|
||||
cover_url=download_result.get('mp3_cover'), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
if 'play_id' not in download_result or not download_result['play_id']: return song_info
|
||||
headers = {
|
||||
"accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "x-custom-header": "SecretKey", "x-requested-with": "XMLHttpRequest", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "origin": "https://www.gequhai.com",
|
||||
"priority": "u=1, i", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
|
||||
}
|
||||
(resp := self.post('https://www.gequhai.com/api/music', data={'id': download_result['play_id'], 'type': '0'}, headers=headers, **request_overrides)).raise_for_status()
|
||||
download_result['api/music'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['api/music'], ['data', 'url'], '')
|
||||
if not download_url or not str(download_url).startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1],
|
||||
file_size=None, identifier=download_result.get('mp3_id') or urlparse(str(search_result['play_url'])).path.strip('/').split('/')[-1], duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc2").get_text("\n", strip=True)), cover_url=download_result.get('mp3_cover'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.with_valid_download_url: return SongInfo(source=self.source)
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----fetch basic information
|
||||
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractappdataandwindowvars(resp.text)
|
||||
except Exception: continue
|
||||
soup = BeautifulSoup(resp.text, 'lxml')
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,108 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of HTQYYMusicClient: http://www.htqyy.com/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
from html import unescape
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from ..utils import legalizestring, usesearchheaderscookies, SongInfo, AudioLinkTester
|
||||
|
||||
|
||||
'''HTQYYMusicClient'''
|
||||
class HTQYYMusicClient(BaseMusicClient):
|
||||
source = 'HTQYYMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(HTQYYMusicClient, self).__init__(**kwargs)
|
||||
self.default_search_headers = {
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-encoding": "gzip, deflate", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "cache-control": "max-age=0", "host": "www.htqyy.com", "proxy-connection": "keep-alive", "referer": "http://www.htqyy.com/", "upgrade-insecure-requests": "1",
|
||||
}
|
||||
self.default_download_headers = {"accept-encoding": "identity;q=1, *;q=0", "referer": "http://www.htqyy.com/", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'http://www.htqyy.com/home/search?wd={keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
base_url, soup = "http://www.htqyy.com", BeautifulSoup(html_text, "html.parser")
|
||||
items, search_results = soup.select("ul#musicList li.musicItem"), []
|
||||
for li in items:
|
||||
chk = li.select_one('input[type="checkbox"][name="checked"]')
|
||||
song_id = chk["value"].strip() if chk and chk.has_attr("value") else None
|
||||
a_title = li.select_one("span.title a")
|
||||
play_url = urljoin(base_url, play_href) if (play_href := a_title["href"].strip() if a_title and a_title.has_attr("href") else None) else None
|
||||
artist = a_artist.get_text(" ", strip=True) if (a_artist := li.select_one("span.artistName a")) else None; artist_url = urljoin(base_url, a_artist["href"]) if a_artist and a_artist.has_attr("href") else None
|
||||
album = a_album.get_text(" ", strip=True) if (a_album := li.select_one("span.albumName a")) else None; album_url = urljoin(base_url, a_album["href"]) if a_album and a_album.has_attr("href") else None
|
||||
search_results.append({"id": song_id, "sid": a_title.get("sid") if a_title else None, "title": a_title.get_text(" ", strip=True) if a_title else None, "title_attr": a_title.get("title") if a_title else None, "artist": artist, "artist_url": artist_url, "album": album, "album_url": album_url, "play_url": play_url})
|
||||
return search_results
|
||||
'''_extractplayscriptinfo'''
|
||||
def _extractplayscriptinfo(self, html_text: str):
|
||||
unescape_func = lambda x: unescape(x) if isinstance(x, str) else x
|
||||
grabvar_func = lambda name: (None if (m := re.search(rf'\bvar\s+{re.escape(name)}\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t)) is None else (int(v) if m.group(3) is not None else v) if (v := (m.group(1) or m.group(2) or m.group(3))) is not None else None)
|
||||
soup, script_text = BeautifulSoup(html_text, "html.parser"), None
|
||||
for s in soup.find_all("script"):
|
||||
if not (txt := s.string or s.get_text()): continue
|
||||
if ("PageData." in txt or "var PageData" in txt) and ("fileHost" in txt or "var mp3" in txt): script_text = txt; break
|
||||
if not script_text: return {}
|
||||
t, pagedata = script_text, {}
|
||||
for m in re.finditer(r'PageData\.(\w+)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([0-9]+))\s*;', t):
|
||||
key, val = m.group(1), m.group(2) or m.group(3) or m.group(4)
|
||||
if m.group(4) is not None: val = int(val)
|
||||
pagedata[key] = val
|
||||
file_format, ip = grabvar_func("format") or pagedata.get("format"), grabvar_func("ip")
|
||||
file_host, mp3_path, bd_text, bd_text2, img_url, mp3_url = grabvar_func("fileHost"), grabvar_func("mp3"), grabvar_func("bdText"), grabvar_func("bdText2"), grabvar_func("imgUrl"), None
|
||||
if file_host and mp3_path and re.search(r'\bmp3\s*=\s*fileHost\s*\+\s*mp3\s*;', t): mp3_url = file_host + mp3_path
|
||||
return {"format": unescape_func(file_format), "PageData": {k: unescape_func(v) for k, v in pagedata.items()}, "ip": unescape_func(ip), "fileHost": unescape_func(file_host), "mp3_path": unescape_func(mp3_path), "mp3_url": unescape_func(mp3_url), "bdText": unescape_func(bd_text), "bdText2": unescape_func(bd_text2), "imgUrl": unescape_func(img_url)}
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('play_url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
try: (resp := self.get(search_result['play_url'], **request_overrides)).raise_for_status(); download_result = self._extractplayscriptinfo(resp.text)
|
||||
except Exception: continue
|
||||
download_url: str = download_result.get('mp3_url')
|
||||
if not download_url or not download_url.startswith('http'): continue
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('artist')), album=legalizestring(search_result.get('album')),
|
||||
ext=download_result.get('format', 'mp3') or download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id') or search_result.get('sid'), duration_s=None, duration='-:-:-', lyric='NULL', cover_url=download_result.get('imgUrl'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
if not song_info.with_valid_download_url: continue
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,171 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of JCPOOMusicClient: https://www.jcpoo.cn/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import ast
|
||||
import copy
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
|
||||
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''JCPOOMusicClient'''
|
||||
class JCPOOMusicClient(BaseMusicClient):
|
||||
source = 'JCPOOMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(JCPOOMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# search rules
|
||||
default_rule = {'page': 0, 'keyword': keyword}
|
||||
default_rule.update(rule)
|
||||
# construct search urls
|
||||
base_url = 'https://www.jcpoo.cn/search?'
|
||||
self.search_size_per_page = min(self.search_size_per_source, 30)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
page_rule = copy.deepcopy(default_rule)
|
||||
page_rule['page'] = int(count // page_size)
|
||||
search_urls.append(base_url + urlencode(page_rule))
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.jcpoo.cn/"
|
||||
for li in soup.select("ul.tuij_song li.song_item2"):
|
||||
if not (a := li.select_one("a[href]")): continue
|
||||
href = a["href"].strip(); full_url = urljoin(base_url, href)
|
||||
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
|
||||
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
|
||||
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
|
||||
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
|
||||
return search_results
|
||||
'''_extractquarklinksfromhtml'''
|
||||
def _extractquarklinksfromhtml(self, html_text: str):
|
||||
PAT = re.compile(
|
||||
r"""(?:const|let|var)\s+
|
||||
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
|
||||
(?P<quote>["'])
|
||||
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
|
||||
(?P=quote)
|
||||
""", re.VERBOSE
|
||||
)
|
||||
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in JCPOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
|
||||
soup, outs = BeautifulSoup(html_text, "lxml"), []
|
||||
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
|
||||
seen, uniq = set(), []
|
||||
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
|
||||
uniq = sorted(uniq, key=lambda x: JCPOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
|
||||
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
|
||||
'''_extractlrc'''
|
||||
def _extractlrc(self, js_text: str):
|
||||
# functions
|
||||
norm_func = lambda s: re.sub(r"\s+", "", str(s))
|
||||
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
|
||||
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
|
||||
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
|
||||
# match
|
||||
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
|
||||
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
|
||||
# return
|
||||
return lyric_result, lyric
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
try: lyric_result, lyric = self._extractlrc(resp.text)
|
||||
except Exception: lyric_result, lyric = {}, 'NULL'
|
||||
download_result = self._extractquarklinksfromhtml(resp.text)
|
||||
for quark_info in download_result['quark_links']:
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
|
||||
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
|
||||
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
try: lyric_result, lyric = self._extractlrc(resp.text)
|
||||
except Exception: lyric_result, lyric = {}, 'NULL'
|
||||
download_result = self._extractquarklinksfromhtml(resp.text)
|
||||
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
|
||||
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
|
||||
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,122 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of KKWSMusicClient: https://www.kkws.cc/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import functools
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, safeextractfromdict, extractdurationsecondsfromlrc, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''KKWSMusicClient'''
|
||||
class KKWSMusicClient(BaseMusicClient):
|
||||
source = 'KKWSMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(KKWSMusicClient, self).__init__(**kwargs)
|
||||
assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
self.search_size_per_page = min(self.search_size_per_source, 15)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
search_urls.append(f'https://www.kkws.cc/search.html?key={keyword}&page={int(count // page_size) + 1}')
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup = BeautifulSoup(html_text, "lxml")
|
||||
search_results, base_url = [], 'https://www.kkws.cc/'
|
||||
for li in soup.select("ul.listbox > li"):
|
||||
if not (a := li.select_one("h2 a[href]")): continue
|
||||
title_attr = (a.get("title") or "").strip(); full_text = a.get_text(" ", strip=True)
|
||||
name = title_attr.replace("免费下载", "").strip() if title_attr else full_text; name = re.sub(r"\s*\[[^\]]+\]\s*", " ", name).strip(); name = re.sub(r"\s*-\s*\d+(\.\d+)?[KMG]?\s*$", "", name).strip()
|
||||
m_fmt = re.search(r"\[([^\]]+)\]", full_text); file_format = m_fmt.group(1).strip() if m_fmt else ""
|
||||
m_size = re.search(r"-\s*([0-9.]+\s*[KMG]?)", full_text, re.IGNORECASE); size = (m_size.group(1).replace(" ", "") if m_size else "").strip()
|
||||
ems = li.select("small em"); share_time, singer = "", ""
|
||||
if len(ems) >= 1: share_time = ems[0].get_text(strip=True).replace("分享时间:", "").strip()
|
||||
if len(ems) >= 2: singer = ems[-1].get_text(strip=True).replace("演唱:", "").strip()
|
||||
m_id = re.search(r"/detail/(\d+)\.html", (href := urljoin(base_url, a["href"]))); item_id = m_id.group(1) if m_id else ""
|
||||
search_results.append({"id": item_id, "name": name, "format": file_format, "size": size, "share_time": share_time, "singer": singer, "detail_url": href})
|
||||
return search_results
|
||||
'''_extractlyricsandquark'''
|
||||
def _extractlyricsandquark(self, html_text: str, song_id: str, request_overrides: dict = None):
|
||||
request_overrides = request_overrides or {}
|
||||
tb = (soup := BeautifulSoup(html_text, "lxml")).select_one("#textbox")
|
||||
to_mmss_func = lambda t: (lambda s: f"{s//60:02d}:{s%60:02d}")(int(float(t.split(":",1)[0])*60+float(t.split(":",1)[1])) if ":" in t else int(float(t)))
|
||||
lyrics = "" if not tb else "\n".join((f"[{to_mmss_func(m.group(1))}] {m.group(2).strip()}" if (m:=re.match(r"^\[(\d+(?:\.\d+)?|\d{1,2}:\d{2}(?:\.\d+)?)\]\s*(.*)$", line)) else f"{line}") for line in (l.strip() for l in tb.get_text("\n").splitlines()) if line)
|
||||
url_map, rank = {}, KKWSMusicClient.MUSIC_QUALITY_RANK
|
||||
for a in soup.select("div.downbox a[onclick]"):
|
||||
if not (onclick := (a.get("onclick") or "").strip()): continue
|
||||
args = re.findall(r"'([^']*)'", onclick); name = fmt = url = None
|
||||
if (parsed := ((args[1], args[2], args[3] or None) if onclick.startswith("openModel") and len(args) >= 4 else (args[1], args[2], None) if onclick.startswith("mbgotourl") and len(args) >= 3 else None)) is None: continue
|
||||
name_fmt, url, fmt = parsed; name, fmt = ((lambda n, f2: (n, fmt or f2))(*map(str.strip, name_fmt.split("|", 1))) if "|" in name_fmt else (name_fmt.strip(), fmt))
|
||||
try: url = resp2json(self.get(f'https://www.kkws.cc/getdown?url={url}&j=1&id={song_id}', allow_redirects=True, **request_overrides))['data']['decrypted_url']
|
||||
except Exception: url = url
|
||||
if not (url and "pan.quark.cn" in url): continue
|
||||
e = url_map.setdefault(url, {"url": url, "formats": set(), "names": set()})
|
||||
if fmt: e["formats"].add(functools.reduce(lambda f, k: k if k.lower() in f.lower() else f, rank, fmt))
|
||||
if name: e["names"].add(name)
|
||||
quark_links = sorted(({"url": e["url"], "formats": sorted(e["formats"]), "names": sorted(e["names"])} for e in url_map.values()), key=lambda x: rank.get(x["formats"][0] if x["formats"] else "UNKNOWN", 0), reverse=True)
|
||||
quark_links = [q for q in quark_links if isinstance(q, dict) and q.get('url')]
|
||||
return {"lyrics": lyrics, "quark_links": quark_links}
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('detail_url' not in search_result) or ('id' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status(); download_result = self._extractlyricsandquark(resp.text, search_result['id'], request_overrides)
|
||||
except Exception: continue
|
||||
for quark_info in download_result['quark_links']:
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(search_result.get('singer')), album='NULL',
|
||||
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(download_result, ['lyrics'], '')),
|
||||
cover_url=None, download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,171 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of LivePOOMusicClient: https://www.livepoo.cn/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import ast
|
||||
import copy
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urlencode, urljoin, urlparse, parse_qs
|
||||
from ..utils import legalizestring, usesearchheaderscookies, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''LivePOOMusicClient'''
|
||||
class LivePOOMusicClient(BaseMusicClient):
|
||||
source = 'LivePOOMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(LivePOOMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# search rules
|
||||
default_rule = {'page': 0, 'keyword': keyword}
|
||||
default_rule.update(rule)
|
||||
# construct search urls
|
||||
base_url = 'https://www.livepoo.cn/search?'
|
||||
self.search_size_per_page = min(self.search_size_per_source, 30)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
page_rule = copy.deepcopy(default_rule)
|
||||
page_rule['page'] = int(count // page_size)
|
||||
search_urls.append(base_url + urlencode(page_rule))
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup, search_results, base_url = BeautifulSoup(html_text, "lxml"), [], "https://www.livepoo.cn/"
|
||||
for li in soup.select("ul.tuij_song li.song_item2"):
|
||||
if not (a := li.select_one("a[href]")): continue
|
||||
href = a["href"].strip(); full_url = urljoin(base_url, href)
|
||||
title = title_div.get_text(strip=True) if (title_div := a.select_one(".song_info2 > div")) else a.get_text(" ", strip=True)
|
||||
q = parse_qs(urlparse(href).query); mid = q.get("id", [None])[0]; m = re.compile(r'^(.*?)《(.*?)》$').match(title.strip())
|
||||
singer, song_name = (m.group(1).strip(), m.group(2).strip()) if m else (None, title.strip())
|
||||
search_results.append({"title": song_name, "artist": singer, "url": full_url, "id": mid.removeprefix('MUSIC_')})
|
||||
return search_results
|
||||
'''_extractquarklinksfromhtml'''
|
||||
def _extractquarklinksfromhtml(self, html_text: str):
|
||||
PAT = re.compile(
|
||||
r"""(?:const|let|var)\s+
|
||||
(?P<key>[A-Za-z0-9_]+?)\s*=\s*
|
||||
(?P<quote>["'])
|
||||
(?P<url>https?://pan\.quark\.cn/s/[^"']+)
|
||||
(?P=quote)
|
||||
""", re.VERBOSE
|
||||
)
|
||||
extract_quark_links_from_text_func = lambda text: [{"key": key, "format": fmt, "url": url} for m in PAT.finditer(text) if (url := m.group("url").strip()) and (key := m.group("key")) and ((base := (key[:-4] if key.endswith("_url") else key)) or True) and (((fmt := (([k for k in LivePOOMusicClient.MUSIC_QUALITY_RANK.keys() if k.lower() in base.lower()] or [base])[-1])) or True))]
|
||||
soup, outs = BeautifulSoup(html_text, "lxml"), []
|
||||
for s in soup.find_all("script"): "pan.quark.cn/s/" in (js := s.string or s.get_text() or "") and outs.extend(extract_quark_links_from_text_func(js))
|
||||
seen, uniq = set(), []
|
||||
for it in outs: (url := it["url"]) not in seen and (seen.add(url) or uniq.append(it))
|
||||
uniq = sorted(uniq, key=lambda x: LivePOOMusicClient.MUSIC_QUALITY_RANK.get(str(x["format"]).upper(), 0), reverse=True)
|
||||
return {'quark_links': uniq, 'cover_url': (bytes(m.group(1), "utf-8").decode("unicode_escape").replace(r"\/", "/") if (m := re.search(r'"music_cover"\s*:\s*"(.*?)"', html_text)) else None)}
|
||||
'''_extractlrc'''
|
||||
def _extractlrc(self, js_text: str):
|
||||
# functions
|
||||
norm_func = lambda s: re.sub(r"\s+", "", str(s))
|
||||
pick_func = lambda d, target: next((v for k, v in d.items() if norm_func(k) == target), None)
|
||||
fmt_lrc_time_func = lambda sec: (f"[{int((t := float(norm_func(sec)))) // 60:02d}:{(t - (int(t // 60) * 60)):05.2f}]")
|
||||
lrc_list_to_lrc_func = lambda detail: (("\n".join([f"[ti:{detail.get('music_name','')}]", f"[ar:{detail.get('music_artist','')}]", f"[al:{detail.get('music_album','')}]",]).strip() + "\n") + "\n".join(f"{ts}{ly}" for ts, ly in sorted([(fmt_lrc_time_func(t), re.sub(r"\s+", " ", str(lyric)).strip()) for it in (detail.get("music_lrclist", []) or []) for t in [pick_func(it, "time")] for lyric in [pick_func(it, "lineLyric")] if t is not None and lyric is not None], key=lambda x: x[0],)))
|
||||
# match
|
||||
if not (s := re.search(r"const\s+detailJson\s*=\s*'(.+?)';\s*const\s+detail\s*=\s*JSON\.parse", js_text, re.S)): return {}, 'NULL'
|
||||
string = s.group(1).replace("\r", "").replace("\n", ""); lyric_result = json_repair.loads(ast.literal_eval(f'"{string}"')); lyric = cleanlrc(lrc_list_to_lrc_func(lyric_result))
|
||||
# return
|
||||
return lyric_result, lyric
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
try: lyric_result, lyric = self._extractlrc(resp.text)
|
||||
except Exception: lyric_result, lyric = {}, 'NULL'
|
||||
download_result = self._extractquarklinksfromhtml(resp.text)
|
||||
for quark_info in download_result['quark_links']:
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_info['url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
|
||||
ext='mp3', file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=lyric, cover_url=download_result.get('cover_url'), download_url=download_url,
|
||||
download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
(resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
try: lyric_result, lyric = self._extractlrc(resp.text)
|
||||
except Exception: lyric_result, lyric = {}, 'NULL'
|
||||
download_result = self._extractquarklinksfromhtml(resp.text)
|
||||
(resp := self.get(f"https://www.jcpoo.cn/audio/play?id={search_result['id']}", **request_overrides)).raise_for_status()
|
||||
if not (download_url := resp.text.strip()) or not str(download_url).startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('title', None)), singers=legalizestring(search_result.get('artist')),
|
||||
album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'], duration_s=None, duration='-:-:-', lyric=lyric, cover_url=download_result.get('cover_url'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,127 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of MituMusicClient: https://www.qqmp3.vip/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import copy
|
||||
from urllib.parse import urlencode
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, seconds2hms, searchdictbykey, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''MituMusicClient'''
|
||||
class MituMusicClient(BaseMusicClient):
|
||||
source = 'MituMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(MituMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {
|
||||
"accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "origin": "https://www.qqmp3.vip", "priority": "u=1, i", "referer": "https://www.qqmp3.vip/", "sec-ch-ua": '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
|
||||
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
|
||||
}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# search rules
|
||||
default_rule = {'keyword': keyword, 'type': 'search'}
|
||||
default_rule.update(rule)
|
||||
# construct search urls based on search rules
|
||||
base_url = 'https://api.qqmp3.vip/api/songs.php?'
|
||||
page_rule = copy.deepcopy(default_rule)
|
||||
search_urls = [base_url + urlencode(page_rule)]
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
parse_format_func = lambda label: next((fmt for fmt in sorted(MituMusicClient.MUSIC_QUALITY_RANK, key=len, reverse=True) if re.search(rf"\b{re.escape(fmt)}\b", (s := str(label).upper())) or fmt in s), "UNKNOWN")
|
||||
quality_score_func = lambda item: MituMusicClient.MUSIC_QUALITY_RANK.get(parse_format_func(item.split("$$", 1)[0]), 0)
|
||||
# parse
|
||||
try: (resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status(); lyric_result = resp2json(resp=resp)
|
||||
except Exception: lyric_result = {}
|
||||
quark_download_urls: list[str] = search_result.get('downurl', []) or []
|
||||
for quark_download_url in sorted(quark_download_urls, key=lambda x: quality_score_func(x), reverse=True):
|
||||
download_result, download_url = QuarkParser.parsefromurl(quark_download_url, **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
|
||||
ext='mp3', file_size=None, identifier=search_result['rid'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(safeextractfromdict(lyric_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
|
||||
download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
(resp := self.get(f'https://api.qqmp3.vip/api/kw.php?rid={search_result["rid"]}&type=json&level=exhigh&lrc=true', **request_overrides)).raise_for_status()
|
||||
download_url = (download_result := resp2json(resp=resp))['data']['url']
|
||||
if not download_url or not download_url.startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name', None)), singers=legalizestring(search_result.get('artist')), album='NULL',
|
||||
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['rid'], duration='-:-:-', lyric=cleanlrc(safeextractfromdict(download_result, ['data', 'lrc'], '')), cover_url=search_result.get('pic'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = resp2json(resp)['data']
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('rid' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,102 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of TwoT58MusicClient: https://www.2t58.com/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import copy
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, extractdurationsecondsfromlrc, seconds2hms, cleanlrc, SongInfo, RandomIPGenerator, AudioLinkTester
|
||||
|
||||
|
||||
'''TwoT58MusicClient'''
|
||||
class TwoT58MusicClient(BaseMusicClient):
|
||||
source = 'TwoT58MusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(TwoT58MusicClient, self).__init__(**kwargs)
|
||||
self.default_search_headers = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-user": "?1",
|
||||
"cookie": "Hm_tf_hx9umupwu8o=1766942296; Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f=1766942296; cac9054cc9568db7fa51d16ee602cd7b=fd6762f9a63b502fda3befef86ea6460; server_name_session=91a76d925399962c481089ef4a83ce4e; Hm_lvt_hx9umupwu8o=1766942296,1768900847; Hm_lpvt_hx9umupwu8o=1768901202", "referer": "https://www.2t58.com/so/%E5%8F%AF%E6%83%9C.html", "priority": "u=0, i",
|
||||
"sec-ch-ua": "\"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"", "sec-ch-ua-mobile": "?0", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "sec-fetch-site": "same-origin",
|
||||
}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
self.search_size_per_page = min(self.search_size_per_source, 68)
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.2t58.com/so/{keyword}.html')
|
||||
else: search_urls.append(f'https://www.2t58.com/so/{keyword}/{int(count // page_size) + 1}.html')
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text: str):
|
||||
soup = BeautifulSoup(html_text, "lxml")
|
||||
search_results, base_url = [], 'https://www.2t58.com/'
|
||||
for a in soup.select(".play_list ul li .name a"):
|
||||
title, href = a.get_text(strip=True), a.get("href", ""); song_id = urlparse(urljoin(base_url, href)).path.strip('/').split('/')[-1].split('.')[0]
|
||||
search_results.append({"title": title, "url": urljoin(base_url, href) if base_url else href, "path": href, "id": song_id})
|
||||
return search_results
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result) or ('id' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
for quality in ['flac', 'wav', '320']:
|
||||
headers = copy.deepcopy(self.default_download_headers); RandomIPGenerator().addrandomipv4toheaders(headers=headers)
|
||||
try: download_url = self.session.head(f"https://www.2t58.com/plug/down.php?ac=music&id={search_result['id']}&k={quality}", allow_redirects=True, headers=headers, **request_overrides).url
|
||||
except Exception: continue
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': {}, 'lyric': {}}, source=self.source, song_name=legalizestring(((m.group(1) if (m := re.search(r"《(.*?)》", (s := re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL"))))) else s).strip())),
|
||||
singers=legalizestring(re.sub(r"\s*\[[^\]]*\]\s*$", "", str(search_result.get("title") or "NULL")).split("《", 1)[0].strip()), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size_bytes=None, file_size=None, identifier=search_result['id'],
|
||||
duration='-:-:-', lyric='NULL', cover_url=None, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
if not song_info.with_valid_download_url: continue
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --lyric results
|
||||
try:
|
||||
(resp := self.get(f"https://www.2t58.com/plug/down.php?ac=music&lk=lrc&id={search_result['id']}", **request_overrides)).raise_for_status()
|
||||
song_info.lyric = cleanlrc(resp.text.replace('[00:00.00]欢迎来访爱听音乐网 www.2t58.com\r\n', ''))
|
||||
song_info.duration_s = extractdurationsecondsfromlrc(song_info.lyric); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
except:
|
||||
song_info.lyric, song_info.duration = 'NULL', '-:-:-'
|
||||
# --cover results
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); soup = BeautifulSoup(resp.text); cover = soup.select_one("#mcover"); song_info.cover_url = cover["src"] if cover and cover.has_attr("src") else None
|
||||
except: song_info.cover_url = None
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,150 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of YinyuedaoMusicClient: https://1mp3.top/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import base64
|
||||
from html import unescape
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, safeextractfromdict, seconds2hms, searchdictbykey, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''YinyuedaoMusicClient'''
|
||||
class YinyuedaoMusicClient(BaseMusicClient):
|
||||
source = 'YinyuedaoMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(YinyuedaoMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "priority": "u=0, i", "referer": "https://1mp3.top/",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
|
||||
}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'https://1mp3.top/search.html?keyword={keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsemusicpage'''
|
||||
def _parsemusicpage(self, html_text: str, base_url: str = ""):
|
||||
soup, lyrics = BeautifulSoup(html_text, "html.parser"), "NULL"
|
||||
if (article := soup.select_one("section#demo article")): lyrics = re.sub(r"\n+", "\n", unescape(article.get_text("\n", strip=True))).strip()
|
||||
cover = ""; img = soup.select_one("#album-cover") or soup.select_one(".cover-art img")
|
||||
if img and img.get("src"): cover = urljoin(base_url, img["src"].strip())
|
||||
links, seen = [], set()
|
||||
for a in soup.select("a.download-link[data-url]"):
|
||||
fmt = (a.get("data-format") or "").strip().upper(); text = a.get_text(" ", strip=True)
|
||||
if not (url := (a.get("data-url") or "").strip()): continue
|
||||
fmt = fmt or ((m.group(1).upper()) if (m := re.search(r"\b(DSD|DSF|DFF|WAV|AIFF|FLAC|ALAC|APE|WV|OPUS|AAC|M4A|OGG|VORBIS|MP3|WMA)\b", text, re.I)) else None)
|
||||
item = {"format": fmt, "score": YinyuedaoMusicClient.MUSIC_QUALITY_RANK.get(fmt, -1), "url": urljoin(base_url, url), "text": text}
|
||||
if (key := (item["format"], item["url"])) not in seen: seen.add(key); links.append(item)
|
||||
links.sort(key=lambda x: (-x["score"], x["format"], x["url"]))
|
||||
return {"lyrics": lyrics, "cover": cover, "quark_links": links}
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text, base_url="https://www.1mp3.top"):
|
||||
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
|
||||
for a in soup.select('a[href^="/mdetail/"]'):
|
||||
if len((cols := a.select("div.row > div"))) < 2: continue
|
||||
token = (href := a.get("href", "")).rsplit("/", 1)[-1]
|
||||
try: music_id = base64.b64decode(token).decode(errors="ignore").split("|", 1)[0]
|
||||
except Exception: music_id = token
|
||||
search_results.append({"id": music_id, "title": cols[0].get_text(" ", strip=True), "singer": cols[1].get_text(" ", strip=True), "url": urljoin(base_url, href)})
|
||||
return search_results
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
|
||||
# parse
|
||||
for quark_download_url in download_result['quark_links']:
|
||||
if not isinstance(quark_download_url, dict) or not safeextractfromdict(quark_download_url, ['format'], ''): continue
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result['quark_parse_result'], 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
|
||||
ext=str(quark_download_url.get('format', 'mp3')).lower(), file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(download_result.get('lyrics')),
|
||||
cover_url=download_result.get("cover"), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
if song_info.ext in {'mgg'}: continue
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
encrypted_id = urlparse(str(search_result["url"])).path.strip('/').split('/')[-1]
|
||||
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
|
||||
# parse
|
||||
try: (resp := self.get(f'https://1mp3.top/geturl?id={encrypted_id}&quality=exhigh&type=json', **request_overrides)).raise_for_status()
|
||||
except Exception: return song_info
|
||||
download_result['geturl'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['geturl'], ['data', 'url'], None)
|
||||
if not download_url or not str(download_url).startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
|
||||
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id'), duration_s=None, duration='-:-:-', lyric=cleanlrc(download_result.get('lyrics')), cover_url=download_result.get("cover"),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
if song_info.ext in {'mgg'}: return SongInfo(source=self.source)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('id' not in search_result) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); download_result: dict = self._parsemusicpage(resp.text)
|
||||
except Exception: continue
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
@@ -0,0 +1,89 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of ZhuolinMusicClient: https://music.zhuolin.wang/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import copy
|
||||
from urllib.parse import urlsplit
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from ..utils import resp2json, seconds2hms, legalizestring, safeextractfromdict, usesearchheaderscookies, extractdurationsecondsfromlrc, cleanlrc, SongInfo, LanZouYParser, AudioLinkTester
|
||||
|
||||
|
||||
'''ZhuolinMusicClient'''
|
||||
class ZhuolinMusicClient(BaseMusicClient):
|
||||
source = 'ZhuolinMusicClient'
|
||||
MUSIC_QUALITIES = {'128', '320', '2000'}
|
||||
def __init__(self, **kwargs):
|
||||
super(ZhuolinMusicClient, self).__init__(**kwargs)
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# search rules
|
||||
default_rule = {"types": "search", 'count': "20", 'source': "freemp3", 'pages': "1", 'name': keyword}
|
||||
default_rule.update(rule)
|
||||
# construct search urls based on search rules
|
||||
base_url = 'https://music.zhuolin.wang/plugns/api.php'
|
||||
search_urls, page_size, count = [], self.search_size_per_page, 0
|
||||
while self.search_size_per_source > count:
|
||||
page_rule = copy.deepcopy(default_rule)
|
||||
page_rule['count'] = page_size
|
||||
page_rule['pages'] = int(count // page_size) + 1
|
||||
search_urls.append({'url': base_url, 'data': page_rule})
|
||||
count += page_size
|
||||
# return
|
||||
return search_urls
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: dict = None, request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}; search_meta = copy.deepcopy(search_url); search_url = search_meta.pop('url')
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.post(search_url, verify=False, **search_meta, **request_overrides)).raise_for_status()
|
||||
for search_result in resp2json(resp=resp):
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('id' not in search_result): continue
|
||||
download_url, download_result = safeextractfromdict(search_result, ['url'], ""), {}
|
||||
if 'lanzouy.com' in urlsplit(str(download_url)).hostname: download_result, download_url = LanZouYParser.parsefromurl(download_url)
|
||||
if (not download_url) or (not download_url.startswith('http')): continue
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('name')), singers=legalizestring(', '.join(safeextractfromdict(search_result, ['artist'], []) or [])),
|
||||
album=legalizestring(safeextractfromdict(search_result, ['album', 'name'], None)), ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result['id'], duration='-:-:-', lyric=None, cover_url=search_result.get('pic'),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --lyric results
|
||||
try:
|
||||
(resp := self.post('https://music.zhuolin.wang/plugns/api.php', verify=False, data={'types': 'lyric', 'id': search_result['id'], 'source': 'freemp3'})).raise_for_status()
|
||||
lyric_result = resp2json(resp=resp); lyric = safeextractfromdict(lyric_result, ['lyric'], '')
|
||||
if lyric.startswith('http'): lyric = cleanlrc(self.get(lyric, **request_overrides).text)
|
||||
lyric = lyric or 'NULL'; song_info.duration_s = extractdurationsecondsfromlrc(lyric); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
except:
|
||||
lyric_result, lyric = {}, 'NULL'
|
||||
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
|
||||
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
Reference in New Issue
Block a user