Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,145 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of FangpiMusicClient: https://www.fangpi.net/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import ast
|
||||
import json_repair
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, resp2json, safeextractfromdict, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''FangpiMusicClient'''
|
||||
class FangpiMusicClient(BaseMusicClient):
|
||||
source = 'FangpiMusicClient'
|
||||
def __init__(self, **kwargs):
|
||||
super(FangpiMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36", "referer": "https://www.fangpi.net/"}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'https://www.fangpi.net/s/{keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html, base_url="https://www.fangpi.net"):
|
||||
soup, search_results, seen = BeautifulSoup(html, "lxml"), [], set()
|
||||
result_card = next((card for card in soup.select("div.card") if "搜索结果" in card.get_text(" ", strip=True) and card.select_one("h1.mark")), None)
|
||||
if result_card is None: return []
|
||||
for row in result_card.select("div.row"):
|
||||
detail, action = row.select_one('a[href^="/music/"][title]'), row.select_one('a.btn[href^="/music/"]')
|
||||
if not detail or not action: continue
|
||||
if (url := urljoin(base_url, detail["href"])) in seen: continue
|
||||
seen.add(url); search_results.append({"id": detail["href"].rsplit("/", 1)[-1], "name": (row.select_one("span.text-primary") or detail).get_text(strip=True), "artist": row.select_one("small.text-jade").get_text(strip=True), "title": detail.get("title", "").strip(), "url": url})
|
||||
return search_results
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
for quark_download_url in (download_result.get('mp3_extra_urls', []) or []):
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['share_link'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext='mp3', file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)),
|
||||
cover_url=safeextractfromdict(download_result, ['mp3_cover'], None), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
format_duration_func = lambda d: "{:02}:{:02}:{:02}".format(*([0] * (3 - len(d.split(":"))) + list(map(int, d.split(":")))))
|
||||
song_info.duration = format_duration_func(download_result.get('mp3_duration', '00:00:00') or '00:00:00')
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, soup: BeautifulSoup, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
# parse
|
||||
if 'play_id' not in download_result or not download_result['play_id']: return song_info
|
||||
try: (resp := self.post('https://www.fangpi.net/api/play-url', json={'id': download_result['play_id']}, **request_overrides)).raise_for_status(); download_result['api/play-url'] = resp2json(resp=resp)
|
||||
except Exception: download_result['api/play-url'] = {}
|
||||
download_url = safeextractfromdict(download_result['api/play-url'], ['data', 'url'], '')
|
||||
if not download_url or not download_url.startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(download_result.get('mp3_title')), singers=legalizestring(download_result.get('mp3_author')), album='NULL', ext=download_url.split('?')[0].split('.')[-1], file_size='NULL',
|
||||
identifier=download_result.get('mp3_id') or urlparse(str(search_result['url'])).path.strip('/').split('/')[-1], duration_s=None, duration='-:-:-', lyric=cleanlrc(soup.find("div", id="content-lrc").get_text("\n", strip=True)), cover_url=safeextractfromdict(download_result, ['mp3_cover'], None),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00':
|
||||
try: song_info.duration = '{:02d}:{:02d}:{:02d}'.format(*([0,0,0] + list(map(int, re.findall(r'\d+', safeextractfromdict(download_result, ['data', 'duration'], '')))))[-3:])
|
||||
except Exception: song_info.duration = '-:-:-'
|
||||
if song_info.duration == '00:00:00': song_info.duration = '-:-:-'
|
||||
if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
|
||||
if not song_info.duration or song_info.duration == '-:-:-' or song_info.duration == '00:00:00': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
# ----fetch basic information
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status()
|
||||
except Exception: continue
|
||||
script_tag = (soup := BeautifulSoup(resp.text, "lxml")).find("script", string=re.compile(r"window\.appData"))
|
||||
if script_tag is None: continue
|
||||
js_text: str = script_tag.string
|
||||
if not (m := re.search(r'JSON\.parse\(\s*(?P<lit>(["\'])(?:\\.|(?!\2).)*?\2)\s*\)', js_text, re.S)): continue
|
||||
download_result = json_repair.loads(ast.literal_eval(m.group('lit')))
|
||||
if download_result.get("mp3_cover"): download_result["mp3_cover"] = str(download_result["mp3_cover"]).replace("\\/", "/")
|
||||
if download_result.get("extra_recommend_wap_url"): download_result["extra_recommend_wap_url"] = str(download_result["extra_recommend_wap_url"]).replace("\\/", "/")
|
||||
for share_link in (download_result.get("mp3_extra_urls", []) or []): isinstance(share_link, dict) and share_link.__setitem__('share_link', str(share_link.get('share_link', '')).replace("\\/", "/"))
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, soup, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, soup, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
Reference in New Issue
Block a user