Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,150 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of YinyuedaoMusicClient: https://1mp3.top/
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import re
|
||||
import base64
|
||||
from html import unescape
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.progress import Progress
|
||||
from ..sources import BaseMusicClient
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from ..utils import legalizestring, usesearchheaderscookies, safeextractfromdict, seconds2hms, searchdictbykey, resp2json, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
|
||||
|
||||
|
||||
'''YinyuedaoMusicClient'''
|
||||
class YinyuedaoMusicClient(BaseMusicClient):
|
||||
source = 'YinyuedaoMusicClient'
|
||||
MUSIC_QUALITY_RANK = {"DSD": 100, "DSF": 100, "DFF": 100, "WAV": 95, "AIFF": 95, "FLAC": 90, "ALAC": 90, "APE": 88, "WV": 88, "OPUS": 70, "AAC": 65, "M4A": 65, "OGG": 60, "VORBIS": 60, "MP3": 50, "WMA": 45}
|
||||
def __init__(self, **kwargs):
|
||||
super(YinyuedaoMusicClient, self).__init__(**kwargs)
|
||||
if not self.quark_parser_config.get('cookies'): self.logger_handle.warning(f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so song downloads are restricted and only mp3 files can be downloaded.')
|
||||
self.default_search_headers = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7", "priority": "u=0, i", "referer": "https://1mp3.top/",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"142\", \"Google Chrome\";v=\"142\", \"Not_A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
|
||||
}
|
||||
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
|
||||
self.default_headers = self.default_search_headers
|
||||
self._initsession()
|
||||
'''_constructsearchurls'''
|
||||
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
||||
# init
|
||||
rule, request_overrides = rule or {}, request_overrides or {}
|
||||
# construct search urls
|
||||
search_urls = [f'https://1mp3.top/search.html?keyword={keyword}']
|
||||
self.search_size_per_page = self.search_size_per_source
|
||||
# return
|
||||
return search_urls
|
||||
'''_parsemusicpage'''
|
||||
def _parsemusicpage(self, html_text: str, base_url: str = ""):
|
||||
soup, lyrics = BeautifulSoup(html_text, "html.parser"), "NULL"
|
||||
if (article := soup.select_one("section#demo article")): lyrics = re.sub(r"\n+", "\n", unescape(article.get_text("\n", strip=True))).strip()
|
||||
cover = ""; img = soup.select_one("#album-cover") or soup.select_one(".cover-art img")
|
||||
if img and img.get("src"): cover = urljoin(base_url, img["src"].strip())
|
||||
links, seen = [], set()
|
||||
for a in soup.select("a.download-link[data-url]"):
|
||||
fmt = (a.get("data-format") or "").strip().upper(); text = a.get_text(" ", strip=True)
|
||||
if not (url := (a.get("data-url") or "").strip()): continue
|
||||
fmt = fmt or ((m.group(1).upper()) if (m := re.search(r"\b(DSD|DSF|DFF|WAV|AIFF|FLAC|ALAC|APE|WV|OPUS|AAC|M4A|OGG|VORBIS|MP3|WMA)\b", text, re.I)) else None)
|
||||
item = {"format": fmt, "score": YinyuedaoMusicClient.MUSIC_QUALITY_RANK.get(fmt, -1), "url": urljoin(base_url, url), "text": text}
|
||||
if (key := (item["format"], item["url"])) not in seen: seen.add(key); links.append(item)
|
||||
links.sort(key=lambda x: (-x["score"], x["format"], x["url"]))
|
||||
return {"lyrics": lyrics, "cover": cover, "quark_links": links}
|
||||
'''_parsesearchresultsfromhtml'''
|
||||
def _parsesearchresultsfromhtml(self, html_text, base_url="https://www.1mp3.top"):
|
||||
soup, search_results = BeautifulSoup(html_text, "html.parser"), []
|
||||
for a in soup.select('a[href^="/mdetail/"]'):
|
||||
if len((cols := a.select("div.row > div"))) < 2: continue
|
||||
token = (href := a.get("href", "")).rsplit("/", 1)[-1]
|
||||
try: music_id = base64.b64decode(token).decode(errors="ignore").split("|", 1)[0]
|
||||
except Exception: music_id = token
|
||||
search_results.append({"id": music_id, "title": cols[0].get_text(" ", strip=True), "singer": cols[1].get_text(" ", strip=True), "url": urljoin(base_url, href)})
|
||||
return search_results
|
||||
'''_parsesearchresultfromquark'''
|
||||
def _parsesearchresultfromquark(self, search_result: dict, download_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
|
||||
# parse
|
||||
for quark_download_url in download_result['quark_links']:
|
||||
if not isinstance(quark_download_url, dict) or not safeextractfromdict(quark_download_url, ['format'], ''): continue
|
||||
download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_download_url['url'], **self.quark_parser_config)
|
||||
if not download_url or not str(download_url).startswith('http'): continue
|
||||
duration = [int(float(d)) for d in searchdictbykey(download_result['quark_parse_result'], 'duration') if int(float(d)) > 0]
|
||||
duration_in_secs = duration[0] if duration else 0
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
|
||||
ext=str(quark_download_url.get('format', 'mp3')).lower(), file_size=None, identifier=search_result['id'], duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc(download_result.get('lyrics')),
|
||||
cover_url=download_result.get("cover"), download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
|
||||
)
|
||||
if song_info.ext in {'mgg'}: continue
|
||||
song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if song_info.with_valid_download_url: break
|
||||
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
# return
|
||||
return song_info
|
||||
'''_parsesearchresultfromweb'''
|
||||
def _parsesearchresultfromweb(self, search_result: dict, download_result: dict, request_overrides: dict = None):
|
||||
# init
|
||||
request_overrides, song_info = request_overrides or {}, SongInfo(source=self.source)
|
||||
encrypted_id = urlparse(str(search_result["url"])).path.strip('/').split('/')[-1]
|
||||
extract_duration_func = lambda s: float(re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", s).group(1))
|
||||
# parse
|
||||
try: (resp := self.get(f'https://1mp3.top/geturl?id={encrypted_id}&quality=exhigh&type=json', **request_overrides)).raise_for_status()
|
||||
except Exception: return song_info
|
||||
download_result['geturl'] = resp2json(resp=resp); download_url = safeextractfromdict(download_result['geturl'], ['data', 'url'], None)
|
||||
if not download_url or not str(download_url).startswith('http'): return song_info
|
||||
song_info = SongInfo(
|
||||
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL',
|
||||
ext=download_url.split('?')[0].split('.')[-1], file_size=None, identifier=search_result.get('id'), duration_s=None, duration='-:-:-', lyric=cleanlrc(download_result.get('lyrics')), cover_url=download_result.get("cover"),
|
||||
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
||||
)
|
||||
if song_info.ext in {'mgg'}: return SongInfo(source=self.source)
|
||||
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
||||
song_info.file_size = song_info.download_url_status['probe_status']['file_size']
|
||||
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
||||
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
||||
if (not song_info.duration or song_info.duration == '-:-:-') and (re.search(r"\[\s*([+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)\s*\]", str(song_info.lyric))): song_info.duration_s = extract_duration_func(song_info.lyric.split('\n')[-1]); song_info.duration = seconds2hms(song_info.duration_s)
|
||||
# return
|
||||
return song_info
|
||||
'''_search'''
|
||||
@usesearchheaderscookies
|
||||
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
||||
# init
|
||||
request_overrides = request_overrides or {}
|
||||
# successful
|
||||
try:
|
||||
# --search results
|
||||
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
||||
search_results = self._parsesearchresultsfromhtml(resp.text)
|
||||
for search_result in search_results:
|
||||
# --download results
|
||||
if not isinstance(search_result, dict) or ('id' not in search_result) or ('url' not in search_result): continue
|
||||
song_info = SongInfo(source=self.source)
|
||||
try: (resp := self.get(search_result['url'], **request_overrides)).raise_for_status(); download_result: dict = self._parsemusicpage(resp.text)
|
||||
except Exception: continue
|
||||
# ----parse from quark links
|
||||
if self.quark_parser_config.get('cookies'): song_info = self._parsesearchresultfromquark(search_result, download_result, request_overrides)
|
||||
# ----parse from play url
|
||||
if not song_info.with_valid_download_url: song_info = self._parsesearchresultfromweb(search_result, download_result, request_overrides)
|
||||
# ----filter if invalid
|
||||
if not song_info.with_valid_download_url: continue
|
||||
# --append to song_infos
|
||||
song_infos.append(song_info)
|
||||
# --judgement for search_size
|
||||
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
||||
# --update progress
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
||||
# failure
|
||||
except Exception as err:
|
||||
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
||||
# return
|
||||
return song_infos
|
||||
Reference in New Issue
Block a user