147 lines
12 KiB
Python
147 lines
12 KiB
Python
'''
|
|
Function:
|
|
Implementation of FiveSingMusicClient: https://5sing.kugou.com/index.html
|
|
Author:
|
|
Zhenchao Jin
|
|
WeChat Official Account (微信公众号):
|
|
Charles的皮卡丘
|
|
'''
|
|
import os
|
|
import re
|
|
import copy
|
|
from bs4 import BeautifulSoup
|
|
from .base import BaseMusicClient
|
|
from pathvalidate import sanitize_filepath
|
|
from ..utils.hosts import FIVESING_MUSIC_HOSTS
|
|
from urllib.parse import urlencode, urlparse, urljoin
|
|
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
|
|
from ..utils import touchdir, legalizestring, byte2mb, resp2json, usesearchheaderscookies, safeextractfromdict, extractdurationsecondsfromlrc, seconds2hms, useparseheaderscookies, obtainhostname, hostmatchessuffix, cleanlrc, SongInfo, AudioLinkTester
|
|
|
|
|
|
'''FiveSingMusicClient'''
|
|
class FiveSingMusicClient(BaseMusicClient):
|
|
source = 'FiveSingMusicClient'
|
|
MUSIC_QUALITIES = ['sq', 'hq', 'lq']
|
|
def __init__(self, **kwargs):
|
|
super(FiveSingMusicClient, self).__init__(**kwargs)
|
|
self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Referer": "https://5sing.kugou.com/"}
|
|
self.default_parse_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", "Referer": "https://5sing.kugou.com/"}
|
|
self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"}
|
|
self.default_headers = self.default_search_headers
|
|
self._initsession()
|
|
'''_constructsearchurls'''
|
|
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
|
# init
|
|
rule, request_overrides = rule or {}, request_overrides or {}
|
|
# search rules
|
|
default_rule = {'keyword': keyword, 'sort': 1, 'page': 1, 'filter': 0, 'type': 0}
|
|
default_rule.update(rule)
|
|
# construct search urls based on search rules
|
|
base_url = 'http://search.5sing.kugou.com/home/json?'
|
|
search_urls, page_size, count = [], self.search_size_per_page, 0
|
|
while self.search_size_per_source > count:
|
|
page_rule = copy.deepcopy(default_rule)
|
|
page_rule['page'] = int(count // page_size) + 1
|
|
search_urls.append(base_url + urlencode(page_rule))
|
|
count += page_size
|
|
# return
|
|
return search_urls
|
|
'''_parsewithofficialapiv1'''
|
|
def _parsewithofficialapiv1(self, search_result: dict, song_info_flac: SongInfo = None, lossless_quality_is_sufficient: bool = True, lossless_quality_definitions: set | list | tuple = {'flac'}, request_overrides: dict = None) -> "SongInfo":
|
|
# init
|
|
song_info, request_overrides, song_info_flac = SongInfo(source=self.source), request_overrides or {}, song_info_flac or SongInfo(source=self.source)
|
|
if (not isinstance(search_result, dict)) or (not (song_id := search_result.get('songId'))) or (not (song_type := search_result.get('typeEname'))): return song_info
|
|
# obtain basic song_info
|
|
if lossless_quality_is_sufficient and song_info_flac.with_valid_download_url and (song_info_flac.ext in lossless_quality_definitions): song_info = song_info_flac
|
|
else:
|
|
(resp := self.get('http://mobileapi.5sing.kugou.com/song/getSongUrl', params={'songid': str(song_id), 'songtype': song_type}, **request_overrides)).raise_for_status()
|
|
download_result: dict = resp2json(resp)
|
|
for quality in FiveSingMusicClient.MUSIC_QUALITIES:
|
|
download_url = safeextractfromdict(download_result, ['data', f'{quality}url'], '') or safeextractfromdict(download_result, ['data', f'{quality}url_backup'], '')
|
|
if not download_url or not (str(download_url).startswith('http')): continue
|
|
song_info = SongInfo(
|
|
raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('songName')), singers=legalizestring(search_result.get('singer')), album='NULL', ext=safeextractfromdict(download_result, ['data', f'{quality}ext'], 'mp3'),
|
|
file_size_bytes=safeextractfromdict(download_result, ['data', f'{quality}size'], 0), file_size=byte2mb(safeextractfromdict(download_result, ['data', f'{quality}size'], 0)), identifier=song_id, duration='-:-:-', lyric=None, cover_url=safeextractfromdict(download_result, ['data', 'user', 'I'], None),
|
|
download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
|
)
|
|
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
|
song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
|
|
if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
|
|
elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
|
|
if song_info.with_valid_download_url: break
|
|
if not song_info.with_valid_download_url: return song_info
|
|
# supplement lyric results
|
|
params = {'songid': str(song_id), 'songtype': song_type, 'songfields': '', 'userfields': ''}
|
|
try: (resp := self.get('http://mobileapi.5sing.kugou.com/song/newget', params=params, **request_overrides)).raise_for_status(); lyric = cleanlrc(safeextractfromdict((lyric_result := resp2json(resp)), ['data', 'dynamicWords'], '')) or 'NULL'
|
|
except Exception: lyric_result, lyric = dict(), 'NULL'
|
|
song_info.raw_data['lyric'] = lyric_result if lyric_result else song_info.raw_data['lyric']
|
|
song_info.lyric = lyric if (lyric and (lyric not in {'NULL'})) else song_info.lyric
|
|
song_info.album = legalizestring(safeextractfromdict(lyric_result, ['data', 'albumName'], None))
|
|
song_info.cover_url = safeextractfromdict(lyric_result, ['data', 'user', 'I'], None)
|
|
if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
|
|
# return
|
|
return song_info
|
|
'''_search'''
|
|
@usesearchheaderscookies
|
|
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
|
# init
|
|
request_overrides = request_overrides or {}
|
|
# successful
|
|
try:
|
|
# --search results
|
|
(resp := self.get(search_url, **request_overrides)).raise_for_status()
|
|
for search_result in resp2json(resp)['list']:
|
|
# --parse with official apis
|
|
try: song_info = self._parsewithofficialapiv1(search_result=search_result, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
|
|
except Exception: song_info = SongInfo(source=self.source)
|
|
# --append to song_infos
|
|
if not song_info.with_valid_download_url: continue
|
|
song_infos.append(song_info)
|
|
# --judgement for search_size
|
|
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
|
# --update progress
|
|
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
|
# failure
|
|
except Exception as err:
|
|
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
|
# return
|
|
return song_infos
|
|
'''parseplaylist'''
|
|
@useparseheaderscookies
|
|
def parseplaylist(self, playlist_url: str, request_overrides: dict = None):
|
|
# init
|
|
request_overrides = request_overrides or {}
|
|
playlist_url = self.session.head(playlist_url, allow_redirects=True, **request_overrides).url
|
|
playlist_id, song_infos = urlparse(playlist_url).path.strip('/').split('/')[-1].removesuffix('.html').removesuffix('.htm'), []
|
|
if (not (hostname := obtainhostname(url=playlist_url))) or (not hostmatchessuffix(hostname, FIVESING_MUSIC_HOSTS)): return song_infos
|
|
# get tracks in playlist
|
|
try: playlist_result = resp2json(self.get(f'http://mobileapi.5sing.kugou.com/song/getsonglist?id={playlist_id}&songfields=ID,user', **request_overrides))
|
|
except Exception: playlist_result = dict()
|
|
soup, playlist_result['song_list'] = BeautifulSoup(self.get(playlist_url, **request_overrides).text, "lxml"), []
|
|
for li in soup.select("ul.dj_songitems > li"):
|
|
title_a, singer_a = li.select_one("span.s_title a.songlist_hits"), li.select_one("span.s_soner a")
|
|
info_node = li.select_one("a.paly_btn[songinfo]") or li.select_one("a.add_btn[songinfo]")
|
|
kind, song_id = ((m.group(1), m.group(2)) if (m := re.match(r"([a-z]+)\$(\d+)", str(info_node["songinfo"]))) else (None, None)) if info_node and info_node.has_attr("songinfo") else (None, None)
|
|
coll_btn = li.select_one("a.coll_btn[songid]")
|
|
song_id, kind = (coll_btn.get("songid"), {"1": "yc", "2": "fc", "3": "bz"}.get(coll_btn.get("songkind"))) if (not song_id) and coll_btn else (song_id, kind)
|
|
playlist_result['song_list'].append({"songName": title_a.get_text(strip=True) if title_a else None, "songId": song_id, "typeEname": kind, "song_url": urljoin("http://5sing.kugou.com", title_a["href"]) if title_a and title_a.has_attr("href") else None, "singer": singer_a.get_text(strip=True) if singer_a else None, "singer_url": urljoin("http://5sing.kugou.com", singer_a["href"]) if singer_a and singer_a.has_attr("href") else None})
|
|
tracks_in_playlist = playlist_result['song_list']
|
|
# parse track by track in playlist
|
|
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn(), refresh_per_second=10) as main_process_context:
|
|
main_progress_id = main_process_context.add_task(f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed (0/{len(tracks_in_playlist)})", total=len(tracks_in_playlist))
|
|
for idx, track_info in enumerate(tracks_in_playlist):
|
|
if idx > 0: main_process_context.advance(main_progress_id, 1)
|
|
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx}/{len(tracks_in_playlist)})")
|
|
try: song_info = self._parsewithofficialapiv1(search_result=track_info, song_info_flac=None, lossless_quality_is_sufficient=False, request_overrides=request_overrides)
|
|
except Exception: song_info = SongInfo(source=self.source)
|
|
if song_info.with_valid_download_url: song_infos.append(song_info)
|
|
main_process_context.advance(main_progress_id, 1)
|
|
main_process_context.update(main_progress_id, description=f"{len(tracks_in_playlist)} songs found in playlist {playlist_id} >>> completed ({idx+1}/{len(tracks_in_playlist)})")
|
|
# post processing
|
|
playlist_name = safeextractfromdict(playlist_result, ['data', 'T'], None)
|
|
song_infos = self._removeduplicates(song_infos=song_infos); work_dir = self._constructuniqueworkdir(keyword=legalizestring(playlist_name or f"playlist-{playlist_id}"))
|
|
for song_info in song_infos:
|
|
song_info.work_dir = work_dir; episodes = song_info.episodes if isinstance(song_info.episodes, list) else []
|
|
for eps_info in episodes: eps_info.work_dir = sanitize_filepath(os.path.join(work_dir, song_info.song_name)); touchdir(work_dir)
|
|
# return results
|
|
return song_infos |