Initial import: Music_Server, MusicFree, catalog-sync

2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
@@ -0,0 +1,113 @@
+'''
+Function:
+    Implementation of FiveSongMusicClient: https://www.5song.xyz/index.html
+Author:
+    Zhenchao Jin
+WeChat Official Account (微信公众号):
+    Charles的皮卡丘
+'''
+import re
+from bs4 import BeautifulSoup
+from rich.progress import Progress
+from ..sources import BaseMusicClient
+from urllib.parse import urljoin, urlparse
+from ..utils import legalizestring, usesearchheaderscookies, searchdictbykey, seconds2hms, extractdurationsecondsfromlrc, cleanlrc, SongInfo, QuarkParser, AudioLinkTester
+
+
+'''FiveSongMusicClient'''
+class FiveSongMusicClient(BaseMusicClient):
+    source = 'FiveSongMusicClient'
+    MUSIC_QUALITY_RANK = {"DSD": 0, "WAV": 1, "FLAC": 2, "APE": 3, "ALAC": 4, "AAC": 5, "MP3": 6, "OGG": 7, "M4A": 8}
+    def __init__(self, **kwargs):
+        super(FiveSongMusicClient, self).__init__(**kwargs)
+        assert self.quark_parser_config.get('cookies'), f'{self.source}.__init__ >>> "quark_parser_config" is not configured, so the songs cannot be downloaded.'
+        self.default_search_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
+        self.default_download_headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"}
+        self.default_headers = self.default_search_headers
+        self._initsession()
+    '''_constructsearchurls'''
+    def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
+        # init
+        rule, request_overrides = rule or {}, request_overrides or {}
+        # construct search urls
+        self.search_size_per_page = min(self.search_size_per_source, 10)
+        search_urls, page_size, count = [], self.search_size_per_page, 0
+        while self.search_size_per_source > count:
+            if int(count // page_size) + 1 == 1: search_urls.append(f'https://www.5song.xyz/search.html?keyword={keyword}')
+            else: search_urls.append(f'https://www.5song.xyz/search.html?page={int(count // page_size) + 1}&keyword={keyword}')
+            count += page_size
+        # return
+        return search_urls
+    '''_parsesearchresultsfromhtml'''
+    def _parsesearchresultsfromhtml(self, html_text: str):
+        soup, base_url, search_results = BeautifulSoup(html_text, "lxml"), "https://www.5song.xyz", []
+        for li in soup.select("div.list ul > li"):
+            if not (a := li.select_one("a[href]")): continue
+            href = a.get("href", "").strip(); detail_url = urljoin(base_url, href)
+            title_el = a.select_one("div.con div.t h3"); title = title_el.get_text(strip=True) if title_el else None
+            formats = [s.get_text(strip=True) for s in a.select("div.con div.t span") if s.get_text(strip=True)]
+            singer_el = a.select_one("div.singerNum div.singer"); date_el = a.select_one("div.singerNum div.date"); num_el = a.select_one("div.singerNum div.num")
+            singer = singer_el.get_text(strip=True) if singer_el else None; date = date_el.get_text(strip=True) if date_el else None
+            num = num_el.get_text(strip=True) if num_el else None; img = a.select_one("div.pic img")
+            cover_url = urljoin(base_url, img.get("src")) if img and img.get("src") else None
+            search_results.append({"title": title, "formats": formats, "singer": singer, "date": date, "num": num, "detail_url": detail_url, "cover_url": cover_url})
+        return search_results
+    '''_search'''
+    @usesearchheaderscookies
+    def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
+        # init
+        request_overrides, base_url = request_overrides or {}, "https://www.5song.xyz"
+        guess_format_func = lambda label: (m.group(1) if (m := re.search(r"(DSD|WAV|FLAC|APE|ALAC|AAC|MP3|OGG|M4A)", str(label).upper())) else None)
+        sort_by_audio_quality_func = lambda link_list: sorted(link_list, key=lambda x: (FiveSongMusicClient.MUSIC_QUALITY_RANK.get((fmt := guess_format_func(x.get("label", ""))), 999), fmt or ""))
+        # successful
+        try:
+            # --search results
+            (resp := self.get(search_url, **request_overrides)).raise_for_status()
+            search_results = self._parsesearchresultsfromhtml(resp.text)
+            for search_result in search_results:
+                # --download results
+                if not isinstance(search_result, dict) or ('detail_url' not in search_result): continue
+                song_info, song_id = SongInfo(source=self.source), urlparse(str(search_result['detail_url'])).path.strip('/').split('/')[-1].split('.')[0]
+                # ----fetch basic information
+                try: (resp := self.get(search_result['detail_url'], **request_overrides)).raise_for_status()
+                except Exception: continue
+                soup, quark_links = BeautifulSoup(resp.text, "lxml"), []
+                for li in soup.select("div.download ul li[data-url]"):
+                    if not (quark_url := (li.get("data-url") or "").strip()): continue
+                    a = li.select_one("a[href]"); label = a.get_text(" ", strip=True) if a else None
+                    pc_download_href = a.get("href", "").strip() if a else None
+                    pc_download_url = urljoin(base_url, pc_download_href) if pc_download_href else None
+                    if "quark" in quark_url: quark_links.append({"label": label, "quark_url": quark_url, "pc_download_url": pc_download_url})
+                if not quark_links: continue
+                download_result = dict(quark_links=quark_links)
+                # ----parse from quark links
+                for quark_link in sort_by_audio_quality_func(download_result['quark_links']):
+                    download_result['quark_parse_result'], download_url = QuarkParser.parsefromurl(quark_link['quark_url'], **self.quark_parser_config)
+                    if not download_url or not str(download_url).startswith('http'): continue
+                    duration = [int(float(d)) for d in searchdictbykey(download_result, 'duration') if int(float(d)) > 0]
+                    duration_in_secs = duration[0] if duration else 0
+                    song_info = SongInfo(
+                        raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, source=self.source, song_name=legalizestring(search_result.get('title')), singers=legalizestring(search_result.get('singer')), album='NULL', ext='mp3', file_size_bytes=None, file_size=None, 
+                        identifier=song_id, duration_s=duration_in_secs, duration=seconds2hms(duration_in_secs), lyric=cleanlrc("\n".join([p.get_text(strip=True) for p in soup.select_one("div.viewCon div.text").select("p") if p.get_text(strip=True)])), cover_url=search_result.get('cover_url'),
+                        download_url=download_url, download_url_status=self.quark_audio_link_tester.test(download_url, request_overrides), default_download_headers=self.quark_default_download_headers,
+                    )
+                    song_info.download_url_status['probe_status'] = self.quark_audio_link_tester.probe(song_info.download_url, request_overrides)
+                    song_info.file_size = song_info.download_url_status['probe_status']['file_size']; song_info.ext = song_info.download_url_status['probe_status']['ext']
+                    if (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS) and (song_info.download_url_status['probe_status']['ext'] in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = song_info.download_url_status['probe_status']['ext']
+                    elif (song_info.ext not in AudioLinkTester.VALID_AUDIO_EXTS): song_info.ext = 'mp3'
+                    if song_info.with_valid_download_url: break
+                if not song_info.lyric or '歌词获取失败' in song_info.lyric: song_info.lyric = 'NULL'
+                if not song_info.duration or song_info.duration == '-:-:-': song_info.duration = seconds2hms(extractdurationsecondsfromlrc(song_info.lyric))
+                # ----filter if invalid
+                if not song_info.with_valid_download_url: continue
+                # --append to song_infos
+                song_infos.append(song_info)
+                # --judgement for search_size
+                if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
+            # --update progress
+            progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
+        # failure
+        except Exception as err:
+            progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
+        # return
+        return song_infos