Initial import: Music_Server, MusicFree, catalog-sync

This commit is contained in:
2026-05-23 16:51:14 +08:00
commit 069af30dba
847 changed files with 179878 additions and 0 deletions
+394
View File
@@ -0,0 +1,394 @@
'''
Function:
Implementation of Common Utils
Author:
Zhenchao Jin
WeChat Official Account (微信公众号):
Charles的皮卡丘
'''
from __future__ import annotations
import re
import os
import html
import copy
import emoji
import errno
import pickle
import shutil
import bleach
import hashlib
import requests
import functools
import json_repair
import unicodedata
from io import BytesIO
from pathlib import Path
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
from mutagen.asf import ASF
from mutagen.flac import FLAC
from mutagen.aiff import AIFF
from mutagen.wave import WAVE
from bs4 import BeautifulSoup
from http.cookies import SimpleCookie
from .importutils import optionalimport
from mutagen import File as MutagenFile
from mutagen.oggvorbis import OggVorbis
from pathvalidate import sanitize_filepath, sanitize_filename
def remove_suffix(value: str, suffix: str) -> str:
if suffix and value.endswith(suffix):
return value[: -len(suffix)]
return value
'''estimatedurationwithfilesizebr'''
def estimatedurationwithfilesizebr(file_size_bytes: int, br_kbps: float, return_seconds: bool = False) -> str:
if not file_size_bytes or not br_kbps or br_kbps <= 0: return "-:-:-"
total_bits = file_size_bytes * 8
duration_seconds = int(total_bits / (br_kbps * 1000))
if return_seconds: return duration_seconds
hours = duration_seconds // 3600
minutes = (duration_seconds % 3600) // 60
seconds = duration_seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
'''estimatedurationwithfilelink'''
def estimatedurationwithfilelink(filelink: str = '', headers: dict = None, request_overrides: dict = None):
headers, request_overrides = headers or {}, request_overrides or {}
try:
(resp := requests.get(filelink, headers=headers, timeout=10, **request_overrides)).raise_for_status()
audio = MutagenFile(BytesIO(resp.content))
length = getattr(audio.info, "length", 0)
return int(length)
except:
return 0
'''cookies2dict'''
def cookies2dict(cookies: str | dict = None):
if not cookies: cookies = {}
if isinstance(cookies, dict): return cookies
if isinstance(cookies, str): (c := SimpleCookie()).load(cookies); return {k: morsel.value for k, morsel in c.items()}
raise TypeError(f'cookies type is "{type(cookies)}", expect cookies to "str" or "dict" or "None".')
'''cookies2string'''
def cookies2string(cookies: str | dict = None):
if not cookies: cookies = ""
if isinstance(cookies, str): return cookies
if isinstance(cookies, dict): return (lambda c: ([c.__setitem__(k, "" if v is None else str(v)) for k, v in cookies.items()], "; ".join(m.OutputString() for m in c.values()))[1])(SimpleCookie())
raise TypeError(f'cookies type is "{type(cookies)}", expect cookies to "str" or "dict" or "None".')
'''touchdir'''
def touchdir(directory, exist_ok=True, mode=511, auto_sanitize=True):
if auto_sanitize: directory = sanitize_filepath(directory)
return os.makedirs(directory, exist_ok=exist_ok, mode=mode)
'''replacefile'''
def replacefile(src: str, dest: str):
try:
os.replace(src, dest)
except OSError as exc:
if exc.errno != errno.EXDEV: raise Exception
if os.path.exists(dest):
if os.path.isdir(dest): raise Exception
os.remove(dest)
shutil.move(src, dest)
'''legalizestring'''
def legalizestring(string: str, fit_gbk: bool = True, max_len: int = 255, fit_utf8: bool = True, replace_null_string: str = 'NULL'):
if not string: return replace_null_string
string = str(string)
string = string.replace(r'\"', '"')
string = re.sub(r"<\\/", "</", string)
string = re.sub(r"\\/>", "/>", string)
string = re.sub(r"\\u([0-9a-fA-F]{4})", lambda m: chr(int(m.group(1), 16)), string)
# html.unescape
for _ in range(2):
new_string = html.unescape(string)
if new_string == string: break
string = new_string
# bleach.clean
try: string = BeautifulSoup(string, "lxml").get_text(separator="")
except: string = bleach.clean(string, tags=[], attributes={}, strip=True)
# unicodedata.normalize
string = unicodedata.normalize("NFC", string)
# emoji.replace_emoji
string = emoji.replace_emoji(string, replace="")
# isprintable
string = "".join([ch for ch in string if ch.isprintable() and not unicodedata.category(ch).startswith("C")])
# sanitize_filename
string = sanitize_filename(string, max_len=max_len)
# fix encoding
if fit_gbk: string = string.encode("gbk", errors="ignore").decode("gbk", errors="ignore")
if fit_utf8: string = string.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
# return
string = re.sub(r"\s+", " ", string).strip()
if not string: string = replace_null_string
return string
'''shortenpathsinsonginfos'''
def shortenpathsinsonginfos(song_infos: list, max_path: int = 240, keep_ext: bool = True, with_hash_suffix: bool = False):
used_paths = set()
for info in song_infos:
raw_path = (info.save_path or "").strip()
if not raw_path or raw_path.upper() == "NULL": continue
src_path = Path(raw_path); output_dir = src_path.parent.resolve(); output_dir.mkdir(parents=True, exist_ok=True)
ext = src_path.suffix if keep_ext else ""; stem = src_path.stem
digest = hashlib.md5(str(src_path).encode("utf-8")).hexdigest()
for hash_len in (8, 10):
hash_suffix = f"-{digest[:hash_len]}" if with_hash_suffix else ""
max_stem_len = max(1, max_path - (len(str(output_dir)) + 1 + len(hash_suffix) + len(ext)))
safe_stem = (stem[:max_stem_len].rstrip(" .") or "NULL")
out_path = str(output_dir / f"{safe_stem}{hash_suffix}{ext}")
if out_path.lower() not in used_paths: break
used_paths.add(out_path.lower()); info._save_path = out_path
return song_infos
'''seconds2hms'''
def seconds2hms(seconds: int):
try:
seconds = int(float(seconds))
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
hms = '%02d:%02d:%02d' % (h, m, s)
if hms == '00:00:00': hms = '-:-:-'
except:
hms = '-:-:-'
return hms
'''byte2mb'''
def byte2mb(size: int):
try:
size = int(float(size))
if size == 0: return 'NULL'
size = round(size / 1024 / 1024, 2)
if size == 0.0: return 'NULL'
size = f'{size} MB'
except:
size = 'NULL'
return size
'''resp2json'''
def _valid_response_types():
response_types = [requests.Response]
curl_cffi = optionalimport('curl_cffi')
curl_requests = getattr(curl_cffi, 'requests', None) if curl_cffi else None
curl_response = getattr(curl_requests, 'Response', None) if curl_requests else None
if curl_response is not None:
response_types.append(curl_response)
return tuple(response_types)
'''resp2json'''
def resp2json(resp: requests.Response):
valid_resp_object = _valid_response_types()
if not isinstance(resp, valid_resp_object): return {}
try: result = resp.json()
except: result = json_repair.loads(resp.text)
if not result: result = dict()
return result
'''isvalidresp'''
def isvalidresp(resp: requests.Response, valid_status_codes: list | tuple | set = {200, 206}):
valid_resp_object = _valid_response_types()
if not isinstance(resp, valid_resp_object): return False
if resp is None or resp.status_code not in valid_status_codes: return False
return True
'''safeextractfromdict'''
def safeextractfromdict(data, progressive_keys, default_value = None):
try:
result = data
for key in progressive_keys: result = result[key]
except:
result = default_value
return result
'''cachecookies'''
def cachecookies(client_name: str = '', cache_cookie_path: str = '', client_cookies: dict = None):
if os.path.exists(cache_cookie_path):
with open(cache_cookie_path, 'rb') as fp: cookies = pickle.load(fp)
else:
cookies = dict()
with open(cache_cookie_path, 'wb') as fp:
cookies[client_name] = client_cookies
pickle.dump(cookies, fp)
'''usedownloadheaderscookies'''
def usedownloadheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_download_headers
if hasattr(self, 'default_download_cookies'): self.default_cookies = self.default_download_cookies
if hasattr(self, 'enable_download_curl_cffi'): self.enable_curl_cffi = self.enable_download_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''useparseheaderscookies'''
def useparseheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_parse_headers
if hasattr(self, 'default_parse_cookies'): self.default_cookies = self.default_parse_cookies
if hasattr(self, 'enable_parse_curl_cffi'): self.enable_curl_cffi = self.enable_parse_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''usesearchheaderscookies'''
def usesearchheaderscookies(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.default_headers = self.default_search_headers
if hasattr(self, 'default_search_cookies'): self.default_cookies = self.default_search_cookies
if hasattr(self, 'enable_search_curl_cffi'): self.enable_curl_cffi = self.enable_search_curl_cffi
if hasattr(self, '_initsession'): self._initsession()
return func(self, *args, **kwargs)
return wrapper
'''searchdictbykey'''
def searchdictbykey(obj, target_key: str):
results = []
if isinstance(obj, dict):
for k, v in obj.items():
if k == target_key: results.append(v)
results.extend(searchdictbykey(v, target_key))
elif isinstance(obj, list):
for item in obj: results.extend(searchdictbykey(item, target_key))
return results
'''naiveguessextfromaudiobytes'''
def naiveguessextfromaudiobytes(content: bytes):
if (audio := MutagenFile(BytesIO(content))) is None: return None
if isinstance(audio, MP3): return "mp3"
if isinstance(audio, FLAC): return "flac"
if isinstance(audio, MP4): return "m4a"
if isinstance(audio, OggVorbis): return "ogg"
if isinstance(audio, WAVE): return "wav"
if isinstance(audio, AIFF): return "aiff"
if isinstance(audio, ASF): return "wma"
return None
'''AudioLinkTester'''
class AudioLinkTester(object):
VALID_AUDIO_EXTS = {
"aac", "aax", "aaxc", "ac3", "adts", "aif", "aifc", "aiff", "alac", "amr", "ape", "au", "avr", "awb", "caf", "cda", "dff", "dfsf", "dsf", "dss", "dts", "dtshd", "ec3", "f32",
"f64", "flac", "gsm", "hca", "htk", "iff", "ima", "ircam", "kar", "kss", "la", "l16", "m15", "m3u8", "m4a", "m4b", "m4p", "m4r", "mat4", "mat5", "med", "midi", "mid", "mlp",
"mod", "mo3", "mp1", "mp2", "mp3", "mpa", "mpc", "mp+", "mpp", "mptm", "msv", "mt2", "mtm", "mxmf", "nist", "nsf", "oga", "ogg", "okt", "oma", "ofr", "ofs", "opus", "paf",
"pcm", "ptm", "pvf", "ra", "ram", "rf64", "rmi", "rmj", "rmm", "rmx", "roq", "raw", "s3m", "sap", "sds", "sd2", "sd2f", "sf", "shn", "sid", "snd", "spc", "spx", "stm", "tak",
"tta", "thd", "ul", "ult", "umx", "voc", "vgm", "vgz", "wav", "wave", "wax", "w64", "wma", "wve", "wv", "wvx", "xi", "xm", "8svx", "16svx", "669", "amf", "dmf", "far", "gbs",
"gym", "hes", "it", "mdl", "mpc2k", "nsa", "psf", "psf1", "psf2", "ssf", "miniusf", "usf", "2sf", "gsf", "qsf", "spu", "at3", "aa3", "at9", "3ga", "m4s"
}
AUDIO_CT_PREFIX = "audio/"
AUDIO_CT_EXTRA = {"application/octet-stream", "application/x-flac", "application/flac", "application/x-mpegurl", "video/mp4"}
MAGIC = [(b"ID3", "mp3"), (b"\xFF\xFB", "mp3"), (b"fLaC", "flac"), (b"RIFF", "wav"), (b"OggS", "ogg"), (b"MThd", "midi"), (b"\x00\x00\x00\x18ftyp", "mp4/m4a")]
CTYPE_TO_EXT = {"audio/mpeg": "mp3", "audio/mp3": "mp3", "audio/mp4": "m4a", "audio/x-m4a": "m4a", "audio/aac": "aac", "audio/wav": "wav", "video/mp4": "mp4", "audio/x-wav": "wav", "audio/flac": "flac", "audio/x-flac": "flac", "audio/ogg": "ogg", "audio/opus": "opus", "audio/x-aac": "ogg", "audio/x-ogg": "ogg", "audio/x-m4p": "m4a"}
def __init__(self, timeout=(5, 15), headers: dict = None, cookies: dict = None):
self.session = requests.Session()
self.timeout = timeout
self.headers = {'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'}
self.headers.update(headers or {})
self.cookies = cookies or {}
'''isaudioct'''
@staticmethod
def isaudioct(ct: str):
if not ct: return False
ct = ct.lower().split(";", 1)[0].strip()
return ct.startswith(AudioLinkTester.AUDIO_CT_PREFIX) or ct in AudioLinkTester.AUDIO_CT_EXTRA
'''sniffmagic'''
@staticmethod
def sniffmagic(b: str):
for sig, fmt in AudioLinkTester.MAGIC:
if b.startswith(sig): return fmt
if len(b) >= 2 and b[0] == 0xFF and (b[1] & 0xF0) == 0xF0: return "aac/adts"
return None
'''probe'''
def probe(self, url: str, request_overrides: dict = None):
request_overrides, naive_guess_ext = copy.deepcopy(request_overrides or {}), url.split('?')[0].split('.')[-1]
if 'headers' not in request_overrides: request_overrides['headers'] = self.headers
if 'timeout' not in request_overrides: request_overrides['timeout'] = self.timeout
if 'cookies' not in request_overrides: request_overrides['cookies'] = self.cookies
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
# HEAD probe
try:
(resp := self.session.head(url, allow_redirects=True, **request_overrides)).raise_for_status()
resp_headers, final_url = resp.headers, resp.url; resp.close()
file_size, ctype = byte2mb(resp_headers.get('content-length')), remove_suffix(str(resp_headers.get('content-type')), '; charset=UTF-8')
if ctype == 'image/jpg; charset=UTF-8' or ctype == 'image/jpg': ctype = 'audio/mpeg'
if ctype == 'text/plain' and naive_guess_ext == 'm4s': ctype = 'audio/mp4'
ext = self.CTYPE_TO_EXT.get(ctype, 'NULL')
outputs = dict(file_size=file_size, ctype=ctype, ext=ext, download_url=url, final_url=final_url)
except:
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
if outputs['file_size'] and outputs['file_size'] not in ('NULL',): return outputs
# GETSTREAM probe
try:
(resp := self.session.get(url, allow_redirects=True, stream=True, **request_overrides)).raise_for_status()
resp_headers, final_url = resp.headers, resp.url; resp.close()
file_size, ctype = byte2mb(resp_headers.get('content-length')), remove_suffix(str(resp_headers.get('content-type')), '; charset=UTF-8')
if ctype == 'image/jpg; charset=UTF-8' or ctype == 'image/jpg': ctype = 'audio/mpeg'
if ctype == 'text/plain' and naive_guess_ext == 'm4s': ctype = 'audio/mp4'
ext = self.CTYPE_TO_EXT.get(ctype, 'NULL')
outputs = dict(file_size=file_size, ctype=ctype, ext=ext, download_url=url, final_url=final_url)
except:
outputs = dict(file_size='NULL', ctype='NULL', ext='NULL', download_url=url, final_url='NULL')
return outputs
'''test'''
def test(self, url: str, request_overrides: dict = None):
request_overrides, naive_guess_ext = copy.deepcopy(request_overrides or {}), url.split('?')[0].split('.')[-1]
if 'headers' not in request_overrides: request_overrides['headers'] = self.headers
if 'timeout' not in request_overrides: request_overrides['timeout'] = self.timeout
if 'cookies' not in request_overrides: request_overrides['cookies'] = self.cookies
outputs = dict(ok=False, status=0, method="", final_url=None, ctype=None, clen=None, range=None, fmt=None, reason="")
# HEAD test
try:
resp = self.session.head(url, allow_redirects=True, **request_overrides)
clen = resp.headers.get("Content-Length")
clen = int(clen) if clen and clen.isdigit() else None
outputs.update(dict(status=resp.status_code, method="HEAD", final_url=str(resp.url), ctype=resp.headers.get("Content-Type"), clen=clen, range=(resp.headers.get("Accept-Ranges") or "").lower() == "bytes"))
if outputs["ctype"] == 'text/plain' and naive_guess_ext == 'm4s': outputs["ctype"] = 'audio/mp4'
if 200 <= resp.status_code < 300 and ((self.isaudioct(outputs["ctype"]) or (naive_guess_ext in ('m4s',))) and (outputs["clen"] or outputs["range"])): outputs.update(dict(ok=True, reason="HEAD success")); return outputs
except Exception as err:
outputs["reason"] = f"HEAD error: {err}"
# RANGEGET test
try:
resp = self.session.get(url, stream=True, allow_redirects=True, **request_overrides)
outputs.update(dict(status=resp.status_code, method="RANGEGET", final_url=str(resp.url)))
if resp.status_code not in (200, 206): outputs["reason"] = f"RANGEGET error: response status {resp.status_code}"; return outputs
chunk = b""
for b in resp.iter_content(chunk_size=16): chunk = b; break
resp.close()
outputs["ctype"] = outputs["ctype"] or resp.headers.get("Content-Type")
if outputs["ctype"] == 'text/plain' and naive_guess_ext == 'm4s': outputs["ctype"] = 'audio/mp4'
outputs["range"] = outputs["range"] or (resp.status_code == 206) or (resp.headers.get("Content-Range") is not None)
clen = resp.headers.get("Content-Length") or (resp.headers.get("Content-Range") or "").split("/")[-1]
if clen and clen.isdigit(): outputs["clen"] = int(clen)
outputs["fmt"] = self.sniffmagic(chunk)
if self.isaudioct(outputs["ctype"]) or outputs["fmt"] or (naive_guess_ext in ('m4s',)): outputs.update(dict(ok=True, reason="RANGEGET success"))
else: outputs.update(dict(ok=False, reason="RANGEGET error: Not audio-like (CT/magic)"))
except Exception as err:
outputs["reason"] = f"RANGEGET error: {err}"
# return
return outputs