''' Function: Implementation of Lyric Related Utils Author: Zhenchao Jin WeChat Official Account (微信公众号): Charles的皮卡丘 ''' from __future__ import annotations import os import re import copy import random import tempfile import requests from typing import Optional from .misc import resp2json from urllib.parse import quote from .importutils import optionalimportfrom '''cleanlrc''' cleanlrc = lambda text: "\n".join(line for raw in re.sub(r"\r\n?", "\n", str(text)).split("\n") if (line := raw.strip("\ufeff\u200b\u200c\u200d\u2060\u00a0 \t").strip()) and not re.fullmatch(r"\[(\d{2}:)?\d{2}:\d{2}(?:\.\d{1,3})?\]", line)) '''fractoseconds''' def fractoseconds(frac: str | None) -> float: if not frac: return 0.0 scale = 10 ** len(frac) return int(frac) / scale '''extractdurationsecondsfromlrc''' def extractdurationsecondsfromlrc(lrc: str) -> Optional[float]: if not lrc or (lrc == 'NULL'): return None max_t, time_pattern_re = None, re.compile(r"\[(?:(\d{1,2}):)?(\d{1,2}):(\d{2})(?:\.(\d{1,3}))?\]") for h, m, s, frac in time_pattern_re.findall(lrc): hh = int(h) if h else 0; mm = int(m); ss = int(s) t = hh * 3600 + mm * 60 + ss + fractoseconds(frac) max_t = t if (max_t is None or t > max_t) else max_t return max_t '''WhisperLRC''' class WhisperLRC: def __init__(self, model_size_or_path="small", device="auto", compute_type="int8", cpu_threads=4, num_workers=1, **kwargs): WhisperModel = optionalimportfrom('faster_whisper', 'WhisperModel') self.whisper_model = WhisperModel(model_size_or_path, device=device, compute_type=compute_type, cpu_threads=cpu_threads, num_workers=num_workers, **kwargs) if WhisperModel else None '''downloadtotmpdir''' @staticmethod def downloadtotmpdir(url: str, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None): headers, cookies, request_overrides = headers or {}, cookies or {}, copy.deepcopy(request_overrides or {}) if 'headers' not in request_overrides: request_overrides['headers'] = headers if 'timeout' not in request_overrides: request_overrides['timeout'] = timeout if 'cookies' not in request_overrides: request_overrides['cookies'] = cookies (resp := requests.get(url, stream=True, **request_overrides)).raise_for_status() m = re.search(r"\.([a-z0-9]{2,5})(?:\?|$)", url, re.I) fd, path = tempfile.mkstemp(suffix="."+(m.group(1).lower() if m else "bin")) with os.fdopen(fd, "wb") as fp: for ch in resp.iter_content(32768): if ch: fp.write(ch) return path '''timestamp''' @staticmethod def timestamp(t): t = max(0.0, float(t)); mm = int(t//60); ss = t - mm*60 return f"[{mm:02d}:{ss:05.2f}]" '''fromurl''' def fromurl(self, url: str, transcribe_overrides: dict = None, headers: dict = None, timeout: int = 300, cookies: dict = None, request_overrides: dict = None): assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"' transcribe_overrides, headers, cookies, request_overrides, tmp_file_path = transcribe_overrides or {}, headers or {}, cookies or {}, request_overrides or {}, '' try: tmp_file_path = self.downloadtotmpdir(url, headers=headers, timeout=timeout, cookies=cookies, request_overrides=request_overrides) (default_transcribe_settings := {'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5}).update(transcribe_overrides) segs, info = self.whisper_model.transcribe(tmp_file_path, **default_transcribe_settings) lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs) result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc} return result finally: if tmp_file_path and os.path.exists(tmp_file_path): os.remove(tmp_file_path) '''fromfilepath''' def fromfilepath(self, file_path: str, transcribe_overrides: dict = None): assert self.whisper_model is not None, 'faster_whisper should be installed via "pip install "faster_whisper"' transcribe_overrides = transcribe_overrides or {} default_transcribe_settings = {'language': None, 'vad_filter': True, 'vad_parameters': dict(min_silence_duration_ms=300), 'chunk_length': 30, 'beam_size': 5} default_transcribe_settings.update(transcribe_overrides) segs, info = self.whisper_model.transcribe(file_path, **default_transcribe_settings) lrc = "\n".join(f"{self.timestamp(s.start)}{s.text.strip()}" for s in segs) result = {"language": info.language, "prob": info.language_probability, "duration": getattr(info, "duration", None), 'lyric': lrc} return result '''LyricSearchClient''' class LyricSearchClient(): '''search''' @staticmethod def search(track_name: str, artist_name: str, allowed_lyric_apis: tuple = ('searchbylrclibapig', 'searchbylrclibapis'), request_overrides: dict = None): lyric_result, lyric = {}, 'NULL' for lyric_api in allowed_lyric_apis: if not callable(lyric_api): lyric_api = getattr(LyricSearchClient, lyric_api, None) try: lyric_result, lyric = lyric_api(track_name=track_name, artist_name=artist_name, request_overrides=request_overrides) except Exception: lyric_result, lyric = {}, 'NULL' if lyric and (lyric not in {'NULL', 'None'}): return lyric_result, lyric return lyric_result, lyric '''searchbylrclibapig''' @staticmethod def searchbylrclibapig(track_name: str, artist_name: str, request_overrides: dict = None): request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"} (resp := requests.get("https://lrclib.net/api/get", params={"artist_name": artist_name, "track_name": track_name}, headers=headers, timeout=10, **request_overrides)).raise_for_status() lyric = cleanlrc((lyric_result := resp2json(resp=resp)).get('syncedLyrics') or lyric_result.get('plainLyrics') or 'NULL') return lyric_result, lyric '''searchbylrclibapis''' @staticmethod def searchbylrclibapis(track_name: str, artist_name: str, request_overrides: dict = None): request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"} (resp := requests.get("https://lrclib.net/api/search", params={"q": f"{artist_name} {track_name}"}, headers=headers, timeout=10, **request_overrides)).raise_for_status() lyric = cleanlrc((lyric_result := resp2json(resp=resp))[0].get('syncedLyrics') or lyric_result[0].get('plainLyrics') or 'NULL') return lyric_result, lyric '''searchbylyricsovhapiv1''' @staticmethod def searchbylyricsovhapiv1(track_name: str, artist_name: str, request_overrides: dict = None): request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"} (resp := requests.get(f"https://api.lyrics.ovh/v1/{quote(artist_name, safe='')}/{quote(track_name, safe='')}", headers=headers, timeout=10, **request_overrides)) lyric = cleanlrc((lyric_result := resp2json(resp=resp)).get('lyrics') or 'NULL') return lyric_result, lyric '''searchbyhappiapiv1''' @staticmethod def searchbyhappiapiv1(track_name: str, artist_name: str, request_overrides: dict = None): request_overrides = request_overrides or {}; headers = {'accept': 'application/json', 'x-happi-token': 'hk254-C1VegxwlJjYdYFPtdUDpg8qiVpmAXVl0aA'} (resp := requests.get('https://api.happi.dev/v1/lyrics', params={'artist': artist_name, 'track': track_name}, headers=headers, timeout=10, **request_overrides)) lyric = cleanlrc((lyric_result := resp2json(resp=resp))['result'][0]['lyrics'] or 'NULL') return lyric_result, lyric '''searchbymusixmatchapi''' @staticmethod def searchbymusixmatchapi(track_name: str, artist_name: str, request_overrides: dict = None): candidate_req_keys = ['3bc1042fde1ac8c1979c400d6f921320'] request_overrides = request_overrides or {}; headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"} (resp := requests.get(f"https://api.musixmatch.com/ws/1.1/matcher.lyrics.get?apikey={random.choice(candidate_req_keys)}&q_track={track_name}&q_artist={artist_name}", headers=headers, timeout=10, **request_overrides)) lyric = cleanlrc((lyric_result := resp2json(resp=resp))['message']['body']['lyrics']['lyrics_body'] or 'NULL') return lyric_result, lyric