'''
Function:
    Implementation of LanZouYParser
Author:
    Zhenchao Jin
WeChat Official Account (微信公众号):
    Charles的皮卡丘
'''
from __future__ import annotations

import re
import json
import random
import requests
from urllib.parse import urljoin, urlparse


'''LanZouYParser'''
class LanZouYParser():
    '''parsefromurl'''
    @staticmethod
    def parsefromurl(url: str, passcode: str = '', max_tries: int = 3):
        for _ in range(max_tries):
            try:
                download_result, download_url = LanZouYParser._parsefromurl(url=url, passcode=passcode)
                assert download_url and str(download_url).startswith('http')
                break
            except:
                download_result, download_url = {}, ""
            if not download_url or not str(download_url).startswith('http'):
                file_id = urlparse(url).path.strip('/').split('/')[-1]
                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36'}
                try:
                    resp = requests.get(f'https://api-v2.cenguigui.cn/api/lanzou/api.php?url=https://cenguigui.lanzouw.com/{file_id}', headers=headers)
                    download_result = resp.json()
                    download_url = download_result['data']['downurl']
                    assert download_url and str(download_url).startswith('http')
                    break
                except:
                    download_result, download_url = {}, ""
        return download_result, download_url
    '''_randip'''
    @staticmethod
    def _randip() -> str:
        ip2 = round(random.randint(600000, 2550000) / 10000)
        ip3 = round(random.randint(600000, 2550000) / 10000)
        ip4 = round(random.randint(600000, 2550000) / 10000)
        arr1 = ["218", "218", "66", "66", "218", "218", "60", "60", "202", "204", "66", "66", "66", "59", "61", "60", "222", "221", "66", "59", "60", "60", "66", "218", "218", "62", "63", "64", "66", "66", "122", "211"]
        ip1 = random.choice(arr1)
        return f"{ip1}.{ip2}.{ip3}.{ip4}"
    '''_httpget'''
    @staticmethod
    def _httpget(url: str, user_agent: str = "", referer: str = "", cookies: dict = None, timeout: int = 10) -> str:
        headers = {"X-FORWARDED-FOR": LanZouYParser._randip(), "CLIENT-IP": LanZouYParser._randip()}
        if user_agent: headers["User-Agent"] = user_agent
        if referer: headers["Referer"] = referer
        resp = requests.get(url, headers=headers, cookies=cookies, timeout=timeout, verify=False, allow_redirects=True)
        resp.raise_for_status()
        resp.encoding = resp.apparent_encoding or "utf-8"
        return resp.text
    '''_httppost'''
    @staticmethod
    def _httppost(data: dict, url: str, referer: str = "", user_agent: str = "", timeout: int = 10) -> str:
        headers = {"X-FORWARDED-FOR": LanZouYParser._randip(), "CLIENT-IP": LanZouYParser._randip()}
        if user_agent: headers["User-Agent"] = user_agent
        if referer: headers["Referer"] = referer
        resp = requests.post(url, data=data, headers=headers, timeout=timeout, verify=False, allow_redirects=True)
        resp.raise_for_status()
        resp.encoding = resp.apparent_encoding or "utf-8"
        return resp.text
    '''_httpredirecturl'''
    @staticmethod
    def _httpredirecturl(url: str, referer: str, user_agent: str, cookie_str: str, timeout: int = 10) -> str:
        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", 
            "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Pragma": "no-cache", "Upgrade-Insecure-Requests": "1", 
            "User-Agent": user_agent, "Referer": referer, "Cookie": cookie_str,
        }
        resp = requests.get(url, headers=headers, timeout=timeout, verify=False, allow_redirects=False)
        resp.raise_for_status()
        loc = resp.headers.get("Location", "") or resp.headers.get("location", "")
        if not loc: return ""
        return urljoin(url, loc)
    '''_acwscv2simple'''
    @staticmethod
    def _acwscv2simple(arg1: str):
        if not arg1: return ""
        mask = "3000176000856006061501533003690027800375"
        pos_list = (15, 35, 29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36)
        arg2 = "".join(arg1[p - 1] for p in pos_list if p <= len(arg1))
        length = min(len(arg2), len(mask))
        return "".join(f"{(int(arg2[i:i+2], 16) ^ int(mask[i:i+2], 16)):02x}" for i in range(0, length, 2))
    '''_parsefromurl'''
    @staticmethod
    def _parsefromurl(url: str, passcode: str = ''):
        # init
        download_result, user_agent = {}, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
        normalize_lanzou_url_func = lambda u: ("https://www.lanzouf.com/" + t.lstrip("/") if (t := (u.split(".com/", 1)[1] if ".com/" in u else None)) is not None else ("https://www.lanzouf.com" + u) if u.startswith("/") else u if u.startswith("http") else "https://www.lanzouf.com/" + u.lstrip("/"))
        extract_first_func = lambda regex_list, text: next((m.group(1) for rgx in regex_list if (m := re.search(rgx, text, flags=re.S))), "")
        # vist home page
        url = normalize_lanzou_url_func(url)
        homepage_url_html = LanZouYParser._httpget(url, user_agent=user_agent)
        if "文件取消分享了" in homepage_url_html: raise
        soft_name = extract_first_func([r'style="font-size: 30px;text-align: center;padding: 56px 0px 20px 0px;">(.*?)</div>', r'<div class="n_box_3fn".*?>(.*?)</div>', r"var filename = '(.*?)';", r'div class="b"><span>(.*?)</span></div>'], homepage_url_html)
        soft_size = extract_first_func([r'<div class="n_filesize".*?>大小：(.*?)</div>', r'<span class="p7">文件大小：</span>(.*?)<br>'], homepage_url_html)
        # with passcode
        if "function down_p(){" in homepage_url_html:
            segment = re.findall(r"'sign':'(.*?)',", homepage_url_html, flags=re.S)
            ajaxm = re.findall(r"ajaxm\.php\?file=\d+", homepage_url_html, flags=re.S)
            assert not (len(segment) < 2 or len(ajaxm) < 1)
            post_data = {"action": "downprocess", "sign": segment[1], "p": passcode, "kd": 1}
            post_url = "https://www.lanzouf.com/" + ajaxm[0]
            parse_result = LanZouYParser._httppost(post_data, post_url, referer=url, user_agent=user_agent)
            parse_result: dict = json.loads(parse_result)
            soft_name = parse_result.get("inf") or soft_name
        # without passcode    
        else:
            link = extract_first_func([r'\n<iframe.*?name="[\s\S]*?"\ssrc="\/(.*?)"', r'<iframe.*?name="[\s\S]*?"\ssrc="\/(.*?)"'], homepage_url_html)
            assert link
            ifurl = "https://www.lanzouf.com/" + link.lstrip("/")
            iframe_html = LanZouYParser._httpget(ifurl, user_agent=user_agent)
            wp_sign = re.findall(r"wp_sign = '(.*?)'", iframe_html, flags=re.S)
            ajaxdata = re.findall(r"ajaxdata = '(.*?)'", iframe_html, flags=re.S)
            ajaxm = re.findall(r"ajaxm\.php\?file=\d+", iframe_html, flags=re.S)
            assert not (len(wp_sign) < 1 or len(ajaxdata) < 1 or len(ajaxm) < 2)
            post_data = {"action": "downprocess", "websignkey": ajaxdata[0], "signs": ajaxdata[0], "sign": wp_sign[0], "websign": "", "kd": 1, "ves": 1}
            post_url = "https://www.lanzouf.com/" + ajaxm[1]
            parse_result = LanZouYParser._httppost(post_data, post_url, referer=ifurl, user_agent=user_agent)
            parse_result: dict = json.loads(parse_result)
        # final parse
        assert not (not isinstance(parse_result, dict) or parse_result.get("zt") != 1)
        download_url = f"{parse_result['dom']}/file/{parse_result['url']}"
        download_html = LanZouYParser._httpget(download_url, user_agent=user_agent)
        arg1_list = re.findall(r"arg1='(.*?)'", download_html, flags=re.S)
        if arg1_list:
            decrypted = LanZouYParser._acwscv2simple(arg1_list[0])
            cookie_str = f"down_ip=1; expires=Sat, 16-Nov-2019 11:42:54 GMT; path=/; domain=.baidupan.com; acw_sc__v2={decrypted}"
            redirected_download_url = LanZouYParser._httpredirecturl(download_url, referer="https://developer.lanzoug.com", user_agent=user_agent, cookie_str=cookie_str)
            if "http" in (redirected_download_url or ""): download_url = redirected_download_url
        download_url = re.sub(r"pid=[^&]*&", "", download_url)
        download_result = {"name": soft_name or "", "filesize": soft_size or "", "downUrl": download_url, "parse_result": parse_result}
        # return
        return download_result, download_url