|
1 | 1 | from base64 import b64decode |
2 | | -from re import compile as re_compile |
3 | | -from ..utils import json_loads |
| 2 | +from re import compile as re_compile, escape as re_escape |
| 3 | + |
| 4 | +from bs4 import BeautifulSoup |
4 | 5 |
|
| 6 | +from ..utils import json_loads |
5 | 7 | from ..networking import AsyncClient |
6 | 8 | from .common import DirectLink, Hoster |
7 | 9 |
|
8 | 10 | REDIRECT_PATTERN = re_compile("https?://[^'\"<>]+") |
9 | 11 |
|
10 | | -EXTRACT_VEO_HLS_PATTERN = re_compile(r"'hls': '(?P<hls>.*)'") |
11 | | -HIDDEN_JSON_PATTERN = re_compile(r"var a168c='(?P<hidden_json>[^']+)'") |
| 12 | +# Credit |
| 13 | +# https://github.com/wolfswolke/aniworld_scraper/blob/41bd0f23cbc02352481dd92e6d986d1fe30c76bf/src/logic/search_for_links.py#L23 |
| 14 | + |
| 15 | +def deb_func1(input_string): |
| 16 | + result = '' |
| 17 | + for char in input_string: |
| 18 | + char_code = ord(char) |
| 19 | + if 0x41 <= char_code <= 0x5a: |
| 20 | + char_code = (char_code - 0x41 + 0xd) % 0x1a + 0x41 |
| 21 | + elif 0x61 <= char_code <= 0x7a: |
| 22 | + char_code = (char_code - 0x61 + 0xd) % 0x1a + 0x61 |
| 23 | + result += chr(char_code) |
| 24 | + return result |
| 25 | + |
| 26 | +PATTERNS = [ |
| 27 | + re_compile(re_escape('@$')), |
| 28 | + re_compile(re_escape('^^')), |
| 29 | + re_compile(re_escape('~@')), |
| 30 | + re_compile(re_escape('%?')), |
| 31 | + re_compile(re_escape('*~')), |
| 32 | + re_compile(re_escape('!!')), |
| 33 | + re_compile(re_escape('#&')) |
| 34 | +] |
| 35 | + |
| 36 | +def regex_func(input_string): |
| 37 | + for pattern in PATTERNS: |
| 38 | + input_string = pattern.sub('_', input_string) |
| 39 | + return input_string |
| 40 | + |
| 41 | +def deb_func3(input_string, shift): |
| 42 | + result = [] |
| 43 | + for char in input_string: |
| 44 | + result.append(chr(ord(char) - shift)) |
| 45 | + return ''.join(result) |
| 46 | + |
| 47 | +def deb_func(input_var): |
| 48 | + math_output = deb_func1(input_var) |
| 49 | + regexed_string = regex_func(math_output) |
| 50 | + cleaned_string = regexed_string.replace('_', '') |
| 51 | + b64_string1 = b64decode(cleaned_string).decode('utf-8') |
| 52 | + decoded_string = deb_func3(b64_string1, 3) |
| 53 | + reversed_string = decoded_string[::-1] |
| 54 | + b64_string2 = b64decode(reversed_string).decode('utf-8') |
| 55 | + return json_loads(b64_string2) |
| 56 | + |
| 57 | +def find_script_element(raw_html): |
| 58 | + soup = BeautifulSoup(raw_html, features="html.parser") |
| 59 | + script_object = soup.find_all("script") |
| 60 | + obfuscated_string = "" |
| 61 | + for script in script_object: |
| 62 | + script = str(script) |
| 63 | + if "KGMAaM=" in script: |
| 64 | + obfuscated_string = script |
| 65 | + break |
| 66 | + if obfuscated_string == "": |
| 67 | + return None |
| 68 | + obfuscated_string = obfuscated_string.split('MKGMa="')[1] |
| 69 | + obfuscated_string = obfuscated_string.split('"')[0] |
| 70 | + output = deb_func(obfuscated_string) |
| 71 | + return output["source"] |
| 72 | + |
12 | 73 |
|
13 | 74 | class VOEHoster(Hoster): |
14 | 75 | async def get_direct_link(self) -> DirectLink: |
15 | 76 | async with AsyncClient(verify=False) as client: |
16 | 77 | redirect_response = await client.get(self.url) |
17 | 78 | redirect_match = REDIRECT_PATTERN.search(redirect_response.text) |
18 | 79 | redirect_link = redirect_match.group() |
19 | | - |
20 | 80 | response = await client.get(redirect_link) |
21 | | - |
22 | | - match = HIDDEN_JSON_PATTERN.search(response.text) |
23 | | - if match: |
24 | | - hidden_json = b64decode(match.group("hidden_json")).decode() |
25 | | - hidden_json = hidden_json[::-1] |
26 | | - hidden_json = json_loads(hidden_json) |
27 | | - hidden_json = hidden_json["source"] |
28 | | - return DirectLink(hidden_json) |
29 | | - |
30 | | - hls_match = EXTRACT_VEO_HLS_PATTERN.search(response.text) |
31 | | - hls_link = hls_match.group("hls") |
32 | | - hls_link = b64decode(hls_link).decode() |
33 | | - return DirectLink( |
34 | | - url=hls_link, |
35 | | - # Requires "host", "origin" or "referer" |
36 | | - # can be "bypassed" by http get once for players without headers |
37 | | - headers = {"Referer": "https://nathanfromsubject.com/"} |
38 | | - ) |
| 81 | + return DirectLink(find_script_element(response.text)) |
0 commit comments