Skip to content

Commit 0510046

Browse files
Add Filemoon support
1 parent 9de674c commit 0510046

File tree

5 files changed

+162
-8
lines changed

5 files changed

+162
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ List of supported video hoster.
127127
- [x] Streamtape (Removed from AniWorld & SerienStream)
128128
- [x] Luluvdo
129129
- [x] LoadX
130-
- [ ] Filemoon
130+
- [x] Filemoon
131131

132132
## Player
133133

src/gucken/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
import warnings
22
warnings.filterwarnings('ignore', message='Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
33

4-
__version__ = "0.3.3"
4+
__version__ = "0.3.4"

src/gucken/hoster/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def has_headers(self) -> bool:
2929
return False
3030

3131

32+
@dataclass
33+
class EmptyDirectLink(DirectLink):
34+
url: str = None
35+
36+
async def check_is_working(self) -> bool:
37+
return False
38+
3239
@dataclass
3340
class Hoster:
3441
url: str

src/gucken/hoster/filemoon.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,38 @@
1+
import logging
12
from re import compile as re_compile
23

4+
from .common import DirectLink, Hoster, EmptyDirectLink
35
from ..networking import AsyncClient
6+
from ..packer import unpack
47

5-
from .common import DirectLink, Hoster
8+
REDIRECT_REGEX = re_compile(r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
9+
SCRIPT_REGEX = re_compile(r'(?s)<script\s+[^>]*?data-cfasync=["\']?false["\']?[^>]*>(.+?)</script>')
10+
VIDEO_URL_REGEX = re_compile(r'file:\s*"([^"]+\.m3u8[^"]*)"')
611

7-
FILEMOON_PATTERN = re_compile("")
8-
9-
# TODO: WIP !!!
1012
class FilemoonHoster(Hoster):
1113
async def get_direct_link(self) -> DirectLink:
12-
# See https://github.com/shashstormer/godstream/blob/master/extractors/filemoon.py
13-
return DirectLink("WIP")
14+
async with AsyncClient(verify=False) as client:
15+
response = await client.get(self.url)
16+
source = response.text
17+
18+
match = REDIRECT_REGEX.search(source)
19+
if match:
20+
redirect_url = match.group(1) or match.group(2)
21+
response = await client.get(redirect_url, headers={"Sec-Fetch-Dest": "iframe"})
22+
source = response.text
23+
24+
for script_match in SCRIPT_REGEX.finditer(source):
25+
script_content = script_match.group(1).strip()
26+
if not script_content.startswith("eval("):
27+
continue
28+
29+
unpacked = unpack(script_content)
30+
if not unpacked:
31+
continue
32+
33+
video_match = VIDEO_URL_REGEX.search(unpacked)
34+
if video_match:
35+
return DirectLink(video_match.group(1))
36+
37+
logging.warning("Filemoon: failed to retrieve video URL from: \"%s\"", self.url)
38+
return EmptyDirectLink()

src/gucken/packer.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Based on https://github.com/beautifier/js-beautify/blob/main/python/jsbeautifier/unpackers/packer.py
2+
from re import compile, DOTALL, ASCII
3+
4+
class UnpackingError(Exception):
5+
pass
6+
7+
DETECT_PATTERN = compile(
8+
r"eval[ ]*\([ ]*function[ ]*\([ ]*p[ ]*,[ ]*a[ ]*,[ ]*c[ ]*,[ ]*k[ ]*,[ ]*e[ ]*,[ ]*"
9+
)
10+
FILTERARGS_PATTERNS = [
11+
compile(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)", DOTALL),
12+
compile(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)", DOTALL),
13+
]
14+
WORD_PATTERN = compile(r"\b\w+\b", ASCII)
15+
REPLACESTRINGS_PATTERN = compile(r'var *(_\w+)\=\["(.*?)"\];', DOTALL)
16+
17+
def detect(source: str) -> tuple[bool, str, str]:
18+
"""Detects whether `source` is P.A.C.K.E.R. coded."""
19+
beginstr = ""
20+
endstr = ""
21+
begin_offset = -1
22+
mystr = DETECT_PATTERN.search(source)
23+
if mystr:
24+
begin_offset = mystr.start()
25+
beginstr = source[:begin_offset]
26+
if begin_offset != -1:
27+
source_end = source[begin_offset:]
28+
if source_end.split("')))", 1)[0] == source_end:
29+
try:
30+
endstr = source_end.split("}))", 1)[1]
31+
except IndexError:
32+
endstr = ""
33+
else:
34+
endstr = source_end.split("')))", 1)[1]
35+
return mystr is not None, beginstr, endstr
36+
37+
def unpack(source: str, beginstr: str = "", endstr: str = "") -> str:
38+
"""Unpacks P.A.C.K.E.R. packed js code."""
39+
payload, symtab, radix, count = _filterargs(source)
40+
41+
if count != len(symtab):
42+
raise UnpackingError("Malformed p.a.c.k.e.r. symtab.")
43+
44+
try:
45+
unbase = Unbaser(radix)
46+
except TypeError:
47+
raise UnpackingError("Unknown p.a.c.k.e.r. encoding.")
48+
49+
def lookup(match) -> str:
50+
"""Look up symbols in the synthetic symtab."""
51+
word = match.group(0)
52+
return symtab[unbase(word)] or word
53+
54+
payload = payload.replace("\\\\", "\\").replace("\\'", "'")
55+
source = WORD_PATTERN.sub(lookup, payload)
56+
return _replacestrings(source, beginstr, endstr)
57+
58+
def _filterargs(source: str) -> tuple[str, list[str], int, int]:
59+
"""Juice from a source file the four args needed by decoder."""
60+
for juicer in FILTERARGS_PATTERNS:
61+
args = juicer.search(source)
62+
if args:
63+
a = args.groups()
64+
if a[1] == "[]":
65+
a = list(a)
66+
a[1] = 62
67+
a = tuple(a)
68+
try:
69+
return a[0], a[3].split("|"), int(a[1]), int(a[2])
70+
except ValueError:
71+
raise UnpackingError("Corrupted p.a.c.k.e.r. data.")
72+
raise UnpackingError("Could not make sense of p.a.c.k.e.r data (unexpected code structure)")
73+
74+
def _replacestrings(source: str, beginstr: str = "", endstr: str = "") -> str:
75+
"""Strip string lookup table (list) and replace values in source."""
76+
match = REPLACESTRINGS_PATTERN.search(source)
77+
if match:
78+
varname, strings = match.groups()
79+
startpoint = len(match.group(0))
80+
lookup = strings.split('","')
81+
variable = "%s[%%d]" % varname
82+
for index, value in enumerate(lookup):
83+
source = source.replace(variable % index, '"%s"' % value)
84+
return source[startpoint:]
85+
return beginstr + source + endstr
86+
87+
class Unbaser:
88+
"""Functor for a given base. Will efficiently convert strings to natural numbers."""
89+
ALPHABET = {
90+
62: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
91+
95: (
92+
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
93+
"[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
94+
),
95+
}
96+
97+
def __init__(self, base: int):
98+
self.base = base
99+
100+
if 36 < base < 62:
101+
if base not in self.ALPHABET:
102+
self.ALPHABET[base] = self.ALPHABET[62][:base]
103+
104+
if 2 <= base <= 36:
105+
self.unbase = lambda string: int(string, base)
106+
else:
107+
try:
108+
self.dictionary = {cipher: index for index, cipher in enumerate(self.ALPHABET[base])}
109+
except KeyError:
110+
raise TypeError("Unsupported base encoding.")
111+
112+
self.unbase = self._dictunbaser
113+
114+
def __call__(self, string: str) -> int:
115+
return self.unbase(string)
116+
117+
def _dictunbaser(self, string: str) -> int:
118+
"""Decodes a value to an integer."""
119+
ret = 0
120+
for index, cipher in enumerate(string[::-1]):
121+
ret += (self.base**index) * self.dictionary[cipher]
122+
return ret

0 commit comments

Comments
 (0)