Skip to content

Commit 6713de4

Browse files
committed
Bump v3.0.9
1 parent b8e28a3 commit 6713de4

File tree

4 files changed

+106
-11
lines changed

4 files changed

+106
-11
lines changed

.github/.domain/domain_update.py

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# 20.04.2024
2-
31
import re
42
import os
53
import json
@@ -47,6 +45,90 @@ def get_new_tld(full_url):
4745

4846
return None
4947

48+
def get_enhanced_headers():
49+
ua = ua_generator.generate(device='desktop', browser='chrome')
50+
headers = ua.headers.get()
51+
52+
additional_headers = {
53+
'DNT': '1',
54+
'Upgrade-Insecure-Requests': '1',
55+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
56+
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
57+
'Accept-Encoding': 'gzip, deflate, br',
58+
'Cache-Control': 'max-age=0',
59+
'Connection': 'keep-alive',
60+
'Referer': 'https://www.google.com/',
61+
}
62+
63+
headers.update(additional_headers)
64+
return headers
65+
66+
def extract_redirect_from_403(response, original_url):
67+
redirect_headers = ['location', 'refresh', 'x-redirect-to', 'x-location', 'redirect']
68+
for header in redirect_headers:
69+
if header in response.headers:
70+
return response.headers[header]
71+
72+
try:
73+
content = response.text
74+
75+
js_patterns = [
76+
r'window\.location\.href\s*=\s*["\']([^"\']+)["\']',
77+
r'window\.location\s*=\s*["\']([^"\']+)["\']',
78+
r'location\.href\s*=\s*["\']([^"\']+)["\']',
79+
r'document\.location\s*=\s*["\']([^"\']+)["\']',
80+
r'top\.location\.href\s*=\s*["\']([^"\']+)["\']',
81+
r'parent\.location\s*=\s*["\']([^"\']+)["\']'
82+
]
83+
84+
for pattern in js_patterns:
85+
match = re.search(pattern, content, re.IGNORECASE)
86+
if match:
87+
return match.group(1)
88+
89+
meta_patterns = [
90+
r'<meta[^>]*http-equiv=["\']?refresh["\']?[^>]*content=["\'][^"\']*url=([^"\'>\s]+)',
91+
r'<meta[^>]*content=["\'][^"\']*url=([^"\'>\s]+)[^>]*http-equiv=["\']?refresh["\']?'
92+
]
93+
94+
for pattern in meta_patterns:
95+
match = re.search(pattern, content, re.IGNORECASE)
96+
if match:
97+
return match.group(1)
98+
99+
text_patterns = [
100+
r'[Rr]edirect(?:ed)?\s+to:?\s*([^\s<>"\']+)',
101+
r'[Nn]ew\s+[Uu][Rr][Ll]:?\s*([^\s<>"\']+)',
102+
r'[Mm]oved\s+to:?\s*([^\s<>"\']+)',
103+
r'[Ff]ound\s+at:?\s*([^\s<>"\']+)',
104+
r'[Gg]o\s+to:?\s*([^\s<>"\']+)',
105+
r'[Vv]isit:?\s*([^\s<>"\']+)',
106+
r'https?://[^\s<>"\']+\.[a-z]{2,}[^\s<>"\']*'
107+
]
108+
109+
for pattern in text_patterns:
110+
match = re.search(pattern, content)
111+
if match:
112+
potential_url = match.group(1) if '(' in pattern else match.group(0)
113+
if potential_url.startswith(('http://', 'https://', '//')):
114+
return potential_url
115+
116+
link_patterns = [
117+
r'<a[^>]*href=["\']([^"\']+)["\'][^>]*>(?:click here|continue|proceed|go here)',
118+
r'<link[^>]*rel=["\']?canonical["\']?[^>]*href=["\']([^"\']+)["\']',
119+
r'<base[^>]*href=["\']([^"\']+)["\']'
120+
]
121+
122+
for pattern in link_patterns:
123+
match = re.search(pattern, content, re.IGNORECASE)
124+
if match:
125+
return match.group(1)
126+
127+
except Exception:
128+
pass
129+
130+
return None
131+
50132
def extract_domain_from_response(response, original_url):
51133
if 'location' in response.headers:
52134
return response.headers['location']
@@ -108,7 +190,10 @@ def extract_domain_from_response(response, original_url):
108190

109191
return None
110192

111-
def try_url(url_to_try, headers, timeout=15):
193+
def try_url(url_to_try, headers=None, timeout=15):
194+
if headers is None:
195+
headers = get_enhanced_headers()
196+
112197
try:
113198
with httpx.Client(headers=headers, timeout=timeout, follow_redirects=False) as client:
114199
response = client.get(url_to_try)
@@ -136,7 +221,20 @@ def try_url(url_to_try, headers, timeout=15):
136221
request=response.request
137222
)
138223

139-
elif response.status_code in [403, 409, 429, 503]:
224+
elif response.status_code == 403:
225+
print(f" [!] HTTP 403 - attempting enhanced extraction")
226+
227+
redirect_url = extract_redirect_from_403(response, url_to_try)
228+
if redirect_url:
229+
print(f" [+] Found redirect URL in 403 response: {redirect_url}")
230+
return httpx.Response(
231+
status_code=200,
232+
headers={"location": redirect_url},
233+
content=b"",
234+
request=response.request
235+
)
236+
237+
elif response.status_code in [409, 429, 503]:
140238
print(f" [!] HTTP {response.status_code} - attempting to extract redirect info")
141239

142240
location = response.headers.get('location')
@@ -194,15 +292,12 @@ def update_domain_entries(data):
194292
print(f" [!] 'full_url' missing. Skipped.")
195293
continue
196294

197-
ua = ua_generator.generate(device=('desktop', 'mobile'), browser=('chrome', 'edge', 'firefox', 'safari'))
198-
current_headers = ua.headers.get()
199-
200295
print(f" [] Stored URL: {original_full_url}")
201296
if original_domain_in_entry:
202297
print(f" [] Stored Domain (TLD): {original_domain_in_entry}")
203298

204299
print(f" [] Testing URL: {original_full_url}")
205-
response = try_url(original_full_url, current_headers)
300+
response = try_url(original_full_url)
206301

207302
if response:
208303
final_url_from_request = str(response.url)

.github/workflows/update_domain.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Update domains
22

33
on:
44
schedule:
5-
- cron: "0 */2 * * *"
5+
- cron: "0 */3 * * *"
66
workflow_dispatch:
77

88
jobs:
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__title__ = 'StreamingCommunity'
2-
__version__ = '3.0.8'
2+
__version__ = '3.0.9'
33
__author__ = 'Arrowar'
44
__description__ = 'A command-line program to download film'
55
__copyright__ = 'Copyright 2024'

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def read_readme():
1010

1111
setup(
1212
name="StreamingCommunity",
13-
version="3.0.8",
13+
version="3.0.9",
1414
long_description=read_readme(),
1515
long_description_content_type="text/markdown",
1616
author="Lovi-0",

0 commit comments

Comments
 (0)