Skip to content

Commit dbd6f4a

Browse files
committed
x1337 scraper Jan 2025 update
1 parent 2e5c48a commit dbd6f4a

File tree

1 file changed

+60
-39
lines changed

1 file changed

+60
-39
lines changed

scraper/services/x1337.py

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,78 @@
1-
#import modules
2-
from base import *
1+
import urllib.request
2+
import urllib.parse
33
from ui.ui_print import *
44
import releases
5+
import re
56

67
name = "1337x"
7-
session = requests.Session()
8+
base_url = "https://1337x.to"
9+
session = urllib.request.build_opener()
10+
811

912
def setup(cls, new=False):
1013
from scraper.services import setup
11-
setup(cls,new)
14+
setup(cls, new)
15+
1216

1317
def scrape(query, altquery):
1418
from scraper.services import active
19+
20+
mediatype = 'TV' if re.search(r'(\bseries\b|\bS\d+\b)', altquery) else 'Movies'
1521
scraped_releases = []
1622
if '1337x' in active:
23+
q = query.replace('.?', '').replace("'", "").replace("’", "").replace('.', ' ').strip(".").strip(" ")
24+
ui_print("[1337x] using extended query: " + q, ui_settings.debug)
1725
headers = {
1826
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
19-
url = 'http://1337x.to/search/' + str(query) + '/1/'
27+
url = base_url + '/sort-category-search/' + urllib.parse.quote(q, safe=':/') + '/' + mediatype + '/seeders/desc/1/'
2028
response = None
2129
try:
22-
response = session.get(url, headers=headers)
23-
soup = BeautifulSoup(response.content, 'html.parser')
24-
torrentList = soup.select('a[href*="/torrent/"]')
25-
sizeList = soup.select('td.coll-4')
26-
seederList = soup.select('td.coll-2')
27-
if torrentList:
28-
for count, torrent in enumerate(torrentList):
29-
title = torrent.getText().strip()
30-
title = title.replace(" ", '.')
31-
title = regex.sub(r'\.+', ".", title)
32-
if regex.match(r'(' + altquery.replace('.', '\.').replace("\.*", ".*") + ')', title,
33-
regex.I):
34-
link = torrent['href']
35-
response = session.get('http://1337x.to' + link, headers=headers)
36-
soup = BeautifulSoup(response.content, 'html.parser')
37-
download = soup.select('a[href^="magnet"]')[0]['href']
38-
size = sizeList[count].contents[0]
39-
seeders = seederList[count].contents[0]
40-
if regex.search(r'([0-9]*?\.[0-9])(?= MB)', size, regex.I):
41-
size = regex.search(r'([0-9]*?\.[0-9])(?= MB)', size, regex.I).group()
42-
size = float(float(size) / 1000)
43-
elif regex.search(r'([0-9]*?\.[0-9])(?= GB)', size, regex.I):
44-
size = regex.search(r'([0-9]*?\.[0-9])(?= GB)', size, regex.I).group()
45-
size = float(size)
46-
else:
47-
size = float(size)
48-
scraped_releases += [
49-
releases.release('[1337x]', 'torrent', title, [], size, [download], seeders=int(seeders))]
30+
ui_print("[1337x] Sending GET request to URL: " + url, ui_settings.debug)
31+
request = urllib.request.Request(url, headers=headers)
32+
response = session.open(request)
33+
status_code = response.getcode()
34+
35+
if status_code == 200:
36+
content = response.read().decode('utf-8', errors='ignore')
37+
soup = BeautifulSoup(content, 'html.parser')
38+
torrentList = soup.select('a[href*="/torrent/"]')
39+
sizeList = soup.select('td.coll-4')
40+
seederList = soup.select('td.coll-2')
41+
if torrentList:
42+
ui_print(f"[1337x] Found {len(torrentList)} torrent(s)", ui_settings.debug)
43+
for count, torrent in enumerate(torrentList):
44+
title = torrent.getText().strip()
45+
title = re.sub(r'[^\w\s\.\-]', '', title)
46+
title = title.replace(" ", '.')
47+
title = re.sub(r'\.+', ".", title)
48+
if re.match(r'(' + altquery.replace('.', '\.').replace("\.*", ".*") + ')', title, re.I):
49+
link = torrent['href']
50+
request = urllib.request.Request(base_url + link, headers=headers)
51+
response = session.open(request)
52+
content = response.read().decode('utf-8')
53+
soup = BeautifulSoup(content, 'html.parser')
54+
download = soup.select('a[href^="magnet"]')[0]['href']
55+
size = sizeList[count].contents[0]
56+
seeders = seederList[count].contents[0]
57+
if re.search(r'([0-9]*?\.[0-9])(?= MB)', size, re.I):
58+
size = re.search(r'([0-9]*?\.[0-9])(?= MB)', size, re.I).group()
59+
size = float(float(size) / 1000)
60+
elif re.search(r'([0-9]*?\.[0-9])(?= GB)', size, re.I):
61+
size = re.search(r'([0-9]*?\.[0-9])(?= GB)', size, re.I).group()
62+
size = float(size)
63+
else:
64+
size = float(size)
65+
66+
scraped_releases += [releases.release('[1337x]', 'torrent', title, [], size, [download], seeders=int(seeders))]
67+
ui_print(f"[1337x] Scraped release: title={title}, size={size} GB, seeders={seeders}", ui_settings.debug)
68+
else:
69+
ui_print("[1337x] No torrents found", ui_settings.debug)
70+
else:
71+
ui_print("[1337x] Failed to retrieve the page. Status code: " + str(status_code), ui_settings.debug)
5072
except Exception as e:
51-
if hasattr(response,"status_code") and not str(response.status_code).startswith("2"):
52-
ui_print('1337x error '+str(response.status_code)+': 1337x is temporarily not reachable')
73+
if hasattr(response, "status_code") and not str(response.status_code).startswith("2"):
74+
ui_print('[1337x] error ' + str(response.status_code) + ': 1337x is temporarily not reachable')
5375
else:
54-
ui_print('1337x error: unknown error')
55-
response = None
56-
ui_print('1337x error: exception: ' + str(e),ui_settings.debug)
57-
return scraped_releases
76+
ui_print('[1337x] error: unknown error')
77+
ui_print('[1337x] error: exception: ' + str(e), ui_settings.debug)
78+
return scraped_releases

0 commit comments

Comments
 (0)