Skip to content

Commit 7d924a4

Browse files
authored
Merge pull request #30 from sirstudly/eztv_scraper
2 parents 24ce3e6 + a736299 commit 7d924a4

File tree

2 files changed

+78
-1
lines changed

2 files changed

+78
-1
lines changed

scraper/services/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
from scraper.services import torbox
1212
from scraper.services import mediafusion
1313
from scraper.services import comet
14+
from scraper.services import eztv
1415

1516
#define subclass method
1617
def __subclasses__():
17-
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion,comet]
18+
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion,comet,eztv]
1819

1920
active = ['torrentio']
2021
overwrite = []

scraper/services/eztv.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import urllib.request
2+
import urllib.parse
3+
from ui.ui_print import *
4+
import releases
5+
6+
name = "eztv"
7+
base_url = "https://eztvx.to" # if host is DNS blocked, add it manually to your /etc/hosts file
8+
session = urllib.request.build_opener()
9+
10+
11+
def setup(cls, new=False):
12+
from scraper.services import setup
13+
setup(cls, new)
14+
15+
16+
def scrape(query, altquery):
17+
from scraper.services import active
18+
scraped_releases = []
19+
if 'eztv' in active:
20+
ui_print("[eztv] using extended query: " + query.replace('.?', '').replace("'", "").replace("’", "").strip(".").strip(" "), ui_settings.debug)
21+
headers = {
22+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
23+
url = base_url + '/search/' + urllib.parse.quote(
24+
query.replace('.?', '').replace("'", "").replace("’", "").replace('.', ' ').strip(".").strip(" "),
25+
safe=':/')
26+
try:
27+
ui_print("[eztv] Sending GET request to URL: " + url, ui_settings.debug)
28+
request = urllib.request.Request(url, headers=headers)
29+
response = session.open(request)
30+
status_code = response.getcode()
31+
ui_print("[eztv] Received response for search query with status code: " + str(status_code), ui_settings.debug)
32+
33+
if status_code == 200:
34+
content = response.read().decode('utf-8')
35+
soup = BeautifulSoup(content, 'html.parser')
36+
torrentList = soup.select('a.epinfo')
37+
sizeList = soup.select('td.forum_thread_post')[4::4] # Every 4th element starting from the 4th one
38+
seederList = soup.select('td.forum_thread_post_end')
39+
if torrentList:
40+
ui_print(f"[eztv] Found {len(torrentList)} torrent(s)", ui_settings.debug)
41+
for count, torrent in enumerate(torrentList):
42+
title = torrent.getText().strip()
43+
title = title.replace(" ", '.')
44+
title = regex.sub(r'\.+', ".", title)
45+
ui_print("[eztv] Processing torrent: " + title, ui_settings.debug)
46+
if regex.match(r'(' + altquery.replace('.', '\.').replace("\.*", ".*") + ')', title, regex.I):
47+
link = torrent['href']
48+
ui_print("[eztv] Sending GET request for torrent details: " + link, ui_settings.debug)
49+
request = urllib.request.Request(base_url + link, headers=headers)
50+
response = session.open(request)
51+
content = response.read().decode('utf-8')
52+
soup = BeautifulSoup(content, 'html.parser')
53+
download = soup.select('a[href^="magnet"]')[0]['href']
54+
size = sizeList[count].getText().strip()
55+
ui_print(f"[eztv] Found size: {size}", ui_settings.debug)
56+
seeders = seederList[count].getText().strip().replace('-', '0').replace(',', '')
57+
ui_print("[eztv] Found download link: " + download, ui_settings.debug)
58+
59+
if regex.search(r'([0-9]*?[0-9])(?= MB)', size, regex.I):
60+
size = regex.search(r'([0-9]*?[0-9])(?= MB)', size, regex.I).group()
61+
size = float(float(size) / 1000)
62+
elif regex.search(r'([0-9]*?\.[0-9]*?)(?= GB)', size, regex.I):
63+
size = regex.search(r'([0-9]*?\.[0-9]*?)(?= GB)', size, regex.I).group()
64+
size = float(size)
65+
else:
66+
size = float(size)
67+
68+
scraped_releases += [releases.release('[eztv]', 'torrent', title, [], size, [download], seeders=int(seeders))]
69+
ui_print(f"[eztv] Scraped release: title={title}, size={size} GB, seeders={seeders}", ui_settings.debug)
70+
else:
71+
ui_print("[eztv] No torrents found", ui_settings.debug)
72+
else:
73+
ui_print("[eztv] Failed to retrieve the page. Status code: " + str(status_code), ui_settings.debug)
74+
except Exception as e:
75+
ui_print('eztv error: exception: ' + str(e), ui_settings.debug)
76+
return scraped_releases

0 commit comments

Comments
 (0)