Skip to content

Commit 81dedbc

Browse files
authored
Merge pull request #36 from sirstudly/torrentgalaxy_scraper_support
adds torrentgalaxy scraper support
2 parents 98c217c + c099d88 commit 81dedbc

File tree

2 files changed

+74
-1
lines changed

2 files changed

+74
-1
lines changed

scraper/services/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#define subclass method
1818
def __subclasses__():
19-
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion,comet,eztv,thepiratebay]
19+
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion,comet,eztv,thepiratebay,torrentgalaxy]
2020

2121
active = ['torrentio']
2222
overwrite = []

scraper/services/torrentgalaxy.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import urllib.request
2+
import urllib.parse
3+
from ui.ui_print import *
4+
import releases
5+
import re
6+
7+
name = "torrentgalaxy"
8+
base_url = "https://torrentgalaxy.to"
9+
session = urllib.request.build_opener()
10+
11+
12+
def setup(cls, new=False):
13+
from scraper.services import setup
14+
setup(cls, new)
15+
16+
17+
def scrape(query, altquery):
18+
from scraper.services import active
19+
scraped_releases = []
20+
if 'torrentgalaxy' in active:
21+
q = query.replace('.?', '').replace("'", "").replace("’", "").replace('.', ' ').strip(".").strip(" ")
22+
ui_print("[torrentgalaxy] using extended query: " + q, ui_settings.debug)
23+
24+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
25+
url = f'{base_url}/torrents.php?search={urllib.parse.quote(q)}&sort=seeders&order=desc'
26+
response = None
27+
try:
28+
ui_print("[torrentgalaxy] Sending GET request to URL: " + url, ui_settings.debug)
29+
request = urllib.request.Request(url, headers=headers)
30+
response = session.open(request)
31+
status_code = response.getcode()
32+
33+
if status_code == 200:
34+
content = response.read().decode('utf-8', errors='ignore')
35+
soup = BeautifulSoup(content, 'html.parser')
36+
torrentList = soup.select('div.tgxtablerow')
37+
if torrentList:
38+
ui_print(f"[torrentgalaxy] Found {len(torrentList)} torrent(s)", ui_settings.debug)
39+
for count, torrent in enumerate(torrentList):
40+
title_element = torrent.select_one('div.tgxtablecell.clickable-row a.txlight')
41+
title = title_element.getText().strip() if title_element else 'Unknown Title'
42+
title = re.sub(r'[^\w\s\.\-]', '', title) # Remove non-alphanumeric characters except for dots, spaces, and hyphens
43+
title = title.replace(" ", '.')
44+
title = re.sub(r'\.+', ".", title)
45+
if re.match(r'(' + altquery.replace('.', '\.').replace("\.*", ".*") + ')', title, re.I):
46+
magnet_element = torrent.select_one('a[href^="magnet"]')
47+
download = magnet_element['href'] if magnet_element else '#'
48+
size_element = torrent.select_one('div.tgxtablecell[style*="right"] span.badge')
49+
size = size_element.getText().strip() if size_element else '0 GB'
50+
seeders_leechers_element = torrent.select_one('div.tgxtablecell span[title="Seeders/Leechers"]')
51+
seeders = int(seeders_leechers_element.getText().strip().replace(',', '').replace('[', '').split('/')[0]) if seeders_leechers_element else 0
52+
if regex.search(r'([0-9]*?\.[0-9]+)(?= MB)', size, regex.I):
53+
size = regex.search(r'([0-9]*,?[0-9]*?\.[0-9]+)(?= MB)', size, regex.I).group().replace(',', '')
54+
size = float(float(size) / 1000)
55+
elif regex.search(r'([0-9]*?\.[0-9]+)(?= GB)', size, regex.I):
56+
size = regex.search(r'([0-9]*?\.[0-9]+)(?= GB)', size, regex.I).group()
57+
size = float(size)
58+
else:
59+
size = float(size)
60+
61+
scraped_releases += [releases.release('[torrentgalaxy]', 'torrent', title, [], size, [download], seeders=seeders)]
62+
ui_print(f"[torrentgalaxy] Scraped release: title={title}, size={size:.2f} GB, seeders={seeders}", ui_settings.debug)
63+
else:
64+
ui_print("[torrentgalaxy] No torrents found", ui_settings.debug)
65+
else:
66+
ui_print("[torrentgalaxy] Failed to retrieve the page. Status code: " + str(status_code), ui_settings.debug)
67+
except Exception as e:
68+
if hasattr(response, "status_code") and not str(response.status_code).startswith("2"):
69+
ui_print('[torrentgalaxy] error ' + str(response.status_code) + ': torrentgalaxy is temporarily not reachable')
70+
else:
71+
ui_print('[torrentgalaxy] error: unknown error')
72+
ui_print('[torrentgalaxy] error: exception: ' + str(e), ui_settings.debug)
73+
return scraped_releases

0 commit comments

Comments
 (0)