1+ import urllib .request
2+ import urllib .parse
3+ from bs4 import BeautifulSoup
4+ from ui .ui_print import *
5+ import releases
6+
7+ name = "rarbgv2"
8+ session = urllib .request .build_opener ()
9+
10+
11+ def setup (cls , new = False ):
12+ from scraper .services import setup
13+ setup (cls , new )
14+
15+
16+ def scrape (query , altquery ):
17+ from scraper .services import active
18+ scraped_releases = []
19+ if 'rarbgv2' in active :
20+ ui_print ("[rarbg] using extended query: " + query , ui_settings .debug )
21+ headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36' }
22+ url = 'http://therarbg.com/get-posts/keywords:' + urllib .parse .quote (query .replace ('.' , ' ' ).replace ('?' , '' ), safe = ':/' ) + '/'
23+ response = None
24+ try :
25+ ui_print ("[rarbg] Sending GET request to URL: " + url , ui_settings .debug )
26+ request = urllib .request .Request (url , headers = headers )
27+ response = session .open (request )
28+ status_code = response .getcode ()
29+
30+ if status_code == 200 :
31+ content = response .read ().decode ('utf-8' , errors = 'ignore' )
32+ soup = BeautifulSoup (content , 'html.parser' )
33+ torrentList = soup .select ('a[href*="/post-detail/"]' )
34+ sizeList = soup .select ('td[style*="left"]' )
35+ seederList = soup .select ('td[style*="color: green"]' )
36+ if torrentList :
37+ ui_print (f"[rarbg] Found { len (torrentList )} torrent(s)" , ui_settings .debug )
38+ for count , torrent in enumerate (torrentList ):
39+ title = torrent .getText ().strip ()
40+ title = regex .sub (r'[^\w\s\.\-]' , '' , title )
41+ title = title .replace (" " , '.' )
42+ title = regex .sub (r'\.+' , "." , title )
43+ ui_print ("[rarbg] Processing torrent: " + title , ui_settings .debug )
44+ if regex .match (r'(' + altquery .replace ('.' , r'\.' ).replace (r"\.*" , ".*" ) + ')' , title , regex .I ):
45+ link = torrent ['href' ]
46+ request = urllib .request .Request (escape_url ('http://therarbg.com' + link ), headers = headers )
47+ response = session .open (request )
48+ content = response .read ().decode ('utf-8' , errors = 'ignore' )
49+ soup = BeautifulSoup (content , 'html.parser' )
50+ download = soup .select ('a[href^="magnet"]' )[0 ]['href' ]
51+ seeders = seederList [count ].contents [0 ]
52+ size = sizeList [count ].contents [0 ].replace (' ' , ' ' ).replace ('\xa0 ' , ' ' )
53+ size_match = regex .search (r'([0-9]*\.?[0-9]+)\s*(KB|MB|GB)' , size , regex .I )
54+
55+ if size_match :
56+ size_value = float (size_match .group (1 ))
57+ size_unit = size_match .group (2 ).upper ()
58+
59+ if size_unit == 'KB' :
60+ size = size_value / (1024 * 1024 ) # Convert KB to GB
61+ elif size_unit == 'MB' :
62+ size = size_value / 1024 # Convert MB to GB
63+ elif size_unit == 'GB' :
64+ size = size_value
65+ else :
66+ size = float (size_value )
67+
68+ scraped_releases += [releases .release ('[rarbg]' , 'torrent' , title , [], size , [download ], seeders = int (seeders ))]
69+ ui_print (f"[rarbg] Scraped release: title={ title } , size={ size } GB, seeders={ seeders } " , ui_settings .debug )
70+ else :
71+ ui_print ("[rarbg] No torrents found" , ui_settings .debug )
72+ else :
73+ ui_print ("[rarbg] Failed to retrieve the page. Status code: " + str (status_code ), ui_settings .debug )
74+
75+ except Exception as e :
76+ if hasattr (response , "status_code" ) and not str (response .status_code ).startswith ("2" ):
77+ ui_print ('[rarbg] error ' + str (response .status_code ) + ': rarbg is temporarily not reachable' )
78+ else :
79+ ui_print ('[rarbg] error: unknown error. turn on debug printing for more information.' )
80+ response = None
81+ ui_print ('[rarbg] error: exception: ' + str (e ), ui_settings .debug )
82+ return scraped_releases
83+
84+
85+ # properly escapes any non-ascii characters in url
86+ def escape_url (url ):
87+ parts = urllib .parse .urlsplit (url )
88+ path = urllib .parse .quote (parts .path )
89+ query = urllib .parse .quote (parts .query , safe = "=&?" ) # Adjust safe characters as needed
90+ return urllib .parse .urlunsplit ((parts .scheme , parts .netloc , path , query , parts .fragment ))
0 commit comments