Skip to content

Commit ae49251

Browse files
committed
Fixes non-ascii (eg. cyrillic) characters in rarbg (v2) scraper URLs
1 parent af73de8 commit ae49251

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

scraper/services/rarbgv2.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def scrape(query, altquery):
4343
ui_print("[rarbg] Processing torrent: " + title, ui_settings.debug)
4444
if regex.match(r'(' + altquery.replace('.', r'\.').replace(r"\.*", ".*") + ')', title, regex.I):
4545
link = torrent['href']
46-
request = urllib.request.Request('http://therarbg.com' + link, headers=headers)
46+
request = urllib.request.Request(escape_url('http://therarbg.com' + link), headers=headers)
4747
response = session.open(request)
48-
content = response.read().decode('utf-8')
48+
content = response.read().decode('utf-8', errors='ignore')
4949
soup = BeautifulSoup(content, 'html.parser')
5050
download = soup.select('a[href^="magnet"]')[0]['href']
5151
seeders = seederList[count].contents[0]
@@ -79,4 +79,12 @@ def scrape(query, altquery):
7979
ui_print('[rarbg] error: unknown error. turn on debug printing for more information.')
8080
response = None
8181
ui_print('[rarbg] error: exception: ' + str(e), ui_settings.debug)
82-
return scraped_releases
82+
return scraped_releases
83+
84+
85+
# properly escapes any non-ascii characters in url
86+
def escape_url(url):
87+
parts = urllib.parse.urlsplit(url)
88+
path = urllib.parse.quote(parts.path)
89+
query = urllib.parse.quote(parts.query, safe="=&?") # Adjust safe characters as needed
90+
return urllib.parse.urlunsplit((parts.scheme, parts.netloc, path, query, parts.fragment))

0 commit comments

Comments
 (0)