Skip to content

Commit e977449

Browse files
committed
fix downloding and scraping trcustom download from trle.net
1 parent a58b8f2 commit e977449

File tree

6 files changed

+317
-106
lines changed

6 files changed

+317
-106
lines changed

database/https.py

Lines changed: 146 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def head(self, url):
9393
curl.setopt(pycurl.HEADERFUNCTION, buffer.write)
9494
curl.setopt(pycurl.NOBODY, True)
9595
curl.setopt(pycurl.URL, url)
96-
curl.setopt(pycurl.FOLLOWLOCATION, True)
96+
curl.setopt(pycurl.FOLLOWLOCATION, False)
9797

9898
if self.misconfigured_server:
9999
if not self.leaf_cert:
@@ -321,9 +321,9 @@ def progress_callback(self, total_to_download, downloaded, total_to_upload, uplo
321321
self.progress_bar.total = total_to_download
322322
return 0 # Returning 0 means to continue
323323

324-
def download_file(self, url):
324+
def download_file_trle(self, url):
325325
"""
326-
Download a file from the specified URL and stores its contents in a buffer.
326+
Download a file from the trle URL and stores its contents in a buffer.
327327
328328
This method utilizes the `pycurl` library to perform the download, providing
329329
a progress bar for user feedback. It handles server misconfigurations,
@@ -422,14 +422,156 @@ def download_file(self, url):
422422

423423
finally:
424424
if self.progress_bar:
425-
self.progress_bar.close() # Close the progress bar
425+
self.progress_bar.close()
426426
curl.close()
427427
if temp_cert_path:
428428
if os.path.exists(temp_cert_path):
429429
os.remove(temp_cert_path)
430430

431431
return zip_file # Always return the zip_file dictionary
432432

433+
def download_file_trcustoms(self, url):
434+
"""
435+
Download a file from the trcustoms URL and stores its contents in a buffer.
436+
437+
This method utilizes the `pycurl` library to perform the download, providing
438+
a progress bar for user feedback. It handles server JSON API, follows redirects,
439+
and calculates the MD5 checksum of the downloaded file.
440+
441+
Parameters:
442+
----------
443+
url : str
444+
The URL of the file to download. Must be a valid URL.
445+
446+
Raises:
447+
-------
448+
SystemExit
449+
Exits the program if the server is misconfigured and no leaf certificate is available.
450+
451+
Exceptions:
452+
------------
453+
pycurl.error
454+
Raised if an error occurs during the download process.
455+
456+
Returns:
457+
--------
458+
dict
459+
Returns a dictionary containing details of the downloaded file, including:
460+
- 'size': Size of the file in MiB (mebibytes).
461+
- 'url': The effective URL from which the file was downloaded.
462+
- 'name': The name of the file.
463+
- 'md5': The MD5 checksum of the downloaded content.
464+
465+
Notes:
466+
------
467+
- The progress bar is displayed using the `tqdm` library to indicate the download status.
468+
- The method checks the HTTP response code after the download to ensure success (HTTP 200).
469+
- Temporary files created for certificate handling are cleaned up after the download.
470+
"""
471+
curl = pycurl.Curl()
472+
zip_file = data_factory.make_zip_file() # Initialize the zip_file dictionary
473+
474+
REQUEST_HANDLER.misconfigured_server = False
475+
level_id = url.split("https://trcustoms.org/levels/")[1]
476+
print(f'https://trcustoms.org/api/levels/{level_id}/')
477+
level_json = get(f'https://trcustoms.org/api/levels/{level_id}/', 'application/json')
478+
if not isinstance(level_json, dict):
479+
sys.exit(1)
480+
level_files = level_json.get('files', [])
481+
level_last_file = level_files[-1]
482+
url = level_last_file.get('url', '')
483+
484+
try:
485+
# Get header info
486+
total_size = level_last_file.get('size', '')
487+
zip_file['size'] = round(total_size / (1024 * 1024), 2) # Size in MiB
488+
zip_file['url'] = level_last_file.get('url', '')
489+
level_version = level_last_file.get('version', '')
490+
zip_file['name'] = f"{level_id}-title-V{level_version}-authors.zip"
491+
492+
# Set up for download
493+
curl.setopt(pycurl.URL, url)
494+
curl.setopt(pycurl.FOLLOWLOCATION, True)
495+
curl.setopt(pycurl.WRITEFUNCTION, self.write_callback)
496+
curl.setopt(pycurl.WRITEDATA, self.buffer)
497+
498+
# Enable progress meter
499+
self.progress_bar = tqdm(total=total_size,
500+
unit='B',
501+
unit_scale=True,
502+
unit_divisor=1024,
503+
desc="Downloading")
504+
505+
curl.setopt(pycurl.NOPROGRESS, False)
506+
curl.setopt(pycurl.XFERINFOFUNCTION, self.progress_callback)
507+
508+
# Perform the download
509+
curl.perform()
510+
511+
# Check for errors
512+
http_code = curl.getinfo(pycurl.RESPONSE_CODE)
513+
if http_code != 200:
514+
self.status = 1
515+
print(f"Error: HTTP response code {http_code}")
516+
return {} # Return an empty dict on error
517+
518+
self.status = 0
519+
520+
# Finalize MD5 checksum
521+
md5_hash = hashlib.md5(usedforsecurity=False)
522+
self.buffer.seek(0) # Reset buffer pointer
523+
md5_hash.update(self.buffer.getvalue())
524+
zip_file['md5'] = md5_hash.hexdigest()
525+
526+
except pycurl.error as e:
527+
self.status = 1
528+
print(f"Error: {e}")
529+
return {} # Return an empty dict on error
530+
531+
finally:
532+
if self.progress_bar:
533+
self.progress_bar.close()
534+
curl.close()
535+
REQUEST_HANDLER.misconfigured_server = True
536+
537+
return zip_file # Always return the zip_file dictionary
538+
539+
def download_file(self, url):
540+
"""
541+
Branch the download from an trle.net redirect link to trcustoms or trle.
542+
543+
This method checks the head for the domain the redirection leads to.
544+
545+
Parameters:
546+
----------
547+
url : str
548+
The URL of the file to download. Must be a valid URL.
549+
550+
Returns:
551+
--------
552+
dict
553+
Returns a dictionary containing details of the downloaded file, including:
554+
- 'size': Size of the file in MiB (mebibytes).
555+
- 'url': The effective URL from which the file was downloaded.
556+
- 'name': The name of the file.
557+
- 'md5': The MD5 checksum of the downloaded content.
558+
559+
"""
560+
head_string = REQUEST_HANDLER.head(url)
561+
redirect_url = head_string.partition("Location: ")[2].split("\r\n", 1)[0]
562+
563+
if redirect_url.startswith("https://trcustoms.org/levels/"):
564+
print("trcustoms.org")
565+
return self.download_file_trcustoms(redirect_url)
566+
if redirect_url.startswith("https://www.trle.net/levels/"):
567+
print("trle.net")
568+
return self.download_file_trle(url)
569+
if redirect_url.startswith("https://trle.net/levels/"):
570+
print("trle.net")
571+
return self.download_file_trle(url)
572+
573+
return data_factory.make_zip_file()
574+
433575

434576
ACQUIRE_LOCK = AcquireLock()
435577
REQUEST_HANDLER = RequestHandler()

0 commit comments

Comments
 (0)