Refactor config.py and minor code fixes

Lysagxra · web-flow · commit 6ca9a45686e4 · 2025-08-24T13:32:55.000+02:00
- Reorganized config.py into clearer sections (Paths, API, Regex, Download, HTTP, Data Classes)
- Replaced HTTP status constants with IntEnum for better readability
- Abbreviated and clarified comments for conciseness
- Added type hints where beneficial
- Applied minor fixes and improvements in related parts of the code
diff --git a/helpers/bunkr_utils.py b/helpers/bunkr_utils.py
@@ -16,14 +16,15 @@ def fetch_page(url: str) -> BeautifulSoup | None:
     try:
         response = requests.get(url, headers=HEADERS, timeout=10)
         response.raise_for_status()
-        return BeautifulSoup(response.text, "html.parser")
 
     except requests.RequestException:
         logging.exception("Error occurred while making the request.")
         return None
 
+    return BeautifulSoup(response.text, "html.parser")
 
-def get_bunkr_status() -> dict[str, str]:
+
+def get_bunkr_status() -> dict[str, str] | None:
     """Fetch the status of servers from the status page and returns a dictionary."""
     soup = fetch_page(STATUS_PAGE)
     bunkr_status = {}
@@ -46,7 +47,7 @@ def get_bunkr_status() -> dict[str, str]:
     except AttributeError as attr_err:
         log_message = f"Error extracting server data: {attr_err}"
         logging.exception(log_message)
-        return {}
+        return None
 
     return bunkr_status
 
diff --git a/helpers/config.py b/helpers/config.py
@@ -7,42 +7,50 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from enum import IntEnum
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from argparse import Namespace
 
-STATUS_PAGE = "https://status.bunkr.ru/"    # The URL of the status page for checking
-                                            # service availability.
-BUNKR_API = "https://bunkr.cr/api/vs"       # The API for retrieving encryption data.
 
-DOWNLOAD_FOLDER = "Downloads"               # The folder where downloaded files will be
-                                            # stored.
-URLS_FILE = "URLs.txt"                      # The name of the file containing the list
-                                            # of URLs to process.
-SESSION_LOG = "session_log.txt"             # The file used to log errors.
-MIN_DISK_SPACE_GB = 3                       # Minimum free disk space (in GB) required.
-
-MAX_FILENAME_LEN = 120                      # The maximum length for a file name.
-MAX_WORKERS = 3                             # The maximum number of threads for
-                                            # concurrent downloads.
-
-# Maps URL type identifiers to a boolean indicating whether the URL points to an album
-# (True) or a single file (False). For example, URLs containing '/a/' are considered
-# albums, while '/f/' or '/v/' are single files.
+# ============================
+# Paths and Files
+# ============================
+DOWNLOAD_FOLDER = "Downloads"    # The folder where downloaded files will be stored.
+URLS_FILE = "URLs.txt"           # The name of the file containing the list of URLs to
+                                 # process.
+SESSION_LOG = "session_log.txt"  # The file used to log errors.
+MIN_DISK_SPACE_GB = 3            # Minimum free disk space (in GB) required.
+
+# ============================
+# API / Status Endpoints
+# ============================
+STATUS_PAGE = "https://status.bunkr.ru/"  # The URL of the status page for checking
+                                          # service availability.
+BUNKR_API = "https://bunkr.cr/api/vs"     # The API for retrieving encryption data.
+
+# ============================
+# Regex
+# ============================
+MEDIA_SLUG_REGEX = r'const\s+slug\s*=\s*"([a-zA-Z0-9_-]+)"'  # Extract media slug.
+VALID_SLUG_REGEX = r"^[a-zA-Z0-9_-]+$"                       # Validate media slug.
+
+# ============================
+# Download Settings
+# ============================
+MAX_FILENAME_LEN = 120  # The maximum length for a file name.
+MAX_WORKERS = 3         # The maximum number of threads for concurrent downloads.
+
+# Mapping of URL identifiers to a boolean for album (True) vs single file (False).
 URL_TYPE_MAPPING = {"a": True, "f": False, "v": False}
 
-# Regex used to extract and validate the media slug.
-VALID_SLUG_REGEX = r"^[a-zA-Z0-9_-]+$"
-MEDIA_SLUG_REGEX = r'const\s+slug\s*=\s*"([a-zA-Z0-9_-]+)"'
-
 # Constants for file sizes, expressed in bytes.
 KB = 1024
 MB = 1024 * KB
 GB = 1024 * MB
 
 # Thresholds for file sizes and corresponding chunk sizes used during download.
-# Each tuple represents: (file size threshold, chunk size to download in that range).
 THRESHOLDS = [
     (1 * MB, 32 * KB),    # Less than 1 MB
     (10 * MB, 128 * KB),  # 1 MB to 10 MB
@@ -56,11 +64,24 @@
 # Default chunk size for files larger than the largest threshold.
 LARGE_FILE_CHUNK_SIZE = 16 * MB
 
-# HTTP status codes.
-HTTP_STATUS_OK = 200
-HTTP_STATUS_FORBIDDEN = 403
-HTTP_STATUS_BAD_GATEWAY = 502
-HTTP_STATUS_SERVER_DOWN = 521
+# ============================
+# HTTP / Network
+# ============================
+class HTTPStatus(IntEnum):
+    """Enumeration of common HTTP status codes used in the project."""
+
+    OK = 200
+    FORBIDDEN = 403
+    INTERNAL_ERROR = 500
+    BAD_GATEWAY = 502
+    SERVER_DOWN = 521
+
+# Mapping of HTTP error codes to human-readable fetch error messages.
+FETCH_ERROR_MESSAGES: dict[HTTPStatus, str] = {
+    HTTPStatus.FORBIDDEN: "DDoSGuard blocked the request to {url}",
+    HTTPStatus.INTERNAL_ERROR: "Internal server error when fetching {url}",
+    HTTPStatus.BAD_GATEWAY: "Bad gateway for {url}, probably offline",
+}
 
 # Headers used for general HTTP requests.
 HEADERS = {
@@ -76,6 +97,9 @@
     "Referer": "https://get.bunkrr.su/",
 }
 
+# ============================
+# Data Classes
+# ============================
 @dataclass
 class DownloadInfo:
     """Represent the information related to a download task."""
@@ -94,7 +118,7 @@ class SessionInfo:
 
 @dataclass
 class AlbumInfo:
-    """Store the informations about an album and its associated item pages."""
+    """Store the information about an album and its associated item pages."""
 
     album_id: str
     item_pages: list[str]
diff --git a/helpers/crawlers/crawler_utils.py b/helpers/crawlers/crawler_utils.py
@@ -85,5 +85,4 @@ async def get_download_info(item_url: str, item_soup: BeautifulSoup) -> tuple:
         get_url_based_filename(item_download_link) if item_download_link else None
     )
     formatted_item_filename = format_item_filename(item_filename, url_based_filename)
-
     return item_download_link, formatted_item_filename
diff --git a/helpers/downloaders/download_utils.py b/helpers/downloaders/download_utils.py
@@ -36,7 +36,6 @@ def save_file_with_progress(
 
     # Create a temporary download path with the .temp extension
     temp_download_path = Path(download_path).with_suffix(".temp")
-
     chunk_size = get_chunk_size(file_size)
     total_downloaded = 0
 
diff --git a/helpers/downloaders/media_downloader.py b/helpers/downloaders/media_downloader.py
@@ -17,9 +17,8 @@
 from helpers.bunkr_utils import mark_subdomain_as_offline, subdomain_is_offline
 from helpers.config import (
     DOWNLOAD_HEADERS,
-    HTTP_STATUS_BAD_GATEWAY,
-    HTTP_STATUS_SERVER_DOWN,
     DownloadInfo,
+    HTTPStatus,
     SessionInfo,
 )
 from helpers.file_utils import write_on_session_log
@@ -143,7 +142,7 @@ def handle_request_exception(self, req_err: RequestException, attempt: int) -> b
         """Handle exceptions during the request and manages retries."""
         is_server_down = (
             req_err.response is None
-            or req_err.response.status_code == HTTP_STATUS_SERVER_DOWN
+            or req_err.response.status_code == HTTPStatus.SERVER_DOWN
         )
         if is_server_down:
             # Mark the subdomain as offline and exit the loop
@@ -169,7 +168,7 @@ def handle_request_exception(self, req_err: RequestException, attempt: int) -> b
                 time.sleep(delay)
                 return True
 
-        if req_err.response.status_code == HTTP_STATUS_BAD_GATEWAY:
+        if req_err.response.status_code == HTTPStatus.BAD_GATEWAY:
             self.live_manager.update_log(
                 "Server error",
                 f"Bad gateway for {self.download_info.filename}.",
diff --git a/helpers/file_utils.py b/helpers/file_utils.py
@@ -63,7 +63,7 @@ def check_disk_space(live_manager: LiveManager) -> None:
     """Check if the available disk space is greater than or equal to `min_space` GB."""
     root_path = get_root_path()
     _, _, free_space = shutil.disk_usage(root_path)
-    free_space_gb = free_space / (1024**3)
+    free_space_gb = free_space / (1024 ** 3)
 
     if free_space_gb < MIN_DISK_SPACE_GB:
         live_manager.update_log(
diff --git a/helpers/general_utils.py b/helpers/general_utils.py
@@ -23,9 +23,9 @@
 from .config import (
     DOWNLOAD_FOLDER,
     DOWNLOAD_HEADERS,
-    HTTP_STATUS_FORBIDDEN,
-    HTTP_STATUS_SERVER_DOWN,
+    FETCH_ERROR_MESSAGES,
     MAX_FILENAME_LEN,
+    HTTPStatus,
 )
 from .file_utils import write_on_session_log
 from .url_utils import change_domain_to_cr
@@ -39,22 +39,17 @@ def validate_download_link(download_link: str) -> bool:
     except requests.RequestException:
         return False
 
-    return response.status_code != HTTP_STATUS_SERVER_DOWN
+    return response.status_code != HTTPStatus.SERVER_DOWN
 
 
 async def fetch_page(url: str, retries: int = 5) -> BeautifulSoup | None:
     """Fetch the HTML content of a page at the given URL, with retry logic."""
     tried_cr = False
-    error_messages = {
-        500: f"Internal server error when fetching {url}",
-        502: f"Bad gateway for {url}, probably offline",
-        403: f"DDoSGuard blocked the request to {url}",
-    }
 
     def handle_response(response: Response) -> BeautifulSoup | None:
         """Process the HTTP response and handles specific status codes."""
-        if response.status_code in error_messages:
-            log_message = f"{error_messages[response.status_code]}, check the log file"
+        if response.status_code in FETCH_ERROR_MESSAGES:
+            log_message = FETCH_ERROR_MESSAGES[response.status_code].format(url=url)
             logging.exception(log_message)
             write_on_session_log(url)
             return None
@@ -64,21 +59,23 @@ def handle_response(response: Response) -> BeautifulSoup | None:
     for attempt in range(retries):
         try:
             response = requests.Session().get(url, timeout=40)
-            if response.status_code == HTTP_STATUS_FORBIDDEN and not tried_cr:
+            if response.status_code == HTTPStatus.FORBIDDEN and not tried_cr:
                 tried_cr = True
                 url = change_domain_to_cr(url)
-                continue  # retry immediately with .cr
+                continue  # Retry immediately with .cr
 
             response.raise_for_status()
             return handle_response(response)
 
+        # Connection dropped unexpectedly by the server
         except RemoteDisconnected:
             logging.exception("Remote end closed connection without response.")
             if attempt < retries - 1:
                 # Add jitter to avoid a retry storm
                 delay = 2 ** (attempt + 1) + random.uniform(1, 2)  # noqa: S311
-                asyncio.sleep(delay)
+                await asyncio.sleep(delay)
 
+        # Catch-all for request-related errors
         except requests.RequestException as req_err:
             log_message = f"Request error for {url}: {req_err}"
             logging.exception(log_message)
@@ -121,12 +118,15 @@ def create_download_directory(directory_name: str) -> str:
 
     try:
         download_path.mkdir(parents=True, exist_ok=True)
-        return str(download_path)
 
-    except OSError:
-        logging.exception("Error creating 'Downloads' directory.")
+    except OSError as os_err:
+        log_message = f"Error creating 'Downloads' directory: {os_err}"
+        logging.exception(log_message)
         sys.exit(1)
 
+    return str(download_path)
+
+
 def remove_invalid_characters(text: str) -> str:
     """Remove invalid characters from the input string.
 
diff --git a/helpers/url_utils.py b/helpers/url_utils.py
@@ -21,10 +21,10 @@
 
 from .config import (
     BUNKR_API,
-    HTTP_STATUS_OK,
     MEDIA_SLUG_REGEX,
     URL_TYPE_MAPPING,
     VALID_SLUG_REGEX,
+    HTTPStatus,
 )
 
 if TYPE_CHECKING:
@@ -137,8 +137,8 @@ def get_album_name(soup: BeautifulSoup) -> str | None:
     raw_album_name = name_container.find("h1").get_text(strip=True)
     unescaped_album_name = html.unescape(raw_album_name)
 
-    # Attempt to fix mojibake (UTF-8 bytes mis-decoded as Latin-1)
-    # If encoding/decoding fails, keep the decoded version
+    # Attempt to fix mojibake (UTF-8 bytes mis-decoded as Latin-1). If encoding/decoding
+    # fails, keep the decoded version
     with contextlib.suppress(UnicodeEncodeError, UnicodeDecodeError):
         fixed_album_name = unescaped_album_name.encode("latin1").decode("utf-8")
 
@@ -152,7 +152,7 @@ def get_album_name(soup: BeautifulSoup) -> str | None:
 def get_item_type(item_page: str) -> str | None:
     """Extract the type of item (album or single file) from the item page URL."""
     try:
-        return item_page.split("/")[-2]
+        return item_page.rstrip("/").split("/")[-2]
 
     except AttributeError:
         log_message = f"Error extracting the item type from {item_page}"
@@ -165,7 +165,7 @@ def get_url_based_filename(item_download_link: str) -> str:
     """Extract the filename from a download link by removing any directory structure."""
     parsed_url = urlparse(item_download_link)
     # The download link path contains the filename, preceded by a '/'
-    return parsed_url.path.split("/")[-1]
+    return parsed_url.path.rstrip("/").split("/")[-1]
 
 
 def get_api_response(
@@ -178,20 +178,21 @@ def get_api_response(
     try:
         with requests.Session() as session:
             response = session.post(BUNKR_API, json={"slug": slug})
-            if response.status_code != HTTP_STATUS_OK:
+
+            if response.status_code != HTTPStatus.OK:
                 log_message = f"Failed to fetch encryption data for slug '{slug}'"
                 logging.warning(log_message)
                 return None
 
-        return response.json()
-
     except requests.RequestException as req_err:
         log_message = f"Error while requesting encryption data for '{slug}': {req_err}"
         logging.exception(log_message)
         return None
 
+    return response.json()
 
-def decrypt_url(api_response: dict[str, bool | str | int]) -> str:
+
+def decrypt_url(api_response: dict[str, bool | str | int]) -> str | None:
     """Decrypt an encrypted URL using a time-based secret key."""
     try:
         timestamp = api_response["timestamp"]
@@ -200,7 +201,7 @@ def decrypt_url(api_response: dict[str, bool | str | int]) -> str:
     except KeyError as key_err:
         log_message = f"Missing required encryption data field: {key_err}"
         logging.exception(log_message)
-        return ""
+        return None
 
     # Generate the secret key based on the timestamp
     time_key = floor(timestamp / 3600)

Original file line number	Diff line number	Diff line change
`@@ -85,5 +85,4 @@ async def get_download_info(item_url: str, item_soup: BeautifulSoup) -> tuple:`
`85`	`85`	`get_url_based_filename(item_download_link) if item_download_link else None`
`86`	`86`	`)`
`87`	`87`	`formatted_item_filename = format_item_filename(item_filename, url_based_filename)`
`88`		`-`
`89`	`88`	`return item_download_link, formatted_item_filename`