Skip to content

Commit 6ca9a45

Browse files
authored
Refactor config.py and minor code fixes
- Reorganized config.py into clearer sections (Paths, API, Regex, Download, HTTP, Data Classes) - Replaced HTTP status constants with IntEnum for better readability - Abbreviated and clarified comments for conciseness - Added type hints where beneficial - Applied minor fixes and improvements in related parts of the code
1 parent 987cb31 commit 6ca9a45

File tree

8 files changed

+87
-64
lines changed

8 files changed

+87
-64
lines changed

helpers/bunkr_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ def fetch_page(url: str) -> BeautifulSoup | None:
1616
try:
1717
response = requests.get(url, headers=HEADERS, timeout=10)
1818
response.raise_for_status()
19-
return BeautifulSoup(response.text, "html.parser")
2019

2120
except requests.RequestException:
2221
logging.exception("Error occurred while making the request.")
2322
return None
2423

24+
return BeautifulSoup(response.text, "html.parser")
2525

26-
def get_bunkr_status() -> dict[str, str]:
26+
27+
def get_bunkr_status() -> dict[str, str] | None:
2728
"""Fetch the status of servers from the status page and returns a dictionary."""
2829
soup = fetch_page(STATUS_PAGE)
2930
bunkr_status = {}
@@ -46,7 +47,7 @@ def get_bunkr_status() -> dict[str, str]:
4647
except AttributeError as attr_err:
4748
log_message = f"Error extracting server data: {attr_err}"
4849
logging.exception(log_message)
49-
return {}
50+
return None
5051

5152
return bunkr_status
5253

helpers/config.py

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,42 +7,50 @@
77
from __future__ import annotations
88

99
from dataclasses import dataclass
10+
from enum import IntEnum
1011
from typing import TYPE_CHECKING
1112

1213
if TYPE_CHECKING:
1314
from argparse import Namespace
1415

15-
STATUS_PAGE = "https://status.bunkr.ru/" # The URL of the status page for checking
16-
# service availability.
17-
BUNKR_API = "https://bunkr.cr/api/vs" # The API for retrieving encryption data.
1816

19-
DOWNLOAD_FOLDER = "Downloads" # The folder where downloaded files will be
20-
# stored.
21-
URLS_FILE = "URLs.txt" # The name of the file containing the list
22-
# of URLs to process.
23-
SESSION_LOG = "session_log.txt" # The file used to log errors.
24-
MIN_DISK_SPACE_GB = 3 # Minimum free disk space (in GB) required.
25-
26-
MAX_FILENAME_LEN = 120 # The maximum length for a file name.
27-
MAX_WORKERS = 3 # The maximum number of threads for
28-
# concurrent downloads.
29-
30-
# Maps URL type identifiers to a boolean indicating whether the URL points to an album
31-
# (True) or a single file (False). For example, URLs containing '/a/' are considered
32-
# albums, while '/f/' or '/v/' are single files.
17+
# ============================
18+
# Paths and Files
19+
# ============================
20+
DOWNLOAD_FOLDER = "Downloads" # The folder where downloaded files will be stored.
21+
URLS_FILE = "URLs.txt" # The name of the file containing the list of URLs to
22+
# process.
23+
SESSION_LOG = "session_log.txt" # The file used to log errors.
24+
MIN_DISK_SPACE_GB = 3 # Minimum free disk space (in GB) required.
25+
26+
# ============================
27+
# API / Status Endpoints
28+
# ============================
29+
STATUS_PAGE = "https://status.bunkr.ru/" # The URL of the status page for checking
30+
# service availability.
31+
BUNKR_API = "https://bunkr.cr/api/vs" # The API for retrieving encryption data.
32+
33+
# ============================
34+
# Regex
35+
# ============================
36+
MEDIA_SLUG_REGEX = r'const\s+slug\s*=\s*"([a-zA-Z0-9_-]+)"' # Extract media slug.
37+
VALID_SLUG_REGEX = r"^[a-zA-Z0-9_-]+$" # Validate media slug.
38+
39+
# ============================
40+
# Download Settings
41+
# ============================
42+
MAX_FILENAME_LEN = 120 # The maximum length for a file name.
43+
MAX_WORKERS = 3 # The maximum number of threads for concurrent downloads.
44+
45+
# Mapping of URL identifiers to a boolean for album (True) vs single file (False).
3346
URL_TYPE_MAPPING = {"a": True, "f": False, "v": False}
3447

35-
# Regex used to extract and validate the media slug.
36-
VALID_SLUG_REGEX = r"^[a-zA-Z0-9_-]+$"
37-
MEDIA_SLUG_REGEX = r'const\s+slug\s*=\s*"([a-zA-Z0-9_-]+)"'
38-
3948
# Constants for file sizes, expressed in bytes.
4049
KB = 1024
4150
MB = 1024 * KB
4251
GB = 1024 * MB
4352

4453
# Thresholds for file sizes and corresponding chunk sizes used during download.
45-
# Each tuple represents: (file size threshold, chunk size to download in that range).
4654
THRESHOLDS = [
4755
(1 * MB, 32 * KB), # Less than 1 MB
4856
(10 * MB, 128 * KB), # 1 MB to 10 MB
@@ -56,11 +64,24 @@
5664
# Default chunk size for files larger than the largest threshold.
5765
LARGE_FILE_CHUNK_SIZE = 16 * MB
5866

59-
# HTTP status codes.
60-
HTTP_STATUS_OK = 200
61-
HTTP_STATUS_FORBIDDEN = 403
62-
HTTP_STATUS_BAD_GATEWAY = 502
63-
HTTP_STATUS_SERVER_DOWN = 521
67+
# ============================
68+
# HTTP / Network
69+
# ============================
70+
class HTTPStatus(IntEnum):
71+
"""Enumeration of common HTTP status codes used in the project."""
72+
73+
OK = 200
74+
FORBIDDEN = 403
75+
INTERNAL_ERROR = 500
76+
BAD_GATEWAY = 502
77+
SERVER_DOWN = 521
78+
79+
# Mapping of HTTP error codes to human-readable fetch error messages.
80+
FETCH_ERROR_MESSAGES: dict[HTTPStatus, str] = {
81+
HTTPStatus.FORBIDDEN: "DDoSGuard blocked the request to {url}",
82+
HTTPStatus.INTERNAL_ERROR: "Internal server error when fetching {url}",
83+
HTTPStatus.BAD_GATEWAY: "Bad gateway for {url}, probably offline",
84+
}
6485

6586
# Headers used for general HTTP requests.
6687
HEADERS = {
@@ -76,6 +97,9 @@
7697
"Referer": "https://get.bunkrr.su/",
7798
}
7899

100+
# ============================
101+
# Data Classes
102+
# ============================
79103
@dataclass
80104
class DownloadInfo:
81105
"""Represent the information related to a download task."""
@@ -94,7 +118,7 @@ class SessionInfo:
94118

95119
@dataclass
96120
class AlbumInfo:
97-
"""Store the informations about an album and its associated item pages."""
121+
"""Store the information about an album and its associated item pages."""
98122

99123
album_id: str
100124
item_pages: list[str]

helpers/crawlers/crawler_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,4 @@ async def get_download_info(item_url: str, item_soup: BeautifulSoup) -> tuple:
8585
get_url_based_filename(item_download_link) if item_download_link else None
8686
)
8787
formatted_item_filename = format_item_filename(item_filename, url_based_filename)
88-
8988
return item_download_link, formatted_item_filename

helpers/downloaders/download_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def save_file_with_progress(
3636

3737
# Create a temporary download path with the .temp extension
3838
temp_download_path = Path(download_path).with_suffix(".temp")
39-
4039
chunk_size = get_chunk_size(file_size)
4140
total_downloaded = 0
4241

helpers/downloaders/media_downloader.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
from helpers.bunkr_utils import mark_subdomain_as_offline, subdomain_is_offline
1818
from helpers.config import (
1919
DOWNLOAD_HEADERS,
20-
HTTP_STATUS_BAD_GATEWAY,
21-
HTTP_STATUS_SERVER_DOWN,
2220
DownloadInfo,
21+
HTTPStatus,
2322
SessionInfo,
2423
)
2524
from helpers.file_utils import write_on_session_log
@@ -143,7 +142,7 @@ def handle_request_exception(self, req_err: RequestException, attempt: int) -> b
143142
"""Handle exceptions during the request and manages retries."""
144143
is_server_down = (
145144
req_err.response is None
146-
or req_err.response.status_code == HTTP_STATUS_SERVER_DOWN
145+
or req_err.response.status_code == HTTPStatus.SERVER_DOWN
147146
)
148147
if is_server_down:
149148
# Mark the subdomain as offline and exit the loop
@@ -169,7 +168,7 @@ def handle_request_exception(self, req_err: RequestException, attempt: int) -> b
169168
time.sleep(delay)
170169
return True
171170

172-
if req_err.response.status_code == HTTP_STATUS_BAD_GATEWAY:
171+
if req_err.response.status_code == HTTPStatus.BAD_GATEWAY:
173172
self.live_manager.update_log(
174173
"Server error",
175174
f"Bad gateway for {self.download_info.filename}.",

helpers/file_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def check_disk_space(live_manager: LiveManager) -> None:
6363
"""Check if the available disk space is greater than or equal to `min_space` GB."""
6464
root_path = get_root_path()
6565
_, _, free_space = shutil.disk_usage(root_path)
66-
free_space_gb = free_space / (1024**3)
66+
free_space_gb = free_space / (1024 ** 3)
6767

6868
if free_space_gb < MIN_DISK_SPACE_GB:
6969
live_manager.update_log(

helpers/general_utils.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
from .config import (
2424
DOWNLOAD_FOLDER,
2525
DOWNLOAD_HEADERS,
26-
HTTP_STATUS_FORBIDDEN,
27-
HTTP_STATUS_SERVER_DOWN,
26+
FETCH_ERROR_MESSAGES,
2827
MAX_FILENAME_LEN,
28+
HTTPStatus,
2929
)
3030
from .file_utils import write_on_session_log
3131
from .url_utils import change_domain_to_cr
@@ -39,22 +39,17 @@ def validate_download_link(download_link: str) -> bool:
3939
except requests.RequestException:
4040
return False
4141

42-
return response.status_code != HTTP_STATUS_SERVER_DOWN
42+
return response.status_code != HTTPStatus.SERVER_DOWN
4343

4444

4545
async def fetch_page(url: str, retries: int = 5) -> BeautifulSoup | None:
4646
"""Fetch the HTML content of a page at the given URL, with retry logic."""
4747
tried_cr = False
48-
error_messages = {
49-
500: f"Internal server error when fetching {url}",
50-
502: f"Bad gateway for {url}, probably offline",
51-
403: f"DDoSGuard blocked the request to {url}",
52-
}
5348

5449
def handle_response(response: Response) -> BeautifulSoup | None:
5550
"""Process the HTTP response and handles specific status codes."""
56-
if response.status_code in error_messages:
57-
log_message = f"{error_messages[response.status_code]}, check the log file"
51+
if response.status_code in FETCH_ERROR_MESSAGES:
52+
log_message = FETCH_ERROR_MESSAGES[response.status_code].format(url=url)
5853
logging.exception(log_message)
5954
write_on_session_log(url)
6055
return None
@@ -64,21 +59,23 @@ def handle_response(response: Response) -> BeautifulSoup | None:
6459
for attempt in range(retries):
6560
try:
6661
response = requests.Session().get(url, timeout=40)
67-
if response.status_code == HTTP_STATUS_FORBIDDEN and not tried_cr:
62+
if response.status_code == HTTPStatus.FORBIDDEN and not tried_cr:
6863
tried_cr = True
6964
url = change_domain_to_cr(url)
70-
continue # retry immediately with .cr
65+
continue # Retry immediately with .cr
7166

7267
response.raise_for_status()
7368
return handle_response(response)
7469

70+
# Connection dropped unexpectedly by the server
7571
except RemoteDisconnected:
7672
logging.exception("Remote end closed connection without response.")
7773
if attempt < retries - 1:
7874
# Add jitter to avoid a retry storm
7975
delay = 2 ** (attempt + 1) + random.uniform(1, 2) # noqa: S311
80-
asyncio.sleep(delay)
76+
await asyncio.sleep(delay)
8177

78+
# Catch-all for request-related errors
8279
except requests.RequestException as req_err:
8380
log_message = f"Request error for {url}: {req_err}"
8481
logging.exception(log_message)
@@ -121,12 +118,15 @@ def create_download_directory(directory_name: str) -> str:
121118

122119
try:
123120
download_path.mkdir(parents=True, exist_ok=True)
124-
return str(download_path)
125121

126-
except OSError:
127-
logging.exception("Error creating 'Downloads' directory.")
122+
except OSError as os_err:
123+
log_message = f"Error creating 'Downloads' directory: {os_err}"
124+
logging.exception(log_message)
128125
sys.exit(1)
129126

127+
return str(download_path)
128+
129+
130130
def remove_invalid_characters(text: str) -> str:
131131
"""Remove invalid characters from the input string.
132132

helpers/url_utils.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121

2222
from .config import (
2323
BUNKR_API,
24-
HTTP_STATUS_OK,
2524
MEDIA_SLUG_REGEX,
2625
URL_TYPE_MAPPING,
2726
VALID_SLUG_REGEX,
27+
HTTPStatus,
2828
)
2929

3030
if TYPE_CHECKING:
@@ -137,8 +137,8 @@ def get_album_name(soup: BeautifulSoup) -> str | None:
137137
raw_album_name = name_container.find("h1").get_text(strip=True)
138138
unescaped_album_name = html.unescape(raw_album_name)
139139

140-
# Attempt to fix mojibake (UTF-8 bytes mis-decoded as Latin-1)
141-
# If encoding/decoding fails, keep the decoded version
140+
# Attempt to fix mojibake (UTF-8 bytes mis-decoded as Latin-1). If encoding/decoding
141+
# fails, keep the decoded version
142142
with contextlib.suppress(UnicodeEncodeError, UnicodeDecodeError):
143143
fixed_album_name = unescaped_album_name.encode("latin1").decode("utf-8")
144144

@@ -152,7 +152,7 @@ def get_album_name(soup: BeautifulSoup) -> str | None:
152152
def get_item_type(item_page: str) -> str | None:
153153
"""Extract the type of item (album or single file) from the item page URL."""
154154
try:
155-
return item_page.split("/")[-2]
155+
return item_page.rstrip("/").split("/")[-2]
156156

157157
except AttributeError:
158158
log_message = f"Error extracting the item type from {item_page}"
@@ -165,7 +165,7 @@ def get_url_based_filename(item_download_link: str) -> str:
165165
"""Extract the filename from a download link by removing any directory structure."""
166166
parsed_url = urlparse(item_download_link)
167167
# The download link path contains the filename, preceded by a '/'
168-
return parsed_url.path.split("/")[-1]
168+
return parsed_url.path.rstrip("/").split("/")[-1]
169169

170170

171171
def get_api_response(
@@ -178,20 +178,21 @@ def get_api_response(
178178
try:
179179
with requests.Session() as session:
180180
response = session.post(BUNKR_API, json={"slug": slug})
181-
if response.status_code != HTTP_STATUS_OK:
181+
182+
if response.status_code != HTTPStatus.OK:
182183
log_message = f"Failed to fetch encryption data for slug '{slug}'"
183184
logging.warning(log_message)
184185
return None
185186

186-
return response.json()
187-
188187
except requests.RequestException as req_err:
189188
log_message = f"Error while requesting encryption data for '{slug}': {req_err}"
190189
logging.exception(log_message)
191190
return None
192191

192+
return response.json()
193193

194-
def decrypt_url(api_response: dict[str, bool | str | int]) -> str:
194+
195+
def decrypt_url(api_response: dict[str, bool | str | int]) -> str | None:
195196
"""Decrypt an encrypted URL using a time-based secret key."""
196197
try:
197198
timestamp = api_response["timestamp"]
@@ -200,7 +201,7 @@ def decrypt_url(api_response: dict[str, bool | str | int]) -> str:
200201
except KeyError as key_err:
201202
log_message = f"Missing required encryption data field: {key_err}"
202203
logging.exception(log_message)
203-
return ""
204+
return None
204205

205206
# Generate the secret key based on the timestamp
206207
time_key = floor(timestamp / 3600)

0 commit comments

Comments
 (0)