Skip to content

Commit 382e256

Browse files
committed
refactor: Store download state in a separate class
The Downloader code is hard to follow as it passes a bunch of variables across a variety of functions and methods. To deal with this mess, encapsulate the state needed to download a single link into a separate _FileDownload class and use that instead.
1 parent c5e89fe commit 382e256

File tree

2 files changed

+72
-81
lines changed

2 files changed

+72
-81
lines changed

src/pip/_internal/exceptions.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from pip._vendor.requests.models import Request, Response
3131

3232
from pip._internal.metadata import BaseDistribution
33-
from pip._internal.models.link import Link
33+
from pip._internal.network.download import _FileDownload
3434
from pip._internal.req.req_install import InstallRequirement
3535

3636
logger = logging.getLogger(__name__)
@@ -819,17 +819,19 @@ class IncompleteDownloadError(DiagnosticPipError):
819819

820820
reference = "incomplete-download"
821821

822-
def __init__(
823-
self, link: Link, received: int, expected: int, *, retries: int
824-
) -> None:
822+
def __init__(self, download: _FileDownload) -> None:
825823
# Dodge circular import.
826824
from pip._internal.utils.misc import format_size
827825

828-
download_status = f"{format_size(received)}/{format_size(expected)}"
829-
if retries:
830-
retry_status = f"after {retries} attempts "
826+
assert download.size is not None
827+
download_status = (
828+
f"{format_size(download.bytes_received)}/{format_size(download.size)}"
829+
)
830+
if download.reattempts:
831+
retry_status = f"after {download.reattempts + 1} attempts "
831832
hint = "Use --resume-retries to configure resume attempt limit."
832833
else:
834+
# Download retrying is not enabled.
833835
retry_status = ""
834836
hint = "Consider using --resume-retries to enable download resumption."
835837
message = Text(
@@ -839,7 +841,7 @@ def __init__(
839841

840842
super().__init__(
841843
message=message,
842-
context=f"URL: {link.redacted_url}",
844+
context=f"URL: {download.link.redacted_url}",
843845
hint_stmt=hint,
844846
note_stmt="This is an issue with network connectivity, not pip.",
845847
)

src/pip/_internal/network/download.py

Lines changed: 62 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,30 @@ def _http_get_download(
158158
return resp
159159

160160

161+
@dataclass
162+
class _FileDownload:
163+
"""Stores the state of a single file download."""
164+
165+
link: Link
166+
output_file: BinaryIO
167+
size: Optional[int]
168+
bytes_received: int = 0
169+
reattempts: int = 0
170+
171+
def is_incomplete(self) -> bool:
172+
return bool(self.size is not None and self.bytes_received < self.size)
173+
174+
def write_chunk(self, data: bytes) -> None:
175+
self.bytes_received += len(data)
176+
self.output_file.write(data)
177+
178+
def reset_download(self) -> None:
179+
"""Delete any saved data and reset progress to zero."""
180+
self.output_file.seek(0)
181+
self.output_file.truncate()
182+
self.bytes_received = 0
183+
184+
161185
class Downloader:
162186
def __init__(
163187
self,
@@ -183,90 +207,63 @@ def batch(
183207
def __call__(self, link: Link, location: str) -> tuple[str, str]:
184208
"""Download the file given by link into location."""
185209
resp = _http_get_download(self._session, link)
186-
# NOTE: The original download size needs to be passed down everywhere
187-
# so if the download is resumed (with a HTTP Range request) the progress
188-
# bar will report the right size.
189-
total_length = _get_http_response_size(resp)
190-
content_type = resp.headers.get("Content-Type", "")
191-
192-
filename = _get_http_response_filename(resp, link)
193-
filepath = os.path.join(location, filename)
210+
download_size = _get_http_response_size(resp)
194211

212+
filepath = os.path.join(location, _get_http_response_filename(resp, link))
195213
with open(filepath, "wb") as content_file:
196-
bytes_received = self._process_response(
197-
resp, link, content_file, 0, total_length
198-
)
199-
# If possible, check for an incomplete download and attempt resuming.
200-
if total_length and bytes_received < total_length:
201-
self._attempt_resume(
202-
resp, link, content_file, total_length, bytes_received
203-
)
214+
download = _FileDownload(link, content_file, download_size)
215+
self._process_response(download, resp)
216+
if download.is_incomplete():
217+
self._attempt_resume(download, resp)
204218

219+
content_type = resp.headers.get("Content-Type", "")
205220
return filepath, content_type
206221

207-
def _process_response(
208-
self,
209-
resp: Response,
210-
link: Link,
211-
content_file: BinaryIO,
212-
bytes_received: int,
213-
total_length: int | None,
214-
) -> int:
222+
def _process_response(self, download: _FileDownload, resp: Response) -> None:
215223
"""Process the response and write the chunks to the file."""
216224
chunks = _prepare_download(
217-
resp, link, self._progress_bar, total_length, range_start=bytes_received
218-
)
219-
return self._write_chunks_to_file(
220-
chunks, content_file, allow_partial=bool(total_length)
225+
resp,
226+
download.link,
227+
self._progress_bar,
228+
download.size,
229+
range_start=download.bytes_received,
221230
)
231+
self._write_chunks_to_file(download, chunks)
222232

223233
def _write_chunks_to_file(
224-
self, chunks: Iterable[bytes], content_file: BinaryIO, *, allow_partial: bool
225-
) -> int:
234+
self, download: _FileDownload, chunks: Iterable[bytes]
235+
) -> None:
226236
"""Write the chunks to the file and return the number of bytes received."""
227-
bytes_received = 0
228237
try:
229238
for chunk in chunks:
230-
bytes_received += len(chunk)
231-
content_file.write(chunk)
239+
download.write_chunk(chunk)
232240
except ReadTimeoutError as e:
233-
# If partial downloads are OK (the download will be retried), don't bail.
234-
if not allow_partial:
241+
# If the download size is not known, then give up downloading the file.
242+
if download.size is None:
235243
raise e
236244

237-
# Ensuring bytes_received is returned to attempt resume
238245
logger.warning("Connection timed out while downloading.")
239246

240-
return bytes_received
241-
242-
def _attempt_resume(
243-
self,
244-
resp: Response,
245-
link: Link,
246-
content_file: BinaryIO,
247-
total_length: int | None,
248-
bytes_received: int,
249-
) -> None:
247+
def _attempt_resume(self, download: _FileDownload, resp: Response) -> None:
250248
"""Attempt to resume the download if connection was dropped."""
251249
etag_or_last_modified = _get_http_response_etag_or_last_modified(resp)
252250

253-
attempts_left = self._resume_retries
254-
while total_length and attempts_left and bytes_received < total_length:
255-
attempts_left -= 1
256-
251+
while download.reattempts < self._resume_retries and download.is_incomplete():
252+
assert download.size is not None
253+
download.reattempts += 1
257254
logger.warning(
258255
"Attempting to resume incomplete download (%s/%s, attempt %d)",
259-
format_size(bytes_received),
260-
format_size(total_length),
261-
(self._resume_retries - attempts_left),
256+
format_size(download.bytes_received),
257+
format_size(download.size),
258+
download.reattempts,
262259
)
263260

264261
try:
265262
# Try to resume the download using a HTTP range request.
266263
resume_resp = _http_get_download(
267264
self._session,
268-
link,
269-
range_start=bytes_received,
265+
download.link,
266+
range_start=download.bytes_received,
270267
if_range=etag_or_last_modified,
271268
)
272269

@@ -275,33 +272,25 @@ def _attempt_resume(
275272
# other unexpected status, restart the download from the beginning.
276273
must_restart = resume_resp.status_code != HTTPStatus.PARTIAL_CONTENT
277274
if must_restart:
278-
bytes_received, total_length, etag_or_last_modified = (
279-
self._reset_download_state(resume_resp, content_file)
275+
download.size, etag_or_last_modified = self._reset_download_state(
276+
download, resume_resp
280277
)
281278

282-
bytes_received += self._process_response(
283-
resume_resp, link, content_file, bytes_received, total_length
284-
)
279+
self._process_response(download, resume_resp)
285280
except (ConnectionError, ReadTimeoutError, OSError):
286281
continue
287282

288283
# No more resume attempts. Raise an error if the download is still incomplete.
289-
if total_length and bytes_received < total_length:
290-
os.remove(content_file.name)
291-
raise IncompleteDownloadError(
292-
link, bytes_received, total_length, retries=self._resume_retries
293-
)
284+
if download.is_incomplete():
285+
os.remove(download.output_file.name)
286+
raise IncompleteDownloadError(download)
294287

295288
def _reset_download_state(
296-
self,
297-
resp: Response,
298-
content_file: BinaryIO,
299-
) -> tuple[int, int | None, str | None]:
289+
self, download: _FileDownload, resp: Response
290+
) -> tuple[int | None, str | None]:
300291
"""Reset the download state to restart downloading from the beginning."""
301-
content_file.seek(0)
302-
content_file.truncate()
303-
bytes_received = 0
292+
download.reset_download()
304293
total_length = _get_http_response_size(resp)
305294
etag_or_last_modified = _get_http_response_etag_or_last_modified(resp)
306295

307-
return bytes_received, total_length, etag_or_last_modified
296+
return total_length, etag_or_last_modified

0 commit comments

Comments
 (0)