Skip to content

Commit 028821a

Browse files
authored
Merge branch 'main' into pre-commit-ci-update-config
2 parents 1514182 + c643213 commit 028821a

File tree

7 files changed

+145
-196
lines changed

7 files changed

+145
-196
lines changed

build-project/build-requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pyproject-hooks==1.2.0 \
1818
# via build
1919

2020
# The following packages are considered to be unsafe in a requirements file:
21-
setuptools==80.8.0 \
22-
--hash=sha256:49f7af965996f26d43c8ae34539c8d99c5042fbff34302ea151eaa9c207cd257 \
23-
--hash=sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0
21+
setuptools==80.9.0 \
22+
--hash=sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922 \
23+
--hash=sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c
2424
# via -r build-requirements.in

src/pip/_internal/exceptions.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from pip._vendor.requests.models import Request, Response
3131

3232
from pip._internal.metadata import BaseDistribution
33-
from pip._internal.models.link import Link
33+
from pip._internal.network.download import _FileDownload
3434
from pip._internal.req.req_install import InstallRequirement
3535

3636
logger = logging.getLogger(__name__)
@@ -819,17 +819,19 @@ class IncompleteDownloadError(DiagnosticPipError):
819819

820820
reference = "incomplete-download"
821821

822-
def __init__(
823-
self, link: Link, received: int, expected: int, *, retries: int
824-
) -> None:
822+
def __init__(self, download: _FileDownload) -> None:
825823
# Dodge circular import.
826824
from pip._internal.utils.misc import format_size
827825

828-
download_status = f"{format_size(received)}/{format_size(expected)}"
829-
if retries:
830-
retry_status = f"after {retries} attempts "
826+
assert download.size is not None
827+
download_status = (
828+
f"{format_size(download.bytes_received)}/{format_size(download.size)}"
829+
)
830+
if download.reattempts:
831+
retry_status = f"after {download.reattempts + 1} attempts "
831832
hint = "Use --resume-retries to configure resume attempt limit."
832833
else:
834+
# Download retrying is not enabled.
833835
retry_status = ""
834836
hint = "Consider using --resume-retries to enable download resumption."
835837
message = Text(
@@ -839,7 +841,7 @@ def __init__(
839841

840842
super().__init__(
841843
message=message,
842-
context=f"URL: {link.redacted_url}",
844+
context=f"URL: {download.link.redacted_url}",
843845
hint_stmt=hint,
844846
note_stmt="This is an issue with network connectivity, not pip.",
845847
)

src/pip/_internal/network/auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def __init__(
230230
) -> None:
231231
self.prompting = prompting
232232
self.index_urls = index_urls
233-
self.keyring_provider = keyring_provider # type: ignore[assignment]
233+
self.keyring_provider = keyring_provider
234234
self.passwords: dict[str, AuthInfo] = {}
235235
# When the user is prompted to enter credentials and keyring is
236236
# available, we will offer to save them. If the user accepts,

src/pip/_internal/network/download.py

Lines changed: 96 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import logging
77
import mimetypes
88
import os
9-
from collections.abc import Iterable
9+
from collections.abc import Iterable, Mapping
10+
from dataclasses import dataclass
1011
from http import HTTPStatus
1112
from typing import BinaryIO
1213

@@ -40,7 +41,7 @@ def _get_http_response_etag_or_last_modified(resp: Response) -> str | None:
4041
return resp.headers.get("etag", resp.headers.get("last-modified"))
4142

4243

43-
def _prepare_download(
44+
def _log_download(
4445
resp: Response,
4546
link: Link,
4647
progress_bar: str,
@@ -134,28 +135,28 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
134135
return filename
135136

136137

137-
def _http_get_download(
138-
session: PipSession,
139-
link: Link,
140-
range_start: int | None = 0,
141-
if_range: str | None = None,
142-
) -> Response:
143-
target_url = link.url.split("#", 1)[0]
144-
headers = HEADERS.copy()
145-
# request a partial download
146-
if range_start:
147-
headers["Range"] = f"bytes={range_start}-"
148-
# make sure the file hasn't changed
149-
if if_range:
150-
headers["If-Range"] = if_range
151-
try:
152-
resp = session.get(target_url, headers=headers, stream=True)
153-
raise_for_status(resp)
154-
except NetworkConnectionError as e:
155-
assert e.response is not None
156-
logger.critical("HTTP error %s while getting %s", e.response.status_code, link)
157-
raise
158-
return resp
138+
@dataclass
139+
class _FileDownload:
140+
"""Stores the state of a single link download."""
141+
142+
link: Link
143+
output_file: BinaryIO
144+
size: int | None
145+
bytes_received: int = 0
146+
reattempts: int = 0
147+
148+
def is_incomplete(self) -> bool:
149+
return bool(self.size is not None and self.bytes_received < self.size)
150+
151+
def write_chunk(self, data: bytes) -> None:
152+
self.bytes_received += len(data)
153+
self.output_file.write(data)
154+
155+
def reset_file(self) -> None:
156+
"""Delete any saved data and reset progress to zero."""
157+
self.output_file.seek(0)
158+
self.output_file.truncate()
159+
self.bytes_received = 0
159160

160161

161162
class Downloader:
@@ -172,146 +173,106 @@ def __init__(
172173
self._progress_bar = progress_bar
173174
self._resume_retries = resume_retries
174175

175-
def __call__(self, link: Link, location: str) -> tuple[str, str]:
176-
"""Download the file given by link into location."""
177-
resp = _http_get_download(self._session, link)
178-
# NOTE: The original download size needs to be passed down everywhere
179-
# so if the download is resumed (with a HTTP Range request) the progress
180-
# bar will report the right size.
181-
total_length = _get_http_response_size(resp)
182-
content_type = resp.headers.get("Content-Type", "")
176+
def batch(
177+
self, links: Iterable[Link], location: str
178+
) -> Iterable[tuple[Link, tuple[str, str]]]:
179+
"""Convenience method to download multiple links."""
180+
for link in links:
181+
filepath, content_type = self(link, location)
182+
yield link, (filepath, content_type)
183183

184-
filename = _get_http_response_filename(resp, link)
185-
filepath = os.path.join(location, filename)
184+
def __call__(self, link: Link, location: str) -> tuple[str, str]:
185+
"""Download a link and save it under location."""
186+
resp = self._http_get(link)
187+
download_size = _get_http_response_size(resp)
186188

189+
filepath = os.path.join(location, _get_http_response_filename(resp, link))
187190
with open(filepath, "wb") as content_file:
188-
bytes_received = self._process_response(
189-
resp, link, content_file, 0, total_length
190-
)
191-
# If possible, check for an incomplete download and attempt resuming.
192-
if total_length and bytes_received < total_length:
193-
self._attempt_resume(
194-
resp, link, content_file, total_length, bytes_received
195-
)
191+
download = _FileDownload(link, content_file, download_size)
192+
self._process_response(download, resp)
193+
if download.is_incomplete():
194+
self._attempt_resumes_or_redownloads(download, resp)
196195

196+
content_type = resp.headers.get("Content-Type", "")
197197
return filepath, content_type
198198

199-
def _process_response(
200-
self,
201-
resp: Response,
202-
link: Link,
203-
content_file: BinaryIO,
204-
bytes_received: int,
205-
total_length: int | None,
206-
) -> int:
207-
"""Process the response and write the chunks to the file."""
208-
chunks = _prepare_download(
209-
resp, link, self._progress_bar, total_length, range_start=bytes_received
210-
)
211-
return self._write_chunks_to_file(
212-
chunks, content_file, allow_partial=bool(total_length)
199+
def _process_response(self, download: _FileDownload, resp: Response) -> None:
200+
"""Download and save chunks from a response."""
201+
chunks = _log_download(
202+
resp,
203+
download.link,
204+
self._progress_bar,
205+
download.size,
206+
range_start=download.bytes_received,
213207
)
214-
215-
def _write_chunks_to_file(
216-
self, chunks: Iterable[bytes], content_file: BinaryIO, *, allow_partial: bool
217-
) -> int:
218-
"""Write the chunks to the file and return the number of bytes received."""
219-
bytes_received = 0
220208
try:
221209
for chunk in chunks:
222-
bytes_received += len(chunk)
223-
content_file.write(chunk)
210+
download.write_chunk(chunk)
224211
except ReadTimeoutError as e:
225-
# If partial downloads are OK (the download will be retried), don't bail.
226-
if not allow_partial:
212+
# If the download size is not known, then give up downloading the file.
213+
if download.size is None:
227214
raise e
228215

229-
# Ensuring bytes_received is returned to attempt resume
230216
logger.warning("Connection timed out while downloading.")
231217

232-
return bytes_received
233-
234-
def _attempt_resume(
235-
self,
236-
resp: Response,
237-
link: Link,
238-
content_file: BinaryIO,
239-
total_length: int | None,
240-
bytes_received: int,
218+
def _attempt_resumes_or_redownloads(
219+
self, download: _FileDownload, first_resp: Response
241220
) -> None:
242-
"""Attempt to resume the download if connection was dropped."""
243-
etag_or_last_modified = _get_http_response_etag_or_last_modified(resp)
244-
245-
attempts_left = self._resume_retries
246-
while total_length and attempts_left and bytes_received < total_length:
247-
attempts_left -= 1
221+
"""Attempt to resume/restart the download if connection was dropped."""
248222

223+
while download.reattempts < self._resume_retries and download.is_incomplete():
224+
assert download.size is not None
225+
download.reattempts += 1
249226
logger.warning(
250227
"Attempting to resume incomplete download (%s/%s, attempt %d)",
251-
format_size(bytes_received),
252-
format_size(total_length),
253-
(self._resume_retries - attempts_left),
228+
format_size(download.bytes_received),
229+
format_size(download.size),
230+
download.reattempts,
254231
)
255232

256233
try:
257-
# Try to resume the download using a HTTP range request.
258-
resume_resp = _http_get_download(
259-
self._session,
260-
link,
261-
range_start=bytes_received,
262-
if_range=etag_or_last_modified,
263-
)
264-
234+
resume_resp = self._http_get_resume(download, should_match=first_resp)
265235
# Fallback: if the server responded with 200 (i.e., the file has
266236
# since been modified or range requests are unsupported) or any
267237
# other unexpected status, restart the download from the beginning.
268238
must_restart = resume_resp.status_code != HTTPStatus.PARTIAL_CONTENT
269239
if must_restart:
270-
bytes_received, total_length, etag_or_last_modified = (
271-
self._reset_download_state(resume_resp, content_file)
272-
)
240+
download.reset_file()
241+
download.size = _get_http_response_size(resume_resp)
242+
first_resp = resume_resp
273243

274-
bytes_received += self._process_response(
275-
resume_resp, link, content_file, bytes_received, total_length
276-
)
244+
self._process_response(download, resume_resp)
277245
except (ConnectionError, ReadTimeoutError, OSError):
278246
continue
279247

280248
# No more resume attempts. Raise an error if the download is still incomplete.
281-
if total_length and bytes_received < total_length:
282-
os.remove(content_file.name)
283-
raise IncompleteDownloadError(
284-
link, bytes_received, total_length, retries=self._resume_retries
249+
if download.is_incomplete():
250+
os.remove(download.output_file.name)
251+
raise IncompleteDownloadError(download)
252+
253+
def _http_get_resume(
254+
self, download: _FileDownload, should_match: Response
255+
) -> Response:
256+
"""Issue a HTTP range request to resume the download."""
257+
# To better understand the download resumption logic, see the mdn web docs:
258+
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Range_requests
259+
headers = HEADERS.copy()
260+
headers["Range"] = f"bytes={download.bytes_received}-"
261+
# If possible, use a conditional range request to avoid corrupted
262+
# downloads caused by the remote file changing in-between.
263+
if identifier := _get_http_response_etag_or_last_modified(should_match):
264+
headers["If-Range"] = identifier
265+
return self._http_get(download.link, headers)
266+
267+
def _http_get(self, link: Link, headers: Mapping[str, str] = HEADERS) -> Response:
268+
target_url = link.url_without_fragment
269+
try:
270+
resp = self._session.get(target_url, headers=headers, stream=True)
271+
raise_for_status(resp)
272+
except NetworkConnectionError as e:
273+
assert e.response is not None
274+
logger.critical(
275+
"HTTP error %s while getting %s", e.response.status_code, link
285276
)
286-
287-
def _reset_download_state(
288-
self,
289-
resp: Response,
290-
content_file: BinaryIO,
291-
) -> tuple[int, int | None, str | None]:
292-
"""Reset the download state to restart downloading from the beginning."""
293-
content_file.seek(0)
294-
content_file.truncate()
295-
bytes_received = 0
296-
total_length = _get_http_response_size(resp)
297-
etag_or_last_modified = _get_http_response_etag_or_last_modified(resp)
298-
299-
return bytes_received, total_length, etag_or_last_modified
300-
301-
302-
class BatchDownloader:
303-
def __init__(
304-
self,
305-
session: PipSession,
306-
progress_bar: str,
307-
resume_retries: int,
308-
) -> None:
309-
self._downloader = Downloader(session, progress_bar, resume_retries)
310-
311-
def __call__(
312-
self, links: Iterable[Link], location: str
313-
) -> Iterable[tuple[Link, tuple[str, str]]]:
314-
"""Download the files given by links into location."""
315-
for link in links:
316-
filepath, content_type = self._downloader(link, location)
317-
yield link, (filepath, content_type)
277+
raise
278+
return resp

src/pip/_internal/operations/prepare.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pip._internal.models.direct_url import ArchiveInfo
3030
from pip._internal.models.link import Link
3131
from pip._internal.models.wheel import Wheel
32-
from pip._internal.network.download import BatchDownloader, Downloader
32+
from pip._internal.network.download import Downloader
3333
from pip._internal.network.lazy_wheel import (
3434
HTTPRangeRequestUnsupported,
3535
dist_from_wheel_url,
@@ -245,7 +245,6 @@ def __init__(
245245
self.build_tracker = build_tracker
246246
self._session = session
247247
self._download = Downloader(session, progress_bar, resume_retries)
248-
self._batch_download = BatchDownloader(session, progress_bar, resume_retries)
249248
self.finder = finder
250249

251250
# Where still-packed archives should be written to. If None, they are
@@ -468,10 +467,7 @@ def _complete_partial_requirements(
468467
assert req.link
469468
links_to_fully_download[req.link] = req
470469

471-
batch_download = self._batch_download(
472-
links_to_fully_download.keys(),
473-
temp_dir,
474-
)
470+
batch_download = self._download.batch(links_to_fully_download.keys(), temp_dir)
475471
for link, (filepath, _) in batch_download:
476472
logger.debug("Downloading link %s to %s", link, filepath)
477473
req = links_to_fully_download[link]

tests/lib/venv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def sitecustomize(self) -> str | None:
226226
return self._sitecustomize
227227

228228
@sitecustomize.setter
229-
def sitecustomize(self, value: str) -> None:
229+
def sitecustomize(self, value: str | None) -> None:
230230
self._sitecustomize = value
231231
self._customize_site()
232232

0 commit comments

Comments
 (0)