8
8
import os
9
9
from collections .abc import Iterable
10
10
from http import HTTPStatus
11
- from typing import BinaryIO
11
+ from typing import BinaryIO , Mapping
12
12
13
13
from pip ._vendor .requests .models import Response
14
14
from pip ._vendor .urllib3 .exceptions import ReadTimeoutError
@@ -134,33 +134,9 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
134
134
return filename
135
135
136
136
137
- def _http_get_download (
138
- session : PipSession ,
139
- link : Link ,
140
- range_start : int | None = 0 ,
141
- if_range : str | None = None ,
142
- ) -> Response :
143
- target_url = link .url .split ("#" , 1 )[0 ]
144
- headers = HEADERS .copy ()
145
- # request a partial download
146
- if range_start :
147
- headers ["Range" ] = f"bytes={ range_start } -"
148
- # make sure the file hasn't changed
149
- if if_range :
150
- headers ["If-Range" ] = if_range
151
- try :
152
- resp = session .get (target_url , headers = headers , stream = True )
153
- raise_for_status (resp )
154
- except NetworkConnectionError as e :
155
- assert e .response is not None
156
- logger .critical ("HTTP error %s while getting %s" , e .response .status_code , link )
157
- raise
158
- return resp
159
-
160
-
161
137
@dataclass
162
138
class _FileDownload :
163
- """Stores the state of a single file download."""
139
+ """Stores the state of a single link download."""
164
140
165
141
link : Link
166
142
output_file : BinaryIO
@@ -175,7 +151,7 @@ def write_chunk(self, data: bytes) -> None:
175
151
self .bytes_received += len (data )
176
152
self .output_file .write (data )
177
153
178
- def reset_download (self ) -> None :
154
+ def reset_file (self ) -> None :
179
155
"""Delete any saved data and reset progress to zero."""
180
156
self .output_file .seek (0 )
181
157
self .output_file .truncate ()
@@ -206,7 +182,7 @@ def batch(
206
182
207
183
def __call__ (self , link : Link , location : str ) -> tuple [str , str ]:
208
184
"""Download the file given by link into location."""
209
- resp = _http_get_download ( self ._session , link )
185
+ resp = self ._http_get ( link )
210
186
download_size = _get_http_response_size (resp )
211
187
212
188
filepath = os .path .join (location , _get_http_response_filename (resp , link ))
@@ -228,12 +204,6 @@ def _process_response(self, download: _FileDownload, resp: Response) -> None:
228
204
download .size ,
229
205
range_start = download .bytes_received ,
230
206
)
231
- self ._write_chunks_to_file (download , chunks )
232
-
233
- def _write_chunks_to_file (
234
- self , download : _FileDownload , chunks : Iterable [bytes ]
235
- ) -> None :
236
- """Write the chunks to the file and return the number of bytes received."""
237
207
try :
238
208
for chunk in chunks :
239
209
download .write_chunk (chunk )
@@ -246,7 +216,6 @@ def _write_chunks_to_file(
246
216
247
217
def _attempt_resume (self , download : _FileDownload , resp : Response ) -> None :
248
218
"""Attempt to resume the download if connection was dropped."""
249
- etag_or_last_modified = _get_http_response_etag_or_last_modified (resp )
250
219
251
220
while download .reattempts < self ._resume_retries and download .is_incomplete ():
252
221
assert download .size is not None
@@ -259,22 +228,14 @@ def _attempt_resume(self, download: _FileDownload, resp: Response) -> None:
259
228
)
260
229
261
230
try :
262
- # Try to resume the download using a HTTP range request.
263
- resume_resp = _http_get_download (
264
- self ._session ,
265
- download .link ,
266
- range_start = download .bytes_received ,
267
- if_range = etag_or_last_modified ,
268
- )
269
-
231
+ resume_resp = self ._http_get_resume (download , should_match = resp )
270
232
# Fallback: if the server responded with 200 (i.e., the file has
271
233
# since been modified or range requests are unsupported) or any
272
234
# other unexpected status, restart the download from the beginning.
273
235
must_restart = resume_resp .status_code != HTTPStatus .PARTIAL_CONTENT
274
236
if must_restart :
275
- download .size , etag_or_last_modified = self ._reset_download_state (
276
- download , resume_resp
277
- )
237
+ download .reset_file ()
238
+ download .size = _get_http_response_size (resume_resp )
278
239
279
240
self ._process_response (download , resume_resp )
280
241
except (ConnectionError , ReadTimeoutError , OSError ):
@@ -285,12 +246,27 @@ def _attempt_resume(self, download: _FileDownload, resp: Response) -> None:
285
246
os .remove (download .output_file .name )
286
247
raise IncompleteDownloadError (download )
287
248
288
- def _reset_download_state (
289
- self , download : _FileDownload , resp : Response
290
- ) -> tuple [int | None , str | None ]:
291
- """Reset the download state to restart downloading from the beginning."""
292
- download .reset_download ()
293
- total_length = _get_http_response_size (resp )
294
- etag_or_last_modified = _get_http_response_etag_or_last_modified (resp )
295
-
296
- return total_length , etag_or_last_modified
249
+ def _http_get_resume (
250
+ self , download : _FileDownload , should_match : Response
251
+ ) -> Response :
252
+ """Issue a HTTP range request to resume the download."""
253
+ headers = HEADERS .copy ()
254
+ headers ["Range" ] = f"bytes={ download .bytes_received } -"
255
+ # If possible, use a conditional range request to avoid corrupted
256
+ # downloads caused by the remote file changing in-between.
257
+ if identifier := _get_http_response_etag_or_last_modified (should_match ):
258
+ headers ["If-Range" ] = identifier
259
+ return self ._http_get (download .link , headers )
260
+
261
+ def _http_get (self , link : Link , headers : Mapping [str , str ] = HEADERS ) -> Response :
262
+ target_url = link .url_without_fragment
263
+ try :
264
+ resp = self ._session .get (target_url , headers = headers , stream = True )
265
+ raise_for_status (resp )
266
+ except NetworkConnectionError as e :
267
+ assert e .response is not None
268
+ logger .critical (
269
+ "HTTP error %s while getting %s" , e .response .status_code , link
270
+ )
271
+ raise
272
+ return resp
0 commit comments