@@ -158,6 +158,30 @@ def _http_get_download(
158158 return resp
159159
160160
161+ @dataclass
162+ class _FileDownload :
163+ """Stores the state of a single file download."""
164+
165+ link : Link
166+ output_file : BinaryIO
167+ size : Optional [int ]
168+ bytes_received : int = 0
169+ reattempts : int = 0
170+
171+ def is_incomplete (self ) -> bool :
172+ return bool (self .size is not None and self .bytes_received < self .size )
173+
174+ def write_chunk (self , data : bytes ) -> None :
175+ self .bytes_received += len (data )
176+ self .output_file .write (data )
177+
178+ def reset_download (self ) -> None :
179+ """Delete any saved data and reset progress to zero."""
180+ self .output_file .seek (0 )
181+ self .output_file .truncate ()
182+ self .bytes_received = 0
183+
184+
161185class Downloader :
162186 def __init__ (
163187 self ,
@@ -183,90 +207,63 @@ def batch(
183207 def __call__ (self , link : Link , location : str ) -> tuple [str , str ]:
184208 """Download the file given by link into location."""
185209 resp = _http_get_download (self ._session , link )
186- # NOTE: The original download size needs to be passed down everywhere
187- # so if the download is resumed (with a HTTP Range request) the progress
188- # bar will report the right size.
189- total_length = _get_http_response_size (resp )
190- content_type = resp .headers .get ("Content-Type" , "" )
191-
192- filename = _get_http_response_filename (resp , link )
193- filepath = os .path .join (location , filename )
210+ download_size = _get_http_response_size (resp )
194211
212+ filepath = os .path .join (location , _get_http_response_filename (resp , link ))
195213 with open (filepath , "wb" ) as content_file :
196- bytes_received = self ._process_response (
197- resp , link , content_file , 0 , total_length
198- )
199- # If possible, check for an incomplete download and attempt resuming.
200- if total_length and bytes_received < total_length :
201- self ._attempt_resume (
202- resp , link , content_file , total_length , bytes_received
203- )
214+ download = _FileDownload (link , content_file , download_size )
215+ self ._process_response (download , resp )
216+ if download .is_incomplete ():
217+ self ._attempt_resume (download , resp )
204218
219+ content_type = resp .headers .get ("Content-Type" , "" )
205220 return filepath , content_type
206221
207- def _process_response (
208- self ,
209- resp : Response ,
210- link : Link ,
211- content_file : BinaryIO ,
212- bytes_received : int ,
213- total_length : int | None ,
214- ) -> int :
222+ def _process_response (self , download : _FileDownload , resp : Response ) -> None :
215223 """Process the response and write the chunks to the file."""
216224 chunks = _prepare_download (
217- resp , link , self ._progress_bar , total_length , range_start = bytes_received
218- )
219- return self ._write_chunks_to_file (
220- chunks , content_file , allow_partial = bool (total_length )
225+ resp ,
226+ download .link ,
227+ self ._progress_bar ,
228+ download .size ,
229+ range_start = download .bytes_received ,
221230 )
231+ self ._write_chunks_to_file (download , chunks )
222232
223233 def _write_chunks_to_file (
224- self , chunks : Iterable [bytes ], content_file : BinaryIO , * , allow_partial : bool
225- ) -> int :
234+ self , download : _FileDownload , chunks : Iterable [bytes ]
235+ ) -> None :
226236 """Write the chunks to the file and return the number of bytes received."""
227- bytes_received = 0
228237 try :
229238 for chunk in chunks :
230- bytes_received += len (chunk )
231- content_file .write (chunk )
239+ download .write_chunk (chunk )
232240 except ReadTimeoutError as e :
233- # If partial downloads are OK (the download will be retried), don't bail .
234- if not allow_partial :
241+ # If the download size is not known, then give up downloading the file .
242+ if download . size is None :
235243 raise e
236244
237- # Ensuring bytes_received is returned to attempt resume
238245 logger .warning ("Connection timed out while downloading." )
239246
240- return bytes_received
241-
242- def _attempt_resume (
243- self ,
244- resp : Response ,
245- link : Link ,
246- content_file : BinaryIO ,
247- total_length : int | None ,
248- bytes_received : int ,
249- ) -> None :
247+ def _attempt_resume (self , download : _FileDownload , resp : Response ) -> None :
250248 """Attempt to resume the download if connection was dropped."""
251249 etag_or_last_modified = _get_http_response_etag_or_last_modified (resp )
252250
253- attempts_left = self ._resume_retries
254- while total_length and attempts_left and bytes_received < total_length :
255- attempts_left -= 1
256-
251+ while download .reattempts < self ._resume_retries and download .is_incomplete ():
252+ assert download .size is not None
253+ download .reattempts += 1
257254 logger .warning (
258255 "Attempting to resume incomplete download (%s/%s, attempt %d)" ,
259- format_size (bytes_received ),
260- format_size (total_length ),
261- ( self . _resume_retries - attempts_left ) ,
256+ format_size (download . bytes_received ),
257+ format_size (download . size ),
258+ download . reattempts ,
262259 )
263260
264261 try :
265262 # Try to resume the download using a HTTP range request.
266263 resume_resp = _http_get_download (
267264 self ._session ,
268- link ,
269- range_start = bytes_received ,
265+ download . link ,
266+ range_start = download . bytes_received ,
270267 if_range = etag_or_last_modified ,
271268 )
272269
@@ -275,33 +272,25 @@ def _attempt_resume(
275272 # other unexpected status, restart the download from the beginning.
276273 must_restart = resume_resp .status_code != HTTPStatus .PARTIAL_CONTENT
277274 if must_restart :
278- bytes_received , total_length , etag_or_last_modified = (
279- self . _reset_download_state ( resume_resp , content_file )
275+ download . size , etag_or_last_modified = self . _reset_download_state (
276+ download , resume_resp
280277 )
281278
282- bytes_received += self ._process_response (
283- resume_resp , link , content_file , bytes_received , total_length
284- )
279+ self ._process_response (download , resume_resp )
285280 except (ConnectionError , ReadTimeoutError , OSError ):
286281 continue
287282
288283 # No more resume attempts. Raise an error if the download is still incomplete.
289- if total_length and bytes_received < total_length :
290- os .remove (content_file .name )
291- raise IncompleteDownloadError (
292- link , bytes_received , total_length , retries = self ._resume_retries
293- )
284+ if download .is_incomplete ():
285+ os .remove (download .output_file .name )
286+ raise IncompleteDownloadError (download )
294287
295288 def _reset_download_state (
296- self ,
297- resp : Response ,
298- content_file : BinaryIO ,
299- ) -> tuple [int , int | None , str | None ]:
289+ self , download : _FileDownload , resp : Response
290+ ) -> tuple [int | None , str | None ]:
300291 """Reset the download state to restart downloading from the beginning."""
301- content_file .seek (0 )
302- content_file .truncate ()
303- bytes_received = 0
292+ download .reset_download ()
304293 total_length = _get_http_response_size (resp )
305294 etag_or_last_modified = _get_http_response_etag_or_last_modified (resp )
306295
307- return bytes_received , total_length , etag_or_last_modified
296+ return total_length , etag_or_last_modified
0 commit comments