1414import zipfile
1515from typing import Dict , List , Optional , Tuple
1616
17+ import requests
18+ from requests .adapters import HTTPAdapter , Retry
1719
1820logger = logging .getLogger (__name__ )
1921logger .addHandler (logging .NullHandler ())
@@ -115,12 +117,36 @@ def _atomic_download(url: str, dest: pathlib.Path):
115117
116118
117119def _download_archive (url : str , archive_path : pathlib .Path ) -> bool :
118- """Download archive from URL with progress reporting."""
120+ """Robust streaming download with retries."""
121+
119122 logger .debug ("Archive will be saved to: %s" , archive_path )
120123
124+ session = requests .Session ()
125+ retries = Retry (
126+ total = 5 ,
127+ backoff_factor = 1.0 ,
128+ status_forcelist = [429 , 500 , 502 , 503 , 504 ],
129+ allowed_methods = ["GET" ],
130+ )
131+ session .mount ("https://" , HTTPAdapter (max_retries = retries ))
132+
121133 try :
122- urllib .request .urlretrieve (url , archive_path , _make_report_progress ())
134+ with session .get (url , stream = True ) as r :
135+ r .raise_for_status ()
136+
137+ total = int (r .headers .get ("content-length" , 0 ))
138+ downloaded = 0
139+ chunk_size = 1024 * 1024 # 1MB
140+
141+ with open (archive_path , "wb" ) as f :
142+ for chunk in r .iter_content (chunk_size ):
143+ if chunk :
144+ f .write (chunk )
145+ downloaded += len (chunk )
146+ _make_report_progress ()(downloaded , downloaded , total )
147+
123148 logger .info ("Download completed!" )
149+
124150 except Exception as e :
125151 logger .exception ("Error during download: %s" , e )
126152 return False
@@ -131,6 +157,7 @@ def _download_archive(url: str, archive_path: pathlib.Path) -> bool:
131157 elif not archive_path .exists ():
132158 logger .error ("File was not downloaded!" )
133159 return False
160+
134161 return True
135162
136163
0 commit comments