@@ -34,32 +34,50 @@ def _download_metalinksdb(verbose=True):
3434 else :
3535 return db_path
3636
37- # Download the database
38- _logg ("Downloading database..." , verbose = verbose )
39- try :
40- # Figshare requires a browser-like User-Agent to bypass WAF
41- headers = {
42- 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
43- }
44- response = requests .get (METALINKS_URL , headers = headers , stream = True , allow_redirects = True )
45- response .raise_for_status () # Raise an error for bad status codes
46-
47- with open (db_path , 'wb' ) as f :
48- for chunk in response .iter_content (chunk_size = 8192 ):
49- f .write (chunk )
50-
51- # Validate the downloaded file
52- file_size = os .path .getsize (db_path )
53- if file_size == 0 :
54- os .remove (db_path )
55- raise RuntimeError ("Downloaded file is empty. Please check the URL and try again." )
37+ # Download the database with retry logic
38+ import time
39+ max_retries = 3
40+ retry_delay = 2 # seconds
41+
42+ for attempt in range (max_retries ):
43+ _logg (f"Downloading database (attempt { attempt + 1 } /{ max_retries } )..." , verbose = verbose )
44+ try :
45+ # Figshare requires browser-like headers to bypass WAF
46+ headers = {
47+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' ,
48+ 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' ,
49+ 'Accept-Language' : 'en-US,en;q=0.5' ,
50+ 'Accept-Encoding' : 'gzip, deflate' ,
51+ 'Connection' : 'keep-alive' ,
52+ 'Upgrade-Insecure-Requests' : '1'
53+ }
54+ response = requests .get (METALINKS_URL , headers = headers , stream = True , allow_redirects = True , timeout = 60 )
55+ response .raise_for_status () # Raise an error for bad status codes
56+
57+ with open (db_path , 'wb' ) as f :
58+ for chunk in response .iter_content (chunk_size = 8192 ):
59+ f .write (chunk )
60+
61+ # Validate the downloaded file
62+ file_size = os .path .getsize (db_path )
63+ if file_size == 0 :
64+ os .remove (db_path )
65+ raise RuntimeError ("Downloaded file is empty. Please check the URL and try again." )
66+
67+ _logg (f"Database downloaded and saved to { db_path } ({ file_size } bytes)." , verbose = verbose )
68+ return db_path
5669
57- _logg (f"Database downloaded and saved to { db_path } ({ file_size } bytes)." , verbose = verbose )
58- except (requests .exceptions .RequestException , OSError , RuntimeError ) as e :
59- # Clean up failed download
60- if os .path .exists (db_path ):
61- os .remove (db_path )
62- raise RuntimeError (f"Failed to download database: { e } " ) from e
70+ except (requests .exceptions .RequestException , OSError , RuntimeError ) as e :
71+ # Clean up failed download
72+ if os .path .exists (db_path ):
73+ os .remove (db_path )
74+
75+ if attempt < max_retries - 1 :
76+ _logg (f"Download failed: { e } . Retrying in { retry_delay } seconds..." , verbose = verbose )
77+ time .sleep (retry_delay )
78+ retry_delay *= 2 # Exponential backoff
79+ else :
80+ raise RuntimeError (f"Failed to download database after { max_retries } attempts: { e } " ) from e
6381
6482 return db_path
6583
0 commit comments