Skip to content

Commit 85e84b3

Browse files
committed
add more headers
1 parent 25ff730 commit 85e84b3

File tree

1 file changed

+43
-25
lines changed

1 file changed

+43
-25
lines changed

src/liana/resource/get_metalinks.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,32 +34,50 @@ def _download_metalinksdb(verbose=True):
3434
else:
3535
return db_path
3636

37-
# Download the database
38-
_logg("Downloading database...", verbose=verbose)
39-
try:
40-
# Figshare requires a browser-like User-Agent to bypass WAF
41-
headers = {
42-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
43-
}
44-
response = requests.get(METALINKS_URL, headers=headers, stream=True, allow_redirects=True)
45-
response.raise_for_status() # Raise an error for bad status codes
46-
47-
with open(db_path, 'wb') as f:
48-
for chunk in response.iter_content(chunk_size=8192):
49-
f.write(chunk)
50-
51-
# Validate the downloaded file
52-
file_size = os.path.getsize(db_path)
53-
if file_size == 0:
54-
os.remove(db_path)
55-
raise RuntimeError("Downloaded file is empty. Please check the URL and try again.")
37+
# Download the database with retry logic
38+
import time
39+
max_retries = 3
40+
retry_delay = 2 # seconds
41+
42+
for attempt in range(max_retries):
43+
_logg(f"Downloading database (attempt {attempt + 1}/{max_retries})...", verbose=verbose)
44+
try:
45+
# Figshare requires browser-like headers to bypass WAF
46+
headers = {
47+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
48+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
49+
'Accept-Language': 'en-US,en;q=0.5',
50+
'Accept-Encoding': 'gzip, deflate',
51+
'Connection': 'keep-alive',
52+
'Upgrade-Insecure-Requests': '1'
53+
}
54+
response = requests.get(METALINKS_URL, headers=headers, stream=True, allow_redirects=True, timeout=60)
55+
response.raise_for_status() # Raise an error for bad status codes
56+
57+
with open(db_path, 'wb') as f:
58+
for chunk in response.iter_content(chunk_size=8192):
59+
f.write(chunk)
60+
61+
# Validate the downloaded file
62+
file_size = os.path.getsize(db_path)
63+
if file_size == 0:
64+
os.remove(db_path)
65+
raise RuntimeError("Downloaded file is empty. Please check the URL and try again.")
66+
67+
_logg(f"Database downloaded and saved to {db_path} ({file_size} bytes).", verbose=verbose)
68+
return db_path
5669

57-
_logg(f"Database downloaded and saved to {db_path} ({file_size} bytes).", verbose=verbose)
58-
except (requests.exceptions.RequestException, OSError, RuntimeError) as e:
59-
# Clean up failed download
60-
if os.path.exists(db_path):
61-
os.remove(db_path)
62-
raise RuntimeError(f"Failed to download database: {e}") from e
70+
except (requests.exceptions.RequestException, OSError, RuntimeError) as e:
71+
# Clean up failed download
72+
if os.path.exists(db_path):
73+
os.remove(db_path)
74+
75+
if attempt < max_retries - 1:
76+
_logg(f"Download failed: {e}. Retrying in {retry_delay} seconds...", verbose=verbose)
77+
time.sleep(retry_delay)
78+
retry_delay *= 2 # Exponential backoff
79+
else:
80+
raise RuntimeError(f"Failed to download database after {max_retries} attempts: {e}") from e
6381

6482
return db_path
6583

0 commit comments

Comments
 (0)