Skip to content

Commit 0f60de2

Browse files
authored
fix: add retrys for zenodo requests with tenacity (#1861)
1 parent 620fe79 commit 0f60de2

File tree

4 files changed

+60
-8
lines changed

4 files changed

+60
-8
lines changed

doc/release_notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ Release Notes
99
Upcoming Release
1010
================
1111

12+
* Added automatic retry for some (Zenodo) HTTP requests to handle transient errors
13+
like rate limiting and server errors.
14+
1215
* Fixed `ValueError` in `prepare_sector_network.py` in function `add_storage_and_grids`
1316
when running with few nodes such that they are all already connected by existing gas
1417
lines. (https://github.com/PyPSA/pypsa-eur/pull/1780)

envs/environment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dependencies:
4646
- country_converter
4747
- geopy
4848
- tqdm
49+
- tenacity
4950
- pytz
5051
- jpype1
5152
- pyxlsb

rules/common.smk

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ import copy
66
from functools import partial, lru_cache
77

88
import os, sys, glob
9+
import requests
10+
from tenacity import (
11+
retry as tenacity_retry,
12+
stop_after_attempt,
13+
wait_exponential,
14+
retry_if_exception_type,
15+
)
916

1017
path = workflow.source_path("../scripts/_helpers.py")
1118
sys.path.insert(0, os.path.dirname(path))
@@ -113,6 +120,13 @@ def input_custom_extra_functionality(w):
113120
return []
114121

115122

123+
@tenacity_retry(
124+
stop=stop_after_attempt(3),
125+
wait=wait_exponential(multiplier=1, min=4, max=10),
126+
retry=retry_if_exception_type(
127+
(requests.HTTPError, requests.ConnectionError, requests.Timeout)
128+
),
129+
)
116130
def has_internet_access(url: str = "https://www.zenodo.org", timeout: int = 5) -> bool:
117131
"""
118132
Checks if internet connection is available by sending a HEAD request
@@ -125,14 +139,14 @@ def has_internet_access(url: str = "https://www.zenodo.org", timeout: int = 5) -
125139
Returns:
126140
- bool: True if the internet is available, otherwise False.
127141
"""
128-
try:
129-
# Send a HEAD request to avoid fetching full response
130-
response = requests.head(url, timeout=timeout, allow_redirects=True)
131-
return response.status_code == 200
132-
except requests.ConnectionError: # (e.g., no internet, DNS issues)
133-
return False
134-
except requests.Timeout: # (e.g., slow or no network)
135-
return False
142+
# Send a HEAD request to avoid fetching full response
143+
response = requests.head(url, timeout=timeout, allow_redirects=True)
144+
# Raise HTTPError for transient errors
145+
# 429: Too Many Requests (rate limiting)
146+
# 500, 502, 503, 504: Server errors
147+
if response.status_code in (429, 500, 502, 503, 504):
148+
response.raise_for_status()
149+
return response.status_code == 200
136150

137151

138152
def solved_previous_horizon(w):

scripts/_helpers.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@
2323
import xarray as xr
2424
import yaml
2525
from snakemake.utils import update_config
26+
from tenacity import (
27+
retry as tenacity_retry,
28+
)
29+
from tenacity import (
30+
retry_if_exception_type,
31+
stop_after_attempt,
32+
wait_exponential,
33+
)
2634
from tqdm import tqdm
2735

2836
logger = logging.getLogger(__name__)
@@ -407,17 +415,31 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
407415
return costs
408416

409417

418+
@tenacity_retry(
419+
stop=stop_after_attempt(3),
420+
wait=wait_exponential(multiplier=1, min=4, max=10),
421+
retry=retry_if_exception_type(
422+
(requests.HTTPError, requests.ConnectionError, requests.Timeout)
423+
),
424+
)
410425
def progress_retrieve(url, file, disable=False):
411426
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
412427
# Hotfix - Bug, tqdm not working with disable=False
413428
disable = True
414429

430+
# Raise HTTPError for transient errors
431+
# 429: Too Many Requests (rate limiting)
432+
# 500, 502, 503, 504: Server errors
415433
if disable:
416434
response = requests.get(url, headers=headers, stream=True)
435+
if response.status_code in (429, 500, 502, 503, 504):
436+
response.raise_for_status()
417437
with open(file, "wb") as f:
418438
f.write(response.content)
419439
else:
420440
response = requests.get(url, headers=headers, stream=True)
441+
if response.status_code in (429, 500, 502, 503, 504):
442+
response.raise_for_status()
421443
total_size = int(response.headers.get("content-length", 0))
422444
chunk_size = 1024
423445

@@ -834,12 +856,24 @@ def update_config_from_wildcards(config, w, inplace=True):
834856
return config
835857

836858

859+
@tenacity_retry(
860+
stop=stop_after_attempt(3),
861+
wait=wait_exponential(multiplier=1, min=4, max=10),
862+
retry=retry_if_exception_type(
863+
(requests.HTTPError, requests.ConnectionError, requests.Timeout)
864+
),
865+
)
837866
def get_checksum_from_zenodo(file_url):
838867
parts = file_url.split("/")
839868
record_id = parts[parts.index("records") + 1]
840869
filename = parts[-1]
841870

842871
response = requests.get(f"https://zenodo.org/api/records/{record_id}", timeout=30)
872+
# Raise HTTPError for transient errors
873+
# 429: Too Many Requests (rate limiting)
874+
# 500, 502, 503, 504: Server errors
875+
if response.status_code in (429, 500, 502, 503, 504):
876+
response.raise_for_status()
843877
response.raise_for_status()
844878
data = response.json()
845879

0 commit comments

Comments
 (0)