Skip to content

Commit 2017e41

Browse files
authored
Merge pull request #28 from Aquaveo/fix-herbie-download
Add exception handling and retry functionality to herbie downloads
2 parents 6d342ca + 860e9e7 commit 2017e41

File tree

4 files changed

+242
-141
lines changed

4 files changed

+242
-141
lines changed

loone_data_prep/forecast_scripts/create_forecast_LOWs.py

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
from herbie import FastHerbie
33
from datetime import datetime
44
import pandas as pd
5-
from retry_requests import retry
5+
from retry_requests import retry as retry_requests
6+
from retry import retry
67
import warnings
8+
from typing import Tuple
9+
from loone_data_prep.herbie_utils import get_fast_herbie_object
710

811

912
def generate_wind_forecasts(output_dir):
@@ -26,7 +29,8 @@ def generate_wind_forecasts(output_dir):
2629
}
2730

2831
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
29-
FH = FastHerbie([today_str], model="ifs", fxx=range(0, 360, 3))
32+
FH = get_fast_herbie_object(today_str)
33+
print("FastHerbie initialized.")
3034
dfs = []
3135

3236
variables = {
@@ -45,37 +49,20 @@ def generate_wind_forecasts(output_dir):
4549
"latitude": [point.latitude]
4650
})
4751

52+
# Loop through variables for current point and extract data
4853
for var_key, var_name in variables.items():
54+
# Get the current variable data at the current point
4955
print(f" Variable: {var_key}")
56+
try:
57+
df, var_name_actual = _download_herbie_variable(FH, var_key, var_name, point_df)
58+
except Exception as e:
59+
print(f"Error processing {var_key} for Point {index + 1} ({point.latitude}, {point.longitude}): {e}")
60+
print(f'Skipping {var_key}')
61+
continue
5062

51-
# Download and load dataset
52-
FH.download(f":{var_key}")
53-
ds = FH.xarray(f":{var_key}", backend_kwargs={"decode_timedelta": True})
54-
55-
# Extract point data
56-
dsi = ds.herbie.pick_points(point_df, method="nearest")
57-
58-
# Get actual variable name
59-
if var_name == "10u":
60-
var_name_actual = "u10" # Map 10u to u10
61-
elif var_name == "10v":
62-
var_name_actual = "v10" # Map 10v to v10
63-
elif var_name == "2t":
64-
var_name_actual = "t2m" #TODO: check that this is correct
65-
66-
# Convert to DataFrame
67-
time_series = dsi[var_name_actual].squeeze()
68-
df = time_series.to_dataframe().reset_index()
69-
70-
# Handle datetime columns
71-
if "valid_time" in df.columns:
72-
df = df.rename(columns={"valid_time": "datetime"})
73-
elif "step" in df.columns and "time" in dsi.coords:
74-
df["datetime"] = dsi.time.values[0] + df["step"]
75-
76-
# Retain necessary columns
77-
df = df[["datetime", var_name_actual]].drop_duplicates()
78-
dfs.append((index, var_name_actual, df))
63+
# Append the DataFrame and variable name to the list
64+
if not df.empty:
65+
dfs.append((index, var_name_actual, df))
7966

8067
# Merge and process data per point
8168
results = {}
@@ -125,3 +112,51 @@ def generate_wind_forecasts(output_dir):
125112
filepath = os.path.join(output_dir, airt_file_map[key])
126113
df_airt.to_csv(filepath, index=False)
127114

115+
116+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
117+
def _download_herbie_variable(fast_herbie_object: FastHerbie, variable_key: str, variable_name: str, point_df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
118+
"""
119+
Download a specific variable from the Herbie API.
120+
121+
Args:
122+
fast_herbie_object: An instance of the FastHerbie class.
123+
variable_key: The key of the variable to download.
124+
variable_name: The name of the variable to download.
125+
point_df: A DataFrame containing the point of interest (longitude and latitude).
126+
127+
Returns:
128+
A DataFrame containing the downloaded variable data.
129+
130+
Example:
131+
point_df = pd.DataFrame({"longitude": [-80.7934], "latitude": [27.1389]})
132+
df, var_name_actual = _download_herbie_variable(FastHerbie('2020-05-16 00:00', model='ifs', fxx=range(0, 360, 3)), '10u', '10u', point_df)
133+
"""
134+
# Download and load dataset
135+
fast_herbie_object.download(f":{variable_key}")
136+
ds = fast_herbie_object.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
137+
138+
# Extract point data
139+
dsi = ds.herbie.pick_points(point_df, method="nearest")
140+
141+
# Get actual variable name
142+
if variable_name == "10u":
143+
var_name_actual = "u10" # Map 10u to u10
144+
elif variable_name == "10v":
145+
var_name_actual = "v10" # Map 10v to v10
146+
elif variable_name == "2t":
147+
var_name_actual = "t2m" #TODO: check that this is correct
148+
149+
# Convert to DataFrame
150+
time_series = dsi[var_name_actual].squeeze()
151+
df = time_series.to_dataframe().reset_index()
152+
153+
# Handle datetime columns
154+
if "valid_time" in df.columns:
155+
df = df.rename(columns={"valid_time": "datetime"})
156+
elif "step" in df.columns and "time" in dsi.coords:
157+
df["datetime"] = dsi.time.values[0] + df["step"]
158+
159+
# Retain necessary columns
160+
df = df[["datetime", var_name_actual]].drop_duplicates()
161+
162+
return df, var_name_actual

0 commit comments

Comments
 (0)