Skip to content

Commit 296e58e

Browse files
committed
Handle faulty turbine library data
1 parent d274ce6 commit 296e58e

File tree

2 files changed

+77
-34
lines changed

2 files changed

+77
-34
lines changed

tests/test_data_handling.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import filecmp
7+
import logging
78
import os
89
from shutil import copyfile
910

@@ -85,14 +86,18 @@ def test_get_turbine_types(self, capsys):
8586
with pytest.raises(ValueError, match=msg):
8687
get_turbine_types("wrong")
8788

88-
def test_store_turbine_data_from_oedb(self):
89+
def test_store_turbine_data_from_oedb(self, caplog):
8990
"""Test `store_turbine_data_from_oedb` function."""
9091
t = {}
9192
for fn in os.listdir(self.orig_path):
9293
t[fn] = os.path.getmtime(os.path.join(self.orig_path, fn))
93-
store_turbine_data_from_oedb()
94+
with caplog.at_level(logging.WARNING):
95+
store_turbine_data_from_oedb()
9496
for fn in os.listdir(self.orig_path):
9597
assert t[fn] < os.path.getmtime(os.path.join(self.orig_path, fn))
98+
assert "The turbine library data contains too many faulty" not in caplog.text
99+
assert "No cp-curve but has_cp_curve=True" not in caplog.text
100+
assert "No power curve but has_power_curve=True" not in caplog.text
96101

97102
def test_wrong_url_load_turbine_data(self):
98103
"""Load turbine data from oedb with a wrong schema."""

windpowerlib/data.py

Lines changed: 70 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -187,46 +187,84 @@ def store_turbine_data_from_oedb(
187187

188188
# get all power (coefficient) curves and save them to file
189189
for curve_type in ["power_curve", "power_coefficient_curve"]:
190+
broken_turbine_data = []
190191
curves_df = pd.DataFrame(columns=["wind_speed"])
191192
for index in turbine_data.index:
192193
if (
193194
turbine_data["{}_wind_speeds".format(curve_type)][index]
194195
and turbine_data["{}_values".format(curve_type)][index]
195196
):
196-
df = (
197-
pd.DataFrame(
198-
data=[
199-
eval(
200-
turbine_data[
201-
"{}_wind_speeds".format(curve_type)
202-
][index]
203-
),
204-
eval(
205-
turbine_data["{}_values".format(curve_type)][
206-
index
207-
]
208-
),
209-
]
210-
)
211-
.transpose()
212-
.rename(
213-
columns={
214-
0: "wind_speed",
215-
1: turbine_data["turbine_type"][index],
216-
}
197+
try:
198+
df = (
199+
pd.DataFrame(
200+
data=[
201+
eval(
202+
turbine_data[
203+
"{}_wind_speeds".format(curve_type)
204+
][index]
205+
),
206+
eval(
207+
turbine_data["{}_values".format(curve_type)][
208+
index
209+
]
210+
),
211+
]
212+
)
213+
.transpose()
214+
.rename(
215+
columns={
216+
0: "wind_speed",
217+
1: turbine_data["turbine_type"][index],
218+
}
219+
)
217220
)
221+
if not df.wind_speed.duplicated().any():
222+
curves_df = pd.merge(
223+
left=curves_df, right=df, how="outer", on="wind_speed"
224+
)
225+
except:
226+
broken_turbine_data.append(turbine_data.loc[index, "turbine_type"])
227+
228+
# warning in case of broken turbine data
229+
if len(broken_turbine_data) > 0:
230+
issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
231+
"/issues/28")
232+
# in case only some data is faulty, only give out warning
233+
if len(broken_turbine_data) < 0.2 * len(turbine_data):
234+
logging.warning(
235+
f"The turbine library data contains faulty {curve_type}s. The "
236+
f"{curve_type} data can therefore not be loaded for the following "
237+
f"turbines: {broken_turbine_data}. "
238+
f"Please report this in the following issue, in case it hasn't "
239+
f"already been reported: {issue_link}"
218240
)
219-
if not df.wind_speed.duplicated().any():
220-
curves_df = pd.merge(
221-
left=curves_df, right=df, how="outer", on="wind_speed"
222-
)
223-
curves_df = curves_df.set_index("wind_speed").sort_index().transpose()
224-
# power curve values in W
225-
if curve_type == "power_curve":
226-
curves_df *= 1000
227-
curves_df.index.name = "turbine_type"
228-
curves_df.sort_index(inplace=True)
229-
curves_df.to_csv(filename.format("{}s".format(curve_type)))
241+
save_turbine_data = True
242+
# set has_power_(coefficient)_curve to False for faulty turbines
243+
for turb in broken_turbine_data:
244+
ind = turbine_data[turbine_data.turbine_type == turb].index[0]
245+
col = ("has_power_curve" if curve_type == "power_curve"
246+
else "has_cp_curve")
247+
turbine_data.at[ind, col] = False
248+
# in case most data is faulty, do not store downloaded data
249+
else:
250+
logging.warning(
251+
f"The turbine library data contains too many faulty {curve_type}s,"
252+
f"wherefore {curve_type} data is not loaded from the oedb. "
253+
f"Please report this in the following issue, in case it hasn't "
254+
f"already been reported: {issue_link}"
255+
)
256+
save_turbine_data = False
257+
else:
258+
save_turbine_data = True
259+
260+
if save_turbine_data:
261+
curves_df = curves_df.set_index("wind_speed").sort_index().transpose()
262+
# power curve values in W
263+
if curve_type == "power_curve":
264+
curves_df *= 1000
265+
curves_df.index.name = "turbine_type"
266+
curves_df.sort_index(inplace=True)
267+
curves_df.to_csv(filename.format("{}s".format(curve_type)))
230268

231269
# get turbine data and save to file (excl. curves)
232270
turbine_data_df = turbine_data.drop(

0 commit comments

Comments
 (0)