@@ -158,21 +158,34 @@ def load_turbine_data_from_oedb(schema="supply", table="wind_turbine_library"):
158158
159159
160160def store_turbine_data_from_oedb (
161- schema = "supply" , table = "wind_turbine_library"
161+ schema = "supply" , table = "wind_turbine_library" , threshold = 0.2
162162):
163163 r"""
164164 Loads turbine library from the OpenEnergy database (oedb).
165165
166166 Turbine data is saved to csv files ('oedb_power_curves.csv',
167167 'oedb_power_coefficient_curves.csv' and 'oedb_nominal_power') for offline
168168 usage of the windpowerlib. If the files already exist they are overwritten.
169+ In case the turbine library on the oedb contains too many faulty turbines,
170+ the already existing files are not overwritten. The accepted percentage of faulty
171+ turbines can be set through the parameter `threshold`.
169172
170173 Parameters
171174 ----------
172175 schema : str
173176 Database schema of the turbine library.
174177 table : str
175178 Table name of the turbine library.
179+ threshold : float
180+ In case there are turbines in the turbine library with faulty data (e.g.
181+ duplicate wind speed entries in the power (coefficient) curve data), the
182+ threshold defines the share of accepted faulty turbine ata up to which the
183+ existing turbine data is overwritten by the newly downloaded data.
184+ For example, a threshold of 0.1 means that more than 10% of the
185+ turbines would need to have invalid data in order to discard the downloaded
186+ data. This is to make sure that in the rare case the oedb data is too buggy,
187+ the turbine data that is by default provided with the windpowerlib is not
188+ overwritten by poor data.
176189
177190 Returns
178191 -------
@@ -182,11 +195,40 @@ def store_turbine_data_from_oedb(
182195
183196 """
184197 turbine_data = fetch_turbine_data_from_oedb (schema = schema , table = table )
185- # standard file name for saving data
186- filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
198+ turbine_data = _process_and_save_oedb_data (
199+ turbine_data , threshold = threshold
200+ )
201+ check_turbine_data (
202+ filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
203+ )
204+ return turbine_data
205+
206+
207+ def _process_and_save_oedb_data (turbine_data , threshold = 0.2 ):
208+ """
209+ Helper function to extract power (coefficient) curve data from the turbine library.
210+
211+ Parameters
212+ -----------
213+ turbine_data : :pandas:`pandas.DataFrame<frame>`
214+ Raw turbine data downloaded from the oedb with
215+ :func:`fetch_turbine_data_from_oedb`.
216+ threshold : float
217+ See parameter `threshold` in func:`store_turbine_data_from_oedb`
218+ for more information.
219+
220+ Returns
221+ --------
222+ :pandas:`pandas.DataFrame<frame>`
223+ Turbine data of different turbines such as 'manufacturer',
224+ 'turbine_type', 'nominal_power'.
187225
188- # get all power (coefficient) curves and save them to file
189- for curve_type in ["power_curve" , "power_coefficient_curve" ]:
226+ """
227+ curve_types = ["power_curve" , "power_coefficient_curve" ]
228+ # get all power (coefficient) curves
229+ curve_dict = {}
230+ broken_turbines_dict = {}
231+ for curve_type in curve_types :
190232 broken_turbine_data = []
191233 curves_df = pd .DataFrame (columns = ["wind_speed" ])
192234 for index in turbine_data .index :
@@ -222,67 +264,82 @@ def store_turbine_data_from_oedb(
222264 curves_df = pd .merge (
223265 left = curves_df , right = df , how = "outer" , on = "wind_speed"
224266 )
267+ else :
268+ broken_turbine_data .append (
269+ turbine_data .loc [index , "turbine_type" ])
225270 except :
226271 broken_turbine_data .append (turbine_data .loc [index , "turbine_type" ])
227-
228- # warning in case of broken turbine data
229- if len (broken_turbine_data ) > 0 :
230- issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
231- "/issues/28" )
232- # in case only some data is faulty, only give out warning
233- if len (broken_turbine_data ) < 0.2 * len (turbine_data ):
234- logging .warning (
235- f"The turbine library data contains faulty { curve_type } s. The "
236- f"{ curve_type } data can therefore not be loaded for the following "
237- f"turbines: { broken_turbine_data } . "
238- f"Please report this in the following issue, in case it hasn't "
239- f"already been reported: { issue_link } "
240- )
241- save_turbine_data = True
272+ curve_dict [curve_type ] = curves_df
273+ broken_turbines_dict [curve_type ] = broken_turbine_data
274+
275+ # check if there are faulty turbines and if so, raise warning
276+ # if there are too many, don't save downloaded data to disk but keep existing data
277+ if any (len (_ ) > 0 for _ in broken_turbines_dict .values ()):
278+ issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
279+ "/issues/28" )
280+ # in case only some data is faulty, only give out warning
281+ if all (len (_ ) < threshold * len (turbine_data )
282+ for _ in broken_turbines_dict .values ()):
283+ save_turbine_data = True
284+ for curve_type in curve_types :
285+ if len (broken_turbines_dict [curve_type ]) > 0 :
286+ logging .warning (
287+ f"The turbine library data contains faulty { curve_type } s. The "
288+ f"{ curve_type } data can therefore not be loaded for the "
289+ f"following turbines: { broken_turbine_data } . "
290+ f"Please report this in the following issue, in case it hasn't "
291+ f"already been reported: { issue_link } "
292+ )
242293 # set has_power_(coefficient)_curve to False for faulty turbines
243- for turb in broken_turbine_data :
294+ for turb in broken_turbines_dict [ curve_type ] :
244295 ind = turbine_data [turbine_data .turbine_type == turb ].index [0 ]
245296 col = ("has_power_curve" if curve_type == "power_curve"
246297 else "has_cp_curve" )
247298 turbine_data .at [ind , col ] = False
248- # in case most data is faulty, do not store downloaded data
249- else :
250- logging .warning (
251- f"The turbine library data contains too many faulty { curve_type } s,"
252- f"wherefore { curve_type } data is not loaded from the oedb. "
253- f"Please report this in the following issue, in case it hasn't "
254- f"already been reported: { issue_link } "
255- )
256- save_turbine_data = False
299+ # in case most data is faulty, do not store downloaded data
257300 else :
258- save_turbine_data = True
259-
260- if save_turbine_data :
261- curves_df = curves_df .set_index ("wind_speed" ).sort_index ().transpose ()
301+ logging .warning (
302+ f"The turbine library data contains too many faulty turbine datasets "
303+ f"wherefore it is not loaded from the oedb. "
304+ f"In case you want to circumvent this behaviour, you can specify a "
305+ f"higher tolerance through the parameter 'threshold'."
306+ f"Please report this in the following issue, in case it hasn't "
307+ f"already been reported: { issue_link } "
308+ )
309+ save_turbine_data = False
310+ else :
311+ save_turbine_data = True
312+
313+ if save_turbine_data :
314+ # standard file name for saving data
315+ filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
316+ # save curve data to csv
317+ for curve_type in curve_types :
318+ curves_df = curve_dict [curve_type ].set_index (
319+ "wind_speed" ).sort_index ().transpose ()
262320 # power curve values in W
263321 if curve_type == "power_curve" :
264322 curves_df *= 1000
265323 curves_df .index .name = "turbine_type"
266324 curves_df .sort_index (inplace = True )
267325 curves_df .to_csv (filename .format ("{}s" .format (curve_type )))
268326
269- # get turbine data and save to file (excl. curves)
270- turbine_data_df = turbine_data .drop (
271- [
272- "power_curve_wind_speeds" ,
273- "power_curve_values" ,
274- "power_coefficient_curve_wind_speeds" ,
275- "power_coefficient_curve_values" ,
276- "thrust_coefficient_curve_wind_speeds" ,
277- "thrust_coefficient_curve_values" ,
278- ],
279- axis = 1 ,
280- ).set_index ("turbine_type" )
281- # nominal power in W
282- turbine_data_df ["nominal_power" ] *= 1000
283- turbine_data_df .sort_index (inplace = True )
284- turbine_data_df .to_csv (filename .format ("turbine_data" ))
285- check_turbine_data (filename )
327+ # save turbine data to file (excl. curves)
328+ turbine_data_df = turbine_data .drop (
329+ [
330+ "power_curve_wind_speeds" ,
331+ "power_curve_values" ,
332+ "power_coefficient_curve_wind_speeds" ,
333+ "power_coefficient_curve_values" ,
334+ "thrust_coefficient_curve_wind_speeds" ,
335+ "thrust_coefficient_curve_values" ,
336+ ],
337+ axis = 1 ,
338+ ).set_index ("turbine_type" )
339+ # nominal power in W
340+ turbine_data_df ["nominal_power" ] *= 1000
341+ turbine_data_df .sort_index (inplace = True )
342+ turbine_data_df .to_csv (filename .format ("turbine_data" ))
286343 return turbine_data
287344
288345
0 commit comments