@@ -187,46 +187,84 @@ def store_turbine_data_from_oedb(
187187
188188 # get all power (coefficient) curves and save them to file
189189 for curve_type in ["power_curve" , "power_coefficient_curve" ]:
190+ broken_turbine_data = []
190191 curves_df = pd .DataFrame (columns = ["wind_speed" ])
191192 for index in turbine_data .index :
192193 if (
193194 turbine_data ["{}_wind_speeds" .format (curve_type )][index ]
194195 and turbine_data ["{}_values" .format (curve_type )][index ]
195196 ):
196- df = (
197- pd .DataFrame (
198- data = [
199- eval (
200- turbine_data [
201- "{}_wind_speeds" .format (curve_type )
202- ][index ]
203- ),
204- eval (
205- turbine_data ["{}_values" .format (curve_type )][
206- index
207- ]
208- ),
209- ]
210- )
211- .transpose ()
212- .rename (
213- columns = {
214- 0 : "wind_speed" ,
215- 1 : turbine_data ["turbine_type" ][index ],
216- }
197+ try :
198+ df = (
199+ pd .DataFrame (
200+ data = [
201+ eval (
202+ turbine_data [
203+ "{}_wind_speeds" .format (curve_type )
204+ ][index ]
205+ ),
206+ eval (
207+ turbine_data ["{}_values" .format (curve_type )][
208+ index
209+ ]
210+ ),
211+ ]
212+ )
213+ .transpose ()
214+ .rename (
215+ columns = {
216+ 0 : "wind_speed" ,
217+ 1 : turbine_data ["turbine_type" ][index ],
218+ }
219+ )
217220 )
221+ if not df .wind_speed .duplicated ().any ():
222+ curves_df = pd .merge (
223+ left = curves_df , right = df , how = "outer" , on = "wind_speed"
224+ )
225+ except :
226+ broken_turbine_data .append (turbine_data .loc [index , "turbine_type" ])
227+
228+ # warning in case of broken turbine data
229+ if len (broken_turbine_data ) > 0 :
230+ issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
231+ "/issues/28" )
232+ # in case only some data is faulty, only give out warning
233+ if len (broken_turbine_data ) < 0.2 * len (turbine_data ):
234+ logging .warning (
235+ f"The turbine library data contains faulty { curve_type } s. The "
236+ f"{ curve_type } data can therefore not be loaded for the following "
237+ f"turbines: { broken_turbine_data } . "
238+ f"Please report this in the following issue, in case it hasn't "
239+ f"already been reported: { issue_link } "
218240 )
219- if not df .wind_speed .duplicated ().any ():
220- curves_df = pd .merge (
221- left = curves_df , right = df , how = "outer" , on = "wind_speed"
222- )
223- curves_df = curves_df .set_index ("wind_speed" ).sort_index ().transpose ()
224- # power curve values in W
225- if curve_type == "power_curve" :
226- curves_df *= 1000
227- curves_df .index .name = "turbine_type"
228- curves_df .sort_index (inplace = True )
229- curves_df .to_csv (filename .format ("{}s" .format (curve_type )))
241+ save_turbine_data = True
242+ # set has_power_(coefficient)_curve to False for faulty turbines
243+ for turb in broken_turbine_data :
244+ ind = turbine_data [turbine_data .turbine_type == turb ].index [0 ]
245+ col = ("has_power_curve" if curve_type == "power_curve"
246+ else "has_cp_curve" )
247+ turbine_data .at [ind , col ] = False
248+ # in case most data is faulty, do not store downloaded data
249+ else :
250+ logging .warning (
251+ f"The turbine library data contains too many faulty { curve_type } s,"
252+ f"wherefore { curve_type } data is not loaded from the oedb. "
253+ f"Please report this in the following issue, in case it hasn't "
254+ f"already been reported: { issue_link } "
255+ )
256+ save_turbine_data = False
257+ else :
258+ save_turbine_data = True
259+
260+ if save_turbine_data :
261+ curves_df = curves_df .set_index ("wind_speed" ).sort_index ().transpose ()
262+ # power curve values in W
263+ if curve_type == "power_curve" :
264+ curves_df *= 1000
265+ curves_df .index .name = "turbine_type"
266+ curves_df .sort_index (inplace = True )
267+ curves_df .to_csv (filename .format ("{}s" .format (curve_type )))
230268
231269 # get turbine data and save to file (excl. curves)
232270 turbine_data_df = turbine_data .drop (
0 commit comments