Skip to content

Commit 6427133

Browse files
committed
Save turbine data to csv instead of hdf5
This was changed as problems with hdf5 installation with Pytables occurs see PR #45
1 parent aeaf4d3 commit 6427133

File tree

2 files changed

+59
-43
lines changed

2 files changed

+59
-43
lines changed

doc/whatsnew/v0-1-2.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Bug fixes
1919

2020
Other changes
2121
#############
22-
* Make windpowerlib work offline: turbine data from oedb is stored in a hdf5 file for offline usage
22+
* Make windpowerlib work offline: turbine data from oedb is stored in csv files for offline usage
2323
* Make :py:func:`~windpowerlib.wind_turbine.get_turbine_types` also accessible via `get_turbine_types()` --> from windpowerlib import get_turbine_types
2424

2525

windpowerlib/wind_turbine.py

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,17 @@ def isfloat(x):
246246
df = pd.read_csv(file_, index_col=0)
247247
except FileNotFoundError:
248248
raise FileNotFoundError("The file '{}' was not found.".format(file_))
249-
wpp_df = df[df.turbine_id == turbine_type]
249+
# todo: note: this try except statement will be removed in 0.2.0 and only
250+
# the exception will stay. The example power (coefficient) curve files
251+
# will then be adapted
252+
try:
253+
wpp_df = df[df['turbine_id'] == turbine_type]
254+
except KeyError:
255+
wpp_df = df[df.index == turbine_type]
250256
# if turbine not in data file
251257
if wpp_df.shape[0] == 0:
252258
pd.set_option('display.max_rows', len(df))
253-
logging.info('Possible types: \n{0}'.format(df.turbine_id))
259+
logging.info('Possible types: \n{0}'.format(df['turbine_id']))
254260
pd.reset_option('display.max_rows')
255261
sys.exit('Cannot find the wind converter type: {0}'.format(
256262
turbine_type))
@@ -261,27 +267,32 @@ def isfloat(x):
261267
df = curve_data.transpose().reset_index()
262268
df.columns = ['wind_speed', 'value']
263269
df['wind_speed'] = df['wind_speed'].apply(lambda x: float(x))
264-
nominal_power = wpp_df['p_nom'].iloc[0]
270+
# todo: note: this try except statement will be removed in 0.2.0 and only
271+
# the exception will stay. The example power (coefficient) curve files
272+
# will then be adapted
273+
try:
274+
nominal_power = wpp_df['p_nom'].iloc[0]
275+
except KeyError:
276+
nominal_power = float(wpp_df['installed_capacity'].iloc[0])
265277
return df, nominal_power
266278

267279

268280
def get_turbine_data_from_oedb(turbine_type, fetch_curve, overwrite=False):
269281
r"""
270-
Fetches data for one wind turbine type from the OpenEnergy Database (oedb).
282+
Fetches wind turbine data from the OpenEnergy Database (oedb).
271283
272284
If turbine data exists in local repository it is loaded from this file. The
273285
file is created when turbine data was loaded from oedb in
274286
:py:func:`~.load_turbine_data_from_oedb`. Use this function with
275287
`overwrite=True` to overwrite your file with newly fetched data.
276-
Use :py:func:`~.check_local_turbine_data` to check
277-
weather your local file is up to date.
278288
279289
Parameters
280290
----------
281291
turbine_type : string
282292
Specifies the turbine type data is fetched for.
283293
Use :py:func:`~.get_turbine_types` to see a table of all wind turbines
284-
for which power (coefficient) curve data is provided.
294+
in oedb containing information about whether power (coefficient) curve
295+
data is provided.
285296
fetch_curve : string
286297
Parameter to specify whether a power or power coefficient curve
287298
should be retrieved from the provided turbine data. Valid options are
@@ -299,35 +310,18 @@ def get_turbine_data_from_oedb(turbine_type, fetch_curve, overwrite=False):
299310
power curve values in W with the corresponding wind speeds in m/s.
300311
301312
"""
302-
# hdf5 filename
303-
filename = os.path.join(os.path.dirname(__file__), 'data',
304-
'turbine_data_oedb.h5')
305-
if os.path.isfile(filename) and not overwrite:
306-
logging.debug("Turbine data is fetched from {}".format(filename))
307-
with pd.HDFStore(filename) as hdf_store:
308-
turbine_data = hdf_store.get('turbine_data')
309-
else:
310-
turbine_data = load_turbine_data_from_oedb()
311-
turbine_data.set_index('turbine_type', inplace=True)
312313
# Set `curve` depending on `fetch_curve` to match names in oedb
313314
curve = ('cp_curve' if fetch_curve == 'power_coefficient_curve'
314-
else fetch_curve)
315-
# Select curve and nominal power of turbine type
316-
try:
317-
df = turbine_data.loc[turbine_type]
318-
except KeyError:
319-
raise KeyError("Turbine type '{}' not in database. ".format(
320-
turbine_type) + "Use 'get_turbine_types()' to see a table of " +
321-
"possible wind turbine types.")
322-
if df[curve] is not None:
323-
df = pd.DataFrame(df[curve])
315+
else fetch_curve) # todo not needed anymore after OEP name changing
316+
filename = os.path.join(os.path.dirname(__file__), 'data',
317+
'oedb_{}s.csv'.format(curve)) # todo fetch_curve after the above was removed
318+
if not os.path.isfile(filename) or overwrite: # todo remove overwrite in 0.2.0
319+
# Load data from oedb and save to csv file
320+
load_turbine_data_from_oedb()
324321
else:
325-
sys.exit("{} of {} not available in ".format(curve, turbine_type) +
326-
"oedb. Use 'get_turbine_types()' to see for which turbine " +
327-
"types power coefficient curves are available.")
328-
nominal_power = turbine_data.loc[turbine_type][
329-
'installed_capacity_kw'] * 1000
330-
df.columns = ['wind_speed', 'value']
322+
logging.debug("Turbine data is fetched from {}".format(filename))
323+
# nominal power and power curve values in W
324+
nominal_power = nominal_power * 1000
331325
if fetch_curve == 'power_curve':
332326
# power in W
333327
df['value'] = df['value'] * 1000
@@ -338,7 +332,8 @@ def load_turbine_data_from_oedb():
338332
r"""
339333
Loads turbine data from the OpenEnergy Database (oedb).
340334
341-
Turbine data is saved to `filename` for offline usage of windpowerlib.
335+
Turbine data is saved to a csv file for offline usage of windpowerlib. If
336+
the file already exists it is overwritten.
342337
343338
Returns
344339
-------
@@ -361,15 +356,36 @@ def load_turbine_data_from_oedb():
361356
"Response: [{}]".format(result.status_code))
362357
# extract data to data frame
363358
turbine_data = pd.DataFrame(result.json())
364-
# store data as hdf5
359+
# standard file name for saving data
365360
filename = os.path.join(os.path.dirname(__file__), 'data',
366-
'turbine_data_oedb.h5')
367-
with pd.HDFStore(filename) as hdf_store:
368-
hdf_store.put('turbine_data', turbine_data)
369-
logging.debug("Turbine data is fetched from oedb and saved "
370-
"to {}".format(filename))
371-
return turbine_data
372-
361+
'oedb_{}.csv')
362+
# get all power (coefficient) curves and save to file
363+
# for curve_type in ['power_curve', 'power_coefficient_curve']:
364+
for curve_type in ['power', 'cp']: #todo change after renaming
365+
curves_df = pd.DataFrame(columns=['wind_speed'])
366+
for index in turbine_data.index:
367+
if (turbine_data['wind_speed_{}_value'.format(curve_type)][index] and
368+
turbine_data['{}_value'.format(curve_type)][index]):
369+
df = pd.DataFrame(data=[
370+
eval(turbine_data['wind_speed_{}_value'.format(curve_type)][index]),
371+
eval(turbine_data['{}_value'.format(curve_type)][
372+
index])]).transpose().rename(
373+
columns={0: 'wind_speed',
374+
1: turbine_data['turbine_type'][index]})
375+
if turbine_data['turbine_type'][index] not in [
376+
'S104/3400', 'S126/6150', 'V164/8000', 'MM92/2050']: # todo delete after fixed in OEP
377+
curves_df = pd.merge(left=curves_df, right=df, how='outer',
378+
on='wind_speed')
379+
curves_df = curves_df.set_index('wind_speed').sort_index().transpose()
380+
curves_df['turbine_type'] = curves_df.index
381+
# add nominal power to power (coefficient) data frame
382+
curves_df = pd.merge(left=curves_df,
383+
right=turbine_data[['turbine_type',
384+
'installed_capacity']],
385+
on='turbine_type').set_index('turbine_type')
386+
curves_df.to_csv(filename.format('{}_curves'.format(curve_type)))
387+
388+
return turbine_data
373389

374390
def get_turbine_types(print_out=True, filter_=True):
375391
r"""

0 commit comments

Comments
 (0)