Skip to content

Commit 2ba8be9

Browse files
Remove attribute-style accesses to (Geo)DataFrames columns and xarray Datasets variables and attributes (#939)
* Remove attribute-style access, first draft * Remove attribute-style accesses from engine * Further removal of attribute-style accesses * Remove attribute-style accesses in hazard module * Remove attribute-style accesses in util * Remove attribute-style accesses in storm_europe and correct mistake * Mention policy in coding conventions * Fix linter issues * Remove files that were mistakenly commited * Remove some more missed instances of attribute-style access * Remove attr-style accesses in tests * Remove some missed accesses to dataset attrs --------- Co-authored-by: luseverin <[email protected]> Co-authored-by: emanuel-schmid <[email protected]>
1 parent 36f4735 commit 2ba8be9

38 files changed

+959
-937
lines changed

climada/engine/forecast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def __init__(
186186
if exposure_name is None:
187187
try:
188188
self.exposure_name = u_coord.country_to_iso(
189-
exposure.gdf.region_id.unique()[0], "name"
189+
exposure.gdf["region_id"].unique()[0], "name"
190190
)
191191
except (KeyError, AttributeError):
192192
self.exposure_name = "custom"

climada/engine/impact.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,8 @@ def from_eih(cls, exposures, hazard, at_event, eai_exp, aai_agg, imp_mat=None):
243243
date = hazard.date,
244244
frequency = hazard.frequency,
245245
frequency_unit = hazard.frequency_unit,
246-
coord_exp = np.stack([exposures.gdf.latitude.values,
247-
exposures.gdf.longitude.values],
246+
coord_exp = np.stack([exposures.gdf['latitude'].values,
247+
exposures.gdf['longitude'].values],
248248
axis=1),
249249
crs = exposures.crs,
250250
unit = exposures.value_unit,
@@ -1081,25 +1081,25 @@ def from_csv(cls, file_name):
10811081
# pylint: disable=no-member
10821082
LOGGER.info('Reading %s', file_name)
10831083
imp_df = pd.read_csv(file_name)
1084-
imp = cls(haz_type=imp_df.haz_type[0])
1085-
imp.unit = imp_df.unit[0]
1086-
imp.tot_value = imp_df.tot_value[0]
1087-
imp.aai_agg = imp_df.aai_agg[0]
1088-
imp.event_id = imp_df.event_id[~np.isnan(imp_df.event_id)].values
1084+
imp = cls(haz_type=imp_df['haz_type'][0])
1085+
imp.unit = imp_df['unit'][0]
1086+
imp.tot_value = imp_df['tot_value'][0]
1087+
imp.aai_agg = imp_df['aai_agg'][0]
1088+
imp.event_id = imp_df['event_id'][~np.isnan(imp_df['event_id'])].values
10891089
num_ev = imp.event_id.size
1090-
imp.event_name = imp_df.event_name[:num_ev].values.tolist()
1091-
imp.date = imp_df.event_date[:num_ev].values
1092-
imp.at_event = imp_df.at_event[:num_ev].values
1093-
imp.frequency = imp_df.event_frequency[:num_ev].values
1094-
imp.frequency_unit = imp_df.frequency_unit[0] if 'frequency_unit' in imp_df \
1090+
imp.event_name = imp_df['event_name'][:num_ev].values.tolist()
1091+
imp.date = imp_df['event_date'][:num_ev].values
1092+
imp.at_event = imp_df['at_event'][:num_ev].values
1093+
imp.frequency = imp_df['event_frequency'][:num_ev].values
1094+
imp.frequency_unit = imp_df['frequency_unit'][0] if 'frequency_unit' in imp_df \
10951095
else DEF_FREQ_UNIT
1096-
imp.eai_exp = imp_df.eai_exp[~np.isnan(imp_df.eai_exp)].values
1096+
imp.eai_exp = imp_df['eai_exp'][~np.isnan(imp_df['eai_exp'])].values
10971097
num_exp = imp.eai_exp.size
10981098
imp.coord_exp = np.zeros((num_exp, 2))
1099-
imp.coord_exp[:, 0] = imp_df.exp_lat[:num_exp]
1100-
imp.coord_exp[:, 1] = imp_df.exp_lon[:num_exp]
1099+
imp.coord_exp[:, 0] = imp_df['exp_lat'][:num_exp]
1100+
imp.coord_exp[:, 1] = imp_df['exp_lon'][:num_exp]
11011101
try:
1102-
imp.crs = u_coord.to_crs_user_input(imp_df.exp_crs.values[0])
1102+
imp.crs = u_coord.to_crs_user_input(imp_df['exp_crs'].values[0])
11031103
except AttributeError:
11041104
imp.crs = DEF_CRS
11051105

@@ -1129,23 +1129,23 @@ def from_excel(cls, file_name):
11291129
dfr = pd.read_excel(file_name)
11301130
imp = cls(haz_type=str(dfr['haz_type'][0]))
11311131

1132-
imp.unit = dfr.unit[0]
1133-
imp.tot_value = dfr.tot_value[0]
1134-
imp.aai_agg = dfr.aai_agg[0]
1132+
imp.unit = dfr['unit'][0]
1133+
imp.tot_value = dfr['tot_value'][0]
1134+
imp.aai_agg = dfr['aai_agg'][0]
11351135

1136-
imp.event_id = dfr.event_id[~np.isnan(dfr.event_id.values)].values
1137-
imp.event_name = dfr.event_name[:imp.event_id.size].values
1138-
imp.date = dfr.event_date[:imp.event_id.size].values
1139-
imp.frequency = dfr.event_frequency[:imp.event_id.size].values
1140-
imp.frequency_unit = dfr.frequency_unit[0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
1141-
imp.at_event = dfr.at_event[:imp.event_id.size].values
1136+
imp.event_id = dfr['event_id'][~np.isnan(dfr['event_id'].values)].values
1137+
imp.event_name = dfr['event_name'][:imp.event_id.size].values
1138+
imp.date = dfr['event_date'][:imp.event_id.size].values
1139+
imp.frequency = dfr['event_frequency'][:imp.event_id.size].values
1140+
imp.frequency_unit = dfr['frequency_unit'][0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
1141+
imp.at_event = dfr['at_event'][:imp.event_id.size].values
11421142

1143-
imp.eai_exp = dfr.eai_exp[~np.isnan(dfr.eai_exp.values)].values
1143+
imp.eai_exp = dfr['eai_exp'][~np.isnan(dfr['eai_exp'].values)].values
11441144
imp.coord_exp = np.zeros((imp.eai_exp.size, 2))
1145-
imp.coord_exp[:, 0] = dfr.exp_lat.values[:imp.eai_exp.size]
1146-
imp.coord_exp[:, 1] = dfr.exp_lon.values[:imp.eai_exp.size]
1145+
imp.coord_exp[:, 0] = dfr['exp_lat'].values[:imp.eai_exp.size]
1146+
imp.coord_exp[:, 1] = dfr['exp_lon'].values[:imp.eai_exp.size]
11471147
try:
1148-
imp.crs = u_coord.to_csr_user_input(dfr.exp_crs.values[0])
1148+
imp.crs = u_coord.to_csr_user_input(dfr['exp_crs'].values[0])
11491149
except AttributeError:
11501150
imp.crs = DEF_CRS
11511151

@@ -1324,14 +1324,14 @@ def video_direct_impact(exp, impf_set, haz_list, file_name='',
13241324
np.array([haz.intensity.max() for haz in haz_list]).max()]
13251325

13261326
if 'vmin' not in args_exp:
1327-
args_exp['vmin'] = exp.gdf.value.values.min()
1327+
args_exp['vmin'] = exp.gdf['value'].values.min()
13281328

13291329
if 'vmin' not in args_imp:
13301330
args_imp['vmin'] = np.array([imp.eai_exp.min() for imp in imp_list
13311331
if imp.eai_exp.size]).min()
13321332

13331333
if 'vmax' not in args_exp:
1334-
args_exp['vmax'] = exp.gdf.value.values.max()
1334+
args_exp['vmax'] = exp.gdf['value'].values.max()
13351335

13361336
if 'vmax' not in args_imp:
13371337
args_imp['vmax'] = np.array([imp.eai_exp.max() for imp in imp_list

climada/engine/impact_calc.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,8 @@ def impact(self, save_mat=True, assign_centroids=True,
154154
exp_gdf.size, self.n_events)
155155
imp_mat_gen = self.imp_mat_gen(exp_gdf, impf_col)
156156

157-
insured = ('cover' in exp_gdf and exp_gdf.cover.max() >= 0) \
158-
or ('deductible' in exp_gdf and exp_gdf.deductible.max() > 0)
157+
insured = ('cover' in exp_gdf and exp_gdf['cover'].max() >= 0) \
158+
or ('deductible' in exp_gdf and exp_gdf['deductible'].max() > 0)
159159
if insured:
160160
LOGGER.info("cover and/or deductible columns detected,"
161161
" going to calculate insured impact")
@@ -253,8 +253,8 @@ def minimal_exp_gdf(self, impf_col, assign_centroids, ignore_cover, ignore_deduc
253253
" Run 'exposures.assign_centroids()' beforehand or set"
254254
" 'assign_centroids' to 'True'")
255255
mask = (
256-
(self.exposures.gdf.value.values == self.exposures.gdf.value.values) # value != NaN
257-
& (self.exposures.gdf.value.values != 0) # value != 0
256+
(self.exposures.gdf['value'].values == self.exposures.gdf['value'].values)# value != NaN
257+
& (self.exposures.gdf['value'].values != 0) # value != 0
258258
& (self.exposures.gdf[self.hazard.centr_exp_col].values >= 0) # centroid assigned
259259
)
260260

@@ -320,7 +320,7 @@ def _chunk_exp_idx(haz_size, idx_exp_impf):
320320
)
321321
idx_exp_impf = (exp_gdf[impf_col].values == impf_id).nonzero()[0]
322322
for exp_idx in _chunk_exp_idx(self.hazard.size, idx_exp_impf):
323-
exp_values = exp_gdf.value.values[exp_idx]
323+
exp_values = exp_gdf['value'].values[exp_idx]
324324
cent_idx = exp_gdf[self.hazard.centr_exp_col].values[exp_idx]
325325
yield (
326326
self.impact_matrix(exp_values, cent_idx, impf),
@@ -363,10 +363,10 @@ def insured_mat_gen(self, imp_mat_gen, exp_gdf, impf_col):
363363
haz_type=self.hazard.haz_type,
364364
fun_id=impf_id)
365365
if 'deductible' in exp_gdf:
366-
deductible = exp_gdf.deductible.values[exp_idx]
366+
deductible = exp_gdf['deductible'].values[exp_idx]
367367
mat = self.apply_deductible_to_mat(mat, deductible, self.hazard, cent_idx, impf)
368368
if 'cover' in exp_gdf:
369-
cover = exp_gdf.cover.values[exp_idx]
369+
cover = exp_gdf['cover'].values[exp_idx]
370370
mat = self.apply_cover_to_mat(mat, cover)
371371
yield (mat, exp_idx)
372372

climada/engine/impact_data.py

Lines changed: 46 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -355,23 +355,23 @@ def create_lookup(emdat_data, start, end, disaster_subtype='Tropical cyclone'):
355355
'Date_start_EM_ordinal', 'Disaster_name',
356356
'EM_ID', 'ibtracsID', 'allocation_level',
357357
'possible_track', 'possible_track_all'])
358-
lookup.hit_country = data.ISO
359-
lookup.Date_start_EM = data.Date_start_clean
360-
lookup.Disaster_name = data.Disaster_name
361-
lookup.EM_ID = data.Disaster_No
358+
lookup['hit_country'] = data['ISO']
359+
lookup['Date_start_EM'] = data['Date_start_clean']
360+
lookup['Disaster_name'] = data['Disaster_name']
361+
lookup['EM_ID'] = data['Disaster_No']
362362
lookup = lookup.reset_index(drop=True)
363363
# create ordinals
364-
for i in range(0, len(data.Date_start_clean.values)):
365-
lookup.Date_start_EM_ordinal[i] = datetime.toordinal(
366-
datetime.strptime(lookup.Date_start_EM.values[i], '%Y-%m-%d'))
364+
for i in range(0, len(data['Date_start_clean'].values)):
365+
lookup['Date_start_EM_ordinal'][i] = datetime.toordinal(
366+
datetime.strptime(lookup['Date_start_EM'].values[i], '%Y-%m-%d'))
367367
# ordinals to numeric
368-
lookup.Date_start_EM_ordinal = pd.to_numeric(lookup.Date_start_EM_ordinal)
368+
lookup['Date_start_EM_ordinal'] = pd.to_numeric(lookup['Date_start_EM_ordinal'])
369369
# select time
370370
emdat_start = datetime.toordinal(datetime.strptime(start, '%Y-%m-%d'))
371371
emdat_end = datetime.toordinal(datetime.strptime(end, '%Y-%m-%d'))
372372

373-
lookup = lookup[lookup.Date_start_EM_ordinal.values > emdat_start]
374-
lookup = lookup[lookup.Date_start_EM_ordinal.values < emdat_end]
373+
lookup = lookup[lookup['Date_start_EM_ordinal'].values > emdat_start]
374+
lookup = lookup[lookup['Date_start_EM_ordinal'].values < emdat_end]
375375

376376
return lookup
377377

@@ -397,15 +397,16 @@ def emdat_possible_hit(lookup, hit_countries, delta_t):
397397
# tracks: processed IBtracks with info which track hit which country
398398
# delta_t: time difference of start of EMdat and IBrtacks
399399
possible_hit_all = []
400-
for i in range(0, len(lookup.EM_ID.values)):
400+
for i in range(0, len(lookup['EM_ID'].values)):
401401
possible_hit = []
402402
country_tracks = hit_countries[
403-
hit_countries['hit_country'] == lookup.hit_country.values[i]]
404-
for j in range(0, len(country_tracks.Date_start.values)):
405-
if (lookup.Date_start_EM_ordinal.values[i] - country_tracks.Date_start.values[j]) < \
406-
delta_t and (lookup.Date_start_EM_ordinal.values[i] -
407-
country_tracks.Date_start.values[j]) >= 0:
408-
possible_hit.append(country_tracks.ibtracsID.values[j])
403+
hit_countries['hit_country'] == lookup['hit_country'].values[i]]
404+
for j in range(0, len(country_tracks['Date_start'].values)):
405+
if (lookup['Date_start_EM_ordinal'].values[i] -
406+
country_tracks['Date_start'].values[j]) < \
407+
delta_t and (lookup['Date_start_EM_ordinal'].values[i] -
408+
country_tracks['Date_start'].values[j]) >= 0:
409+
possible_hit.append(country_tracks['ibtracsID'].values[j])
409410
possible_hit_all.append(possible_hit)
410411

411412
return possible_hit_all
@@ -428,14 +429,14 @@ def match_em_id(lookup, poss_hit):
428429
with all possible hits per EMdat ID
429430
"""
430431
possible_hit_all = []
431-
for i in range(0, len(lookup.EM_ID.values)):
432+
for i in range(0, len(lookup['EM_ID'].values)):
432433
possible_hit = []
433434
# lookup without line i
434435
#lookup_match = lookup.drop(i)
435436
lookup_match = lookup
436437
# Loop over check if EM dat ID is the same
437-
for i_match in range(0, len(lookup_match.EM_ID.values)):
438-
if lookup.EM_ID.values[i] == lookup_match.EM_ID.values[i_match]:
438+
for i_match in range(0, len(lookup_match['EM_ID'].values)):
439+
if lookup['EM_ID'].values[i] == lookup_match['EM_ID'].values[i_match]:
439440
possible_hit.append(poss_hit[i])
440441
possible_hit_all.append(possible_hit)
441442
return possible_hit_all
@@ -467,7 +468,7 @@ def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
467468
"""
468469

469470
for i, _ in enumerate(possible_tracks_1):
470-
if np.isnan(lookup.allocation_level.values[i]):
471+
if np.isnan(lookup['allocation_level'].values[i]):
471472
number_emdat_id = len(possible_tracks_1[i])
472473
# print(number_emdat_id)
473474
for j in range(0, number_emdat_id):
@@ -479,14 +480,15 @@ def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
479480
if all(possible_tracks_1[i][0] == possible_tracks_1[i][k]
480481
for k in range(0, len(possible_tracks_1[i]))):
481482
# check that track ID has not been assigned to that country already
482-
ctry_lookup = lookup[lookup['hit_country'] == lookup.hit_country.values[i]]
483-
if possible_tracks_1[i][0][0] not in ctry_lookup.ibtracsID.values:
484-
lookup.ibtracsID.values[i] = possible_tracks_1[i][0][0]
485-
lookup.allocation_level.values[i] = level
483+
ctry_lookup = lookup[lookup['hit_country']
484+
== lookup['hit_country'].values[i]]
485+
if possible_tracks_1[i][0][0] not in ctry_lookup['ibtracsID'].values:
486+
lookup['ibtracsID'].values[i] = possible_tracks_1[i][0][0]
487+
lookup['allocation_level'].values[i] = level
486488
elif possible_tracks_1[i][j] != []:
487-
lookup.possible_track.values[i] = possible_tracks_1[i]
489+
lookup['possible_track'].values[i] = possible_tracks_1[i]
488490
else:
489-
lookup.possible_track_all.values[i] = possible_tracks_1[i]
491+
lookup['possible_track_all'].values[i] = possible_tracks_1[i]
490492
return lookup
491493

492494

@@ -507,13 +509,13 @@ def check_assigned_track(lookup, checkset):
507509
# merge checkset and lookup
508510
check = pd.merge(checkset, lookup[['hit_country', 'EM_ID', 'ibtracsID']],
509511
on=['hit_country', 'EM_ID'])
510-
check_size = len(check.ibtracsID.values)
511-
# not assigned values
512-
not_assigned = check.ibtracsID.isnull().sum(axis=0)
512+
check_size = len(check['ibtracsID'].values)
513+
# not assigned values]
514+
not_assigned = check['ibtracsID'].isnull().sum(axis=0)
513515
# correct assigned values
514-
correct = sum(check.ibtracsID.values == check.IBtracsID_checked.values)
516+
correct = sum(check['ibtracsID'].values == check['IBtracsID_checked'].values)
515517
# wrongly assigned values
516-
wrong = len(check.ibtracsID.values) - not_assigned - correct
518+
wrong = len(check['ibtracsID'].values) - not_assigned - correct
517519
print('%.1f%% tracks assigned correctly, %.1f%% wrongly, %.1f%% not assigned'
518520
% (correct / check_size * 100,
519521
wrong / check_size * 100,
@@ -707,7 +709,7 @@ def emdat_countries_by_hazard(emdat_file_csv, hazard=None, year_range=None):
707709
List of names of countries impacted by the disaster (sub-)types
708710
"""
709711
df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range)
710-
countries_iso3a = list(df_data.ISO.unique())
712+
countries_iso3a = list(df_data['ISO'].unique())
711713
countries_names = list()
712714
for iso3a in countries_iso3a:
713715
try:
@@ -800,26 +802,27 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
800802
year_range=year_range, target_version=version)
801803

802804
df_data[imp_str + " scaled"] = scale_impact2refyear(df_data[imp_str].values,
803-
df_data.Year.values, df_data.ISO.values,
805+
df_data['Year'].values,
806+
df_data['ISO'].values,
804807
reference_year=reference_year)
805808

806809
def country_df(df_data):
807-
for data_iso in df_data.ISO.unique():
810+
for data_iso in df_data['ISO'].unique():
808811
country = u_coord.country_to_iso(data_iso, "alpha3")
809812

810-
df_country = df_data.loc[df_data.ISO == country]
813+
df_country = df_data.loc[df_data['ISO'] == country]
811814
if not df_country.size:
812815
continue
813816

814817
# Retrieve impact data for all years
815-
all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
818+
all_years = np.arange(min(df_data['Year']), max(df_data['Year']) + 1)
816819
data_out = pd.DataFrame.from_records(
817820
[
818821
(
819822
year,
820-
np.nansum(df_country[df_country.Year.isin([year])][imp_str]),
823+
np.nansum(df_country[df_country['Year'].isin([year])][imp_str]),
821824
np.nansum(
822-
df_country[df_country.Year.isin([year])][
825+
df_country[df_country['Year'].isin([year])][
823826
imp_str + " scaled"
824827
]
825828
),
@@ -894,13 +897,13 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
894897
df_data['year'] = df_data['Year']
895898
df_data['reference_year'] = reference_year
896899
df_data['impact'] = df_data[imp_str]
897-
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data.Year.values,
898-
df_data.ISO.values,
900+
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data['Year'].values,
901+
df_data['ISO'].values,
899902
reference_year=reference_year)
900903
df_data['region_id'] = np.nan
901-
for country in df_data.ISO.unique():
904+
for country in df_data['ISO'].unique():
902905
try:
903-
df_data.loc[df_data.ISO == country, 'region_id'] = \
906+
df_data.loc[df_data['ISO'] == country, 'region_id'] = \
904907
u_coord.country_to_iso(country, "numeric")
905908
except LookupError:
906909
LOGGER.warning('ISO3alpha code not found in iso_country: %s', country)

climada/engine/test/test_impact.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_from_eih_pass(self):
111111
np.testing.assert_array_almost_equal(imp.at_event, fake_at_event)
112112
np.testing.assert_array_almost_equal(
113113
imp.coord_exp,
114-
np.stack([exp.gdf.latitude.values, exp.gdf.longitude.values], axis=1)
114+
np.stack([exp.gdf['latitude'].values, exp.gdf['longitude'].values], axis=1)
115115
)
116116

117117
def test_pyproj_crs(self):
@@ -513,7 +513,7 @@ def test_local_exceedance_imp_pass(self):
513513
impact_rp = impact.local_exceedance_imp(return_periods=(10, 40))
514514

515515
self.assertIsInstance(impact_rp, np.ndarray)
516-
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf.value.size)
516+
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf['value'].size)
517517
self.assertAlmostEqual(np.max(impact_rp), 2916964966.388219, places=5)
518518
self.assertAlmostEqual(np.min(impact_rp), 444457580.131494, places=5)
519519

@@ -941,7 +941,7 @@ def test_match_centroids(self):
941941
fake_aai_agg = np.sum(fake_eai_exp)
942942
imp = Impact.from_eih(exp, HAZ, fake_at_event, fake_eai_exp, fake_aai_agg)
943943
imp_centr = imp.match_centroids(HAZ)
944-
np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)
944+
np.testing.assert_array_equal(imp_centr, exp.gdf['centr_TC'])
945945

946946

947947
class TestImpactH5IO(unittest.TestCase):

0 commit comments

Comments
 (0)