Skip to content

Commit 36953fc

Browse files
Fix empty data frame concatenation in emdat_impact_yearlysum (#783)
* engine.impact_data.emdat_impact_yearlysum: fix concatenation of empty datasets * changelog * undo previous commit * Use DataFrame.from_records for initializing This avoids creating an empty dataframe. --------- Co-authored-by: Lukas Riedel <[email protected]>
1 parent e9d8873 commit 36953fc

File tree

2 files changed

+40
-26
lines changed

2 files changed

+40
-26
lines changed

climada/engine/impact_data.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -802,30 +802,46 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
802802
df_data[imp_str + " scaled"] = scale_impact2refyear(df_data[imp_str].values,
803803
df_data.Year.values, df_data.ISO.values,
804804
reference_year=reference_year)
805-
out = pd.DataFrame(columns=['ISO', 'region_id', 'year', 'impact',
806-
'impact_scaled', 'reference_year'])
807-
for country in df_data.ISO.unique():
808-
country = u_coord.country_to_iso(country, "alpha3")
809-
if not df_data.loc[df_data.ISO == country].size:
810-
continue
811-
all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
812-
data_out = pd.DataFrame(index=np.arange(0, len(all_years)),
813-
columns=out.columns)
814-
df_country = df_data.loc[df_data.ISO == country]
815-
for cnt, year in enumerate(all_years):
816-
data_out.loc[cnt, 'year'] = year
817-
data_out.loc[cnt, 'reference_year'] = reference_year
818-
data_out.loc[cnt, 'ISO'] = country
819-
data_out.loc[cnt, 'region_id'] = u_coord.country_to_iso(country, "numeric")
820-
data_out.loc[cnt, 'impact'] = \
821-
np.nansum(df_country[df_country.Year.isin([year])][imp_str])
822-
data_out.loc[cnt, 'impact_scaled'] = \
823-
np.nansum(df_country[df_country.Year.isin([year])][imp_str + " scaled"])
824-
if '000 US' in imp_str: # EM-DAT damages provided in '000 USD
825-
data_out.loc[cnt, 'impact'] = data_out.loc[cnt, 'impact'] * 1e3
826-
data_out.loc[cnt, 'impact_scaled'] = data_out.loc[cnt, 'impact_scaled'] * 1e3
827-
out = pd.concat([out, data_out])
828-
out = out.reset_index(drop=True)
805+
806+
def country_df(df_data):
807+
for data_iso in df_data.ISO.unique():
808+
country = u_coord.country_to_iso(data_iso, "alpha3")
809+
810+
df_country = df_data.loc[df_data.ISO == country]
811+
if not df_country.size:
812+
continue
813+
814+
# Retrieve impact data for all years
815+
all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
816+
data_out = pd.DataFrame.from_records(
817+
[
818+
(
819+
year,
820+
np.nansum(df_country[df_country.Year.isin([year])][imp_str]),
821+
np.nansum(
822+
df_country[df_country.Year.isin([year])][
823+
imp_str + " scaled"
824+
]
825+
),
826+
)
827+
for year in all_years
828+
],
829+
columns=["year", "impact", "impact_scaled"]
830+
)
831+
832+
# Add static data
833+
data_out["reference_year"] = reference_year
834+
data_out["ISO"] = country
835+
data_out["region_id"] = u_coord.country_to_iso(country, "numeric")
836+
837+
# EMDAT provides damage data in 1000 USD
838+
if "000 US" in imp_str:
839+
data_out["impact"] = data_out["impact"] * 1e3
840+
data_out["impact_scaled"] = data_out["impact_scaled"] * 1e3
841+
842+
yield data_out
843+
844+
out = pd.concat(list(country_df(df_data)), ignore_index=True)
829845
return out
830846

831847

climada/engine/test/test_impact_data.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,6 @@ def test_emdat_impact_yearlysum_no_futurewarning(self):
156156
reference_year=None,
157157
imp_str="Total Affected",
158158
)
159-
# TODO: pandas 2.1 will eventually raise a FutureWarning here,
160-
# but about array concatenation of empty entries. fix it!
161159

162160
def test_emdat_affected_yearlysum(self):
163161
"""test emdat_impact_yearlysum yearly impact data extraction"""

0 commit comments

Comments
 (0)