Skip to content

Commit dfb1a92

Browse files
Update EMDAT VARNAMES for version 2023 (#701)
* Update EMDAT VARNAMES for version 2023 Update the EMDAT variables names to meet data structure criteria of the year 2023 * impact_data: use the newest version of EM-DAT files instead of fixing it to 2020 * clean_emdat_df: add inline comments for 2018 case --------- Co-authored-by: emanuel-schmid <[email protected]>
1 parent 11601e0 commit dfb1a92

File tree

2 files changed

+83
-18
lines changed

2 files changed

+83
-18
lines changed

climada/engine/calibration_opt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from climada.engine import ImpactCalc
3333
from climada.entity import ImpactFuncSet, ImpfTropCyclone, impact_funcs
34-
from climada.engine.impact_data import emdat_impact_yearlysum, emdat_impact_event
34+
from climada.engine.impact_data import emdat_impact_yearlysum #, emdat_impact_event
3535

3636
LOGGER = logging.getLogger(__name__)
3737

@@ -261,7 +261,7 @@ def init_impact_data(hazard_type,
261261
reference_year=reference_year)
262262
else:
263263
raise ValueError('init_impact_data not yet implemented for yearly_impact = False.')
264-
em_data = emdat_impact_event(source_file)
264+
#em_data = emdat_impact_event(source_file)
265265
else:
266266
raise ValueError('init_impact_data not yet implemented for other impact_data_sources '
267267
'than emdat.')

climada/engine/impact_data.py

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,57 @@
114114
"Reconstruction Costs ('000 US$)": "Reconstruction Costs ('000 US$)",
115115
"Insured Damages ('000 US$)": "Insured Damages ('000 US$)",
116116
"Total Damages ('000 US$)": "Total Damages ('000 US$)",
117-
'CPI': 'CPI'}}
117+
'CPI': 'CPI'},
118+
2023: {'Dis No': 'Dis No',
119+
'Year': 'Year',
120+
'Seq': 'Seq',
121+
'Glide': 'Glide',
122+
'Disaster Group': 'Disaster Group',
123+
'Disaster Subgroup': 'Disaster Subgroup',
124+
'Disaster Type': 'Disaster Type',
125+
'Disaster Subtype': 'Disaster Subtype',
126+
'Disaster Subsubtype': 'Disaster Subsubtype',
127+
'Event Name': 'Event Name',
128+
'Country': 'Country',
129+
'ISO': 'ISO',
130+
'Region': 'Region',
131+
'Continent': 'Continent',
132+
'Location': 'Location',
133+
'Origin': 'Origin',
134+
'Associated Dis': 'Associated Dis',
135+
'Associated Dis2': 'Associated Dis2',
136+
'OFDA Response': 'OFDA Response',
137+
'Appeal': 'Appeal',
138+
'Declaration': 'Declaration',
139+
"AID Contribution ('000 US$)": "AID Contribution ('000 US$)",
140+
'Dis Mag Value': 'Dis Mag Value',
141+
'Dis Mag Scale': 'Dis Mag Scale',
142+
'Latitude': 'Latitude',
143+
'Longitude': 'Longitude',
144+
'Local Time': 'Local Time',
145+
'River Basin': 'River Basin',
146+
'Start Year': 'Start Year',
147+
'Start Month': 'Start Month',
148+
'Start Day': 'Start Day',
149+
'End Year': 'End Year',
150+
'End Month': 'End Month',
151+
'End Day': 'End Day',
152+
'Total Deaths': 'Total Deaths',
153+
'No Injured': 'No Injured',
154+
'No Affected': 'No Affected',
155+
'No Homeless': 'No Homeless',
156+
'Total Affected': 'Total Affected',
157+
"Reconstruction Costs ('000 US$)": "Reconstruction Costs ('000 US$)",
158+
"Reconstruction Costs, Adjusted ('000 US$)": "Reconstruction Costs, Adjusted ('000 US$)",
159+
"Insured Damages ('000 US$)": "Insured Damages ('000 US$)",
160+
"Insured Damages, Adjusted ('000 US$)": "Insured Damages, Adjusted ('000 US$)",
161+
"Total Damages ('000 US$)": "Total Damages ('000 US$)",
162+
"Total Damages, Adjusted ('000 US$)": "Total Damages, Adjusted ('000 US$)",
163+
'CPI': 'CPI',
164+
'Adm Level': 'Adm Level',
165+
'Admin1 Code': 'Admin1 Code',
166+
'Admin2 Code': 'Admin2 Code',
167+
'Geo Locations': 'Geo Locations'}}
118168

119169

120170
def assign_hazard_to_emdat(certainty_level, intensity_path_haz, names_path_haz,
@@ -473,7 +523,7 @@ def check_assigned_track(lookup, checkset):
473523

474524

475525
def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
476-
target_version=2020):
526+
target_version=None):
477527
"""
478528
Get a clean and standardized DataFrame from EM-DAT-CSV-file
479529
(1) load EM-DAT data from CSV to DataFrame and remove header/footer,
@@ -501,7 +551,8 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
501551
(only min and max are considered)
502552
target_version : int
503553
required EM-DAT data format version (i.e. year of download),
504-
changes naming of columns/variables (default: 2020)
554+
changes naming of columns/variables,
555+
default: newest available version in ``VARNAMES_EMDAT`` that matches the given emdat_file
505556
506557
Returns
507558
-------
@@ -527,12 +578,16 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
527578

528579
# (2) handle version, clean up, and add columns:
529580
# (2.1) identify underlying EMDAT version of csv:
530-
version = 2020
531-
for vers in list(VARNAMES_EMDAT.keys()):
581+
version = None
582+
for vers in sorted(VARNAMES_EMDAT.keys()):
532583
if len(df_emdat.columns) >= len(VARNAMES_EMDAT[vers]) and \
533584
all(item in list(df_emdat.columns) for item in VARNAMES_EMDAT[vers].values()):
534585
version = vers
586+
if not version:
587+
raise ValueError("the given emdat_file contains unexpected columns and cannot be"
588+
" associated with any known EM-DAT file structure")
535589
# (2.2) create new DataFrame df_data with column names as target version
590+
target_version = target_version or version
536591
df_data = pd.DataFrame(index=df_emdat.index.values,
537592
columns=VARNAMES_EMDAT[target_version].values())
538593
if 'Year' not in df_data.columns: # make sure column "Year" exists
@@ -551,6 +606,9 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
551606
years_list.append(np.nan)
552607
df_data[col] = years_list
553608
if version <= 2018 and target_version >= 2020:
609+
# create 'Start Year', -Month' and -Day' from 'Start date'
610+
# ignore 'End date'
611+
# replace NaN with None in 'Disaster Subtype', 'Disaster Type' and 'Country'
554612
date_list = list()
555613
year_list = list()
556614
month_list = list()
@@ -705,7 +763,7 @@ def scale_impact2refyear(impact_values, year_values, iso3a_values, reference_yea
705763

706764
def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_range=None,
707765
reference_year=None, imp_str="Total Damages ('000 US$)",
708-
version=2020):
766+
version=None):
709767
"""function to load EM-DAT data and sum impact per year
710768
711769
Parameters
@@ -727,16 +785,18 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
727785
year_range : list or tuple
728786
Year range to be extracted, e.g. (2000, 2015);
729787
(only min and max are considered)
730-
version : int
788+
version : int, optional
731789
required EM-DAT data format version (i.e. year of download),
732-
changes naming of columns/variables (default: 2020)
790+
changes naming of columns/variables,
791+
default: newest available version in ``VARNAMES_EMDAT``
733792
734793
Returns
735794
-------
736795
out : pd.DataFrame
737796
DataFrame with summed impact and scaled impact per
738797
year and country.
739798
"""
799+
version = version or max(VARNAMES_EMDAT.keys())
740800
imp_str = VARNAMES_EMDAT[version][imp_str]
741801
df_data = clean_emdat_df(emdat_file_csv, countries=countries, hazard=hazard,
742802
year_range=year_range, target_version=version)
@@ -773,7 +833,7 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
773833

774834
def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=None,
775835
reference_year=None, imp_str="Total Damages ('000 US$)",
776-
version=2020):
836+
version=None):
777837
"""function to load EM-DAT data return impact per event
778838
779839
Parameters
@@ -801,8 +861,9 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
801861
imp_str : str
802862
Column name of impact metric in EMDAT CSV,
803863
default = "Total Damages ('000 US$)"
804-
version : int
805-
EM-DAT version to take variable/column names from (defaul: 2020)
864+
version : int, optional
865+
EM-DAT version to take variable/column names from,
866+
default: newest available version in ``VARNAMES_EMDAT``
806867
807868
Returns
808869
-------
@@ -812,6 +873,7 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
812873
same unit as chosen impact, but multiplied by 1000 if impact is given
813874
as 1000 US$ (e.g. imp_str="Total Damages ('000 US$) scaled").
814875
"""
876+
version = version or max(VARNAMES_EMDAT.keys())
815877
imp_str = VARNAMES_EMDAT[version][imp_str]
816878
df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range,
817879
countries=countries, target_version=version)
@@ -883,7 +945,11 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
883945
imp_str = "Insured Damages ('000 US$)"
884946
elif "Reconstruction Costs" in imp_str:
885947
imp_str = "Reconstruction Costs ('000 US$)"
886-
imp_str = VARNAMES_EMDAT[max(VARNAMES_EMDAT.keys())][imp_str]
948+
949+
# use the newest version of EMDAT varnames
950+
version = max(VARNAMES_EMDAT.keys())
951+
952+
imp_str = VARNAMES_EMDAT[version][imp_str]
887953
if not hazard_type_emdat:
888954
hazard_type_emdat = [hazard_type_climada]
889955
if reference_year == 0:
@@ -903,7 +969,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
903969
# Load EM-DAT impact data by event:
904970
em_data = emdat_impact_event(emdat_file_csv, countries=countries, hazard=hazard_type_emdat,
905971
year_range=year_range, reference_year=reference_year,
906-
imp_str=imp_str, version=max(VARNAMES_EMDAT.keys()))
972+
imp_str=imp_str, version=version)
907973

908974
if isinstance(countries, str):
909975
countries = [countries]
@@ -915,7 +981,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
915981
return impact_instance, countries
916982
impact_instance.event_id = np.array(em_data.index, int)
917983
impact_instance.event_name = list(
918-
em_data[VARNAMES_EMDAT[max(VARNAMES_EMDAT.keys())]['Dis No']])
984+
em_data[VARNAMES_EMDAT[version]['Dis No']])
919985

920986
date_list = list()
921987
for year in list(em_data['Year']):
@@ -982,8 +1048,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
9821048
countries_reg_id.append(u_coord.country_to_iso(cntry, "numeric"))
9831049
except LookupError:
9841050
countries_reg_id.append(0)
985-
df_tmp = em_data[em_data[VARNAMES_EMDAT[
986-
max(VARNAMES_EMDAT.keys())]['ISO']].str.contains(cntry)]
1051+
df_tmp = em_data[em_data[VARNAMES_EMDAT[version]['ISO']].str.contains(cntry)]
9871052
if not reference_year:
9881053
impact_instance.eai_exp[idx] = sum(np.array(df_tmp["impact"]) *
9891054
impact_instance.frequency[0])

0 commit comments

Comments
 (0)