114114 "Reconstruction Costs ('000 US$)" : "Reconstruction Costs ('000 US$)" ,
115115 "Insured Damages ('000 US$)" : "Insured Damages ('000 US$)" ,
116116 "Total Damages ('000 US$)" : "Total Damages ('000 US$)" ,
117- 'CPI' : 'CPI' }}
117+ 'CPI' : 'CPI' },
118+ 2023 : {'Dis No' : 'Dis No' ,
119+ 'Year' : 'Year' ,
120+ 'Seq' : 'Seq' ,
121+ 'Glide' : 'Glide' ,
122+ 'Disaster Group' : 'Disaster Group' ,
123+ 'Disaster Subgroup' : 'Disaster Subgroup' ,
124+ 'Disaster Type' : 'Disaster Type' ,
125+ 'Disaster Subtype' : 'Disaster Subtype' ,
126+ 'Disaster Subsubtype' : 'Disaster Subsubtype' ,
127+ 'Event Name' : 'Event Name' ,
128+ 'Country' : 'Country' ,
129+ 'ISO' : 'ISO' ,
130+ 'Region' : 'Region' ,
131+ 'Continent' : 'Continent' ,
132+ 'Location' : 'Location' ,
133+ 'Origin' : 'Origin' ,
134+ 'Associated Dis' : 'Associated Dis' ,
135+ 'Associated Dis2' : 'Associated Dis2' ,
136+ 'OFDA Response' : 'OFDA Response' ,
137+ 'Appeal' : 'Appeal' ,
138+ 'Declaration' : 'Declaration' ,
139+ "AID Contribution ('000 US$)" : "AID Contribution ('000 US$)" ,
140+ 'Dis Mag Value' : 'Dis Mag Value' ,
141+ 'Dis Mag Scale' : 'Dis Mag Scale' ,
142+ 'Latitude' : 'Latitude' ,
143+ 'Longitude' : 'Longitude' ,
144+ 'Local Time' : 'Local Time' ,
145+ 'River Basin' : 'River Basin' ,
146+ 'Start Year' : 'Start Year' ,
147+ 'Start Month' : 'Start Month' ,
148+ 'Start Day' : 'Start Day' ,
149+ 'End Year' : 'End Year' ,
150+ 'End Month' : 'End Month' ,
151+ 'End Day' : 'End Day' ,
152+ 'Total Deaths' : 'Total Deaths' ,
153+ 'No Injured' : 'No Injured' ,
154+ 'No Affected' : 'No Affected' ,
155+ 'No Homeless' : 'No Homeless' ,
156+ 'Total Affected' : 'Total Affected' ,
157+ "Reconstruction Costs ('000 US$)" : "Reconstruction Costs ('000 US$)" ,
158+ "Reconstruction Costs, Adjusted ('000 US$)" : "Reconstruction Costs, Adjusted ('000 US$)" ,
159+ "Insured Damages ('000 US$)" : "Insured Damages ('000 US$)" ,
160+ "Insured Damages, Adjusted ('000 US$)" : "Insured Damages, Adjusted ('000 US$)" ,
161+ "Total Damages ('000 US$)" : "Total Damages ('000 US$)" ,
162+ "Total Damages, Adjusted ('000 US$)" : "Total Damages, Adjusted ('000 US$)" ,
163+ 'CPI' : 'CPI' ,
164+ 'Adm Level' : 'Adm Level' ,
165+ 'Admin1 Code' : 'Admin1 Code' ,
166+ 'Admin2 Code' : 'Admin2 Code' ,
167+ 'Geo Locations' : 'Geo Locations' }}
118168
119169
120170def assign_hazard_to_emdat (certainty_level , intensity_path_haz , names_path_haz ,
@@ -473,7 +523,7 @@ def check_assigned_track(lookup, checkset):
473523
474524
475525def clean_emdat_df (emdat_file , countries = None , hazard = None , year_range = None ,
476- target_version = 2020 ):
526+ target_version = None ):
477527 """
478528 Get a clean and standardized DataFrame from EM-DAT-CSV-file
479529 (1) load EM-DAT data from CSV to DataFrame and remove header/footer,
@@ -501,7 +551,8 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
501551 (only min and max are considered)
502552 target_version : int
503553 required EM-DAT data format version (i.e. year of download),
504- changes naming of columns/variables (default: 2020)
554+ changes naming of columns/variables,
555+ default: newest available version in ``VARNAMES_EMDAT`` that matches the given emdat_file
505556
506557 Returns
507558 -------
@@ -527,12 +578,16 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
527578
528579 # (2) handle version, clean up, and add columns:
529580 # (2.1) identify underlying EMDAT version of csv:
530- version = 2020
531- for vers in list (VARNAMES_EMDAT .keys ()):
581+ version = None
582+ for vers in sorted (VARNAMES_EMDAT .keys ()):
532583 if len (df_emdat .columns ) >= len (VARNAMES_EMDAT [vers ]) and \
533584 all (item in list (df_emdat .columns ) for item in VARNAMES_EMDAT [vers ].values ()):
534585 version = vers
586+ if not version :
587+ raise ValueError ("the given emdat_file contains unexpected columns and cannot be"
588+ " associated with any known EM-DAT file structure" )
535589 # (2.2) create new DataFrame df_data with column names as target version
590+ target_version = target_version or version
536591 df_data = pd .DataFrame (index = df_emdat .index .values ,
537592 columns = VARNAMES_EMDAT [target_version ].values ())
538593 if 'Year' not in df_data .columns : # make sure column "Year" exists
@@ -551,6 +606,9 @@ def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None,
551606 years_list .append (np .nan )
552607 df_data [col ] = years_list
553608 if version <= 2018 and target_version >= 2020 :
609+ # create 'Start Year', -Month' and -Day' from 'Start date'
610+ # ignore 'End date'
611+ # replace NaN with None in 'Disaster Subtype', 'Disaster Type' and 'Country'
554612 date_list = list ()
555613 year_list = list ()
556614 month_list = list ()
@@ -705,7 +763,7 @@ def scale_impact2refyear(impact_values, year_values, iso3a_values, reference_yea
705763
706764def emdat_impact_yearlysum (emdat_file_csv , countries = None , hazard = None , year_range = None ,
707765 reference_year = None , imp_str = "Total Damages ('000 US$)" ,
708- version = 2020 ):
766+ version = None ):
709767 """function to load EM-DAT data and sum impact per year
710768
711769 Parameters
@@ -727,16 +785,18 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
727785 year_range : list or tuple
728786 Year range to be extracted, e.g. (2000, 2015);
729787 (only min and max are considered)
730- version : int
788+ version : int, optional
731789 required EM-DAT data format version (i.e. year of download),
732- changes naming of columns/variables (default: 2020)
790+ changes naming of columns/variables,
791+ default: newest available version in ``VARNAMES_EMDAT``
733792
734793 Returns
735794 -------
736795 out : pd.DataFrame
737796 DataFrame with summed impact and scaled impact per
738797 year and country.
739798 """
799+ version = version or max (VARNAMES_EMDAT .keys ())
740800 imp_str = VARNAMES_EMDAT [version ][imp_str ]
741801 df_data = clean_emdat_df (emdat_file_csv , countries = countries , hazard = hazard ,
742802 year_range = year_range , target_version = version )
@@ -773,7 +833,7 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
773833
774834def emdat_impact_event (emdat_file_csv , countries = None , hazard = None , year_range = None ,
775835 reference_year = None , imp_str = "Total Damages ('000 US$)" ,
776- version = 2020 ):
836+ version = None ):
777837 """function to load EM-DAT data return impact per event
778838
779839 Parameters
@@ -801,8 +861,9 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
801861 imp_str : str
802862 Column name of impact metric in EMDAT CSV,
803863 default = "Total Damages ('000 US$)"
804- version : int
805- EM-DAT version to take variable/column names from (defaul: 2020)
864+ version : int, optional
865+ EM-DAT version to take variable/column names from,
866+ default: newest available version in ``VARNAMES_EMDAT``
806867
807868 Returns
808869 -------
@@ -812,6 +873,7 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
812873 same unit as chosen impact, but multiplied by 1000 if impact is given
813874 as 1000 US$ (e.g. imp_str="Total Damages ('000 US$) scaled").
814875 """
876+ version = version or max (VARNAMES_EMDAT .keys ())
815877 imp_str = VARNAMES_EMDAT [version ][imp_str ]
816878 df_data = clean_emdat_df (emdat_file_csv , hazard = hazard , year_range = year_range ,
817879 countries = countries , target_version = version )
@@ -883,7 +945,11 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
883945 imp_str = "Insured Damages ('000 US$)"
884946 elif "Reconstruction Costs" in imp_str :
885947 imp_str = "Reconstruction Costs ('000 US$)"
886- imp_str = VARNAMES_EMDAT [max (VARNAMES_EMDAT .keys ())][imp_str ]
948+
949+ # use the newest version of EMDAT varnames
950+ version = max (VARNAMES_EMDAT .keys ())
951+
952+ imp_str = VARNAMES_EMDAT [version ][imp_str ]
887953 if not hazard_type_emdat :
888954 hazard_type_emdat = [hazard_type_climada ]
889955 if reference_year == 0 :
@@ -903,7 +969,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
903969 # Load EM-DAT impact data by event:
904970 em_data = emdat_impact_event (emdat_file_csv , countries = countries , hazard = hazard_type_emdat ,
905971 year_range = year_range , reference_year = reference_year ,
906- imp_str = imp_str , version = max ( VARNAMES_EMDAT . keys ()) )
972+ imp_str = imp_str , version = version )
907973
908974 if isinstance (countries , str ):
909975 countries = [countries ]
@@ -915,7 +981,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
915981 return impact_instance , countries
916982 impact_instance .event_id = np .array (em_data .index , int )
917983 impact_instance .event_name = list (
918- em_data [VARNAMES_EMDAT [max ( VARNAMES_EMDAT . keys ()) ]['Dis No' ]])
984+ em_data [VARNAMES_EMDAT [version ]['Dis No' ]])
919985
920986 date_list = list ()
921987 for year in list (em_data ['Year' ]):
@@ -982,8 +1048,7 @@ def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countr
9821048 countries_reg_id .append (u_coord .country_to_iso (cntry , "numeric" ))
9831049 except LookupError :
9841050 countries_reg_id .append (0 )
985- df_tmp = em_data [em_data [VARNAMES_EMDAT [
986- max (VARNAMES_EMDAT .keys ())]['ISO' ]].str .contains (cntry )]
1051+ df_tmp = em_data [em_data [VARNAMES_EMDAT [version ]['ISO' ]].str .contains (cntry )]
9871052 if not reference_year :
9881053 impact_instance .eai_exp [idx ] = sum (np .array (df_tmp ["impact" ]) *
9891054 impact_instance .frequency [0 ])
0 commit comments