11import os
2+ import shutil
23import urllib .request
34import zipfile
45import pandas as pd
78data_path = ".data"
89
910wgms_source_data_link = f"https://wgms.ch/downloads/{ wgms_zip_file } "
10- local_path_wgms = f"{ data_path } /{ wgms_zip_file } "
11+ local_path_wgms = f"{ data_path } /WGMS/ { wgms_zip_file } "
1112
12- wgms_folder = f"{ data_path } /{ wgms_zip_file .replace ('.zip' , '' )} "
13+ wgms_folder = f"{ data_path } /WGMS/{ wgms_zip_file .replace ('.zip' , '' )} "
14+
15+
16+ def _clean_extracted_wgms ():
17+ if os .path .isdir (wgms_folder ):
18+ shutil .rmtree (wgms_folder )
1319
1420
1521def check_and_download_wgms ():
16- os .makedirs (data_path , exist_ok = True )
22+ os .makedirs (f" { data_path } /WGMS/" , exist_ok = True )
1723 if not os .path .isdir (wgms_folder ):
1824 if not os .path .isfile (local_path_wgms ):
19- print ("Downloading from WGMS website" )
25+ print ("Downloading data from WGMS website" )
2026 urllib .request .urlretrieve (wgms_source_data_link , local_path_wgms )
21- print ("Unzipping file " )
27+ print ("Unzipping WGMS archive " )
2228 with zipfile .ZipFile (local_path_wgms , "r" ) as zip_ref :
2329 zip_ref .extractall (wgms_folder )
2430
@@ -72,7 +78,6 @@ def parse_wgms_format(data_mb):
7278 "density" ,
7379 "density_unc" ,
7480 "method" ,
75- "balance_code" ,
7681 "remarks" ,
7782 ]
7883 )
@@ -86,9 +91,33 @@ def parse_wgms_format(data_mb):
8691 "elevation" : "POINT_ELEVATION" ,
8792 "begin_date" : "FROM_DATE" ,
8893 "end_date" : "TO_DATE" ,
94+ "balance_code" : "PERIOD" ,
8995 },
9096 )
97+ assert new_df .ID .nunique () == new_df .shape [0 ], "It seems that ID are not unique"
98+
9199 new_df ["FROM_DATE" ] = pd .to_datetime (new_df ["FROM_DATE" ]).dt .strftime ("%Y%m%d" )
92- new_df ["TO_DATE" ] = pd .to_datetime (new_df ["FROM_DATE " ]).dt .strftime ("%Y%m%d" )
100+ new_df ["TO_DATE" ] = pd .to_datetime (new_df ["TO_DATE " ]).dt .strftime ("%Y%m%d" )
93101
94102 return new_df
103+
104+
105+ def filter_dates (df ):
106+ # Remove points for which the dates have a too large uncertainty
107+ threshold_date_uncertainty = 5
108+ filtered_df = df [
109+ (df .end_date_unc <= threshold_date_uncertainty )
110+ & (df .begin_date_unc <= threshold_date_uncertainty )
111+ ]
112+
113+ return filtered_df
114+
115+
116+ def load_processed_wgms (rgi_region = None ):
117+ check_and_download_wgms ()
118+ df = load_wgms_data ()
119+ df = filter_dates (df )
120+ df = parse_wgms_format (df )
121+ if rgi_region is not None :
122+ df = df .loc [df .rgi_region == rgi_region ]
123+ return df
0 commit comments