Skip to content

Commit 3aba34f

Browse files
committed
add support of WGMS data in the geodetic scripts
1 parent 7e3966d commit 3aba34f

File tree

12 files changed

+302
-39
lines changed

12 files changed

+302
-39
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import os, sys
2+
import glob
3+
import pandas as pd
4+
import geopandas as gpd
5+
6+
from data_processing.Dataset import Dataset
7+
from data_processing.utils import get_rgi
8+
from data_processing.wgms import wgms_folder
9+
from data_processing.glacier_utils import get_region_name
10+
11+
processed_stakes_folder = os.path.join(wgms_folder, "processed")
12+
13+
14+
def processed_features_stakes_path(rgi_region):
15+
if rgi_region is None:
16+
return os.path.join(processed_stakes_folder, "all.csv")
17+
else:
18+
assert isinstance(rgi_region, int)
19+
return os.path.join(processed_stakes_folder, f"region_{rgi_region}.csv")
20+
21+
22+
def build_monthly_data(data, cfg, rgi_region=None):
23+
24+
assert (
25+
rgi_region is not None
26+
), "For the moment only one single RGI region can be used at a time with the WGMS data."
27+
28+
data = get_rgi(data=data, region=rgi_region)
29+
30+
# Drop measurements where no RGIId was found
31+
data = data[data.RGIId.notnull()]
32+
33+
region_name = get_region_name(rgi_region)
34+
dataset = Dataset(cfg, data=data, region_name=region_name, region_id=rgi_region)
35+
36+
voi_topographical = ["aspect", "slope", "svf"]
37+
38+
# Retrieve the topographical features for each stake measurement based on the latitude and longitude of the stake and add them to the dataset
39+
dataset.get_topo_features(vois=voi_topographical)
40+
41+
df = dataset.data
42+
df["MONTH_START"] = [str(date)[4:6] for date in df.FROM_DATE]
43+
df["MONTH_END"] = [str(date)[4:6] for date in df.TO_DATE]
44+
# df.MONTH_START.unique(), df.MONTH_END.unique()
45+
46+
dataset.get_climate_features()
47+
48+
# Specify the short names of the climate variables available in the dataset
49+
vois_climate = ["t2m", "tp", "slhf", "sshf", "ssrd", "fal", "str"]
50+
51+
# For each record, convert to a monthly time resolution
52+
dataset.convert_to_monthly(
53+
vois_climate=vois_climate, vois_topographical=voi_topographical
54+
)
55+
56+
os.makedirs(processed_stakes_folder, exist_ok=True)
57+
dataset.data.to_csv(processed_features_stakes_path(rgi_region), index=False)

massbalancemachine/data_processing/glacier_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import oggm
99
import venv
1010
import subprocess
11+
from typing import Union
1112

1213
import config
1314
from data_processing.product_utils import mbm_path
@@ -107,6 +108,15 @@ def get_region_shape_file(region: str):
107108
return shp_path
108109

109110

111+
def get_region_name(region: Union[str, int]):
112+
if isinstance(region, int):
113+
region = f"{region:02d}"
114+
rgi_version = "62"
115+
fp = oggm.utils.get_rgi_region_file(region, version=rgi_version)
116+
rgi_name = os.path.basename(fp).split("_", 1)[1].replace(".shp", "").split("_")[1]
117+
return rgi_name
118+
119+
110120
def get_region_area_bounds(region):
111121
if not isinstance(region, str):
112122
region = f"{region:02d}"

massbalancemachine/dataloader/SourceManager.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from data_processing.Product import Product
2727
from dataloader.DataLoader import DataLoader, set_dataloader_splits
2828
import data_preprocessing.iceland
29+
import data_preprocessing.wgms
30+
import data_processing.wgms
2931
import config
3032

3133
###
@@ -107,7 +109,12 @@
107109
)
108110

109111
_default_test_glaciers_iceland = []
110-
_default_train_glaciers_iceland = ["RGI60-06.00228", "RGI60-06.00232"]
112+
_default_train_glaciers_iceland = [
113+
"RGI60-06.00228",
114+
"RGI60-06.00232",
115+
"RGI60-06.00236",
116+
"RGI60-06.00238",
117+
]
111118

112119
_default_test_glaciers_norway = [
113120
"RGI60-08.01258",
@@ -178,6 +185,8 @@ def _default_input(sourceData):
178185
return []
179186
elif sourceData == "norway":
180187
return []
188+
elif "wgms" in sourceData:
189+
return []
181190
else:
182191
raise ValueError(f"source_data={sourceData} is unknown")
183192

@@ -202,8 +211,8 @@ def __init__(self, cfg, params, test_split_on="GLACIER"):
202211
self.glaciers_list = []
203212
self.mean_stakes_elevation = {}
204213

205-
def train_test_glaciers(self):
206-
return self.train_glaciers, self.test_glaciers
214+
# def train_test_glaciers(self):
215+
# return self.train_glaciers, self.test_glaciers
207216

208217
def load_stakes_data(self):
209218
raise NotImplemented(
@@ -236,12 +245,34 @@ def train_test_sets(self):
236245
meta_data_columns=self.cfg.metaData,
237246
)
238247

239-
# Ensure all test glaciers exist in the dataset
240248
existing_glaciers = set(
241249
dataloader.data.GLACIER.unique()
242250
if self.glacierNamesProvided
243251
else dataloader.data.RGIId.unique()
244252
)
253+
254+
if self.test_glaciers is None:
255+
assert (
256+
self.train_glaciers is not None
257+
), "If test_glaciers is not defined, train_glaciers should be defined to automatically determine the test glaciers based on available data."
258+
self.test_glaciers = list(
259+
set(existing_glaciers).difference(self.train_glaciers)
260+
)
261+
print(
262+
f"Determining test glaciers based on available data: {self.test_glaciers}"
263+
)
264+
if self.train_glaciers is None:
265+
assert (
266+
self.test_glaciers is not None
267+
), "If train_glaciers is not defined, test_glaciers should be defined to automatically determine the train glaciers based on available data."
268+
self.train_glaciers = list(
269+
set(existing_glaciers).difference(self.test_glaciers)
270+
)
271+
print(
272+
f"Determining train glaciers based on available data: {self.train_glaciers}"
273+
)
274+
275+
# Ensure all test glaciers exist in the dataset
245276
missing_glaciers = [g for g in self.test_glaciers if g not in existing_glaciers]
246277

247278
if missing_glaciers:
@@ -568,3 +599,47 @@ def load_stakes_data(self):
568599
data_monthly = dataloader.data
569600

570601
return data_monthly
602+
603+
604+
class SourceManagerWGMS(SourceManager):
605+
def __init__(self, cfg, params, *args, rgi_region=None, **kwargs):
606+
self.train_glaciers = params["training"].get("train_glaciers")
607+
self.test_glaciers = params["training"].get("test_glaciers")
608+
self.rgi_region = rgi_region
609+
super().__init__(cfg, params, *args, **kwargs)
610+
611+
def load_stakes_data(self):
612+
# TODO: for the moment the arguments are the RGIId but we should manage this properly in the future
613+
614+
path_preprocessed = data_preprocessing.wgms.processed_features_stakes_path(
615+
self.rgi_region
616+
)
617+
p = Product(path_preprocessed)
618+
if not p.is_up_to_date():
619+
data = data_processing.wgms.load_processed_wgms(rgi_region=self.rgi_region)
620+
data_preprocessing.wgms.build_monthly_data(data, self.cfg, self.rgi_region)
621+
p.gen_chk()
622+
data = pd.read_csv(path_preprocessed)
623+
_format_data_credit(
624+
"WGMS (2026): Fluctuations of Glaciers (FoG) Database. World Glacier Monitoring Service (WGMS), Zurich, Switzerland. https://doi.org/10.5904/wgms-fog-2026-02-10",
625+
"Open access under the requirement of correct citation",
626+
[
627+
"WGMS (2023): Global Glacier Change Bulletin No. 5 (2020-2021). Michael Zemp, Isabelle Gärtner-Roer, Samuel U. Nussbaumer, Ethan Z. Welty, Inés Dussaillant, and Jacqueline Bannwart (eds.), ISC (WDS) / IUGG (IACS) / UNEP / UNESCO / WMO, World Glacier Monitoring Service, Zurich, Switzerland, 134 pp. Based on database version https://doi.org/10.5904/wgms-fog-2023-09.",
628+
"WGMS (2013): Glacier Mass Balance Bulletin No. 12 (2010-2011). Michael Zemp, Samuel U. Nussbaumer, Kathrin Naegeli, Isabelle Gärtner-Roer, Frank Paul, Martin Hoelzle, and Wilfried Haeberli (eds.), ICSU (WDS) / IUGG (IACS) / UNEP / UNESCO / WMO, World Glacier Monitoring Service, Zurich, Switzerland, 106 pp. Based on database version https://doi.org/10.5904/wgms-fog-2013-11.",
629+
"WGMS (2012): Fluctuations of Glaciers 2005-2010 (Vol. X): Michael Zemp, Holger Frey, Isabelle Gärtner-Roer, Samuel U. Nussbaumer, Martin Hoelzle, Frank Paul, and Wilfried Haeberli (eds.), ICSU (WDS) / IUGG (IACS) / UNEP / UNESCO / WMO, World Glacier Monitoring Service, Zurich, Switzerland. Based on database version https://doi.org/10.5904/wgms-fog-2012-11.",
630+
],
631+
)
632+
633+
data["aspect"] = 180 * data["aspect"] / np.pi
634+
data["slope"] = 180 * data["slope"] / np.pi
635+
data["t2m"] = data["t2m"] - 273.15
636+
637+
dataloader = DataLoader(
638+
self.cfg,
639+
data=data,
640+
random_seed=self.cfg.seed,
641+
meta_data_columns=self.cfg.metaData,
642+
)
643+
data_monthly = dataloader.data
644+
645+
return data_monthly

massbalancemachine/dataloader/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
SourceManagerSwitzerland,
55
SourceManagerIceland,
66
SourceManagerNorway,
7+
SourceManagerWGMS,
78
)
89
from dataloader.GeoDataLoader import GeoDataLoader

massbalancemachine/plots/perf_plots.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,8 +502,14 @@ def predVSTruthTimeSeries(
502502
scores_predVSTruth[k]["winter"] = scores_winter[k]
503503
if hasSummer:
504504
scores_predVSTruth[k]["summer"] = scores_summer[k]
505+
506+
# Drop entries for which PERIOD is not included in ["annual", "winter", "summer"]
507+
grouped_ids_drop = grouped_ids[
508+
grouped_ids.PERIOD.isin(["annual", "winter", "summer"])
509+
]
510+
505511
predVSTruth(
506-
grouped_ids,
512+
grouped_ids_drop,
507513
ax1,
508514
scores=scores_predVSTruth,
509515
hue="PERIOD",

massbalancemachine/training/training.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,15 @@ def assessOnTest(log_dir, model, geodataloader_test, light=False):
332332
mse_winter, rmse_winter, mae_winter, pearson_corr_winter, r2_winter, bias_winter = (
333333
scores(predWinter, targetWinter)
334334
)
335-
mse_summer, rmse_summer, mae_summer, pearson_corr_summer, r2_summer, bias_summer = (
336-
scores(predSummer, targetSummer)
337-
)
335+
if len(targetSummer) > 0:
336+
(
337+
mse_summer,
338+
rmse_summer,
339+
mae_summer,
340+
pearson_corr_summer,
341+
r2_summer,
342+
bias_summer,
343+
) = scores(predSummer, targetSummer)
338344

339345
# Make plots
340346
plot_pred_vs_obs(log_dir, targetAll, predAll, {"rmse": rmse, "mae": mae})
@@ -348,7 +354,7 @@ def assessOnTest(log_dir, model, geodataloader_test, light=False):
348354
plt.savefig(os.path.join(log_dir, "geodetic_test.png"))
349355
plt.close(fig)
350356

351-
return {
357+
stats = {
352358
"mse": mse.item(),
353359
"rmse": rmse.item(),
354360
"mae": mae.item(),
@@ -367,13 +373,15 @@ def assessOnTest(log_dir, model, geodataloader_test, light=False):
367373
"pearson_winter": pearson_corr_winter.item(),
368374
"r2_winter": r2_winter.item(),
369375
"bias_winter": bias_winter.item(),
370-
"mse_summer": mse_summer.item(),
371-
"rmse_summer": rmse_summer.item(),
372-
"mae_summer": mae_summer.item(),
373-
"pearson_summer": pearson_corr_summer.item(),
374-
"r2_summer": r2_summer.item(),
375-
"bias_summer": bias_summer.item(),
376376
}
377+
if len(targetSummer) > 0:
378+
stats["mse_summer"] = mse_summer.item()
379+
stats["rmse_summer"] = rmse_summer.item()
380+
stats["mae_summer"] = mae_summer.item()
381+
stats["pearson_summer"] = pearson_corr_summer.item()
382+
stats["r2_summer"] = r2_summer.item()
383+
stats["bias_summer"] = bias_summer.item()
384+
return stats
377385

378386

379387
def plot_pred_vs_obs(log_dir, target, pred, scores):

scripts/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def parseParams(params):
4747
"batch_size": batch_size,
4848
"weight_decay": weight_decay,
4949
"scalingStakes": scalingStakes,
50+
"test_glaciers": params["training"].get("test_glaciers"),
51+
"train_glaciers": params["training"].get("train_glaciers"),
5052
},
5153
}
5254

scripts/geo/eval.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,19 @@
103103
],
104104
notMetaDataNotFeatures=["POINT_BALANCE", "svf"],
105105
)
106+
elif "wgms" in sourceData:
107+
cfg = mbm.Config(
108+
metaData=[
109+
"RGIId",
110+
"ID",
111+
"N_MONTHS",
112+
"MONTHS",
113+
"PERIOD",
114+
"YEAR",
115+
"POINT_ELEVATION",
116+
],
117+
notMetaDataNotFeatures=["POINT_BALANCE", "svf"],
118+
)
106119
else:
107120
raise ValueError(f"source_data={sourceData} is unknown")
108121

@@ -131,6 +144,15 @@
131144
datasetManager = mbm.dataloader.SourceManagerNorway(
132145
cfg, params, test_split_on=keyGlacier
133146
)
147+
elif "wgms" in sourceData:
148+
_split = sourceData.split(":")
149+
if len(_split) > 1:
150+
rgi_region = int(_split[1])
151+
else:
152+
rgi_region = None
153+
datasetManager = mbm.dataloader.SourceManagerWGMS(
154+
cfg, params, test_split_on="RGIId", rgi_region=rgi_region
155+
)
134156
train_set, test_set, months_head_pad, months_tail_pad = datasetManager.train_test_sets()
135157

136158

@@ -173,6 +195,8 @@
173195
test_glaciers = list(data_test.GLACIER.unique())
174196
elif sourceData in ["iceland", "norway"]:
175197
test_glaciers = list(data_test.RGIId.unique())
198+
elif "wgms" in sourceData:
199+
test_glaciers = list(data_test.RGIId.unique())
176200

177201
assert set(df_X_test_subset.RGIId.unique()) == set(test_glaciers)
178202

@@ -204,11 +228,14 @@
204228
"r2": scores["winter"]["r2"],
205229
"bias": scores["winter"]["bias"],
206230
}
207-
scores_summer = {
208-
"rmse": scores["summer"]["rmse"],
209-
"r2": scores["summer"]["r2"],
210-
"bias": scores["summer"]["bias"],
211-
}
231+
if "summer" in scores:
232+
scores_summer = {
233+
"rmse": scores["summer"]["rmse"],
234+
"r2": scores["summer"]["r2"],
235+
"bias": scores["summer"]["bias"],
236+
}
237+
else:
238+
scores_summer = None
212239

213240
fig = mbm.plots.predVSTruthTimeSeries(
214241
grouped_ids=grouped_ids,
@@ -289,6 +316,8 @@
289316
train_glaciers = list(data_train.GLACIER.unique())
290317
elif sourceData in ["iceland", "norway"]:
291318
train_glaciers = list(data_train.RGIId.unique())
319+
elif "wgms" in sourceData:
320+
train_glaciers = list(data_train.RGIId.unique())
292321

293322
# Create dataloader
294323
train_gdl = mbm.dataloader.GeoDataLoader(
@@ -315,11 +344,14 @@
315344
"r2": scores_train["winter"]["r2"],
316345
"bias": scores_train["winter"]["bias"],
317346
}
318-
scores_summer = {
319-
"rmse": scores_train["summer"]["rmse"],
320-
"r2": scores_train["summer"]["r2"],
321-
"bias": scores_train["summer"]["bias"],
322-
}
347+
if "summer" in scores_train:
348+
scores_summer = {
349+
"rmse": scores_train["summer"]["rmse"],
350+
"r2": scores_train["summer"]["r2"],
351+
"bias": scores_train["summer"]["bias"],
352+
}
353+
else:
354+
scores_summer = None
323355

324356
fig = mbm.plots.predVSTruthTimeSeries(
325357
grouped_ids=grouped_ids_train,

0 commit comments

Comments
 (0)