Skip to content

Commit 2e69861

Browse files
authored
Merge pull request #75 from ISI-MIP/develop
Develop
2 parents f7847bd + 90eee0d commit 2e69861

30 files changed

+3195
-806
lines changed

README.md

Lines changed: 30 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -83,67 +83,50 @@ You may optionally
8383

8484
## Comments for each variable
8585

86-
#### tas
87-
data checked
88-
Works using Normal distribution
86+
#### daily mean temperature (tas)
87+
Two cells fail in complete dataset.
88+
67418 of 6420 cells.
8989

90-
#### rlds
91-
data checked
92-
Works using Normal distribution
93-
Needs a restart to finish some hanging runs
90+
#### tasskew
91+
Work in progress. See #59
9492

95-
#### psl / ps
96-
data checked
97-
Works using Normal distribution
93+
#### tasrange
94+
Calculation alsmost complete on full dataset.
95+
Some cells do not detrend as expected. Need assessment.
96+
See #60.
9897

99-
#### rsds
100-
Deviationg approach from Lange et al. 2019, using Normal distribution
101-
This is because the yearly cycle is handled inherently here, so no need for specific treatment.
102-
FIXME: produces unrealistic incoming radiation below zero. Needs a different approach
98+
#### precipiation (pr)
99+
Calculatio complete.
100+
67339 of 67420 work.
103101

104-
#### hurs (relative humidity)
105-
data checked
106-
With Beta distribution, working
107-
Needs to be rerun so some holes are filled.
102+
#### sea level pressure (ps)
103+
Calculation complete on full dataset.
104+
See #65
108105

109-
GSWP: needs preprocessing to rename from rhs to hurs, and mask invalid values below zero:
106+
#### wind
107+
Calculation complete on full dataset.
108+
Minor issues on the coast of the Arabic Peninsula.
109+
See #66
110110

111-
```
112-
ncrename -O -v rhs,hurs fname1.nc fname2.nc
111+
#### longwave radiation (rlds)
112+
Calculation complete on full dataset.
113+
Need check of trend removal.
113114

114-
cdo setrtomiss,-1e20,0 fname2.nc fname3.nc
115-
```
115+
#### shortwave radiation (rsds)
116116

117-
#### tasskew
118-
data checked
119-
Works using Beta distribution
117+
#### relative humidity (hurs)
120118

121-
#### prsnratio
122-
Beta distribution
123-
Snow included in GSWP3
124119

125-
#### tasrange
126-
With Rice distribution
127-
ADVI introduces strong positive trend.
128-
Possible issue: use real mu, not nu for quantile mapping.
129120

130-
#### tasmin
131-
Constructed from tas, tasskew and tasrange
132-
To do in postprocessing
121+
## Comments for datasets
133122

134-
#### tasmax
135-
Constructed from tas, tasskew and tasrange
136-
To do in postprocessing
123+
GSWP: needs preprocessing to rename from rhs to hurs, and mask invalid values below zero:
137124

138-
#### pr
139-
Gamma distribution
140-
Does not remove all regional trends with NUTS.
141-
Fails with ADVI.
142-
Low latitudes are particularly most difficult.
125+
```
126+
ncrename -O -v rhs,hurs fname1.nc fname2.nc
143127
144-
#### wind
145-
Works using Weibull distribution
146-
FIXME: does not seem to detrend. Seems we rather chose the parameter that adjusted the variability range
128+
cdo setrtomiss,-1e20,0 fname2.nc fname3.nc
129+
```
147130

148131

149132
## Credits

icounter/const.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@
2323
"prsnratio": (0.0, 1.0),
2424
"hurs": (0, 100.0),
2525
"ps": (0, None),
26-
"rsds": (0, None),
26+
"rsds": (0, 501),
2727
"rlds": (0, None),
2828
"wind": (0.0, None),
2929
}
3030

31+
# for rsds, the global maximum is 500
32+
3133

3234
def check_bounds(data, variable):
3335

@@ -128,7 +130,7 @@ def rescale_and_offset_precip(scaled_data, datamin, scale):
128130
"tas": [scale_to_unity, rescale_to_original],
129131
"ps": [scale_to_unity, rescale_to_original],
130132
"rlds": [scale_to_unity, rescale_to_original],
131-
"rsds": [scale_to_unity, rescale_to_original],
133+
"rsds": [mask_and_scale_by_bounds, refill_and_rescale],
132134
"wind": [scale_and_mask, refill_and_rescale],
133135
"hurs": [mask_and_scale_by_bounds, refill_and_rescale],
134136
"prsnratio": [mask_and_scale_by_bounds, refill_and_rescale],

icounter/datahandler.py

Lines changed: 31 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def create_output_dirs(output_dir):
1515
(output_dir / d).mkdir(parents=True, exist_ok=True)
1616

1717

18-
def make_cell_output_dir(output_dir, sub_dir, lat, lon, variable=None):
18+
def make_cell_output_dir(output_dir, sub_dir, lat, lon, variable):
1919

2020
""" params: output_dir: a pathlib object """
2121

@@ -29,7 +29,7 @@ def make_cell_output_dir(output_dir, sub_dir, lat, lon, variable=None):
2929
return lat_sub_dir
3030

3131

32-
def get_valid_subset(df, subset, seed):
32+
def get_subset(df, subset, seed):
3333

3434
orig_len = len(df)
3535
if subset > 1:
@@ -38,23 +38,10 @@ def get_valid_subset(df, subset, seed):
3838
df = df.loc[np.sort(subselect), :].copy()
3939

4040
df.replace([np.inf, -np.inf], np.nan, inplace=True)
41-
df_valid = df.dropna(axis=0, how="any")
4241

43-
print(len(df_valid), "data points used from originally", orig_len, "datapoints.")
42+
print(len(df), "data points used from originally", orig_len, "datapoints.")
4443

45-
return df_valid
46-
47-
48-
# def get_valid_index(df, subset, seed):
49-
50-
# orig_len = len(df)
51-
# if subset > 1:
52-
# np.random.seed(seed)
53-
# subselect = np.random.choice(orig_len, np.int(orig_len/subset), replace=False)
54-
# df = df.loc[np.sort(subselect), :].copy()
55-
56-
# df.replace([np.inf, -np.inf], np.nan, inplace=True)
57-
# return df.dropna(axis=0, how="any").index
44+
return df
5845

5946

6047
def create_dataframe(nct_array, units, data_to_detrend, gmt, variable):
@@ -101,40 +88,26 @@ def create_dataframe(nct_array, units, data_to_detrend, gmt, variable):
10188
return tdf, datamin, scale
10289

10390

104-
def create_ref_df(df, trace_for_qm, ref_period, scale_variability, is_precip=False):
91+
def create_ref_df(df, trace_for_qm, ref_period, params):
10592

10693
df_params = pd.DataFrame(index=df.index)
10794

108-
# print(trace_for_qm["mu"])
109-
110-
df_params.loc[:, "mu"] = trace_for_qm["mu"].mean(axis=0)
111-
df_params.loc[:, "sigma"] = trace_for_qm["sigma"].mean(axis=0)
112-
if is_precip:
113-
df_params.loc[:, "pbern"] = trace_for_qm["pbern"].mean(axis=0)
95+
for p in params:
96+
df_params.loc[:, p] = trace_for_qm[p].mean(axis=0)
11497

11598
df_params.index = df["ds"]
11699

117100
df_params_ref = df_params.loc[ref_period[0] : ref_period[1]]
118101
# mean over all years for each day
119102
df_params_ref = df_params_ref.groupby(df_params_ref.index.dayofyear).mean()
120103

121-
# case of not scaling variability
122-
df_params.loc[:, "sigma_ref"] = df_params["sigma"]
123104
# write the average values for the reference period to each day of the
124105
# whole timeseries
125106
for day in df_params_ref.index:
126-
df_params.loc[df_params.index.dayofyear == day, "mu_ref"] = df_params_ref.loc[
127-
day, "mu"
128-
]
129-
if is_precip:
107+
for p in params:
130108
df_params.loc[
131-
df_params.index.dayofyear == day, "pbern_ref"
132-
] = df_params_ref.loc[day, "pbern"]
133-
# case of scaling sigma
134-
if scale_variability:
135-
df_params.loc[
136-
df_params.index.dayofyear == day, "sigma_ref"
137-
] = df_params_ref.loc[day, "sigma"]
109+
df_params.index.dayofyear == day, p + "_ref"
110+
] = df_params_ref.loc[day, p]
138111

139112
return df_params
140113

@@ -161,64 +134,36 @@ def get_source_timeseries(data_dir, dataset, qualifier, variable, lat, lon):
161134
obs_data.close()
162135
return df
163136

137+
def get_cell_filename(outdir_for_cell, lat, lon, settings):
164138

165-
def save_to_disk(df_with_cfact, settings, lat, lon, dformat=".h5"):
166-
167-
outdir_for_cell = make_cell_output_dir(
168-
settings.output_dir, "timeseries", lat, lon, settings.variable
139+
return outdir_for_cell / (
140+
"ts_" + settings.dataset + "_lat" + str(lat) + "_lon" + str(lon) + settings.storage_format
169141
)
170142

171-
fname = outdir_for_cell / (
172-
"ts_" + settings.dataset + "_lat" + str(lat) + "_lon" + str(lon) + dformat
173-
)
143+
def test_if_data_valid_exists(fname):
174144

175-
if dformat == ".csv":
176-
df_with_cfact.to_csv(fname)
177-
elif dformat == ".h5":
178-
df_with_cfact.to_hdf(fname, "lat_" + str(lat) + "_lon_" + str(lon), mode="w")
145+
if ".h5" in str(fname):
146+
pd.read_hdf(fname)
147+
elif ".csv" in str(fname):
148+
pd.read_csv(fname)
179149
else:
180-
raise NotImplementedError("choose storage format .h5 or csv.")
150+
raise ValueError
181151

182-
print("Saved timeseries to ", fname)
152+
def save_to_disk(df_with_cfact, fname, lat, lon, storage_format):
183153

154+
# outdir_for_cell = make_cell_output_dir(
155+
# settings.output_dir, "timeseries", lat, lon, settings.variable
156+
# )
184157

185-
def read_from_disk(data_path):
158+
# fname = outdir_for_cell / (
159+
# "ts_" + settings.dataset + "_lat" + str(lat) + "_lon" + str(lon) + dformat
160+
# )
186161

187-
if data_path.split(".")[-1] == "h5":
188-
df = pd.read_hdf(data_path)
189-
elif data_path.split(".")[-1] == "csv":
190-
df = pd.read_csv(data_path, index_col=0)
162+
if storage_format == ".csv":
163+
df_with_cfact.to_csv(fname)
164+
elif storage_format == ".h5":
165+
df_with_cfact.to_hdf(fname, "lat_" + str(lat) + "_lon_" + str(lon), mode="w")
191166
else:
192167
raise NotImplementedError("choose storage format .h5 or csv.")
193168

194-
return df
195-
196-
197-
def form_global_nc(ds, time, lat, lon, vnames, torigin):
198-
199-
ds.createDimension("time", None)
200-
ds.createDimension("lat", lat.shape[0])
201-
ds.createDimension("lon", lon.shape[0])
202-
203-
times = ds.createVariable("time", "f8", ("time",))
204-
longitudes = ds.createVariable("lon", "f8", ("lon",))
205-
latitudes = ds.createVariable("lat", "f8", ("lat",))
206-
for var in vnames:
207-
data = ds.createVariable(
208-
var,
209-
"f4",
210-
("time", "lat", "lon"),
211-
chunksizes=(time.shape[0], 1, 1),
212-
fill_value=1e20,
213-
)
214-
times.units = torigin
215-
latitudes.units = "degree_north"
216-
latitudes.long_name = "latitude"
217-
latitudes.standard_name = "latitude"
218-
longitudes.units = "degree_east"
219-
longitudes.long_name = "longitude"
220-
longitudes.standard_name = "longitude"
221-
# FIXME: make flexible or implement loading from source data
222-
latitudes[:] = lat
223-
longitudes[:] = lon
224-
times[:] = time
169+
print("Saved timeseries to ", fname)

0 commit comments

Comments
 (0)