Skip to content

Commit a3f0e42

Browse files
author
Kit Schwarz
committed
removing zarr dataset changes which were causing replication error
1 parent 9c2679a commit a3f0e42

File tree

3 files changed

+24
-44
lines changed

3 files changed

+24
-44
lines changed

dscim/menu/simple_storage.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,7 @@ def gmst(self):
8181
@property
8282
def gmsl(self):
8383
"""Cached GMSL anomalies"""
84-
gmsl = (
85-
xr.open_dataset(self.gmsl_path, engine="zarr")
86-
.gmsl.to_dataframe()
87-
.reset_index()
88-
)
84+
gmsl = xr.open_zarr(self.gmsl_path).gmsl.to_dataframe().reset_index()
8985

9086
return gmsl
9187

@@ -120,7 +116,7 @@ def gmsl_anomalies(self):
120116
"""This function takes coastal sector's GMSL relative to 1991-2009.
121117
No rebasing occurs, as coastal damages are rebased to the same period.
122118
"""
123-
df = xr.open_mfdataset(self.gmsl_fair_path, engine="zarr")
119+
df = xr.open_zarr(self.gmsl_fair_path)
124120
df = df.chunk(df.dims)
125121

126122
datasets = []
@@ -277,7 +273,7 @@ def __init__(self, path_econ):
277273
def econ_vars(self):
278274
"""Economic variables"""
279275
if self.path[-3:] == "arr":
280-
raw = xr.open_dataset(self.path, engine="zarr", consolidated=True)
276+
raw = xr.open_zarr(self.path, consolidated=True)
281277
else:
282278
raw = xr.open_dataset(self.path)
283279
return raw[["gdp", "pop"]]
@@ -404,11 +400,7 @@ def adding_up_damages(self):
404400
f"Adding up aggregated damages found at {mean_cc}, {mean_no_cc}. These are being loaded..."
405401
)
406402
damages = (
407-
(
408-
xr.open_dataset(mean_no_cc, engine="zarr").no_cc
409-
- xr.open_dataset(mean_cc, engine="zarr").cc
410-
)
411-
* self.pop
403+
(xr.open_zarr(mean_no_cc).no_cc - xr.open_zarr(mean_cc).cc) * self.pop
412404
).sum("region")
413405
else:
414406
raise NotImplementedError(
@@ -437,4 +429,4 @@ def risk_aversion_damages(self, ce_type):
437429
raise NotImplementedError(
438430
"Risk-aversion CEs not found. Please run CE_calculation.ipynb for `risk_aversion`."
439431
)
440-
return self.cut(xr.open_dataset(file, engine="zarr"))
432+
return self.cut(xr.open_zarr(file))

dscim/preprocessing/preprocessing.py

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def ce_from_chunk(
3535
model = chunk.model.values
3636

3737
gdppc = (
38-
xr.open_dataset(socioec, engine="zarr", chunks=None)
38+
xr.open_zarr(socioec, chunks=None)
3939
.sel(
4040
year=year, ssp=ssp, model=model, region=ce_batch_coords["region"], drop=True
4141
)
@@ -97,11 +97,11 @@ def reduce_damages(
9797
delta = params["delta"]
9898
outpath = f"{c['paths']['reduced_damages_library']}/{sector}"
9999

100-
with xr.open_dataset(damages, engine="zarr", chunks=None)[histclim] as ds:
101-
with xr.open_dataset(socioec, engine="zarr", chunks=None) as gdppc:
100+
with xr.open_zarr(damages, chunks=None)[histclim] as ds:
101+
with xr.open_zarr(socioec, chunks=None) as gdppc:
102102

103103
assert (
104-
xr.open_dataset(damages, engine="zarr").chunks["batch"][0] == 15
104+
xr.open_zarr(damages).chunks["batch"][0] == 15
105105
), "'batch' dim on damages does not have chunksize of 15. Please rechunk."
106106

107107
ce_batch_dims = [i for i in gdppc.dims] + [
@@ -112,18 +112,15 @@ def reduce_damages(
112112
i for i in gdppc.region.values if i in ce_batch_coords["region"]
113113
]
114114
ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims]
115-
ce_chunks = [
116-
xr.open_dataset(damages, engine="zarr").chunks[c][0]
117-
for c in ce_batch_dims
118-
]
115+
ce_chunks = [xr.open_zarr(damages).chunks[c][0] for c in ce_batch_dims]
119116

120117
template = xr.DataArray(
121118
da.empty(ce_shapes, chunks=ce_chunks),
122119
dims=ce_batch_dims,
123120
coords=ce_batch_coords,
124121
)
125122

126-
other = xr.open_dataset(damages, engine="zarr")
123+
other = xr.open_zarr(damages)
127124

128125
out = other.map_blocks(
129126
ce_from_chunk,
@@ -212,9 +209,7 @@ def sum_AMEL(
212209

213210
for sector in sectors:
214211
print(f"Opening {sector},{params[sector]['sector_path']}")
215-
ds = xr.open_dataset(
216-
params[sector]["sector_path"], engine="zarr", consolidated=True
217-
)
212+
ds = xr.open_zarr(params[sector]["sector_path"], consolidated=True)
218213
ds = ds[params[sector][var]].rename(var)
219214
ds = xr.where(np.isinf(ds), np.nan, ds)
220215
datasets.append(ds)
@@ -260,12 +255,12 @@ def subset_USA_reduced_damages(
260255
):
261256

262257
if recipe == "adding_up":
263-
ds = xr.open_dataset(
264-
f"{input_path}/{sector}/{recipe}_{reduction}.zarr", engine="zarr"
258+
ds = xr.open_zarr(
259+
f"{input_path}/{sector}/{recipe}_{reduction}.zarr",
265260
)
266261
elif recipe == "risk_aversion":
267-
ds = xr.open_dataset(
268-
f"{input_path}/{sector}/{recipe}_{reduction}_eta{eta}.zarr", engine="zarr"
262+
ds = xr.open_zarr(
263+
f"{input_path}/{sector}/{recipe}_{reduction}_eta{eta}.zarr",
269264
)
270265

271266
subset = ds.sel(region=[i for i in ds.region.values if "USA" in i])
@@ -292,9 +287,8 @@ def subset_USA_ssp_econ(
292287
out_path,
293288
):
294289

295-
zarr = xr.open_dataset(
290+
zarr = xr.open_zarr(
296291
in_path,
297-
engine="zarr",
298292
consolidated=True,
299293
)
300294

@@ -340,18 +334,16 @@ def clip_damages(
340334
histclim = params["histclim"]
341335
delta = params["delta"]
342336

343-
with xr.open_dataset(path, engine="zarr", chunks=None)[delta] as ds:
344-
with xr.open_dataset(econ_path, engine="zarr", chunks=None) as gdppc:
337+
with xr.open_zarr(path, chunks=None)[delta] as ds:
338+
with xr.open_zarr(econ_path, chunks=None) as gdppc:
345339

346340
ce_batch_dims = [i for i in ds.dims]
347341
ce_batch_coords = {c: ds[c].values for c in ce_batch_dims}
348342
ce_batch_coords["region"] = [
349343
i for i in ds.region.values if i in gdppc.region.values
350344
]
351345
ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims]
352-
ce_chunks = [
353-
xr.open_dataset(path, engine="zarr").chunks[c][0] for c in ce_batch_dims
354-
]
346+
ce_chunks = [xr.open_zarr(path).chunks[c][0] for c in ce_batch_dims]
355347
print(ce_chunks)
356348

357349
template = xr.DataArray(
@@ -370,7 +362,7 @@ def chunk_func(
370362
region = damages.region.values
371363

372364
gdppc = (
373-
xr.open_dataset(econ_path, engine="zarr", chunks=None)
365+
xr.open_zarr(econ_path, chunks=None)
374366
.sel(year=year, ssp=ssp, model=model, region=region, drop=True)
375367
.gdppc
376368
)
@@ -402,7 +394,7 @@ def chunk_func(
402394

403395
return damages
404396

405-
data = xr.open_dataset(path, engine="zarr")
397+
data = xr.open_zarr(path)
406398

407399
for var in [delta, histclim]:
408400
out = (

dscim/tests/test_simple_storage.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,8 @@ def test_adding_up_damages(stacked_damages):
4545
stacked_damages.adding_up_damages,
4646
(
4747
(
48-
xr.open_dataset(
49-
f"{stacked_damages.ce_path}/adding_up_no_cc.zarr", engine="zarr"
50-
).no_cc
51-
- xr.open_dataset(
52-
f"{stacked_damages.ce_path}/adding_up_cc.zarr", engine="zarr"
53-
).cc
48+
xr.open_zarr(f"{stacked_damages.ce_path}/adding_up_no_cc.zarr").no_cc
49+
- xr.open_zarr(f"{stacked_damages.ce_path}/adding_up_cc.zarr").cc
5450
)
5551
* stacked_damages.pop
5652
).sum("region"),

0 commit comments

Comments
 (0)