Skip to content

Commit da2132b

Browse files
committed
dropping variables without append dim before appending
1 parent 1430510 commit da2132b

File tree

5 files changed

+65
-11
lines changed

5 files changed

+65
-11
lines changed

tests/helpers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import fsspec
99
import numpy as np
10+
import pyproj
1011
import xarray as xr
1112

1213
default_dims = ("time", "y", "x")
@@ -71,6 +72,7 @@ def make_test_dataset(
7172
dims: tuple[str, str, str] = default_dims,
7273
shape: tuple[int, int, int] = default_shape,
7374
chunks: tuple[int, int, int] = default_chunks,
75+
crs: str | None = None,
7476
index: int = 0,
7577
uri: str | None = None,
7678
storage_options: dict[str, Any] | None = None
@@ -113,6 +115,9 @@ def make_test_dataset(
113115
}
114116
)
115117

118+
if crs:
119+
ds["crs"] = xr.DataArray(np.array(0), attrs=pyproj.CRS(crs).to_cf())
120+
116121
ds = ds.chunk(dict(tuple(zip(dims, chunks))))
117122

118123
if not uri:

tests/test_rollbackstore.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ def handle_rollback_action(self, *args):
134134
def test_to_zarr(self):
135135
ds = make_test_dataset(
136136
shape=(1, 50, 100),
137-
chunks=(2, 50, 50)
137+
chunks=(2, 50, 50),
138+
crs="epsg:4326"
138139
)
139140
ds.time.encoding.update(chunks=(10,))
140141
ds.chl.encoding.update(chunks=(2, 50, 50))
@@ -147,6 +148,7 @@ def test_to_zarr(self):
147148
self.assert_dataset_ok(
148149
{'x': 100, 'y': 50, 'time': 1},
149150
{
151+
'crs': (),
150152
'x': (100,), 'y': (50,), 'time': (10,),
151153
'chl': (2, 50, 50),
152154
'tsm': (2, 50, 50),
@@ -167,6 +169,9 @@ def test_to_zarr(self):
167169
('delete_file', 'time/.zarray'),
168170
('delete_file', 'time/.zattrs'),
169171
('delete_file', 'time/0'),
172+
('delete_file', 'crs/.zarray'),
173+
('delete_file', 'crs/.zattrs'),
174+
('delete_file', 'crs/0'),
170175
('delete_file', 'chl/.zarray'),
171176
('delete_file', 'chl/.zattrs'),
172177
('delete_file', 'chl/0.0.0'),
@@ -188,7 +193,8 @@ def test_to_zarr(self):
188193
chunks=(1, 50, 50)
189194
)
190195
# drop variables w.o. "time" dim
191-
slice_1 = slice_1.drop_vars(["x", "y"])
196+
slice_1 = slice_1.drop_vars([k for k, v in slice_1.variables.items()
197+
if "time" not in v.dims])
192198
slice_1.attrs = {}
193199
for k, v in slice_1.variables.items():
194200
v.encoding = {}
@@ -205,6 +211,7 @@ def test_to_zarr(self):
205211
)
206212
self.assert_dataset_ok({'x': 100, 'y': 50, 'time': 2},
207213
{
214+
'crs': (),
208215
'x': (100,), 'y': (50,), 'time': (10,),
209216
'chl': (2, 50, 50),
210217
'tsm': (2, 50, 50),
@@ -234,7 +241,8 @@ def test_to_zarr(self):
234241
chunks=(1, 50, 50)
235242
)
236243
# drop variables w.o. "time" dim
237-
slice_2 = slice_2.drop_vars(["x", "y"])
244+
slice_2 = slice_2.drop_vars([k for k, v in slice_2.variables.items()
245+
if "time" not in v.dims])
238246
for k, v in slice_2.variables.items():
239247
v.encoding = {}
240248
v.attrs = {}
@@ -250,6 +258,7 @@ def test_to_zarr(self):
250258
)
251259
self.assert_dataset_ok({'x': 100, 'y': 50, 'time': 3},
252260
{
261+
'crs': (),
253262
'x': (100,), 'y': (50,), 'time': (10,),
254263
'chl': (2, 50, 50),
255264
'tsm': (2, 50, 50),

tests/test_tailoring.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import unittest
66

77
import numpy as np
8+
import pyproj
89
import xarray as xr
910

1011
from zappend.metadata import DatasetMetadata
@@ -125,7 +126,37 @@ def test_it_completes_vars(self):
125126

126127
class TailorSliceDatasetTest(unittest.TestCase):
127128

128-
def test_it_sets_metadata(self):
129+
def test_it_drops_constant_variables(self):
130+
ds = xr.Dataset(
131+
{
132+
"a": xr.DataArray(np.zeros((2, 3, 4)),
133+
dims=("time", "y", "x")),
134+
"b": xr.DataArray(np.zeros((2, 3, 4)),
135+
dims=("time", "y", "x")),
136+
"spatial_ref": xr.DataArray(
137+
np.array(0),
138+
attrs=pyproj.CRS("EPSG:4326").to_cf()
139+
)
140+
},
141+
coords={
142+
"x": xr.DataArray(np.linspace(0.0, 1.0, 4),
143+
dims="x"),
144+
"y": xr.DataArray(np.linspace(0.0, 1.0, 3),
145+
dims="y"),
146+
}
147+
)
148+
tailored_ds = tailor_slice_dataset(
149+
ds,
150+
DatasetMetadata.from_dataset(ds, {}),
151+
"time"
152+
)
153+
self.assertIsInstance(tailored_ds, xr.Dataset)
154+
self.assertEqual(
155+
{"a", "b"},
156+
set(tailored_ds.variables.keys())
157+
)
158+
159+
def test_it_clears_encoding_and_attrs(self):
129160
ds = xr.Dataset({
130161
"a": xr.DataArray(np.zeros((2, 3, 4)),
131162
dims=("time", "y", "x"),
@@ -148,18 +179,17 @@ def test_it_sets_metadata(self):
148179
"fill_value": -1}},
149180
}
150181
}
151-
)
182+
),
183+
"time"
152184
)
153185
self.assertIsInstance(tailored_ds, xr.Dataset)
154-
self.assertEqual(
155-
{"a", "b"},
156-
set(tailored_ds.variables.keys())
157-
)
158186

187+
self.assertIn("a", tailored_ds.variables)
159188
a = tailored_ds.a
160189
self.assertEqual({}, a.encoding)
161190
self.assertEqual({}, a.attrs)
162191

192+
self.assertIn("b", tailored_ds.variables)
163193
b = tailored_ds.b
164194
self.assertEqual({}, b.encoding)
165195
self.assertEqual({}, b.attrs)

zappend/processor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def update_target_from_slice(ctx: Context,
9999
target_dir = ctx.target_dir
100100
append_dim_name = ctx.append_dim_name
101101

102-
slice_ds = tailor_slice_dataset(slice_ds, ctx.target_metadata)
102+
slice_ds = tailor_slice_dataset(slice_ds,
103+
ctx.target_metadata,
104+
append_dim_name)
103105

104106
# TODO: adjust global attributes dependent on append_dim,
105107
# e.g., time coverage

zappend/tailoring.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,19 @@ def tailor_target_dataset(
2929

3030
def tailor_slice_dataset(
3131
dataset: xr.Dataset,
32-
target_metadata: DatasetMetadata
32+
target_metadata: DatasetMetadata,
33+
append_dim_name: str
3334
) -> xr.Dataset:
3435
dataset = _strip_dataset(dataset, target_metadata)
3536
dataset = _complete_dataset(dataset, target_metadata)
3637

38+
const_variables = [k for k, v in dataset.variables.items() if
39+
append_dim_name not in v.dims]
40+
if const_variables:
41+
# Strip variables that do not have append_dim_name
42+
# as dimension, e.g., "x", "y", "crs", ...
43+
dataset = dataset.drop_vars(const_variables)
44+
3745
# Remove any encoding and attributes from slice,
3846
# since both are prescribed by target
3947
dataset.attrs.clear()

0 commit comments

Comments
 (0)