Skip to content

Commit 0d8b549

Browse files
authored
Merge pull request #26 from bcdev/forman-25-sizes_instead_of_dims
Use Dataset.sizes instead of .dims
2 parents a1d6475 + 930e61e commit 0d8b549

File tree

7 files changed

+36
-32
lines changed

7 files changed

+36
-32
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## Version 0.2.1 (in development)
22

3+
* Using `sizes` instead of `dims` attribute of `xarray.Dataset` in implementation
4+
code. [#25]
35
* Enhanced documentation including docstrings of several Python API objects.
46

57

tests/fsutil/test_transaction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def create_test_folder(rollback_cb: Callable):
9292
)
9393

9494
if fail:
95-
raise OSError("disk full")
95+
raise OSError("disk full (this is a test!)")
9696
except OSError as e:
9797
if fail:
98-
self.assertEqual("disk full", f"{e}")
98+
self.assertEqual("disk full (this is a test!)", f"{e}")
9999
else:
100100
raise
101101

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@ def test_some_slices(self):
2525
slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()]
2626
zappend(slices, target_dir=target_dir)
2727
ds = xr.open_zarr(target_dir)
28-
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.dims)
28+
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
2929
self.assertEqual({"chl", "tsm"}, set(ds.data_vars))
3030
self.assertEqual({"time", "y", "x"}, set(ds.coords))

tests/test_metadata.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_dims_without_fixed_dims_given(self):
1818
ds = xr.Dataset({"a": xr.DataArray(np.zeros((2, 3, 4)), dims=("z", "y", "x"))})
1919
self.assertEqual(
2020
{"z": 2, "y": 3, "x": 4},
21-
DatasetMetadata.from_dataset(ds, {"append_dim": "z"}).dims,
21+
DatasetMetadata.from_dataset(ds, {"append_dim": "z"}).sizes,
2222
)
2323

2424
def test_dims_with_fixed_dims_given(self):
@@ -27,7 +27,7 @@ def test_dims_with_fixed_dims_given(self):
2727
{"z": 2, "y": 3, "x": 4},
2828
DatasetMetadata.from_dataset(
2929
ds, {"append_dim": "z", "fixed_dims": {"y": 3, "x": 4}}
30-
).dims,
30+
).sizes,
3131
)
3232

3333
# noinspection PyMethodMayBeStatic
@@ -84,7 +84,7 @@ def test_add_missing_variables(self):
8484
self.assertEqual(
8585
{
8686
"attrs": {},
87-
"dims": {"time": 2, "x": 4, "y": 3},
87+
"sizes": {"time": 2, "x": 4, "y": 3},
8888
"variables": {
8989
"a": {
9090
"attrs": {},
@@ -133,7 +133,7 @@ def test_merge_variable_metadata(self):
133133
self.assertEqual(
134134
{
135135
"attrs": {},
136-
"dims": {"time": 2, "x": 4, "y": 3},
136+
"sizes": {"time": 2, "x": 4, "y": 3},
137137
"variables": {
138138
"a": {
139139
"attrs": {"title": "A", "units": "m/s"},
@@ -159,7 +159,7 @@ def test_move_variable_encoding_from_attrs(self):
159159
self.assertEqual(
160160
{
161161
"attrs": {},
162-
"dims": {"time": 2, "x": 4, "y": 3},
162+
"sizes": {"time": 2, "x": 4, "y": 3},
163163
"variables": {
164164
"a": {
165165
"attrs": {},
@@ -199,7 +199,7 @@ def test_variable_defaults(self):
199199
self.assertEqual(
200200
{
201201
"attrs": {},
202-
"dims": {"time": 2, "x": 4, "y": 3},
202+
"sizes": {"time": 2, "x": 4, "y": 3},
203203
"variables": {
204204
"a": {
205205
"attrs": {},
@@ -246,7 +246,7 @@ def test_variable_encoding_from_netcdf(self):
246246
self.assertEqual(
247247
{
248248
"attrs": {},
249-
"dims": {"time": 2, "x": 4, "y": 3},
249+
"sizes": {"time": 2, "x": 4, "y": 3},
250250
"variables": {
251251
"a": {
252252
"attrs": {},

tests/test_processor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_process_one_slice(self):
2828
ds = xr.open_zarr(
2929
target_dir.uri, storage_options=target_dir.storage_options, decode_cf=True
3030
)
31-
self.assertEqual({"time": 1, "y": 10, "x": 20}, ds.dims)
31+
self.assertEqual({"time": 1, "y": 10, "x": 20}, ds.sizes)
3232
self.assertEqual({"x", "y", "time", "chl", "tsm"}, set(ds.variables))
3333

3434
self.assertEqual((20,), ds.x.encoding.get("chunks"))
@@ -53,7 +53,7 @@ def test_process_two_slices(self):
5353
)
5454

5555
self.assertEqual({"x", "y", "time", "chl", "tsm"}, set(ds.variables))
56-
self.assertEqual({"time": 2, "y": 10, "x": 20}, ds.dims)
56+
self.assertEqual({"time": 2, "y": 10, "x": 20}, ds.sizes)
5757

5858
self.assertEqual((20,), ds.x.encoding.get("chunks"))
5959
self.assertEqual((10,), ds.y.encoding.get("chunks"))
@@ -91,7 +91,7 @@ def test_process_many_slices_with_single_append_dim_chunk(self):
9191
)
9292

9393
self.assertEqual({"x", "y", "time", "chl", "tsm"}, set(ds.variables))
94-
self.assertEqual({"time": many, "y": 10, "x": 20}, ds.dims)
94+
self.assertEqual({"time": many, "y": 10, "x": 20}, ds.sizes)
9595

9696
self.assertEqual((20,), ds.x.encoding.get("chunks"))
9797
self.assertEqual((10,), ds.y.encoding.get("chunks"))
@@ -129,7 +129,7 @@ def test_process_two_slices_with_chunk_overlap(self):
129129
)
130130

131131
self.assertEqual({"x", "y", "time", "chl", "tsm"}, set(ds.variables))
132-
self.assertEqual({"time": 4, "y": 10, "x": 20}, ds.dims)
132+
self.assertEqual({"time": 4, "y": 10, "x": 20}, ds.sizes)
133133

134134
self.assertEqual((20,), ds.x.encoding.get("chunks"))
135135
self.assertEqual((10,), ds.y.encoding.get("chunks"))

tests/test_rollbackstore.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def test_to_zarr(self):
196196
slice_1 = make_test_dataset(shape=(1, 50, 100), chunks=(1, 50, 50))
197197
# drop variables w.o. "time" dim
198198
slice_1 = slice_1.drop_vars(
199-
[k for k, v in slice_1.variables.items() if "time" not in v.dims]
199+
[k for k, v in slice_1.variables.items() if "time" not in v.sizes]
200200
)
201201
slice_1.attrs = {}
202202
for k, v in slice_1.variables.items():
@@ -247,7 +247,7 @@ def test_to_zarr(self):
247247
slice_2 = make_test_dataset(shape=(1, 50, 100), chunks=(1, 50, 50))
248248
# drop variables w.o. "time" dim
249249
slice_2 = slice_2.drop_vars(
250-
[k for k, v in slice_2.variables.items() if "time" not in v.dims]
250+
[k for k, v in slice_2.variables.items() if "time" not in v.sizes]
251251
)
252252
for k, v in slice_2.variables.items():
253253
v.encoding = {}
@@ -291,10 +291,12 @@ def test_to_zarr(self):
291291
)
292292

293293
def assert_dataset_ok(
294-
self, expected_dims: dict[str, int], expected_chunks: dict[str, tuple[int, ...]]
294+
self,
295+
expected_sizes: dict[str, int],
296+
expected_chunks: dict[str, tuple[int, ...]],
295297
):
296298
ds = xr.open_zarr(self.target_dir.uri)
297-
self.assertEqual(expected_dims, ds.dims)
299+
self.assertEqual(expected_sizes, ds.sizes)
298300
self.assertEqual(
299301
expected_chunks,
300302
{k: ds[k].encoding.get("chunks") for k in ds.variables.keys()},

zappend/metadata.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -91,28 +91,28 @@ def to_dict(self):
9191
class DatasetMetadata:
9292
def __init__(
9393
self,
94-
dims: dict[str, int],
94+
sizes: dict[str, int],
9595
variables: dict[str, VariableMetadata],
9696
attrs: dict[str, Any],
9797
):
98-
self.dims = dims
98+
self.sizes = sizes
9999
self.variables = variables
100100
self.attrs = attrs
101101

102102
def to_dict(self):
103103
return dict(
104-
dims=self.dims,
104+
sizes=self.sizes,
105105
variables={k: v.to_dict() for k, v in self.variables.items()},
106106
attrs=self.attrs,
107107
)
108108

109109
def assert_compatible_slice(
110110
self, slice_metadata: "DatasetMetadata", append_dim: str
111111
):
112-
for dim_name, dim_size in self.dims.items():
113-
if dim_name not in slice_metadata.dims:
112+
for dim_name, dim_size in self.sizes.items():
113+
if dim_name not in slice_metadata.sizes:
114114
raise ValueError(f"Missing dimension" f" {dim_name!r} in slice dataset")
115-
slice_dim_size = slice_metadata.dims[dim_name]
115+
slice_dim_size = slice_metadata.sizes[dim_name]
116116
if dim_name != append_dim and dim_size != slice_dim_size:
117117
raise ValueError(
118118
f"Wrong size for dimension {dim_name!r}"
@@ -137,7 +137,7 @@ def assert_compatible_slice(
137137
def from_dataset(cls, dataset: xr.Dataset, config: dict[str, Any] | None = None):
138138
config = config or {}
139139

140-
dims = _get_effective_dims(
140+
sizes = _get_effective_sizes(
141141
dataset,
142142
config.get("fixed_dims"),
143143
config.get("append_dim") or DEFAULT_APPEND_DIM,
@@ -152,10 +152,10 @@ def from_dataset(cls, dataset: xr.Dataset, config: dict[str, Any] | None = None)
152152

153153
attrs = merge_configs(dataset.attrs, config.get("attrs") or {})
154154

155-
return DatasetMetadata(dims=dims, variables=variables, attrs=attrs)
155+
return DatasetMetadata(sizes=sizes, variables=variables, attrs=attrs)
156156

157157

158-
def _get_effective_dims(
158+
def _get_effective_sizes(
159159
dataset: xr.Dataset,
160160
config_fixed_dims: dict[str, int] | None,
161161
config_append_dim: str,
@@ -170,19 +170,19 @@ def _get_effective_dims(
170170
raise ValueError(
171171
f"Fixed dimension {dim_name!r}" f" not found in dataset"
172172
)
173-
ds_dim_size = dataset.dims[dim_name]
173+
ds_dim_size = dataset.sizes[dim_name]
174174
if fixed_dim_size != ds_dim_size:
175175
raise ValueError(
176176
f"Wrong size for fixed dimension {dim_name!r}"
177177
f" in dataset: expected {fixed_dim_size},"
178178
f" found {ds_dim_size}"
179179
)
180-
if config_append_dim not in dataset.dims:
180+
if config_append_dim not in dataset.sizes:
181181
raise ValueError(
182182
f"Append dimension" f" {config_append_dim!r} not found in dataset"
183183
)
184184

185-
return {str(k): v for k, v in dataset.dims.items()}
185+
return {str(k): v for k, v in dataset.sizes.items()}
186186

187187

188188
def _get_effective_variables(
@@ -248,12 +248,12 @@ def _get_effective_variables(
248248
if config_var_dims is None:
249249
raise ValueError(f"Missing dimensions" f" of variable {var_name!r}")
250250
for dim in config_var_dims:
251-
if dim not in dataset.dims:
251+
if dim not in dataset.sizes:
252252
raise ValueError(
253253
f"Dimension {dim!r} of variable"
254254
f" {var_name!r} not found in dataset"
255255
)
256-
config_var_def["shape"] = tuple(dataset.dims[k] for k in config_var_dims)
256+
config_var_def["shape"] = tuple(dataset.sizes[k] for k in config_var_dims)
257257
encoding: dict | None = config_var_def.get("encoding")
258258
if encoding is None or encoding.get("dtype") is None:
259259
raise ValueError(

0 commit comments

Comments
 (0)