Skip to content

Commit 2e04e9e

Browse files
committed
fixes
1 parent 223bb6b commit 2e04e9e

File tree

2 files changed

+181
-18
lines changed

2 files changed

+181
-18
lines changed

tests/test_levels.py

Lines changed: 158 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121

2222
class GetVariablesConfigTest(unittest.TestCase):
23-
def test_it(self):
23+
def test_no_variables_given(self):
2424
dataset = make_test_dataset()
2525
variables = get_variables_config(dataset, dict(x=512, y=256, time=1))
2626
self.assertEqual(
@@ -34,13 +34,35 @@ def test_it(self):
3434
variables,
3535
)
3636

37+
def test_variables_given(self):
38+
dataset = make_test_dataset()
39+
variables = get_variables_config(
40+
dataset,
41+
dict(x=512, y=256, time=1),
42+
variables={
43+
"time": {"encoding": {"chunks": [3]}},
44+
"chl": {"encoding": {"chunks": [3, 100, 100]}},
45+
"tsm": {"encoding": {"dtype": "uint16"}},
46+
},
47+
)
48+
self.assertEqual(
49+
{
50+
"x": {"encoding": {"chunks": None}},
51+
"y": {"encoding": {"chunks": None}},
52+
"time": {"encoding": {"chunks": [3]}},
53+
"chl": {"encoding": {"chunks": [3, 100, 100]}},
54+
"tsm": {"encoding": {"chunks": [1, 256, 512], "dtype": "uint16"}},
55+
},
56+
variables,
57+
)
58+
3759

3860
@unittest.skipIf(xcube is None, reason="xcube is not installed")
3961
class WriteLevelsTest(unittest.TestCase):
4062
def setUp(self):
4163
clear_memory_fs()
4264

43-
def test_it(self):
65+
def test_default_x_y_with_crs(self):
4466
source_path = "memory://source.zarr"
4567
make_test_dataset(
4668
uri=source_path,
@@ -70,14 +92,140 @@ def test_it(self):
7092
levels_info,
7193
)
7294

73-
ds0 = xr.open_zarr(target_dir.uri + f"/0.zarr")
74-
self.assertEqual({"time": 3, "y": 1024, "x": 2048}, ds0.sizes)
95+
self.assert_level(target_dir.uri + "/0.zarr", 0, has_crs=True)
96+
self.assert_level(target_dir.uri + "/1.zarr", 1, has_crs=True)
97+
self.assert_level(target_dir.uri + "/2.zarr", 2, has_crs=True)
98+
self.assert_level(target_dir.uri + "/3.zarr", 3, has_crs=True)
99+
100+
def test_default_lon_lat_no_crs(self):
101+
source_path = "memory://source.zarr"
102+
make_test_dataset(
103+
uri=source_path,
104+
dims=("time", "lat", "lon"),
105+
shape=(3, 1024, 2048),
106+
chunks=(1, 128, 256),
107+
)
108+
109+
target_dir = FileObj("memory://target.levels")
110+
self.assertFalse(target_dir.exists())
111+
112+
write_levels(source_path=source_path, target_path=target_dir.uri)
113+
114+
self.assertTrue(target_dir.exists())
75115

76-
ds1 = xr.open_zarr(target_dir.uri + f"/1.zarr")
77-
self.assertEqual({"time": 3, "y": 512, "x": 1024}, ds1.sizes)
116+
levels_file = target_dir.for_path(".zlevels")
117+
self.assertTrue(levels_file.exists())
118+
levels_info = json.loads(levels_file.read())
119+
self.assertEqual(
120+
{
121+
"version": "1.0",
122+
"num_levels": 4,
123+
"agg_methods": {"chl": "mean", "tsm": "mean"},
124+
"use_saved_levels": False,
125+
},
126+
levels_info,
127+
)
78128

79-
ds2 = xr.open_zarr(target_dir.uri + f"/2.zarr")
80-
self.assertEqual({"time": 3, "y": 256, "x": 512}, ds2.sizes)
129+
xy_dims = "lon", "lat"
130+
self.assert_level(target_dir.uri + "/0.zarr", 0, xy_dims=xy_dims)
131+
self.assert_level(target_dir.uri + "/1.zarr", 1, xy_dims=xy_dims)
132+
self.assert_level(target_dir.uri + "/2.zarr", 2, xy_dims=xy_dims)
133+
self.assert_level(target_dir.uri + "/3.zarr", 3, xy_dims=xy_dims)
81134

82-
ds3 = xr.open_zarr(target_dir.uri + f"/3.zarr")
83-
self.assertEqual({"time": 3, "y": 128, "x": 256}, ds3.sizes)
135+
def test_link_level_zero(self):
136+
source_dir = FileObj("memory://source.zarr")
137+
make_test_dataset(
138+
uri=source_dir.uri,
139+
dims=("time", "y", "x"),
140+
shape=(3, 1024, 2048),
141+
chunks=(1, 128, 256),
142+
crs="EPSG:4326",
143+
)
144+
145+
target_dir = FileObj("memory://target.levels")
146+
self.assertFalse(target_dir.exists())
147+
148+
write_levels(
149+
source_path=source_dir.uri,
150+
target_path=target_dir.uri,
151+
link_level_zero=True,
152+
)
153+
154+
self.assertTrue(target_dir.exists())
155+
156+
levels_file = target_dir.for_path(".zlevels")
157+
self.assertTrue(levels_file.exists())
158+
levels_info = json.loads(levels_file.read())
159+
self.assertEqual(
160+
{
161+
"version": "1.0",
162+
"num_levels": 4,
163+
"agg_methods": {"chl": "mean", "tsm": "mean"},
164+
"use_saved_levels": False,
165+
},
166+
levels_info,
167+
)
168+
169+
level_zero_file = target_dir.for_path("0.link")
170+
self.assertTrue(level_zero_file.exists())
171+
self.assertEqual(b"../source.zarr", level_zero_file.read())
172+
self.assert_level(target_dir.uri + "/1.zarr", 1, has_crs=True)
173+
self.assert_level(target_dir.uri + "/2.zarr", 2, has_crs=True)
174+
self.assert_level(target_dir.uri + "/3.zarr", 3, has_crs=True)
175+
176+
def test_link_level_zero_use_saved_levels(self):
177+
source_dir = FileObj("memory://source.zarr")
178+
make_test_dataset(
179+
uri=source_dir.uri,
180+
dims=("time", "lat", "lon"),
181+
shape=(3, 1024, 2048),
182+
chunks=(1, 128, 256),
183+
)
184+
185+
target_dir = FileObj("memory://target.levels")
186+
self.assertFalse(target_dir.exists())
187+
188+
write_levels(
189+
source_path=source_dir.uri,
190+
target_path=target_dir.uri,
191+
link_level_zero=True,
192+
use_saved_levels=True,
193+
)
194+
195+
self.assertTrue(target_dir.exists())
196+
197+
levels_file = target_dir.for_path(".zlevels")
198+
self.assertTrue(levels_file.exists())
199+
levels_info = json.loads(levels_file.read())
200+
self.assertEqual(
201+
{
202+
"version": "1.0",
203+
"num_levels": 4,
204+
"agg_methods": {"chl": "mean", "tsm": "mean"},
205+
"use_saved_levels": True,
206+
},
207+
levels_info,
208+
)
209+
210+
xy_dims = "lon", "lat"
211+
level_zero_file = target_dir.for_path("0.link")
212+
self.assertTrue(level_zero_file.exists())
213+
self.assertEqual(b"../source.zarr", level_zero_file.read())
214+
self.assert_level(target_dir.uri + "/1.zarr", 1, xy_dims=xy_dims)
215+
self.assert_level(target_dir.uri + "/2.zarr", 2, xy_dims=xy_dims)
216+
self.assert_level(target_dir.uri + "/3.zarr", 3, xy_dims=xy_dims)
217+
218+
def assert_level(self, uri: str, level: int, xy_dims=("x", "y"), has_crs=False):
219+
x_dim, y_dim = xy_dims
220+
dataset = xr.open_zarr(uri)
221+
z = 2**level
222+
f = 2 ** (3 - level)
223+
self.assertEqual({"time": 3, y_dim: 1024 // z, x_dim: 2048 // z}, dataset.sizes)
224+
self.assertEqual(
225+
{"time": 3 * (1,), y_dim: f * (128,), x_dim: f * (256,)}, dataset.chunksizes
226+
)
227+
self.assertEqual({x_dim, y_dim, "time"}, set(dataset.coords))
228+
if has_crs:
229+
self.assertEqual({"chl", "tsm", "crs"}, set(dataset.data_vars))
230+
else:
231+
self.assertEqual({"chl", "tsm"}, set(dataset.data_vars))

zappend/levels.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111

1212
from zappend.api import zappend
1313

14+
# Note, the function may be easily adapted to zappend
15+
# to existing multi-level datasets.
16+
1417

1518
def write_levels(
1619
source_path: str,
@@ -24,10 +27,12 @@ def write_levels(
2427
tile_size: tuple[int, int] | None = None,
2528
**zappend_config,
2629
):
30+
"""TODO - document me"""
2731
from xcube.core.tilingscheme import get_num_levels
2832
from xcube.core.gridmapping import GridMapping
2933
from xcube.core.subsampling import get_dataset_agg_methods
3034
from xcube.core.subsampling import subsample_dataset
35+
from xcube.util.fspath import get_fs_path_class
3136

3237
target_dir = zappend_config.pop("target_dir", None)
3338
if not target_dir and not target_path:
@@ -84,7 +89,7 @@ def write_levels(
8489
source_ds,
8590
{
8691
xy_dim_names[0]: tile_size[0],
87-
xy_dim_names[0]: tile_size[1],
92+
xy_dim_names[1]: tile_size[1],
8893
append_dim: 1,
8994
},
9095
variables=zappend_config.pop("variables", None),
@@ -94,14 +99,21 @@ def write_levels(
9499
levels_data: dict[str, Any] = dict(
95100
version="1.0",
96101
num_levels=num_levels,
97-
agg_methods=dict(agg_methods),
102+
agg_methods=agg_methods,
98103
use_saved_levels=use_saved_levels,
99104
)
100105
json.dump(levels_data, fp, indent=2)
101106

102107
if link_level_zero:
108+
path_class = get_fs_path_class(target_fs)
109+
rel_source_path = (
110+
"../"
111+
+ path_class(source_root)
112+
.relative_to(path_class(target_root).parent)
113+
.as_posix()
114+
)
103115
with target_fs.open(f"{target_root}/0.link", "wt") as fp:
104-
fp.write(source_root)
116+
fp.write(rel_source_path)
105117

106118
subsample_dataset_kwargs = dict(xy_dim_names=xy_dim_names, agg_methods=agg_methods)
107119

@@ -113,9 +125,12 @@ def write_levels(
113125
if level_index == 0:
114126
level_slice_ds = slice_ds
115127
elif use_saved_levels:
116-
prev_level_path = f"{target_root}/{level_index - 1}.zarr"
117-
prev_level_store = target_fs.get_mapper(root=prev_level_path)
118-
prev_level_ds = xr.open_zarr(prev_level_store)
128+
if level_index == 1:
129+
prev_level_ds = source_ds
130+
else:
131+
prev_level_path = f"{target_root}/{level_index - 1}.zarr"
132+
prev_level_store = target_fs.get_mapper(root=prev_level_path)
133+
prev_level_ds = xr.open_zarr(prev_level_store)
119134
level_slice_ds = subsample_dataset(
120135
prev_level_ds.isel(slice_ds_indexer),
121136
step=2,
@@ -165,8 +180,8 @@ def get_variables_config(
165180
var_config = dict(var_configs.get(var_name, {}))
166181
var_encoding = dict(var_config.get("encoding", {}))
167182
var_chunks = var_encoding.get("chunks")
168-
if not var_chunks and var.dims:
169-
if var_name in dataset.coords:
183+
if "chunks" not in var_encoding and var.dims:
184+
if var_name in dataset.coords or set(var.dims).isdisjoint(chunk_sizes):
170185
var_chunks = None
171186
else:
172187
var_chunks = [chunk_sizes.get(dim) for dim in var.dims]

0 commit comments

Comments
 (0)