Skip to content

Commit cdd6a74

Browse files
brokkoli71normanrz
andauthored
Fix iterating over sharding index (#2392)
* test_sharding_with_empty_inner_chunk * tests for failing read with sharding * replace morton order by np unravel index * format * Revert "replace morton order by np unravel index" This reverts commit adc3240. * skip morton indices out of bound * improve test_sharding_with_chunks_per_shard * format --------- Co-authored-by: Norman Rzepka <[email protected]>
1 parent 206d145 commit cdd6a74

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

src/zarr/core/indexing.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,8 +1346,15 @@ def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords:
13461346

13471347

13481348
def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]:
1349-
for i in range(product(chunk_shape)):
1350-
yield decode_morton(i, chunk_shape)
1349+
i = 0
1350+
order: list[ChunkCoords] = []
1351+
while len(order) < product(chunk_shape):
1352+
m = decode_morton(i, chunk_shape)
1353+
if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)):
1354+
order.append(m)
1355+
i += 1
1356+
for j in range(product(chunk_shape)):
1357+
yield order[j]
13511358

13521359

13531360
def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:

tests/test_codecs/test_codecs.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,26 @@ def test_morton() -> None:
204204
]
205205

206206

207+
@pytest.mark.parametrize(
208+
"shape",
209+
[
210+
[2, 2, 2],
211+
[5, 2],
212+
[2, 5],
213+
[2, 9, 2],
214+
[3, 2, 12],
215+
[2, 5, 1],
216+
[4, 3, 6, 2, 7],
217+
[3, 2, 1, 6, 4, 5, 2],
218+
],
219+
)
220+
def test_morton2(shape) -> None:
221+
order = list(morton_order_iter(shape))
222+
for i, x in enumerate(order):
223+
assert x not in order[:i] # no duplicates
224+
assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds
225+
226+
207227
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
208228
def test_write_partial_chunks(store: Store) -> None:
209229
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))

tests/test_codecs/test_sharding.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,32 @@ async def test_sharding_with_empty_inner_chunk(
393393
print("read data")
394394
data_read = await a.getitem(...)
395395
assert np.array_equal(data_read, data)
396+
397+
398+
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
399+
@pytest.mark.parametrize(
400+
"index_location",
401+
[ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end],
402+
)
403+
@pytest.mark.parametrize("chunks_per_shard", [(5, 2), (2, 5), (5, 5)])
404+
async def test_sharding_with_chunks_per_shard(
405+
store: Store, index_location: ShardingCodecIndexLocation, chunks_per_shard: tuple[int]
406+
) -> None:
407+
chunk_shape = (2, 1)
408+
shape = [x * y for x, y in zip(chunks_per_shard, chunk_shape, strict=False)]
409+
data = np.ones(np.prod(shape), dtype="int32").reshape(shape)
410+
fill_value = 42
411+
412+
path = f"test_sharding_with_chunks_per_shard_{index_location}"
413+
spath = StorePath(store, path)
414+
a = Array.create(
415+
spath,
416+
shape=shape,
417+
chunk_shape=shape,
418+
dtype="int32",
419+
fill_value=fill_value,
420+
codecs=[ShardingCodec(chunk_shape=chunk_shape, index_location=index_location)],
421+
)
422+
a[...] = data
423+
data_read = a[...]
424+
assert np.array_equal(data_read, data)

0 commit comments

Comments
 (0)