Skip to content

Commit 0b99164

Browse files
committed
make nchunks_initialized report the product of the number of shards and the number of chunks per shard
1 parent 1dad7b3 commit 0b99164

File tree

2 files changed

+39
-30
lines changed

2 files changed

+39
-30
lines changed

src/zarr/core/array.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,11 +1252,14 @@ def nshards(self) -> int:
12521252
"""
12531253
return product(self.shard_grid_shape)
12541254

1255-
@deprecated("Use nshards_initialized instead")
12561255
async def nchunks_initialized(self) -> int:
12571256
"""
1258-
Calculate the number of chunks that have been initialized, i.e. the number of chunks that have
1259-
been persisted to the storage backend.
1257+
Calculate the number of chunks that have been initialized in storage.
1258+
1259+
This value is calculated as the product of the number of initialized shards and the number of
1260+
chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1,
1261+
and in that case the number of chunks initialized is the same as the number of stored objects associated with an
1262+
array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``.
12601263
12611264
Returns
12621265
-------
@@ -1270,19 +1273,28 @@ async def nchunks_initialized(self) -> int:
12701273
12711274
Examples
12721275
--------
1273-
>>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,))
1276+
>>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(1,), shards=(2,))
12741277
>>> await arr.nchunks_initialized()
12751278
0
12761279
>>> await arr.setitem(slice(5), 1)
1277-
>>> await arr.nchunks_initialized()
1280+
>>> await arr.nshards_initialized()
12781281
3
1282+
>>> await arr.nchunks_initialized()
1283+
6
12791284
"""
1280-
return len(await shards_initialized(self))
1285+
if self.shards is None:
1286+
chunks_per_shard = 1
1287+
else:
1288+
chunks_per_shard = product(
1289+
tuple(a // b for a, b in zip(self.shards, self.chunks, strict=True))
1290+
)
1291+
return (await self.nshards_initialized()) * chunks_per_shard
12811292

12821293
async def nshards_initialized(self) -> int:
12831294
"""
1284-
Calculate the number of shards that have been initialized, i.e. the number of shards that have
1285-
been persisted to the storage backend.
1295+
Calculate the number of shards that have been initialized in storage.
1296+
1297+
This is the number of shards that have been persisted to the storage backend.
12861298
12871299
Returns
12881300
-------
@@ -2379,30 +2391,30 @@ def nbytes(self) -> int:
23792391
return self._async_array.nbytes
23802392

23812393
@property
2382-
@deprecated("Use nshards_initialized instead.")
23832394
def nchunks_initialized(self) -> int:
23842395
"""
2385-
Calculate the number of chunks that have been initialized, i.e. the number of chunks that have
2386-
been persisted to the storage backend.
2396+
Calculate the number of chunks that have been initialized in storage.
2397+
2398+
This value is calculated as the product of the number of initialized shards and the number of
2399+
chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1,
2400+
and in that case the number of chunks initialized is the same as the number of stored objects associated with an
2401+
array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``.
23872402
23882403
Returns
23892404
-------
23902405
nchunks_initialized : int
23912406
The number of chunks that have been initialized.
23922407
2393-
Notes
2394-
-----
2395-
On :class:`Array` this is a (synchronous) property, unlike asynchronous function
2396-
:meth:`AsyncArray.nchunks_initialized`.
2397-
23982408
Examples
23992409
--------
2400-
>>> arr = await zarr.create(shape=(10,), chunks=(2,))
2410+
>>> arr = zarr.create_array(store={}, shape=(10,), chunks=(1,), shards=(2,))
24012411
>>> arr.nchunks_initialized
24022412
0
24032413
>>> arr[:5] = 1
2404-
>>> arr.nchunks_initialized
2414+
>>> arr.nshards_initialized
24052415
3
2416+
>>> arr.nchunks_initialized
2417+
6
24062418
"""
24072419
return sync(self._async_array.nchunks_initialized())
24082420

tests/test_array.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ async def test_nchunks_initialized(
387387
"""
388388
Test that nchunks_initialized accurately returns the number of stored partitions.
389389
"""
390+
chunks_per_shard = np.prod(np.array(shard_shape) // np.array(chunk_shape))
390391
store = MemoryStore()
391392
arr = zarr.create_array(store, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1")
392393

@@ -395,26 +396,22 @@ async def test_nchunks_initialized(
395396
arr[region] = 1
396397
expected = idx + 1
397398
if test_cls == Array:
398-
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
399-
observed = arr.nchunks_initialized
400-
assert observed == arr.nshards_initialized
399+
observed = arr.nshards_initialized
400+
assert observed == arr.nchunks_initialized // chunks_per_shard
401401
else:
402-
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
403-
observed = await arr._async_array.nchunks_initialized()
404-
assert observed == await arr._async_array.nshards_initialized()
402+
observed = await arr._async_array.nshards_initialized()
403+
assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard
405404
assert observed == expected
406405

407406
# delete chunks
408407
for idx, key in enumerate(arr._iter_shard_keys()):
409408
sync(arr.store_path.store.delete(key))
410409
if test_cls == Array:
411-
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
412-
observed = arr.nchunks_initialized
413-
assert observed == arr.nshards_initialized
410+
observed = arr.nshards_initialized
411+
assert observed == arr.nchunks_initialized // chunks_per_shard
414412
else:
415-
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
416-
observed = await arr._async_array.nchunks_initialized()
417-
assert observed == await arr._async_array.nshards_initialized()
413+
observed = await arr._async_array.nshards_initialized()
414+
assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard
418415
expected = arr.nshards - idx - 1
419416
assert observed == expected
420417

0 commit comments

Comments
 (0)