Skip to content

Commit 29c1cdd

Browse files
committed
add nshards, nshards_initialized
1 parent 83a3ea5 commit 29c1cdd

File tree

2 files changed

+96
-11
lines changed

2 files changed

+96
-11
lines changed

src/zarr/core/array.py

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,15 +1227,32 @@ def shard_grid_shape(self) -> ChunkCoords:
12271227
@property
12281228
def nchunks(self) -> int:
12291229
"""
1230-
The number of chunks in the stored representation of this array.
1230+
The number of chunks in this array.
1231+
1232+
Note that if a sharding codec is used, then the number of chunks may exceed the number of
1233+
stored objects supporting this array. To find out the number of stored objects that support
1234+
this array, see :func:`nshards`.
12311235
12321236
Returns
12331237
-------
12341238
int
12351239
The total number of chunks in the array.
12361240
"""
1241+
return product(self.chunk_grid_shape)
1242+
1243+
@property
1244+
def nshards(self) -> int:
1245+
"""
1246+
The number of shards in this array.
1247+
1248+
Returns
1249+
-------
1250+
int
1251+
The total number of shards in the array.
1252+
"""
12371253
return product(self.shard_grid_shape)
12381254

1255+
@deprecated("Use nshards_initialized instead")
12391256
async def nchunks_initialized(self) -> int:
12401257
"""
12411258
Calculate the number of chunks that have been initialized, i.e. the number of chunks that have
@@ -1262,6 +1279,32 @@ async def nchunks_initialized(self) -> int:
12621279
"""
12631280
return len(await shards_initialized(self))
12641281

1282+
async def nshards_initialized(self) -> int:
1283+
"""
1284+
Calculate the number of shards that have been initialized, i.e. the number of shards that have
1285+
been persisted to the storage backend.
1286+
1287+
Returns
1288+
-------
1289+
nshards_initialized : int
1290+
The number of shards that have been initialized.
1291+
1292+
Notes
1293+
-----
1294+
On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous)
1295+
property :attr:`Array.nshards_initialized`.
1296+
1297+
Examples
1298+
--------
1299+
>>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,))
1300+
>>> await arr.nshards_initialized()
1301+
0
1302+
>>> await arr.setitem(slice(5), 1)
1303+
>>> await arr.nshards_initialized()
1304+
3
1305+
"""
1306+
return len(await shards_initialized(self))
1307+
12651308
async def nbytes_stored(self) -> int:
12661309
return await self.store_path.store.getsize_prefix(self.store_path.path)
12671310

@@ -1860,7 +1903,7 @@ async def info_complete(self) -> Any:
18601903
A property giving just the statically known information about an array.
18611904
"""
18621905
return self._info(
1863-
await self.nchunks_initialized(),
1906+
await self.nshards_initialized(),
18641907
await self.store_path.store.getsize_prefix(self.store_path.path),
18651908
)
18661909

@@ -2327,10 +2370,21 @@ def shard_grid_shape(self) -> ChunkCoords:
23272370
@property
23282371
def nchunks(self) -> int:
23292372
"""
2330-
The number of chunks in the stored representation of this array.
2373+
The number of chunks in this array.
2374+
2375+
Note that if a sharding codec is used, then the number of chunks may exceed the number of
2376+
stored objects supporting this array. To find out the number of stored objects that support
2377+
this array, see :func:`nshards`.
23312378
"""
23322379
return self._async_array.nchunks
23332380

2381+
@property
2382+
def nshards(self) -> int:
2383+
"""
2384+
The number of shards in the stored representation of this array.
2385+
"""
2386+
return self._async_array.nshards
2387+
23342388
@property
23352389
def nbytes(self) -> int:
23362390
"""
@@ -2347,6 +2401,7 @@ def nbytes(self) -> int:
23472401
return self._async_array.nbytes
23482402

23492403
@property
2404+
@deprecated("Use nshards_initialized instead.")
23502405
def nchunks_initialized(self) -> int:
23512406
"""
23522407
Calculate the number of chunks that have been initialized, i.e. the number of chunks that have
@@ -2373,6 +2428,28 @@ def nchunks_initialized(self) -> int:
23732428
"""
23742429
return sync(self._async_array.nchunks_initialized())
23752430

2431+
@property
2432+
def nshards_initialized(self) -> int:
2433+
"""
2434+
Calculate the number of shards that have been initialized, i.e. the number of shards that have
2435+
been persisted to the storage backend.
2436+
2437+
Returns
2438+
-------
2439+
nshards_initialized : int
2440+
The number of shards that have been initialized.
2441+
2442+
Examples
2443+
--------
2444+
>>> arr = await zarr.create(shape=(10,), chunks=(2,))
2445+
>>> arr.nshards_initialized
2446+
0
2447+
>>> arr[:5] = 1
2448+
>>> arr.nshard_initialized
2449+
3
2450+
"""
2451+
return sync(self._async_array.nshards_initialized())
2452+
23762453
def nbytes_stored(self) -> int:
23772454
"""
23782455
Determine the size, in bytes, of the array actually written to the store.

tests/test_array.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -395,19 +395,27 @@ async def test_nchunks_initialized(
395395
arr[region] = 1
396396
expected = idx + 1
397397
if test_cls == Array:
398-
observed = arr.nchunks_initialized
398+
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
399+
observed = arr.nchunks_initialized
400+
assert observed == arr.nshards_initialized
399401
else:
400-
observed = await arr._async_array.nchunks_initialized()
402+
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
403+
observed = await arr._async_array.nchunks_initialized()
404+
assert observed == await arr._async_array.nshards_initialized()
401405
assert observed == expected
402406

403407
# delete chunks
404408
for idx, key in enumerate(arr._iter_shard_keys()):
405409
sync(arr.store_path.store.delete(key))
406410
if test_cls == Array:
407-
observed = arr.nchunks_initialized
411+
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
412+
observed = arr.nchunks_initialized
413+
assert observed == arr.nshards_initialized
408414
else:
409-
observed = await arr._async_array.nchunks_initialized()
410-
expected = arr.nchunks - idx - 1
415+
with pytest.warns(DeprecationWarning, match="Use nshards_initialized instead"):
416+
observed = await arr._async_array.nchunks_initialized()
417+
assert observed == await arr._async_array.nshards_initialized()
418+
expected = arr.nshards - idx - 1
411419
assert observed == expected
412420

413421

@@ -876,14 +884,14 @@ def test_write_empty_chunks_behavior(
876884

877885
# initialize the store with some non-fill value chunks
878886
arr[:] = fill_value + 1
879-
assert arr.nchunks_initialized == arr.nchunks
887+
assert arr.nshards_initialized == arr.nshards
880888

881889
arr[:] = fill_value
882890

883891
if not write_empty_chunks:
884-
assert arr.nchunks_initialized == 0
892+
assert arr.nshards_initialized == 0
885893
else:
886-
assert arr.nchunks_initialized == arr.nchunks
894+
assert arr.nshards_initialized == arr.nshards
887895

888896

889897
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)