Skip to content

Commit 384d323

Browse files
committed
wip
1 parent 12963ab commit 384d323

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

src/zarr/abc/store.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,8 @@ async def getsize(self, key: str) -> int:
401401
402402
Returns
403403
-------
404-
nbytes: int
405-
The size of the value in bytes.
404+
nbytes : int
405+
The size of the value (in bytes).
406406
407407
Raises
408408
------
@@ -417,6 +417,33 @@ async def getsize(self, key: str) -> int:
417417
raise FileNotFoundError(key)
418418
return len(value)
419419

420+
async def getsize_dir(self, prefix: str) -> int:
421+
"""
422+
Return the size, in bytes, of all values in a directory.
423+
424+
This will include just values whose keys start with ``prefix`` and
425+
do not contain the character ``/`` after the given prefix.
426+
427+
Parameters
428+
----------
429+
prefix : str
430+
The prefix of the directory to measure.
431+
432+
Returns
433+
-------
434+
nbytes : int
435+
The sum of the sizes of the values in the directory (in bytes).
436+
437+
Notes
438+
-----
439+
``getsize_dir`` is just provided as a potentially faster alternative to
440+
listing all the keys in a directory and calling :meth:`Store.getsize`
441+
on each.
442+
"""
443+
keys = [x async for x in self.list_dir(prefix)]
444+
sizes = await gather(*[self.getsize(key) for key in keys])
445+
return sum(sizes)
446+
420447

421448
@runtime_checkable
422449
class ByteGetter(Protocol):

src/zarr/core/array.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,9 @@ def nchunks_initialized(self) -> int:
843843
"""
844844
return nchunks_initialized(self)
845845

846+
async def nbytes_stored(self) -> int:
847+
return await self.store_path.store.getsize_dir(self.store_path.path)
848+
846849
def _iter_chunk_coords(
847850
self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None
848851
) -> Iterator[ChunkCoords]:
@@ -1436,6 +1439,16 @@ def nchunks_initialized(self) -> int:
14361439
"""
14371440
return self._async_array.nchunks_initialized
14381441

1442+
def nbytes_stored(self) -> int:
1443+
"""
1444+
Determine the size, in bytes, of the array actually written to the store.
1445+
1446+
Returns
1447+
-------
1448+
size : int
1449+
"""
1450+
return sync(self._async_array.nbytes_stored())
1451+
14391452
def _iter_chunk_keys(
14401453
self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None
14411454
) -> Iterator[str]:

tests/test_array.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,15 @@ def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> No
371371
assert observed == expected
372372

373373

374+
def test_nbytes_stored():
375+
arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4")
376+
result = arr.nbytes_stored()
377+
assert result == 366 # the size of the metadata document. This is a fragile test
378+
arr[:50] = 1
379+
result = arr.nbytes_stored()
380+
assert result == 366
381+
382+
374383
def test_default_fill_values() -> None:
375384
a = Array.create(MemoryStore({}, mode="w"), shape=5, chunk_shape=5, dtype="<U4")
376385
assert a.fill_value == ""

0 commit comments

Comments
 (0)