Merge branch 'main' into order-handling

d-v-b · web-flow · commit 6a55eae0a181 · 2025-07-17T23:48:26.000+02:00
diff --git a/.github/ISSUE_TEMPLATE/release-checklist.md b/.github/ISSUE_TEMPLATE/release-checklist.md
@@ -25,6 +25,7 @@ assignees: ''
   - [ ] All tests pass in the ["GPU Tests" workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/gpu_test.yml).
   - [ ] All tests pass in the ["Hypothesis" workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/hypothesis.yaml).
   - [ ] Check that downstream libraries work well (maintainers can make executive decisions about whether all checks are required for this release).
+    - [ ] numcodecs
     - [ ] Xarray (@jhamman @dcherian @TomNicholas)
         - Zarr's upstream compatibility is tested via the [Upstream Dev CI worklow](https://github.com/pydata/xarray/actions/workflows/upstream-dev-ci.yaml).
         - Click on the most recent workflow and check that the `upstream-dev` job has run and passed. `upstream-dev` is not run on all all workflow runs.
diff --git a/changes/3227.feature.rst b/changes/3227.feature.rst
@@ -0,0 +1 @@
+Add lightweight implementations of .getsize() and .getsize_prefix() for ObjectStore.
diff --git a/changes/3268.misc.rst b/changes/3268.misc.rst
@@ -0,0 +1,2 @@
+Removed warnings that were emitted when using the ``vlen-utf8`` and ``vlen-bytes`` codecs. Those
+warnings are no longer needed now that both of these codecs are backed by specification documents.
diff --git a/docs/about.rst b/docs/about.rst
@@ -18,7 +18,7 @@ Funding
 -------
 The project is fiscally sponsored by `NumFOCUS <https://numfocus.org/>`_, a US
 501(c)(3) public charity, and development is supported by the
-`MRC Centre for Genomics and Global Health <https://www.cggh.org>`_
+`MRC Centre for Genomics and Global Health <https://github.com/cggh/>`_
 and the `Chan Zuckerberg Initiative <https://chanzuckerberg.com/>`_.
 
 .. _NumCodecs: https://numcodecs.readthedocs.io/
diff --git a/pyproject.toml b/pyproject.toml
@@ -324,7 +324,6 @@ extend-select = [
 ignore = [
     "ANN401",
     "PT011",  # TODO: apply this rule
-    "PT012",  # TODO: apply this rule
     "PT030",  # TODO: apply this rule
     "PT031",  # TODO: apply this rule
     "RET505",
diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py
@@ -2,7 +2,6 @@
 
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
-from warnings import warn
 
 import numpy as np
 from numcodecs.vlen import VLenBytes, VLenUTF8
@@ -25,15 +24,6 @@
 
 @dataclass(frozen=True)
 class VLenUTF8Codec(ArrayBytesCodec):
-    def __init__(self) -> None:
-        warn(
-            "The codec `vlen-utf8` is currently not part in the Zarr format 3 specification. It "
-            "may not be supported by other zarr implementations and may change in the future.",
-            category=UserWarning,
-            stacklevel=2,
-        )
-        super().__init__()
-
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
@@ -80,15 +70,6 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -
 
 @dataclass(frozen=True)
 class VLenBytesCodec(ArrayBytesCodec):
-    def __init__(self) -> None:
-        warn(
-            "The codec `vlen-bytes` is currently not part in the Zarr format 3 specification. It "
-            "may not be supported by other zarr implementations and may change in the future.",
-            category=UserWarning,
-            stacklevel=2,
-        )
-        super().__init__()
-
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py
@@ -212,19 +212,21 @@ def supports_listing(self) -> bool:
         # docstring inherited
         return True
 
-    def list(self) -> AsyncGenerator[str, None]:
-        # docstring inherited
+    async def _list(self, prefix: str | None = None) -> AsyncGenerator[ObjectMeta, None]:
         import obstore as obs
 
-        objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store)
-        return _transform_list(objects)
+        objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store, prefix=prefix)
+        async for batch in objects:
+            for item in batch:
+                yield item
 
-    def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
+    def list(self) -> AsyncGenerator[str, None]:
         # docstring inherited
-        import obstore as obs
+        return (obj["path"] async for obj in self._list())
 
-        objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store, prefix=prefix)
-        return _transform_list(objects)
+    def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
+        # docstring inherited
+        return (obj["path"] async for obj in self._list(prefix))
 
     def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
         # docstring inherited
@@ -233,21 +235,21 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
         coroutine = obs.list_with_delimiter_async(self.store, prefix=prefix)
         return _transform_list_dir(coroutine, prefix)
 
+    async def getsize(self, key: str) -> int:
+        # docstring inherited
+        import obstore as obs
 
-async def _transform_list(
-    list_stream: ListStream[Sequence[ObjectMeta]],
-) -> AsyncGenerator[str, None]:
-    """
-    Transform the result of list into an async generator of paths.
-    """
-    async for batch in list_stream:
-        for item in batch:
-            yield item["path"]
+        resp = await obs.head_async(self.store, key)
+        return resp["size"]
+
+    async def getsize_prefix(self, prefix: str) -> int:
+        # docstring inherited
+        sizes = [obj["size"] async for obj in self._list(prefix=prefix)]
+        return sum(sizes)
 
 
 async def _transform_list_dir(
-    list_result_coroutine: Coroutine[Any, Any, ListResult[Sequence[ObjectMeta]]],
-    prefix: str,
+    list_result_coroutine: Coroutine[Any, Any, ListResult[Sequence[ObjectMeta]]], prefix: str
 ) -> AsyncGenerator[str, None]:
     """
     Transform the result of list_with_delimiter into an async generator of paths.
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -261,9 +261,9 @@ def test_save_errors() -> None:
     with pytest.raises(ValueError):
         # no arrays provided
         save("data/group.zarr")
+    a = np.arange(10)
     with pytest.raises(TypeError):
         # mode is no valid argument and would get handled as an array
-        a = np.arange(10)
         zarr.save("data/example.zarr", a, mode="w")
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -141,8 +141,8 @@ async def write(
 
     _mock.call.assert_called()
 
+    config.set({"codec_pipeline.path": "wrong_name"})
     with pytest.raises(BadConfigError):
-        config.set({"codec_pipeline.path": "wrong_name"})
         get_pipeline_class()
 
     class MockEnvCodecPipeline(CodecPipeline):
diff --git a/tests/test_group.py b/tests/test_group.py
@@ -655,12 +655,13 @@ def test_group_create_array(
 
     if not overwrite:
         if method == "create_array":
-            with pytest.raises(ContainsArrayError):
+            with pytest.raises(ContainsArrayError):  # noqa: PT012
                 a = group.create_array(name=name, shape=shape, dtype=dtype)
                 a[:] = data
         elif method == "array":
-            with pytest.raises(ContainsArrayError), pytest.warns(DeprecationWarning):
-                a = group.array(name=name, shape=shape, dtype=dtype)
+            with pytest.raises(ContainsArrayError):  # noqa: PT012
+                with pytest.warns(DeprecationWarning):
+                    a = group.array(name=name, shape=shape, dtype=dtype)
                 a[:] = data
 
     assert array.path == normalize_path(name)
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
@@ -1093,17 +1093,17 @@ def test_get_coordinate_selection_2d(store: StorePath) -> None:
     ix1 = np.array([[1, 3, 2], [1, 0, 0]])
     _test_get_coordinate_selection(a, z, (ix0, ix1))
 
+    selection = slice(5, 15), [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = slice(5, 15), [1, 2, 3]
         z.get_coordinate_selection(selection)  # type:ignore[arg-type]
+    selection = [1, 2, 3], slice(5, 15)
     with pytest.raises(IndexError):
-        selection = [1, 2, 3], slice(5, 15)
         z.get_coordinate_selection(selection)  # type:ignore[arg-type]
+    selection = Ellipsis, [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = Ellipsis, [1, 2, 3]
         z.get_coordinate_selection(selection)  # type:ignore[arg-type]
+    selection = Ellipsis
     with pytest.raises(IndexError):
-        selection = Ellipsis
         z.get_coordinate_selection(selection)  # type:ignore[arg-type]
 
 
@@ -1299,14 +1299,14 @@ def test_get_block_selection_2d(store: StorePath) -> None:
     ):
         _test_get_block_selection(a, z, selection, expected_idx)
 
+    selection = slice(5, 15), [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = slice(5, 15), [1, 2, 3]
         z.get_block_selection(selection)
+    selection = Ellipsis, [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = Ellipsis, [1, 2, 3]
         z.get_block_selection(selection)
+    selection = slice(15, 20), slice(None)
     with pytest.raises(IndexError):  # out of bounds
-        selection = slice(15, 20), slice(None)
         z.get_block_selection(selection)
 
 
@@ -1360,14 +1360,14 @@ def test_set_block_selection_2d(store: StorePath) -> None:
     ):
         _test_set_block_selection(v, a, z, selection, expected_idx)
 
+    selection = slice(5, 15), [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = slice(5, 15), [1, 2, 3]
         z.set_block_selection(selection, 42)
+    selection = Ellipsis, [1, 2, 3]
     with pytest.raises(IndexError):
-        selection = Ellipsis, [1, 2, 3]
         z.set_block_selection(selection, 42)
+    selection = slice(15, 20), slice(None)
     with pytest.raises(IndexError):  # out of bounds
-        selection = slice(15, 20), slice(None)
         z.set_block_selection(selection, 42)
 
 
diff --git a/tests/test_store/test_object.py b/tests/test_store/test_object.py
@@ -75,6 +75,21 @@ def test_store_init_raises(self) -> None:
         with pytest.raises(TypeError):
             ObjectStore("path/to/store")
 
+    async def test_store_getsize(self, store: ObjectStore) -> None:
+        buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04")
+        await self.set(store, "key", buf)
+        size = await store.getsize("key")
+        assert size == len(buf)
+
+    async def test_store_getsize_prefix(self, store: ObjectStore) -> None:
+        buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04")
+        await self.set(store, "c/key1/0", buf)
+        await self.set(store, "c/key2/0", buf)
+        size = await store.getsize_prefix("c/key1")
+        assert size == len(buf)
+        total_size = await store.getsize_prefix("c")
+        assert total_size == len(buf) * 2
+
 
 @pytest.mark.slow_hypothesis
 def test_zarr_hierarchy():

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Add lightweight implementations of .getsize() and .getsize_prefix() for ObjectStore.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Removed warnings that were emitted when using the ``vlen-utf8`` and ``vlen-bytes`` codecs. Those
	`2`	`+warnings are no longer needed now that both of these codecs are backed by specification documents.`