Merge branch 'main' of github.com:zarr-developers/zarr-python into feat/batch-creation

d-v-b · d-v-b · commit 036fd2aca715 · 2025-01-15T12:08:04.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -60,7 +60,12 @@ jobs:
         hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
     - name: Run Tests
       run: |
-        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run
+        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage
+    - name: Upload coverage
+      uses: codecov/codecov-action@v5
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        verbose: true # optional (default = false)
 
   test-upstream-and-min-deps:
     name: py=${{ matrix.python-version }}-${{ matrix.dependency-set }}
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,10 @@
+coverage:
+  status:
+    patch:
+      default:
+        target: auto
+    project:
+      default:
+        target: auto
+        threshold: 0.1
+comment: false
diff --git a/docs/developers/contributing.rst b/docs/developers/contributing.rst
@@ -329,10 +329,16 @@ Release procedure
    Most of the release process is now handled by GitHub workflow which should
    automatically push a release to PyPI if a tag is pushed.
 
-Before releasing, make sure that all pull requests which will be
-included in the release have been properly documented in
-`docs/release.rst`.
-
+Pre-release
+"""""""""""
+1. Make sure that all pull requests which will be
+   included in the release have been properly documented in
+   :file:`docs/release-notes.rst`.
+2. Rename the "Unreleased" section heading in :file:`docs/release-notes.rst`
+   to the version you are about to release.
+
+Releasing
+"""""""""
 To make a new release, go to
 https://github.com/zarr-developers/zarr-python/releases and
 click "Draft a new release". Choose a version number prefixed
@@ -355,5 +361,8 @@ https://readthedocs.io. Full releases will be available under
 pre-releases will be available under
 `/latest <https://zarr.readthedocs.io/en/latest>`_.
 
-Also review and merge the https://github.com/conda-forge/zarr-feedstock
-pull request that will be automatically generated.
+Post-release
+""""""""""""
+
+- Review and merge the pull request on the `conda-forge feedstock <https://github.com/conda-forge/zarr-feedstock>`_ that will be automatically generated.
+- Create a new "Unreleased" section in the release notes
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -74,7 +74,7 @@ Zarr supports data compression and filters. For example, to use Blosc compressio
     ...    "data/example-3.zarr",
     ...    mode="w", shape=(100, 100),
     ...    chunks=(10, 10), dtype="f4",
-    ...    compressor=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.SHUFFLE)
+    ...    compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
     ... )
     >>> z[:, :] = np.random.random((100, 100))
     >>>
@@ -101,7 +101,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
     >>> root = zarr.group("data/example-2.zarr")
     >>> foo = root.create_group(name="foo")
     >>> bar = root.create_array(
-    ...     name="bar", shape=(100, 10), chunks=(10, 10)
+    ...     name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4"
     ... )
     >>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4")
     >>>
@@ -112,6 +112,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
     >>> # print the hierarchy
     >>> root.tree()
     /
+    ├── bar (100, 10) float32
     └── foo
         └── spam (10,) int32
     <BLANKLINE>
@@ -130,7 +131,7 @@ using external libraries like `s3fs <https://s3fs.readthedocs.io>`_ or
 
     >>> import s3fs # doctest: +SKIP
     >>>
-    >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10)) # doctest: +SKIP
+    >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") # doctest: +SKIP
     >>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP
 
 A single-file store can also be created using the the :class:`zarr.storage.ZipStore`::
diff --git a/docs/release-notes.rst b/docs/release-notes.rst
@@ -9,12 +9,21 @@ New features
 
 Bug fixes
 ~~~~~~~~~
-* Fixes ``order`` argument for Zarr format 2 arrays.
-  By :user:`Norman Rzepka <normanrz>` (:issue:`2679`).
+* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`).
+
+* Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using ``zarr-python`` version 2 (:issue:`2694`).
+
+* Ensure that compressor=None results in no compression when writing Zarr format 2 data (:issue:`2708`)
 
 Behaviour changes
 ~~~~~~~~~~~~~~~~~
 
+Other
+~~~~~
+* Removed some unnecessary files from the source distribution
+  to reduce its size. (:issue:`2686`)
+
+
 .. _release_3.0.0:
 
 3.0.0
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,6 +2,13 @@
 requires = ["hatchling", "hatch-vcs"]
 build-backend = "hatchling.build"
 
+[tool.hatch.build.targets.sdist]
+exclude = [
+  "/.github",
+  "/bench",
+  "/docs",
+  "/notebooks"
+]
 
 [project]
 name = "zarr"
@@ -103,13 +110,13 @@ Homepage = "https://github.com/zarr-developers/zarr-python"
 [tool.coverage.report]
 exclude_lines = [
     "pragma: no cover",
+    "if TYPE_CHECKING:",
     "pragma: ${PY_MAJOR_VERSION} no cover",
     '.*\.\.\.' # Ignore "..." lines
 ]
 
 [tool.coverage.run]
 omit = [
-    "src/zarr/meta_v1.py",
     "bench/compress_normal.py",
 ]
 
@@ -140,8 +147,8 @@ numpy = ["1.25", "2.1"]
 features = ["gpu"]
 
 [tool.hatch.envs.test.scripts]
-run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=src"
-run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=src"
+run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
@@ -170,7 +177,7 @@ numpy = ["1.25", "2.1"]
 version = ["minimal"]
 
 [tool.hatch.envs.gputest.scripts]
-run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=src"
+run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -4131,15 +4131,22 @@ def _parse_chunk_encoding_v3(
 
 
 def _parse_deprecated_compressor(
-    compressor: CompressorLike | None, compressors: CompressorsLike
+    compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3
 ) -> CompressorsLike | None:
-    if compressor:
+    if compressor != "auto":
         if compressors != "auto":
             raise ValueError("Cannot specify both `compressor` and `compressors`.")
-        warn(
-            "The `compressor` argument is deprecated. Use `compressors` instead.",
-            category=UserWarning,
-            stacklevel=2,
-        )
-        compressors = (compressor,)
+        if zarr_format == 3:
+            warn(
+                "The `compressor` argument is deprecated. Use `compressors` instead.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+        if compressor is None:
+            # "no compression"
+            compressors = ()
+        else:
+            compressors = (compressor,)
+    elif zarr_format == 2 and compressor == compressors == "auto":
+        compressors = ({"id": "blosc"},)
     return compressors
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -594,8 +594,8 @@ def _from_bytes_v2(
             v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes())
             v2_consolidated_metadata = v2_consolidated_metadata["metadata"]
             # We already read zattrs and zgroup. Should we ignore these?
-            v2_consolidated_metadata.pop(".zattrs")
-            v2_consolidated_metadata.pop(".zgroup")
+            v2_consolidated_metadata.pop(".zattrs", None)
+            v2_consolidated_metadata.pop(".zgroup", None)
 
             consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict)
 
@@ -1032,7 +1032,7 @@ async def create_array(
         shards: ShardsLike | None = None,
         filters: FiltersLike = "auto",
         compressors: CompressorsLike = "auto",
-        compressor: CompressorLike = None,
+        compressor: CompressorLike = "auto",
         serializer: SerializerLike = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = None,
@@ -1135,8 +1135,9 @@ async def create_array(
         AsyncArray
 
         """
-
-        compressors = _parse_deprecated_compressor(compressor, compressors)
+        compressors = _parse_deprecated_compressor(
+            compressor, compressors, zarr_format=self.metadata.zarr_format
+        )
         return await create_array(
             store=self.store_path,
             name=name,
@@ -2329,7 +2330,7 @@ def create_array(
         shards: ShardsLike | None = None,
         filters: FiltersLike = "auto",
         compressors: CompressorsLike = "auto",
-        compressor: CompressorLike = None,
+        compressor: CompressorLike = "auto",
         serializer: SerializerLike = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = "C",
@@ -2431,7 +2432,9 @@ def create_array(
         -------
         AsyncArray
         """
-        compressors = _parse_deprecated_compressor(compressor, compressors)
+        compressors = _parse_deprecated_compressor(
+            compressor, compressors, zarr_format=self.metadata.zarr_format
+        )
         return Array(
             self._sync(
                 self._async_group.create_array(
diff --git a/tests/test_group.py b/tests/test_group.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 import pytest
-from numcodecs import Zstd
+from numcodecs import Blosc
 
 import zarr
 import zarr.api.asynchronous
@@ -507,7 +507,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                     "chunks": (1,),
                     "order": "C",
                     "filters": None,
-                    "compressor": Zstd(level=0),
+                    "compressor": Blosc(),
                     "zarr_format": zarr_format,
                 },
                 "subgroup": {
@@ -1678,13 +1678,3 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None:
         elapsed = time.time() - start
 
         assert elapsed > num_groups * get_latency
-
-
-@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
-def test_deprecated_compressor(store: Store) -> None:
-    g = zarr.group(store=store, zarr_format=2)
-    with pytest.warns(UserWarning, match="The `compressor` argument is deprecated.*"):
-        a = g.create_array(
-            "foo", shape=(100,), chunks=(10,), dtype="i4", compressor={"id": "blosc"}
-        )
-        assert a.metadata.compressor.codec_id == "blosc"
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import pytest
-from numcodecs import Zstd
+from numcodecs import Blosc
 
 import zarr.api.asynchronous
 import zarr.api.synchronous
@@ -17,7 +17,7 @@
     open,
     open_consolidated,
 )
-from zarr.core.buffer import default_buffer_prototype
+from zarr.core.buffer import cpu, default_buffer_prototype
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV3Metadata
 from zarr.core.metadata.v2 import ArrayV2Metadata
@@ -476,6 +476,30 @@ async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
         with pytest.raises(ValueError):
             await zarr.api.asynchronous.open_consolidated(store, zarr_format=None)
 
+    @pytest.fixture
+    async def v2_consolidated_metadata_empty_dataset(
+        self, memory_store: zarr.storage.MemoryStore
+    ) -> AsyncGroup:
+        zgroup_bytes = cpu.Buffer.from_bytes(json.dumps({"zarr_format": 2}).encode())
+        zmetadata_bytes = cpu.Buffer.from_bytes(
+            b'{"metadata":{".zgroup":{"zarr_format":2}},"zarr_consolidated_format":1}'
+        )
+        return AsyncGroup._from_bytes_v2(
+            None, zgroup_bytes, zattrs_bytes=None, consolidated_metadata_bytes=zmetadata_bytes
+        )
+
+    async def test_consolidated_metadata_backwards_compatibility(
+        self, v2_consolidated_metadata_empty_dataset
+    ):
+        """
+        Test that consolidated metadata handles a missing .zattrs key. This is necessary for backwards compatibility  with zarr-python 2.x. See https://github.com/zarr-developers/zarr-python/issues/2694
+        """
+        store = zarr.storage.MemoryStore()
+        await zarr.api.asynchronous.open(store=store, zarr_format=2)
+        await zarr.api.asynchronous.consolidate_metadata(store)
+        result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2)
+        assert result.metadata == v2_consolidated_metadata_empty_dataset.metadata
+
     async def test_consolidated_metadata_v2(self):
         store = zarr.storage.MemoryStore()
         g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2)
@@ -498,7 +522,7 @@ async def test_consolidated_metadata_v2(self):
                         attributes={"key": "a"},
                         chunks=(1,),
                         fill_value=0,
-                        compressor=Zstd(level=0),
+                        compressor=Blosc(),
                         order="C",
                     ),
                     "g1": GroupMetadata(
diff --git a/tests/test_store/test_stateful.py b/tests/test_store/test_stateful.py
@@ -18,7 +18,7 @@ def mk_test_instance_sync() -> ZarrHierarchyStateMachine:
         pytest.skip(reason="ZipStore does not support delete")
     if isinstance(sync_store, MemoryStore):
         run_state_machine_as_test(
-            mk_test_instance_sync, settings=Settings(report_multiple_bugs=False)
+            mk_test_instance_sync, settings=Settings(report_multiple_bugs=False, max_examples=50)
         )
 
 
@@ -28,6 +28,11 @@ def mk_test_instance_sync() -> None:
 
     if isinstance(sync_store, ZipStore):
         pytest.skip(reason="ZipStore does not support delete")
-    if isinstance(sync_store, LocalStore):
+    elif isinstance(sync_store, LocalStore):
         pytest.skip(reason="This test has errors")
-    run_state_machine_as_test(mk_test_instance_sync, settings=Settings(report_multiple_bugs=True))
+    elif isinstance(sync_store, MemoryStore):
+        run_state_machine_as_test(mk_test_instance_sync, settings=Settings(max_examples=50))
+    else:
+        run_state_machine_as_test(
+            mk_test_instance_sync, settings=Settings(report_multiple_bugs=True)
+        )
diff --git a/tests/test_v2.py b/tests/test_v2.py