diff --git a/docs/release-notes/0.12.0rc3.md b/docs/release-notes/0.12.0rc3.md index e4f8fb950..f52f23635 100644 --- a/docs/release-notes/0.12.0rc3.md +++ b/docs/release-notes/0.12.0rc3.md @@ -1,6 +1,6 @@ (v0.12.0rc3)= ### 0.12.0rc3 {small}`2025-05-20` -### Bug fixes +#### Bug fixes - Update zarr v3 bound to >3.0.8 to prevent corrupted data {issue}`zarr-developers/zarr-python#3061` {user}`ilan-gold` ({pr}`1993`) diff --git a/docs/release-notes/0.12.0rc4.md b/docs/release-notes/0.12.0rc4.md index b0ab3ef81..6923515cb 100644 --- a/docs/release-notes/0.12.0rc4.md +++ b/docs/release-notes/0.12.0rc4.md @@ -13,3 +13,7 @@ ### Performance - Improve {func}`~anndata.experimental.read_elem_lazy` performance for `h5ad` files by not caching `indptr`. {user}`ilan-gold` ({pr}`2005`) + +#### Development + +- Bound {mod}`zarr` to `<3.1` until {pr}`1995` is merged to handle the new data type structure. {user}`ilan-gold` ({pr}`2013`) diff --git a/docs/release-notes/2013.development.md b/docs/release-notes/2013.development.md deleted file mode 100644 index 90220d554..000000000 --- a/docs/release-notes/2013.development.md +++ /dev/null @@ -1 +0,0 @@ -Bound {mod}`zarr` to `<3.1` until {pr}`1995` is merged to handle the new data type structure. {user}`ilan-gold` diff --git a/docs/tutorials/zarr-v3.md b/docs/tutorials/zarr-v3.md index 833c3c9d1..a5b4668e3 100644 --- a/docs/tutorials/zarr-v3.md +++ b/docs/tutorials/zarr-v3.md @@ -4,6 +4,23 @@ Users should notice a significant performance improvement, especially for cloud data, but also likely for local data as well. Here is a quick guide on some of our learnings so far: +## Consolidated Metadata + +All `zarr` stores are now consolidated by default when written via {func}`anndata.io.write_zarr` or {meth}`anndata.AnnData.write_zarr`. For more information on this topic, please seee {ref}`the zarr docs `. Practcally, this changes means that once a store has been written, it should be treated as immutable **unless you remove the consolidated metadata and/or rewrite after the mutating operation** i.e., if you wish to use `anndata.io.write_elem` to add a column to `obs`, a `layer` etc. to an existing store. For example, to mutate an existing store on-disk, you may do: + +```python +g = zarr.open_group(orig_path, mode="a", use_consolidated=False) +ad.io.write_elem( + g, + "obs", + obs, + dataset_kwargs=dict(chunks=(250,)), +) +zarr.consolidate_metadata(g.store) +``` + +In this example, the store was opened unconsolidated (trying to open it as a consolidated store would error out), edited, and then reconsolidated. Alternatively, one could simple delete the file containing the consolidated metadata first at the root, `.zmetadata`. + ## Remote data We now provide the {func}`anndata.experimental.read_lazy` feature for reading as much of the {class}`~anndata.AnnData` object as lazily as possible, using `dask` and {mod}`xarray`. diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 2edaae988..2c4de8d40 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -360,11 +360,22 @@ def write_elem( dest_type = type(store) # Normalize k to absolute path - if (isinstance(store, ZarrGroup) and is_zarr_v2()) or ( - isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute() - ): + if ( + is_zarr_v2_store := ( + (is_zarr_store := isinstance(store, ZarrGroup)) and is_zarr_v2() + ) + ) or (isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute()): k = str(PurePosixPath(store.name) / k) - + is_consolidated = False + if is_zarr_v2_store: + from zarr.storage import ConsolidatedMetadataStore + + is_consolidated = isinstance(store.store, ConsolidatedMetadataStore) + elif is_zarr_store: + is_consolidated = store.metadata.consolidated_metadata is not None + if is_consolidated: + msg = "Cannot overwrite/edit a store with consolidated metadata" + raise ValueError(msg) if k == "/": if isinstance(store, ZarrGroup) and not is_zarr_v2(): from zarr.core.sync import sync diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 033c3c0a1..f32953fb2 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -14,6 +14,7 @@ import pandas as pd import pytest import zarr +import zarr.convenience from numba.core.errors import NumbaDeprecationWarning from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix @@ -966,3 +967,16 @@ def test_read_lazy_import_error(func, tmp_path): tmp_path if func is ad.experimental.read_lazy else tmp_path / "obs" ) ) + + +def test_write_elem_consolidated(tmp_path: Path): + ad.AnnData(np.ones((10, 10))).write_zarr(tmp_path) + g = ( + zarr.convenience.open_consolidated(tmp_path) + if is_zarr_v2() + else zarr.open(tmp_path) + ) + with pytest.raises( + ValueError, match="Cannot overwrite/edit a store with consolidated metadata" + ): + ad.io.write_elem(g["obs"], "foo", np.arange(10))