Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e3ee33b
add copy_store convenience method
melonora Dec 3, 2025
83550a3
add synchronous call
melonora Dec 3, 2025
d473d6e
change comment, test not async
melonora Dec 3, 2025
bb1405e
fix test
melonora Dec 3, 2025
c62543a
support zarr v2
melonora Dec 3, 2025
cdbc2f7
remove use of _iter_chunk_regions
melonora Dec 3, 2025
e6e10df
change method name
melonora Dec 3, 2025
9c42567
remove consolidate_metadata argument
melonora Dec 3, 2025
63c652e
consolidate if consolidated
melonora Dec 4, 2025
d4924f5
add consolidated_metadata argument
melonora Dec 5, 2025
e83dda5
add docstring and argument
melonora Dec 5, 2025
59b18ea
add support subgroup consolidated metadata
melonora Dec 5, 2025
b65d257
add argument to docstring
melonora Dec 5, 2025
1056b9e
add example to docs groups
melonora Dec 5, 2025
eadb647
adjust docs
melonora Dec 5, 2025
8c3471c
partial fix pre-commit
melonora Dec 5, 2025
2cbb9b9
add to changes
melonora Dec 5, 2025
128b924
change to call using self
melonora Dec 10, 2025
fa95e9c
obtain consolidated metadata from self
melonora Dec 10, 2025
848811f
add test for matching parameters
melonora Dec 10, 2025
3398325
implement path and add test
melonora Dec 11, 2025
ae205b5
Merge branch 'main' into copy_store
d-v-b Dec 16, 2025
06ad2d7
add type hints
melonora Dec 17, 2025
6b81a07
merge upstream changes
melonora Dec 17, 2025
3e92ee9
remove unnecessary type ignore
melonora Dec 17, 2025
0a4b5b8
Merge branch 'main' into copy_store
d-v-b Jan 8, 2026
418c670
Merge branch 'main' into copy_store
d-v-b Jan 8, 2026
f3e4f87
Merge branch 'main' into copy_store
d-v-b Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changes/3612.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added the convenience method for `zarr.Group` to copy to a destination store which
can be of a different type than the original store of the `zarr.Group` to be
copied. This will also copy over the metadata as is.
10 changes: 9 additions & 1 deletion docs/user-guide/groups.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,12 @@ print(root.tree())
```

!!! note
[`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra.
[`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra.

You can copy a Group including consolidated metadata to a new destination store
(type of store can differ from the source store) using the `copy_to` method:

```python exec="true" session="groups" source="above" result="ansi"
destination_store = zarr.storage.MemoryStore()
new_group = root.copy_to(destination_store, overwrite=True)
```
134 changes: 133 additions & 1 deletion src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ async def from_store(
store: StoreLike,
*,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
overwrite: bool = False,
zarr_format: ZarrFormat = 3,
) -> AsyncGroup:
Expand All @@ -486,7 +487,11 @@ async def from_store(
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
attributes = attributes or {}
group = cls(
metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format),
metadata=GroupMetadata(
attributes=attributes,
consolidated_metadata=consolidated_metadata,
zarr_format=zarr_format,
),
store_path=store_path,
)
await group._save_metadata(ensure_parents=True)
Expand Down Expand Up @@ -697,6 +702,91 @@ def from_dict(
store_path=store_path,
)

async def copy_to(
self,
store: StoreLike,
*,
overwrite: bool = False,
use_consolidated_for_children: bool = True,
) -> AsyncGroup:
"""
Copy this group and all its contents to a new store.

Parameters
----------
store : StoreLike
The store to copy to.
overwrite : bool, optional
If True, overwrite any existing data in the target store. Default is False.
use_consolidated_for_children : bool, default True
Whether to use the consolidated metadata of child groups when iterating over the store contents.
Note that this only affects groups loaded from the store. If the current Group already has
consolidated metadata, it will always be used.

Returns
-------
AsyncGroup
The new group in the target store.
"""
target_zarr_format = self.metadata.zarr_format
group = await self.open(self.store, zarr_format=target_zarr_format)
consolidated_metadata = group.metadata.consolidated_metadata

new_group = await AsyncGroup.from_store(
store,
overwrite=overwrite,
attributes=self.metadata.attributes,
consolidated_metadata=consolidated_metadata,
zarr_format=target_zarr_format,
)

async for _, member in self.members(
max_depth=None, use_consolidated_for_children=use_consolidated_for_children
):
child_path = member.store_path.path
target_path = StorePath(store=new_group.store, path=child_path)

if isinstance(member, AsyncGroup):
await AsyncGroup.from_store(
store=target_path,
zarr_format=target_zarr_format,
overwrite=overwrite,
attributes=member.metadata.attributes,
consolidated_metadata=member.metadata.consolidated_metadata,
)
else:
kwargs = {}
if target_zarr_format == 3:
kwargs["chunk_key_encoding"] = member.metadata.chunk_key_encoding
kwargs["dimension_names"] = member.metadata.dimension_names
else:
kwargs["chunk_key_encoding"] = {
"name": "v2",
"separator": member.metadata.dimension_separator,
}
# Serializer done this way in case of having zarr_format 2, otherwise mypy complains.
new_array = await new_group.create_array(
name=child_path,
shape=member.shape,
dtype=member.dtype,
chunks=member.chunks,
shards=member.shards,
filters=member.filters,
compressors=member.compressors,
serializer=member.serializer if member.serializer is not None else "auto",
fill_value=member.metadata.fill_value,
attributes=member.attrs,
overwrite=overwrite,
config={"order": member.order},
**kwargs,
)

for region in member._iter_shard_regions():
data = await member.getitem(selection=region)
await new_array.setitem(selection=region, value=data)

return new_group

async def setitem(self, key: str, value: Any) -> None:
"""
Fastpath for creating a new array
Expand Down Expand Up @@ -945,6 +1035,7 @@ async def create_group(
*,
overwrite: bool = False,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
) -> AsyncGroup:
"""Create a sub-group.

Expand All @@ -965,6 +1056,7 @@ async def create_group(
return await type(self).from_store(
self.store_path / name,
attributes=attributes,
consolidated_metadata=consolidated_metadata,
overwrite=overwrite,
zarr_format=self.metadata.zarr_format,
)
Expand Down Expand Up @@ -1810,6 +1902,7 @@ def from_store(
store: StoreLike,
*,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
zarr_format: ZarrFormat = 3,
overwrite: bool = False,
) -> Group:
Expand All @@ -1823,6 +1916,8 @@ def from_store(
for a description of all valid StoreLike values.
attributes : dict, optional
A dictionary of JSON-serializable values with user-defined attributes.
consolidated_metadata : ConsolidatedMetadata, optional
Consolidated Metadata for this Group. This should contain metadata of child nodes below this group.
zarr_format : {2, 3}, optional
Zarr storage format version.
overwrite : bool, optional
Expand All @@ -1842,6 +1937,7 @@ def from_store(
AsyncGroup.from_store(
store,
attributes=attributes,
consolidated_metadata=consolidated_metadata,
overwrite=overwrite,
zarr_format=zarr_format,
),
Expand Down Expand Up @@ -1874,6 +1970,42 @@ def open(
obj = sync(AsyncGroup.open(store, zarr_format=zarr_format))
return cls(obj)

def copy_to(
self,
store: StoreLike,
*,
overwrite: bool = False,
use_consolidated_for_children: bool = True,
Comment on lines 1985 to 1990
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should add a test that checks the signature of this function against the signature of the async version. otherwise, they have a tendency to drift. Here's an example of such a test:

def test_docstrings_match(callable_name: str) -> None:
"""
Tests that the docstrings for the sync and async define identical parameters.
"""
callable_a = getattr(synchronous, callable_name)
callable_b = getattr(asynchronous, callable_name)
if callable_a.__doc__ is None:
assert callable_b.__doc__ is None
else:
params_a = NumpyDocString(callable_a.__doc__)["Parameters"]
params_b = NumpyDocString(callable_b.__doc__)["Parameters"]
mismatch = []
for idx, (a, b) in enumerate(zip(params_a, params_b, strict=False)):
if a != b:
mismatch.append((idx, (a, b)))
assert mismatch == []

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so I did not do the actual docstrings itself, but I did write a test for all methods for classes and their async counterparts. I think this is generalizable to other classes as well, though currently only Group and AsyncGroup are tested. With this, already some mismatches are detected which for now are skipped but could be addressed in a follow up PR. Doing so would require deprecating some parameters. If you agree I can create an issue for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see test_class_method_parameters_match

) -> Group:
"""
Copy this group and all its contents to a new store.

Parameters
----------
store : StoreLike
The store to copy to.
overwrite : bool, optional
If True, overwrite any existing data in the target store. Default is False.
use_consolidated_for_children : bool, default True
Whether to use the consolidated metadata of child groups when iterating over the store contents.
Note that this only affects groups loaded from the store. If the current Group already has
consolidated metadata, it will always be used.

Returns
-------
AsyncGroup
The new group in the target store.
"""
return Group(
sync(
self._async_group.copy_to(
store=store,
overwrite=overwrite,
use_consolidated_for_children=use_consolidated_for_children,
)
)
)

def __getitem__(self, path: str) -> AnyArray | Group:
"""Obtain a group member.

Expand Down
5 changes: 3 additions & 2 deletions src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ def basic_indices(
allow_ellipsis: bool = True,
) -> Any:
"""Basic indices without unsupported negative slices."""
strategy = npst.basic_indices(
# We can ignore here as it is just to numpy type hints being Literal[False | True] for overload variants
strategy = npst.basic_indices( # type: ignore[call-overload]
shape=shape,
min_dims=min_dims,
max_dims=max_dims,
Expand All @@ -362,7 +363,7 @@ def basic_indices(
lambda idxr: (
not (
is_negative_slice(idxr)
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) # type: ignore[redundant-expr]
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
)
)
)
Expand Down
72 changes: 72 additions & 0 deletions tests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,78 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad
members_observed = group.members(max_depth=-1)


@pytest.mark.parametrize(
("zarr_format", "shards", "consolidate_metadata"),
[
(2, None, False),
(2, None, True),
(3, (50,), False),
(3, (50,), True),
],
)
def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata: bool) -> None:
src_store = MemoryStore()
src = Group.from_store(src_store, attributes={"root": True}, zarr_format=zarr_format)

subgroup = src.create_group("subgroup", attributes={"subgroup": True})

subgroup_arr_data = np.arange(50)
subgroup.create_array(
"subgroup_dataset",
shape=(50,),
chunks=(10,),
shards=shards,
dtype=subgroup_arr_data.dtype,
)
subgroup["subgroup_dataset"] = subgroup_arr_data

arr_data = np.arange(100)
src.create_array(
"dataset",
shape=(100,),
chunks=(10,),
shards=shards,
dtype=arr_data.dtype,
)
src["dataset"] = arr_data

if consolidate_metadata:
if zarr_format == 3:
with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"):
zarr.consolidate_metadata(src_store)
with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"):
zarr.consolidate_metadata(src_store, path="subgroup")
else:
zarr.consolidate_metadata(src_store)
zarr.consolidate_metadata(src_store, path="subgroup")

dst_store = MemoryStore()

dst = src.copy_to(dst_store, overwrite=True)

assert dst.attrs.get("root") is True

subgroup = dst["subgroup"]
assert isinstance(subgroup, Group)
assert subgroup.attrs.get("subgroup") is True

copied_arr = dst["dataset"]
copied_data = copied_arr[:]
assert np.array_equal(copied_data, arr_data)

copied_subgroup_arr = subgroup["subgroup_dataset"]
copied_subgroup_data = copied_subgroup_arr[:]
assert np.array_equal(copied_subgroup_data, subgroup_arr_data)

if consolidate_metadata:
assert zarr.open_group(dst_store).metadata.consolidated_metadata
if zarr_format == 3:
assert zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata
else:
assert not zarr.open_group(dst_store).metadata.consolidated_metadata
assert not zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata


def test_group(store: Store, zarr_format: ZarrFormat) -> None:
"""
Test basic Group routines.
Expand Down