Skip to content

Commit 50cd5e0

Browse files
committed
Merge branch 'main' into zarr-extensions
2 parents d3b19cf + 64b9a37 commit 50cd5e0

File tree

25 files changed

+2087
-160
lines changed

25 files changed

+2087
-160
lines changed

changes/2665.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Adds functions for concurrently creating multiple arrays and groups.

changes/2847.fix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed a bug where ``ArrayV2Metadata`` could save ``filters`` as an empty array.

changes/2851.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug when setting values of a smaller last chunk.

docs/quickstart.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,28 @@ Zarr allows you to create hierarchical groups, similar to directories::
119119

120120
This creates a group with two datasets: ``foo`` and ``bar``.
121121

122+
Batch Hierarchy Creation
123+
~~~~~~~~~~~~~~~~~~~~~~~~
124+
125+
Zarr provides tools for creating a collection of arrays and groups with a single function call.
126+
Suppose we want to copy existing groups and arrays into a new storage backend:
127+
128+
>>> # Create nested groups and add arrays
129+
>>> root = zarr.group("data/example-3.zarr", attributes={'name': 'root'})
130+
>>> foo = root.create_group(name="foo")
131+
>>> bar = root.create_array(
132+
... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4"
133+
... )
134+
>>> nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()}
135+
>>> print(nodes)
136+
>>> from zarr.storage import MemoryStore
137+
>>> new_nodes = dict(zarr.create_hierarchy(store=MemoryStore(), nodes=nodes))
138+
>>> new_root = new_nodes['']
139+
>>> assert new_root.attrs == root.attrs
140+
141+
Note that :func:`zarr.create_hierarchy` will only initialize arrays and groups -- copying array data must
142+
be done in a separate step.
143+
122144
Persistent Storage
123145
------------------
124146

docs/user-guide/groups.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,31 @@ For more information on groups see the :class:`zarr.Group` API docs.
7575

7676
.. _user-guide-diagnostics:
7777

78+
Batch Group Creation
79+
--------------------
80+
81+
You can also create multiple groups concurrently with a single function call. :func:`zarr.create_hierarchy` takes
82+
a :class:`zarr.storage.Store` instance and a dict of ``key : metadata`` pairs, parses that dict, and
83+
writes metadata documents to storage:
84+
85+
>>> from zarr import create_hierarchy
86+
>>> from zarr.core.group import GroupMetadata
87+
>>> from zarr.storage import LocalStore
88+
>>> node_spec = {'a/b/c': GroupMetadata()}
89+
>>> nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec))
90+
>>> print(sorted(nodes_created.items(), key=lambda kv: len(kv[0])))
91+
[('', <Group file://data>), ('a', <Group file://data/a>), ('a/b', <Group file://data/a/b>), ('a/b/c', <Group file://data/a/b/c>)]
92+
93+
Note that we only specified a single group named ``a/b/c``, but 4 groups were created. These additional groups
94+
were created to ensure that the desired node ``a/b/c`` is connected to the root group ``''`` by a sequence
95+
of intermediate groups. :func:`zarr.create_hierarchy` normalizes the ``nodes`` keyword argument to
96+
ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root
97+
of the hierarchy via intermediate groups.
98+
99+
Because :func:`zarr.create_hierarchy` concurrently creates metadata documents, it's more efficient
100+
than repeated calls to :func:`create_group` or :func:`create_array`, provided you can statically define
101+
the metadata for the groups and arrays you want to create.
102+
78103
Array and group diagnostics
79104
---------------------------
80105

src/zarr/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
create,
99
create_array,
1010
create_group,
11+
create_hierarchy,
1112
empty,
1213
empty_like,
1314
full,
@@ -50,6 +51,7 @@
5051
"create",
5152
"create_array",
5253
"create_group",
54+
"create_hierarchy",
5355
"empty",
5456
"empty_like",
5557
"full",

src/zarr/api/asynchronous.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@
2323
_warn_write_empty_chunks_kwarg,
2424
parse_dtype,
2525
)
26-
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
26+
from zarr.core.group import (
27+
AsyncGroup,
28+
ConsolidatedMetadata,
29+
GroupMetadata,
30+
create_hierarchy,
31+
)
2732
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
2833
from zarr.core.metadata.v2 import _default_compressor, _default_filters
2934
from zarr.errors import NodeTypeValidationError
@@ -48,6 +53,7 @@
4853
"copy_store",
4954
"create",
5055
"create_array",
56+
"create_hierarchy",
5157
"empty",
5258
"empty_like",
5359
"full",

src/zarr/api/synchronous.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from zarr.core.array import Array, AsyncArray
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
13+
from zarr.core.sync_group import create_hierarchy
1314

1415
if TYPE_CHECKING:
1516
from collections.abc import Iterable
@@ -46,6 +47,7 @@
4647
"copy_store",
4748
"create",
4849
"create_array",
50+
"create_hierarchy",
4951
"empty",
5052
"empty_like",
5153
"full",

src/zarr/core/codec_pipeline.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,6 @@ def _merge_chunk_array(
296296
is_complete_chunk: bool,
297297
drop_axes: tuple[int, ...],
298298
) -> NDBuffer:
299-
if is_complete_chunk and value.shape == chunk_spec.shape:
300-
return value
301-
if existing_chunk_array is None:
302-
chunk_array = chunk_spec.prototype.nd_buffer.create(
303-
shape=chunk_spec.shape,
304-
dtype=chunk_spec.dtype,
305-
order=chunk_spec.order,
306-
fill_value=fill_value_or_default(chunk_spec),
307-
)
308-
else:
309-
chunk_array = existing_chunk_array.copy() # make a writable copy
310299
if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype):
311300
chunk_value = value
312301
else:
@@ -320,6 +309,20 @@ def _merge_chunk_array(
320309
for idx in range(chunk_spec.ndim)
321310
)
322311
chunk_value = chunk_value[item]
312+
if is_complete_chunk and chunk_value.shape == chunk_spec.shape:
313+
# TODO: For the last chunk, we could have is_complete_chunk=True
314+
# that is smaller than the chunk_spec.shape but this throws
315+
# an error in the _decode_single
316+
return chunk_value
317+
if existing_chunk_array is None:
318+
chunk_array = chunk_spec.prototype.nd_buffer.create(
319+
shape=chunk_spec.shape,
320+
dtype=chunk_spec.dtype,
321+
order=chunk_spec.order,
322+
fill_value=fill_value_or_default(chunk_spec),
323+
)
324+
else:
325+
chunk_array = existing_chunk_array.copy() # make a writable copy
323326
chunk_array[chunk_selection] = chunk_value
324327
return chunk_array
325328

0 commit comments

Comments
 (0)