Skip to content

Commit c0976de

Browse files
committed
update migrated group classes
1 parent 186181f commit c0976de

File tree

2 files changed

+10
-312
lines changed

2 files changed

+10
-312
lines changed

src/zarr/core/group.py

Lines changed: 0 additions & 311 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from __future__ import annotations
22

33
import asyncio
4-
import base64
5-
import itertools
64
import json
75
import logging
86
import warnings
@@ -100,315 +98,6 @@ def _parse_async_node(
10098
raise TypeError(f"Unknown node type, got {type(node)}")
10199

102100

103-
@dataclass(frozen=True)
104-
class ConsolidatedMetadata:
105-
"""
106-
Consolidated Metadata for this Group.
107-
108-
This stores the metadata of child nodes below this group. Any child groups
109-
will have their consolidated metadata set appropriately.
110-
"""
111-
112-
metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]
113-
kind: Literal["inline"] = "inline"
114-
must_understand: Literal[False] = False
115-
116-
def to_dict(self) -> dict[str, JSON]:
117-
return {
118-
"kind": self.kind,
119-
"must_understand": self.must_understand,
120-
"metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()},
121-
}
122-
123-
@classmethod
124-
def from_dict(cls, data: dict[str, JSON]) -> ConsolidatedMetadata:
125-
data = dict(data)
126-
127-
kind = data.get("kind")
128-
if kind != "inline":
129-
raise ValueError(f"Consolidated metadata kind='{kind}' is not supported.")
130-
131-
raw_metadata = data.get("metadata")
132-
if not isinstance(raw_metadata, dict):
133-
raise TypeError(f"Unexpected type for 'metadata': {type(raw_metadata)}")
134-
135-
metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {}
136-
if raw_metadata:
137-
for k, v in raw_metadata.items():
138-
if not isinstance(v, dict):
139-
raise TypeError(
140-
f"Invalid value for metadata items. key='{k}', type='{type(v).__name__}'"
141-
)
142-
143-
# zarr_format is present in v2 and v3.
144-
zarr_format = parse_zarr_format(v["zarr_format"])
145-
146-
if zarr_format == 3:
147-
node_type = parse_node_type(v.get("node_type", None))
148-
if node_type == "group":
149-
metadata[k] = GroupMetadata.from_dict(v)
150-
elif node_type == "array":
151-
metadata[k] = ArrayV3Metadata.from_dict(v)
152-
else:
153-
assert_never(node_type)
154-
elif zarr_format == 2:
155-
if "shape" in v:
156-
metadata[k] = ArrayV2Metadata.from_dict(v)
157-
else:
158-
metadata[k] = GroupMetadata.from_dict(v)
159-
else:
160-
assert_never(zarr_format)
161-
162-
cls._flat_to_nested(metadata)
163-
164-
return cls(metadata=metadata)
165-
166-
@staticmethod
167-
def _flat_to_nested(
168-
metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata],
169-
) -> None:
170-
"""
171-
Convert a flat metadata representation to a nested one.
172-
173-
Notes
174-
-----
175-
Flat metadata is used when persisting the consolidated metadata. The keys
176-
include the full path, not just the node name. The key prefixes can be
177-
used to determine which nodes are children of which other nodes.
178-
179-
Nested metadata is used in-memory. The outermost level will only have the
180-
*immediate* children of the Group. All nested child groups will be stored
181-
under the consolidated metadata of their immediate parent.
182-
"""
183-
# We have a flat mapping from {k: v} where the keys include the *full*
184-
# path segment:
185-
# {
186-
# "/a/b": { group_metadata },
187-
# "/a/b/array-0": { array_metadata },
188-
# "/a/b/array-1": { array_metadata },
189-
# }
190-
#
191-
# We want to reorganize the metadata such that each Group contains the
192-
# array metadata of its immediate children.
193-
# In the example, the group at `/a/b` will have consolidated metadata
194-
# for its children `array-0` and `array-1`.
195-
#
196-
# metadata = dict(metadata)
197-
198-
keys = sorted(metadata, key=lambda k: k.count("/"))
199-
grouped = {
200-
k: list(v) for k, v in itertools.groupby(keys, key=lambda k: k.rsplit("/", 1)[0])
201-
}
202-
203-
# we go top down and directly manipulate metadata.
204-
for key, children_keys in grouped.items():
205-
# key is a key like "a", "a/b", "a/b/c"
206-
# The basic idea is to find the immediate parent (so "", "a", or "a/b")
207-
# and update that node's consolidated metadata to include the metadata
208-
# in children_keys
209-
*prefixes, name = key.split("/")
210-
parent = metadata
211-
212-
while prefixes:
213-
# e.g. a/b/c has a parent "a/b". Walk through to get
214-
# metadata["a"]["b"]
215-
part = prefixes.pop(0)
216-
# we can assume that parent[part] here is a group
217-
# otherwise we wouldn't have a node with this `part` prefix.
218-
# We can also assume that the parent node will have consolidated metadata,
219-
# because we're walking top to bottom.
220-
parent = parent[part].consolidated_metadata.metadata # type: ignore[union-attr]
221-
222-
node = parent[name]
223-
children_keys = list(children_keys)
224-
225-
if isinstance(node, ArrayV2Metadata | ArrayV3Metadata):
226-
# These are already present, either thanks to being an array in the
227-
# root, or by being collected as a child in the else clause
228-
continue
229-
children_keys = list(children_keys)
230-
# We pop from metadata, since we're *moving* this under group
231-
children = {
232-
child_key.split("/")[-1]: metadata.pop(child_key)
233-
for child_key in children_keys
234-
if child_key != key
235-
}
236-
parent[name] = replace(
237-
node, consolidated_metadata=ConsolidatedMetadata(metadata=children)
238-
)
239-
240-
@property
241-
def flattened_metadata(self) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]:
242-
"""
243-
Return the flattened representation of Consolidated Metadata.
244-
245-
The returned dictionary will have a key for each child node in the hierarchy
246-
under this group. Under the default (nested) representation available through
247-
``self.metadata``, the dictionary only contains keys for immediate children.
248-
249-
The keys of the dictionary will include the full path to a child node from
250-
the current group, where segments are joined by ``/``.
251-
252-
Examples
253-
--------
254-
>>> cm = ConsolidatedMetadata(
255-
... metadata={
256-
... "group-0": GroupMetadata(
257-
... consolidated_metadata=ConsolidatedMetadata(
258-
... {
259-
... "group-0-0": GroupMetadata(),
260-
... }
261-
... )
262-
... ),
263-
... "group-1": GroupMetadata(),
264-
... }
265-
... )
266-
{'group-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'),
267-
'group-0/group-0-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'),
268-
'group-1': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group')}
269-
"""
270-
metadata = {}
271-
272-
def flatten(
273-
key: str, group: GroupMetadata | ArrayV2Metadata | ArrayV3Metadata
274-
) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]:
275-
children: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {}
276-
if isinstance(group, ArrayV2Metadata | ArrayV3Metadata):
277-
children[key] = group
278-
else:
279-
if group.consolidated_metadata and group.consolidated_metadata.metadata is not None:
280-
children[key] = replace(
281-
group, consolidated_metadata=ConsolidatedMetadata(metadata={})
282-
)
283-
for name, val in group.consolidated_metadata.metadata.items():
284-
full_key = f"{key}/{name}"
285-
if isinstance(val, GroupMetadata):
286-
children.update(flatten(full_key, val))
287-
else:
288-
children[full_key] = val
289-
else:
290-
children[key] = replace(group, consolidated_metadata=None)
291-
return children
292-
293-
for k, v in self.metadata.items():
294-
metadata.update(flatten(k, v))
295-
296-
return metadata
297-
298-
299-
@dataclass(frozen=True)
300-
class GroupMetadata(Metadata):
301-
"""
302-
Metadata for a Group.
303-
"""
304-
305-
attributes: dict[str, Any] = field(default_factory=dict)
306-
zarr_format: ZarrFormat = 3
307-
consolidated_metadata: ConsolidatedMetadata | None = None
308-
node_type: Literal["group"] = field(default="group", init=False)
309-
310-
def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
311-
json_indent = config.get("json_indent")
312-
if self.zarr_format == 3:
313-
return {
314-
ZARR_JSON: prototype.buffer.from_bytes(
315-
json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode()
316-
)
317-
}
318-
else:
319-
items = {
320-
ZGROUP_JSON: prototype.buffer.from_bytes(
321-
json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode()
322-
),
323-
ZATTRS_JSON: prototype.buffer.from_bytes(
324-
json.dumps(self.attributes, indent=json_indent).encode()
325-
),
326-
}
327-
if self.consolidated_metadata:
328-
d = {
329-
ZGROUP_JSON: {"zarr_format": self.zarr_format},
330-
ZATTRS_JSON: self.attributes,
331-
}
332-
consolidated_metadata = self.consolidated_metadata.to_dict()["metadata"]
333-
assert isinstance(consolidated_metadata, dict)
334-
for k, v in consolidated_metadata.items():
335-
attrs = v.pop("attributes", None)
336-
d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)
337-
if "shape" in v:
338-
# it's an array
339-
if isinstance(v.get("fill_value", None), np.void):
340-
v["fill_value"] = base64.standard_b64encode(
341-
cast(bytes, v["fill_value"])
342-
).decode("ascii")
343-
else:
344-
v = _replace_special_floats(v)
345-
d[f"{k}/{ZARRAY_JSON}"] = v
346-
else:
347-
d[f"{k}/{ZGROUP_JSON}"] = {
348-
"zarr_format": self.zarr_format,
349-
"consolidated_metadata": {
350-
"metadata": {},
351-
"must_understand": False,
352-
"kind": "inline",
353-
},
354-
}
355-
356-
items[ZMETADATA_V2_JSON] = prototype.buffer.from_bytes(
357-
json.dumps(
358-
{"metadata": d, "zarr_consolidated_format": 1},
359-
cls=V3JsonEncoder,
360-
).encode()
361-
)
362-
363-
return items
364-
365-
def __init__(
366-
self,
367-
attributes: dict[str, Any] | None = None,
368-
zarr_format: ZarrFormat = 3,
369-
consolidated_metadata: ConsolidatedMetadata | None = None,
370-
) -> None:
371-
attributes_parsed = parse_attributes(attributes)
372-
zarr_format_parsed = parse_zarr_format(zarr_format)
373-
374-
object.__setattr__(self, "attributes", attributes_parsed)
375-
object.__setattr__(self, "zarr_format", zarr_format_parsed)
376-
object.__setattr__(self, "consolidated_metadata", consolidated_metadata)
377-
378-
@classmethod
379-
def from_dict(cls, data: dict[str, Any]) -> GroupMetadata:
380-
data = dict(data)
381-
assert data.pop("node_type", None) in ("group", None)
382-
consolidated_metadata = data.pop("consolidated_metadata", None)
383-
if consolidated_metadata:
384-
data["consolidated_metadata"] = ConsolidatedMetadata.from_dict(consolidated_metadata)
385-
386-
zarr_format = data.get("zarr_format")
387-
if zarr_format == 2 or zarr_format is None:
388-
# zarr v2 allowed arbitrary keys here.
389-
# We don't want the GroupMetadata constructor to fail just because someone put an
390-
# extra key in the metadata.
391-
expected = {x.name for x in fields(cls)}
392-
data = {k: v for k, v in data.items() if k in expected}
393-
394-
return cls(**data)
395-
396-
def to_dict(self) -> dict[str, Any]:
397-
result = asdict(replace(self, consolidated_metadata=None))
398-
if self.consolidated_metadata:
399-
result["consolidated_metadata"] = self.consolidated_metadata.to_dict()
400-
return result
401-
402-
403-
@dataclass(frozen=True)
404-
class ImplicitGroupMarker(GroupMetadata):
405-
"""
406-
Marker for an implicit group. Instances of this class are only used in the context of group
407-
creation as a placeholder to represent groups that should only be created if they do not
408-
already exist in storage
409-
"""
410-
411-
412101
@dataclass(frozen=True)
413102
class AsyncGroup:
414103
"""

src/zarr/core/metadata/group.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from __future__ import annotations
22

3+
import base64
34
import itertools
45
import json
56
from dataclasses import asdict, dataclass, field, fields, replace
67
from typing import TYPE_CHECKING, Literal, assert_never, cast
78

9+
import numpy as np
10+
811
from zarr.abc.metadata import Metadata
912
from zarr.core.common import (
1013
JSON,
@@ -97,7 +100,13 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
97100
d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)
98101
if "shape" in v:
99102
# it's an array
100-
d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v)
103+
if isinstance(v.get("fill_value", None), np.void):
104+
v["fill_value"] = base64.standard_b64encode(
105+
cast(bytes, v["fill_value"])
106+
).decode("ascii")
107+
else:
108+
v = _replace_special_floats(v)
109+
d[f"{k}/{ZARRAY_JSON}"] = v
101110
else:
102111
d[f"{k}/{ZGROUP_JSON}"] = {
103112
"zarr_format": self.zarr_format,

0 commit comments

Comments
 (0)