Skip to content

Commit 65cf759

Browse files
authored
Support unlistable stores for all v04 groups (#278)
* Support unlistable stores for Well * Move get_array_paths onto attribute objects * Move _to_zarr to shared file * Add different API for optional array paths * Implement checking group paths * Remove old print statements * Fix HCS with unlistable stores * Add comment * Use _from_zarr with Labels * Fix Well.from_zarr() docstring
1 parent 27d9ff9 commit 65cf759

File tree

16 files changed

+281
-61
lines changed

16 files changed

+281
-61
lines changed

src/ome_zarr_models/_v06/image.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,8 @@ def from_zarr(cls, group: zarr.Group) -> Self: # type: ignore[override]
5454
members_tree_flat: dict[str, AnyGroupSpec | AnyArraySpec] = {}
5555
for multiscale in multi_meta.multiscales:
5656
for dataset in multiscale.datasets:
57-
array_path = f"{group.path}/{dataset.path}"
5857
array_spec = check_array_path(
59-
group, array_path, expected_zarr_version=3
58+
group, dataset.path, expected_zarr_version=3
6059
)
6160
members_tree_flat["/" + dataset.path] = array_spec
6261

src/ome_zarr_models/base.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Literal
33

44
from pydantic import BaseModel, ConfigDict
5+
from pydantic_zarr.v2 import AnyGroupSpec
56

67

78
class BaseAttrs(BaseModel):
@@ -21,6 +22,38 @@ class BaseAttrs(BaseModel):
2122
frozen=True,
2223
)
2324

25+
def get_array_paths(self) -> list[str]:
26+
"""
27+
Get a list of all array paths expected and required to live in the Group
28+
with these attributes.
29+
"""
30+
return []
31+
32+
def get_optional_array_paths(self) -> list[str]:
33+
"""
34+
Get a list of all array paths expected but not required to live in the Group
35+
with these attributes.
36+
"""
37+
return []
38+
39+
def get_group_paths(self) -> dict[str, type[AnyGroupSpec]]:
40+
"""
41+
Get a list of all group paths expected and required to live in the Group
42+
with these attributes.
43+
44+
Must return a dictionary mapping paths to their GroupSpec class.
45+
"""
46+
return {}
47+
48+
def get_optional_group_paths(self) -> dict[str, type[AnyGroupSpec]]:
49+
"""
50+
Get a list of all group paths expected but not required to live in the Group
51+
with these attributes.
52+
53+
Must return a dictionary mapping paths to their GroupSpec class.
54+
"""
55+
return {}
56+
2457

2558
class BaseGroup(ABC):
2659
"""

src/ome_zarr_models/common/validation.py

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,31 +88,94 @@ def check_array_path(
8888
If the array doesn't exist, or the array is not the expected Zarr version.
8989
"""
9090
try:
91-
array = zarr.open_array(store=group.store, path=array_path, mode="r")
92-
array_spec: AnyArraySpecv2 | AnyArraySpecv3
93-
if array.metadata.zarr_format == 2:
94-
if expected_zarr_version == 3:
95-
raise ValueError("Expected Zarr v3 array, but got v2 array")
96-
array_spec = ArraySpecv2.from_zarr(array)
97-
else:
98-
if expected_zarr_version == 2:
99-
raise ValueError("Expected Zarr v2 array, but got v3 array")
100-
array_spec = ArraySpecv3.from_zarr(array)
91+
array = zarr.open_array(store=group.store_path, path=array_path, mode="r")
10192
except FileNotFoundError as e:
10293
msg = (
10394
f"Expected to find an array at {array_path}, but no array was found there."
10495
)
10596
raise ValueError(msg) from e
106-
except zarr.errors.ContainsGroupError as e:
97+
except (zarr.errors.ContainsGroupError, zarr.errors.NodeTypeValidationError) as e:
10798
msg = (
10899
f"Expected to find an array at {array_path}, "
109100
"but a group was found there instead."
110101
)
111102
raise ValueError(msg) from e
112103

104+
array_spec: AnyArraySpecv2 | AnyArraySpecv3
105+
if array.metadata.zarr_format == 2:
106+
if expected_zarr_version == 3:
107+
raise ValueError("Expected Zarr v3 array, but got v2 array")
108+
array_spec = ArraySpecv2.from_zarr(array)
109+
else:
110+
if expected_zarr_version == 2:
111+
raise ValueError("Expected Zarr v2 array, but got v3 array")
112+
array_spec = ArraySpecv3.from_zarr(array)
113+
113114
return array_spec
114115

115116

117+
@overload
118+
def check_group_path(
119+
group: zarr.Group,
120+
group_path: str,
121+
*,
122+
expected_zarr_version: Literal[2],
123+
) -> AnyGroupSpecv2: ...
124+
125+
126+
@overload
127+
def check_group_path(
128+
group: zarr.Group,
129+
group_path: str,
130+
*,
131+
expected_zarr_version: Literal[3],
132+
) -> AnyGroupSpecv3: ...
133+
134+
135+
def check_group_path(
136+
group: zarr.Group,
137+
group_path: str,
138+
*,
139+
expected_zarr_version: Literal[2, 3],
140+
) -> AnyGroupSpecv2 | AnyGroupSpecv3:
141+
"""
142+
Check if a group exists at a given path in a group.
143+
144+
Returns
145+
-------
146+
GroupSpec
147+
If the path exists, it's GroupSpec is returned.
148+
149+
Raises
150+
------
151+
ValueError
152+
If the group doesn't exist, or the group is not the expected Zarr version.
153+
"""
154+
try:
155+
group = zarr.open_group(store=group.store_path, path=group_path, mode="r")
156+
except FileNotFoundError as e:
157+
msg = f"Expected to find a group at {group_path}, but no group was found there."
158+
raise FileNotFoundError(msg) from e
159+
except zarr.errors.ContainsArrayError as e:
160+
msg = (
161+
f"Expected to find an group at {group_path}, "
162+
"but an array was found there instead."
163+
)
164+
raise zarr.errors.ContainsArrayError(msg) from e
165+
166+
group_spec: AnyGroupSpecv2 | AnyGroupSpecv3
167+
if group.metadata.zarr_format == 2:
168+
if expected_zarr_version == 3:
169+
raise ValueError("Expected Zarr v3 array, but got v2 array")
170+
group_spec = GroupSpecv2.from_zarr(group, depth=0)
171+
else:
172+
if expected_zarr_version == 2:
173+
raise ValueError("Expected Zarr v2 array, but got v3 array")
174+
group_spec = GroupSpecv3.from_zarr(group, depth=0)
175+
176+
return group_spec
177+
178+
116179
def check_length(
117180
sequence: Sequence[T], *, valid_lengths: Sequence[int], variable_name: str
118181
) -> None:

src/ome_zarr_models/common/well_types.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ class WellMeta(BaseAttrs):
3232
Metadata for a single well.
3333
"""
3434

35-
images: Annotated[list[WellImage], AfterValidator(unique_items_validator)]
35+
images: Annotated[list[WellImage], AfterValidator(unique_items_validator)] = Field(
36+
..., description="Images within a well"
37+
)
3638
version: str | None = Field(None, description="Version of the well specification")
3739

3840
def get_acquisition_paths(self) -> dict[int, list[str]]:

src/ome_zarr_models/v04/_shared.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from typing import TYPE_CHECKING, Any, TypeVar
2+
3+
import zarr
4+
from pydantic_zarr.v2 import AnyGroupSpec, GroupSpec
5+
6+
from ome_zarr_models.base import BaseAttrs
7+
from ome_zarr_models.common.validation import (
8+
check_array_path,
9+
check_group_path,
10+
)
11+
from ome_zarr_models.v04.base import BaseGroupv04
12+
13+
TCls = TypeVar("TCls", bound=BaseGroupv04[Any])
14+
TAttrs = TypeVar("TAttrs", bound=BaseAttrs)
15+
16+
if TYPE_CHECKING:
17+
from pydantic_zarr.v2 import AnyArraySpec, AnyGroupSpec
18+
19+
20+
def _from_zarr(
21+
group: zarr.Group,
22+
group_cls: type[TCls],
23+
attrs_cls: type[TAttrs],
24+
) -> TCls:
25+
"""
26+
Create a GroupSpec from a potentially unlistable Zarr group.
27+
28+
This uses methods on the attribute class to get required and optional
29+
paths to ararys and groups, and then manually constructs the GroupSpec
30+
from those paths.
31+
32+
Parameters
33+
----------
34+
group :
35+
Zarr group to create GroupSpec from.
36+
group_cls :
37+
Class of the Group to return.
38+
attrs_cls :
39+
Attributes class.
40+
"""
41+
# on unlistable storage backends, the members of this group will be {}
42+
group_spec: AnyGroupSpec = GroupSpec.from_zarr(group, depth=0)
43+
attributes = attrs_cls.model_validate(group_spec.attributes)
44+
45+
members_tree_flat: dict[str, AnyGroupSpec | AnyArraySpec] = {}
46+
47+
# Required array paths
48+
for array_path in attrs_cls.get_array_paths(attributes):
49+
array_spec = check_array_path(group, array_path, expected_zarr_version=2)
50+
members_tree_flat["/" + array_path] = array_spec
51+
52+
# Optional array paths
53+
for array_path in attrs_cls.get_optional_array_paths(attributes):
54+
try:
55+
array_spec = check_array_path(group, array_path, expected_zarr_version=2)
56+
except ValueError:
57+
continue
58+
members_tree_flat["/" + array_path] = array_spec
59+
60+
# Required group paths
61+
required_groups = attrs_cls.get_group_paths(attributes)
62+
for group_path in required_groups:
63+
group_spec = check_group_path(group, group_path, expected_zarr_version=2)
64+
group_flat = required_groups[group_path].from_zarr(group[group_path]).to_flat() # type: ignore[arg-type]
65+
for path in group_flat:
66+
members_tree_flat["/" + group_path + path] = group_flat[path]
67+
68+
# Optional group paths
69+
optional_groups = attrs_cls.get_optional_group_paths(attributes)
70+
for group_path in optional_groups:
71+
try:
72+
group_spec = check_group_path(group, group_path, expected_zarr_version=2)
73+
except FileNotFoundError:
74+
continue
75+
group_flat = optional_groups[group_path].from_zarr(group[group_path]).to_flat() # type: ignore[arg-type]
76+
for path in group_flat:
77+
members_tree_flat["/" + group_path + path] = group_flat[path]
78+
79+
members_normalized: AnyGroupSpec = GroupSpec.from_flat(members_tree_flat)
80+
return group_cls(members=members_normalized.members, attributes=attributes)

src/ome_zarr_models/v04/hcs.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from collections.abc import Generator, Mapping
22
from typing import Self
33

4+
import zarr
45
from pydantic import model_validator
5-
from pydantic_zarr.v2 import GroupSpec
6+
from pydantic_zarr.v2 import AnyGroupSpec, GroupSpec
67

78
from ome_zarr_models.base import BaseAttrs
89
from ome_zarr_models.common.well import WellGroupNotFoundError
10+
from ome_zarr_models.v04._shared import _from_zarr
911
from ome_zarr_models.v04.base import BaseGroupv04
1012
from ome_zarr_models.v04.plate import Plate
1113
from ome_zarr_models.v04.well import Well
@@ -20,12 +22,37 @@ class HCSAttrs(BaseAttrs):
2022

2123
plate: Plate
2224

25+
def get_optional_group_paths(self) -> dict[str, type[AnyGroupSpec]]: # noqa: D102
26+
return {well.path: Well for well in self.plate.wells}
27+
2328

2429
class HCS(BaseGroupv04[HCSAttrs]):
2530
"""
2631
An OME-Zarr high-content screening (HCS) dataset representing a single plate.
2732
"""
2833

34+
@classmethod
35+
def from_zarr(cls, group: zarr.Group) -> Self: # type: ignore[override]
36+
"""
37+
Create an OME-Zarr image model from a `zarr.Group`.
38+
39+
Parameters
40+
----------
41+
group : zarr.Group
42+
A Zarr group that has valid OME-Zarr image metadata.
43+
"""
44+
hcs = _from_zarr(group, cls, HCSAttrs)
45+
# Traverse all the Well groups, which themselves contain Image groups
46+
hcs_flat = hcs.to_flat()
47+
for well in hcs.attributes.plate.wells:
48+
well_group = group[well.path]
49+
well_group_flat = Well.from_zarr(well_group).to_flat() # type: ignore[arg-type]
50+
for path in well_group_flat:
51+
hcs_flat["/" + well.path + path] = well_group_flat[path]
52+
53+
hcs_unflat: AnyGroupSpec = GroupSpec.from_flat(hcs_flat)
54+
return cls(attributes=hcs_unflat.attributes, members=hcs_unflat.members)
55+
2956
@model_validator(mode="after")
3057
def _check_valid_acquisitions(self) -> Self:
3158
"""

src/ome_zarr_models/v04/image.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22

33
from typing import TYPE_CHECKING, Self
44

5-
import zarr
6-
import zarr.errors
75
from pydantic import Field, JsonValue, model_validator
86
from pydantic_zarr.v2 import AnyArraySpec, AnyGroupSpec, GroupSpec
97

108
from ome_zarr_models.base import BaseAttrs
119
from ome_zarr_models.common.coordinate_transformations import _build_transforms
12-
from ome_zarr_models.common.validation import check_array_path
10+
from ome_zarr_models.v04._shared import _from_zarr
1311
from ome_zarr_models.v04.axes import Axis
1412
from ome_zarr_models.v04.base import BaseGroupv04
1513
from ome_zarr_models.v04.labels import Labels
@@ -19,6 +17,8 @@
1917
if TYPE_CHECKING:
2018
from collections.abc import Sequence
2119

20+
import zarr
21+
2222

2323
__all__ = ["Image", "ImageAttrs"]
2424

@@ -35,6 +35,16 @@ class ImageAttrs(BaseAttrs):
3535
)
3636
omero: Omero | None = None
3737

38+
def get_array_paths(self) -> list[str]: # noqa: D102
39+
paths = []
40+
for multiscale in self.multiscales:
41+
for dataset in multiscale.datasets:
42+
paths.append(dataset.path)
43+
return paths
44+
45+
def get_optional_group_paths(self) -> dict[str, type[AnyGroupSpec]]: # noqa: D102
46+
return {"labels": Labels}
47+
3848

3949
class Image(BaseGroupv04[ImageAttrs]):
4050
"""
@@ -51,31 +61,7 @@ def from_zarr(cls, group: zarr.Group) -> Self: # type: ignore[override]
5161
group : zarr.Group
5262
A Zarr group that has valid OME-Zarr image metadata.
5363
"""
54-
# on unlistable storage backends, the members of this group will be {}
55-
group_spec: AnyGroupSpec = GroupSpec.from_zarr(group, depth=0)
56-
57-
multi_meta = ImageAttrs.model_validate(group_spec.attributes)
58-
members_tree_flat: dict[str, AnyGroupSpec | AnyArraySpec] = {}
59-
for multiscale in multi_meta.multiscales:
60-
for dataset in multiscale.datasets:
61-
array_path = f"{group.path}/{dataset.path}"
62-
array_spec = check_array_path(
63-
group, array_path, expected_zarr_version=2
64-
)
65-
members_tree_flat["/" + dataset.path] = array_spec
66-
67-
try:
68-
labels_group = zarr.open_group(store=group.store, path="labels", mode="r")
69-
members_tree_flat["/labels"] = GroupSpec.from_zarr(labels_group, depth=0)
70-
except zarr.errors.GroupNotFoundError:
71-
pass
72-
73-
members_normalized: AnyGroupSpec = GroupSpec.from_flat(members_tree_flat)
74-
75-
group_spec = group_spec.model_copy(
76-
update={"members": members_normalized.members}
77-
)
78-
return cls(**group_spec.model_dump())
64+
return _from_zarr(group, cls, ImageAttrs)
7965

8066
@classmethod
8167
def new(

0 commit comments

Comments
 (0)