Skip to content

Commit de76fe4

Browse files
Automatically scale ome-tiff and ome-zarr data (#14)
* automatically scale ome-tiff and ome-zarr * add idr data for testing * linting * comments about scaling values in notebook Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> * update docs about default scaling values from meta Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> * add a check for docstrings in appropriate functions Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> * add missing docstrings Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> * add code comments to _read_ngff_scale Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> --------- Co-authored-by: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com>
1 parent 552b995 commit de76fe4

File tree

252 files changed

+442
-35
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

252 files changed

+442
-35
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ repos:
3939
- id: yamllint
4040
exclude: pre-commit-config.yaml
4141
- repo: https://github.com/astral-sh/ruff-pre-commit
42-
rev: "v0.14.8"
42+
rev: "v0.14.10"
4343
hooks:
4444
- id: ruff-format
4545
- id: ruff-check
4646
- repo: https://github.com/rhysd/actionlint
47-
rev: v1.7.9
47+
rev: v1.7.10
4848
hooks:
4949
- id: actionlint
5050
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update

CITATION.cff

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,29 @@ references:
129129
JUMP (cpg0000-jump-pilot) was used to help demonstrate CytoDataFrame performance
130130
with large data. See here for more information:
131131
https://github.com/broadinstitute/cellpainting-gallery
132+
- type: article
133+
authors:
134+
- family-names: Blin
135+
given-names: Guillaume
136+
- family-names: Sadurska
137+
given-names: Dominika
138+
- family-names: Portero Migueles
139+
given-names: Rafael
140+
- family-names: Chen
141+
given-names: Ni
142+
- family-names: Watson
143+
given-names: James A.
144+
- family-names: Lowell
145+
given-names: Sally
146+
title: "Nessys: A new set of tools for the automated detection of nuclei within intact tissues and dense 3D cultures"
147+
journal: PLoS Biology
148+
volume: "17"
149+
issue: "8"
150+
pages: e3000388
151+
year: 2019
152+
doi: "10.1371/journal.pbio.3000388"
153+
url: "https://doi.org/10.1371/journal.pbio.3000388"
154+
notes: >
155+
This work used the file "6001240_labels.zarr" from the DISCEPTS imaging
156+
dataset, available through the Image Data Resource (IDR) under accession
157+
number idr0062.

docs/src/examples/learning_to_fly_with_ome-arrow.ipynb

Lines changed: 72 additions & 14 deletions
Large diffs are not rendered by default.

docs/src/examples/learning_to_fly_with_ome-arrow.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@
4444
stack
4545

4646
# we can visualize the stack using pyvista for 3D rendering
47-
stack.view(how="pyvista")
47+
# note: we use manually specified scaling values here
48+
# and can also default to what the image metadata provides
49+
# with `scaling_values=None` (the default).
50+
stack.view(how="pyvista", scaling_values=(1, 0.1, 0.1))
4851

4952
# here we demonstrate that the data can be exported again
5053
# into numpy format and re-imported
@@ -95,3 +98,8 @@
9598
)
9699
# by default, the image and metadata are shown
97100
oa_image
101+
102+
# read a 3d zarr image from IDR
103+
oa_image = OMEArrow(data="../../../tests/data/idr0062A/6001240_labels.zarr")
104+
# show the image using pyvista
105+
oa_image.view(how="pyvista")

pyproject.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,15 @@ lint.select = [
8484
"C4",
8585
# mccabe
8686
"C90",
87+
# pydocstyle (docstring presence)
88+
"D100",
89+
"D101",
90+
"D102",
91+
"D103",
92+
"D104",
93+
"D105",
94+
"D106",
95+
"D107",
8796
# pycodestyle
8897
"E",
8998
# pyflakes
@@ -102,6 +111,8 @@ lint.select = [
102111
lint.per-file-ignores."*" = [ "ANN401", "C901", "PLC0415", "PLR0912", "PLR0913", "PLR0915", "PLR2004" ]
103112
# Ignore `F401` (unused imports) for `__init__.py` file
104113
lint.per-file-ignores."__init__.py" = [ "F401" ]
114+
# ignore docstring presence checks for docs
115+
lint.per-file-ignores."docs/*" = [ "D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107" ]
105116
# ignore typing rules for tests
106117
lint.per-file-ignores."tests/*" = [ "ANN201", "E501", "PLR0913", "PLR2004" ]
107118

src/ome_arrow/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def view(
299299
opacity: str | float = "sigmoid",
300300
clim: tuple[float, float] | None = None,
301301
show_axes: bool = True,
302-
scaling_values: tuple[float, float, float] | None = (1.0, 0.1, 0.1),
302+
scaling_values: tuple[float, float, float] | None = None,
303303
) -> matplotlib.figure.Figure | "pyvista.Plotter":
304304
"""
305305
Render an OME-Arrow record using Matplotlib or PyVista.
@@ -337,7 +337,10 @@ def view(
337337
clim: Contrast limits (``(low, high)``) for PyVista rendering.
338338
show_axes: If ``True``, display axes in the PyVista scene.
339339
scaling_values: Physical scale multipliers for the (x, y, z) axes used by
340-
PyVista, typically to express anisotropy. Defaults to ``(1.0, 0.1, 0.1)``.
340+
PyVista, typically to express anisotropy. If ``None``, uses metadata
341+
scaling from the OME-Arrow record (pixels_meta.physical_size_x/y/z).
342+
These scaling values will default to 1µm if metadata is missing in
343+
source image metadata.
341344
342345
Returns:
343346
matplotlib.figure.Figure | pyvista.Plotter:

src/ome_arrow/ingest.py

Lines changed: 151 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import itertools
6+
import json
67
import re
78
import warnings
89
from datetime import datetime, timezone
@@ -20,6 +21,129 @@
2021
from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION
2122

2223

24+
def _normalize_unit(unit: str | None) -> str | None:
25+
if not unit:
26+
return None
27+
u = unit.strip().lower()
28+
if u in {"micrometer", "micrometre", "micron", "microns", "um", "µm"}:
29+
return "µm"
30+
if u in {"nanometer", "nanometre", "nm"}:
31+
return "nm"
32+
return unit
33+
34+
35+
def _read_physical_pixel_sizes(
36+
img: BioImage,
37+
) -> tuple[float, float, float, str | None, bool]:
38+
pps = getattr(img, "physical_pixel_sizes", None)
39+
if pps is None:
40+
return 1.0, 1.0, 1.0, None, False
41+
42+
vx = getattr(pps, "X", None) or getattr(pps, "x", None)
43+
vy = getattr(pps, "Y", None) or getattr(pps, "y", None)
44+
vz = getattr(pps, "Z", None) or getattr(pps, "z", None)
45+
46+
if vx is None and vy is None and vz is None:
47+
return 1.0, 1.0, 1.0, None, False
48+
49+
try:
50+
psize_x = float(vx or 1.0)
51+
psize_y = float(vy or 1.0)
52+
psize_z = float(vz or 1.0)
53+
except Exception:
54+
return 1.0, 1.0, 1.0, None, False
55+
56+
unit = getattr(pps, "unit", None) or getattr(pps, "units", None)
57+
unit = _normalize_unit(str(unit)) if unit is not None else None
58+
59+
return psize_x, psize_y, psize_z, unit, True
60+
61+
62+
def _load_zarr_attrs(zarr_path: Path) -> dict:
63+
zarr_json = zarr_path / "zarr.json"
64+
if zarr_json.exists():
65+
try:
66+
data = json.loads(zarr_json.read_text())
67+
return data.get("attributes") or data.get("attrs") or {}
68+
except Exception:
69+
return {}
70+
zattrs = zarr_path / ".zattrs"
71+
if zattrs.exists():
72+
try:
73+
return json.loads(zattrs.read_text())
74+
except Exception:
75+
return {}
76+
return {}
77+
78+
79+
def _extract_multiscales(attrs: dict) -> list[dict]:
80+
if not isinstance(attrs, dict):
81+
return []
82+
ome = attrs.get("ome")
83+
if isinstance(ome, dict) and isinstance(ome.get("multiscales"), list):
84+
return ome["multiscales"]
85+
if isinstance(attrs.get("multiscales"), list):
86+
return attrs["multiscales"]
87+
return []
88+
89+
90+
def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] | None:
91+
zarr_root = zarr_path
92+
for parent in [zarr_path, *list(zarr_path.parents)]:
93+
if parent.suffix.lower() in {".zarr", ".ome.zarr"}:
94+
zarr_root = parent
95+
break
96+
97+
for candidate in (zarr_path, zarr_root):
98+
attrs = _load_zarr_attrs(candidate)
99+
multiscales = _extract_multiscales(attrs)
100+
if multiscales:
101+
break
102+
else:
103+
return None
104+
105+
ms = multiscales[0]
106+
axes = ms.get("axes") or []
107+
datasets = ms.get("datasets") or []
108+
if not axes or not datasets:
109+
return None
110+
111+
ds = next((d for d in datasets if str(d.get("path")) == "0"), datasets[0])
112+
cts = ds.get("coordinateTransformations") or []
113+
scale_ct = next((ct for ct in cts if ct.get("type") == "scale"), None)
114+
if not scale_ct:
115+
return None
116+
117+
scale = scale_ct.get("scale") or []
118+
if len(scale) != len(axes):
119+
return None
120+
121+
axis_scale: dict[str, float] = {}
122+
axis_unit: dict[str, str] = {}
123+
for i, ax in enumerate(axes):
124+
name = str(ax.get("name", "")).lower()
125+
if name in {"x", "y", "z"}:
126+
try:
127+
axis_scale[name] = float(scale[i])
128+
except Exception:
129+
continue
130+
unit = _normalize_unit(ax.get("unit"))
131+
if unit:
132+
axis_unit[name] = unit
133+
134+
if not axis_scale:
135+
return None
136+
137+
psize_x = axis_scale.get("x", 1.0)
138+
psize_y = axis_scale.get("y", 1.0)
139+
psize_z = axis_scale.get("z", 1.0)
140+
141+
units = [axis_unit.get(a) for a in ("x", "y", "z") if axis_unit.get(a)]
142+
unit = units[0] if units and len(set(units)) == 1 else None
143+
144+
return psize_x, psize_y, psize_z, unit
145+
146+
23147
def to_ome_arrow(
24148
type_: str = OME_ARROW_TAG_TYPE,
25149
version: str = OME_ARROW_TAG_VERSION,
@@ -338,13 +462,8 @@ def from_tiff(
338462
if size_x <= 0 or size_y <= 0:
339463
raise ValueError("Image must have positive Y and X dims.")
340464

341-
pps = getattr(img, "physical_pixel_sizes", None)
342-
try:
343-
psize_x = float(getattr(pps, "X", None) or 1.0)
344-
psize_y = float(getattr(pps, "Y", None) or 1.0)
345-
psize_z = float(getattr(pps, "Z", None) or 1.0)
346-
except Exception:
347-
psize_x = psize_y = psize_z = 1.0
465+
psize_x, psize_y, psize_z, unit, _pps_valid = _read_physical_pixel_sizes(img)
466+
psize_unit = unit or "µm"
348467

349468
# --- NEW: coerce top-level strings --------------------------------
350469
img_id = str(image_id or p.stem)
@@ -394,7 +513,7 @@ def from_tiff(
394513
physical_size_x=psize_x,
395514
physical_size_y=psize_y,
396515
physical_size_z=psize_z,
397-
physical_size_unit="µm",
516+
physical_size_unit=psize_unit,
398517
channels=channels,
399518
planes=planes,
400519
masks=None,
@@ -410,6 +529,20 @@ def from_stack_pattern_path(
410529
image_id: Optional[str] = None,
411530
name: Optional[str] = None,
412531
) -> pa.StructScalar:
532+
"""Build an OME-Arrow record from a filename pattern describing a stack.
533+
534+
Args:
535+
pattern_path: Path or pattern string describing the stack layout.
536+
default_dim_for_unspecified: Dimension to use when tokens lack a dim.
537+
map_series_to: Dimension to map series tokens to (e.g., "T"), or None.
538+
clamp_to_uint16: Whether to clamp pixel values to uint16.
539+
channel_names: Optional list of channel names to apply.
540+
image_id: Optional image identifier override.
541+
name: Optional display name override.
542+
543+
Returns:
544+
A validated OME-Arrow StructScalar describing the stack.
545+
"""
413546
path = Path(pattern_path)
414547
folder = path.parent
415548
line = path.name.strip()
@@ -741,13 +874,15 @@ def from_ome_zarr(
741874
if size_x <= 0 or size_y <= 0:
742875
raise ValueError("Image must have positive Y and X dimensions.")
743876

744-
pps = getattr(img, "physical_pixel_sizes", None)
745-
try:
746-
psize_x = float(getattr(pps, "X", None) or 1.0)
747-
psize_y = float(getattr(pps, "Y", None) or 1.0)
748-
psize_z = float(getattr(pps, "Z", None) or 1.0)
749-
except Exception:
750-
psize_x = psize_y = psize_z = 1.0
877+
psize_x, psize_y, psize_z, unit, pps_valid = _read_physical_pixel_sizes(img)
878+
psize_unit = unit or "µm"
879+
880+
if not pps_valid:
881+
ngff_scale = _read_ngff_scale(p)
882+
if ngff_scale is not None:
883+
psize_x, psize_y, psize_z, unit = ngff_scale
884+
if unit:
885+
psize_unit = unit
751886

752887
img_id = str(image_id or p.stem)
753888
display_name = str(name or p.name)
@@ -805,7 +940,7 @@ def from_ome_zarr(
805940
physical_size_x=psize_x,
806941
physical_size_y=psize_y,
807942
physical_size_z=psize_z,
808-
physical_size_unit="µm",
943+
physical_size_unit=psize_unit,
809944
channels=channels,
810945
planes=planes,
811946
masks=None,

src/ome_arrow/view.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ def view_matplotlib(
3333
cmap: str = "gray",
3434
show: bool = True,
3535
) -> tuple[Figure, Axes, AxesImage]:
36+
"""Render a single (t, c, z) plane with Matplotlib.
37+
38+
Args:
39+
data: OME-Arrow row or dict containing pixels_meta and planes.
40+
tcz: (t, c, z) indices of the plane to render.
41+
autoscale: If True, infer vmin/vmax from the image data.
42+
vmin: Explicit lower display limit for intensity scaling.
43+
vmax: Explicit upper display limit for intensity scaling.
44+
cmap: Matplotlib colormap name.
45+
show: Whether to display the plot immediately.
46+
47+
Returns:
48+
A tuple of (figure, axes, image) from Matplotlib.
49+
50+
Raises:
51+
ValueError: If the requested plane is missing or pixel sizes mismatch.
52+
"""
3653
if isinstance(data, pa.StructScalar):
3754
data = data.as_py()
3855

281 KB
Binary file not shown.
302 KB
Binary file not shown.

0 commit comments

Comments
 (0)