Skip to content

Commit 4294cfb

Browse files
committed
Add 2D to 3D label & metadata task
1 parent ee4cc66 commit 4294cfb

16 files changed

+913
-16
lines changed

.DS_Store

6 KB
Binary file not shown.

examples/.DS_Store

6 KB
Binary file not shown.
Lines changed: 397 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,397 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "0c427b11-61b3-41c4-8ec7-96cbe7a1562b",
6+
"metadata": {},
7+
"source": [
8+
"# OME-Zarr ROI processing\n",
9+
"This notebook shows how to load a whole OME-Zarr image, apply some processing to it and store the results as a new label image into the OME-Zarr. \n",
10+
"\n",
11+
"First run the imports & activate the helper functions.\n",
12+
"\n",
13+
"There are 4 prcoessing steps: \n",
14+
"1a) Load an intensity image \n",
15+
"1b) Alternatively, load an existing label image \n",
16+
"2a) Process the image to create a new label image \n",
17+
"2b) Change the label image (e.g. interactively in napari) \n",
18+
"3a) Save the new label image\n",
19+
"3b) Save the changed label image to OME-Zarr \n",
20+
"4) Optionally save masking ROI tables"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"id": "01f19305-0f31-4b46-8239-c3d2a56272fd",
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"import zarr\n",
31+
"import dask.array as da\n",
32+
"import numpy as np\n",
33+
"from skimage.measure import label\n",
34+
"from skimage.filters import threshold_otsu\n",
35+
"from skimage.morphology import remove_small_holes, remove_small_objects\n",
36+
"import napari\n",
37+
"from pathlib import Path\n",
38+
"import copy"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": null,
44+
"id": "444d41ae",
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"# Create a helper function to calculate masking ROI tables based on label_image\n",
49+
"from fractal_tasks_core.roi import (\n",
50+
" array_to_bounding_box_table,\n",
51+
")\n",
52+
"import pandas as pd\n",
53+
"import anndata as ad\n",
54+
"import logging\n",
55+
"from fractal_tasks_core.tables import write_table\n",
56+
"from fractal_tasks_core.labels import prepare_label_group\n",
57+
"from fractal_tasks_core.ngff.zarr_utils import load_NgffImageMeta \n",
58+
"from fractal_tasks_core.pyramids import build_pyramid\n",
59+
"\n",
60+
"logger = logging.getLogger(__name__)\n",
61+
"\n",
62+
"def save_masking_roi_table(\n",
63+
" masking_roi_df: pd.DataFrame, \n",
64+
" zarr_url: str, \n",
65+
" output_ROI_table: str,\n",
66+
" output_label_name: str,\n",
67+
" overwrite: bool = True,\n",
68+
"):\n",
69+
" \"\"\"\n",
70+
" Saves a masking ROI table to the OME-Zarr\n",
71+
"\n",
72+
" masking_roi_df: Dataframe containing the masking rois, e.g. generated by \n",
73+
" `array_to_bounding_box_table`. \n",
74+
" zarr_url: path to the zarr image (e.g. \"/path/to/myplate.zarr/B/03/0\")\n",
75+
" output_ROI_table: Name of the ROI table to be saved\n",
76+
" output_label_name: Name of the label image that has already been written \n",
77+
" to the Zarr and contains the labels for the masking ROI table\n",
78+
" overwrite: Whether an existing roi table with name `output_ROI_table` \n",
79+
" should be overwritten.\n",
80+
" \"\"\"\n",
81+
" masking_roi_df.index = masking_roi_df[\"label\"].astype(str)\n",
82+
"\n",
83+
" # Extract labels & drop them from dataframe\n",
84+
" labels = pd.DataFrame(masking_roi_df[\"label\"].astype(str))\n",
85+
" masking_roi_df.drop(labels=[\"label\"], axis=1, inplace=True)\n",
86+
"\n",
87+
" # Convert all to float (warning: some would be int, in principle)\n",
88+
" bbox_dtype = np.float32\n",
89+
" masking_roi_df = masking_roi_df.astype(bbox_dtype)\n",
90+
"\n",
91+
" # Convert to anndata\n",
92+
" bbox_table = ad.AnnData(masking_roi_df, dtype=bbox_dtype)\n",
93+
" bbox_table.obs = labels\n",
94+
"\n",
95+
" # Write to zarr group\n",
96+
" image_group = zarr.group(zarr_url)\n",
97+
" logger.info(\n",
98+
" \"Now writing bounding-box ROI table to \"\n",
99+
" f\"{zarr_url}/tables/{output_ROI_table}\"\n",
100+
" )\n",
101+
" table_attrs = {\n",
102+
" \"type\": \"masking_roi_table\",\n",
103+
" \"region\": {\"path\": f\"../labels/{output_label_name}\"},\n",
104+
" \"instance_key\": \"label\",\n",
105+
" }\n",
106+
" # TODO: Validate that the label zarr exists\n",
107+
"\n",
108+
" write_table(\n",
109+
" image_group,\n",
110+
" output_ROI_table,\n",
111+
" bbox_table,\n",
112+
" overwrite=overwrite,\n",
113+
" table_attrs=table_attrs,\n",
114+
" )\n",
115+
"\n",
116+
"# New label saving\n",
117+
"def get_zattrs(zarr_url):\n",
118+
" with zarr.open(zarr_url, mode=\"r\") as zarr_img:\n",
119+
" return zarr_img.attrs.asdict()\n",
120+
"\n",
121+
"\n",
122+
"def save_label_image(\n",
123+
" label_image, \n",
124+
" label_name, \n",
125+
" zarr_url, \n",
126+
" label_attrs, \n",
127+
" chunks = (1, 2160, 2560),\n",
128+
" overwrite: bool = True, \n",
129+
"):\n",
130+
" # Prepare the output label group\n",
131+
" # Get the label_attrs correctly (removes hack below)\n",
132+
" zarr_url = Path(zarr_url)\n",
133+
" prepare_label_group(\n",
134+
" image_group=zarr.group(zarr_url),\n",
135+
" label_name=label_name,\n",
136+
" overwrite=overwrite,\n",
137+
" label_attrs=label_attrs,\n",
138+
" logger=logger,\n",
139+
" )\n",
140+
"\n",
141+
" # Save label image to OME-Zarr\n",
142+
" label_dtype = np.uint32\n",
143+
" store = zarr.storage.FSStore(f\"{zarr_url}/labels/{label_name}/0\")\n",
144+
" new_label_array = zarr.create(\n",
145+
" shape=label_image.shape,\n",
146+
" chunks=chunks,\n",
147+
" dtype=label_dtype,\n",
148+
" store=store,\n",
149+
" overwrite=False,\n",
150+
" dimension_separator=\"/\",\n",
151+
" )\n",
152+
"\n",
153+
" da.array(label_image).to_zarr(\n",
154+
" url=new_label_array,\n",
155+
" )\n",
156+
" logger.info(f\"Saved {label_name} to OME-Zarr\")\n",
157+
" # 4) Build pyramids for label image\n",
158+
" label_meta = load_NgffImageMeta(zarr_url / \"labels\" / label_name)\n",
159+
" build_pyramid(\n",
160+
" zarrurl=f\"{zarr_url}/labels/{label_name}\",\n",
161+
" overwrite=overwrite,\n",
162+
" num_levels=label_meta.num_levels,\n",
163+
" coarsening_xy=label_meta.coarsening_xy,\n",
164+
" chunksize=chunks,\n",
165+
" aggregation_function=np.max,\n",
166+
" )\n",
167+
" logger.info(f\"Built a pyramid for the {label_name} label image\")\n",
168+
"\n",
169+
"\n",
170+
"def generate_label_zattrs_from_img_zattrs(img_attrs, label_name):\n",
171+
" \"\"\"Hacky adaptation of zattrs.\"\"\"\n",
172+
" # This assumes the output labels have the same shape as the loaded image\n",
173+
" zattrs = copy.deepcopy(img_attrs)\n",
174+
" label_zattrs = {}\n",
175+
" label_zattrs['image-label'] = {'source': {'image': '../../'}, 'version': '0.4'}\n",
176+
" label_zattrs['multiscales'] = [{}]\n",
177+
" label_zattrs['multiscales'][0]['axes'] = zattrs['multiscales'][0]['axes'][1:]\n",
178+
" label_zattrs['multiscales'][0]['datasets'] = zattrs['multiscales'][0]['datasets']\n",
179+
" # Drop channel dimension from the dataset, as labels don't have channels\n",
180+
" for i, dataset in enumerate(label_zattrs['multiscales'][0]['datasets']):\n",
181+
" dataset['coordinateTransformations'][0]['scale'] = dataset['coordinateTransformations'][0]['scale'][1:]\n",
182+
" label_zattrs['multiscales'][0]['datasets'][i] = dataset\n",
183+
" label_zattrs['multiscales'][0]['name'] = label_name\n",
184+
" label_zattrs['multiscales'][0]['version'] = zattrs['multiscales'][0]['version']\n",
185+
" return label_zattrs"
186+
]
187+
},
188+
{
189+
"cell_type": "markdown",
190+
"id": "7e865676-e37f-4f73-9ae9-bf59c5e976cd",
191+
"metadata": {},
192+
"source": [
193+
"### 1a) Load whole OME-Zarr image"
194+
]
195+
},
196+
{
197+
"cell_type": "code",
198+
"execution_count": null,
199+
"id": "7fa39e0b-3057-4efa-b051-e20b93aa6073",
200+
"metadata": {},
201+
"outputs": [],
202+
"source": [
203+
"# TODO: Change to download the zenodo example data, run it on those\n",
204+
"zarr_url = \"/Users/joel/Desktop/20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr/B/03/0\"\n",
205+
"level = 0\n",
206+
"channel_index = 0\n",
207+
"\n",
208+
"img = da.from_zarr(f\"{zarr_url}/{level}\")[channel_index]\n",
209+
"zattrs = get_zattrs(zarr_url = Path(zarr_url))\n",
210+
"img_scale = zattrs['multiscales'][0]['datasets'][level]['coordinateTransformations'][0][\"scale\"][1:]"
211+
]
212+
},
213+
{
214+
"cell_type": "markdown",
215+
"id": "70e9d111-2cf5-47ad-b8e0-f992b047defe",
216+
"metadata": {},
217+
"source": [
218+
"### 2a) Process the image"
219+
]
220+
},
221+
{
222+
"cell_type": "code",
223+
"execution_count": null,
224+
"id": "8a1d7518-cc78-4a7d-bde3-638c653c8a75",
225+
"metadata": {},
226+
"outputs": [],
227+
"source": [
228+
"# Convert it to a numpy array, do arbitrary processing with the image\n",
229+
"# Depending on the processing you want to do, it may also work directly in dask\n",
230+
"min_size=256\n",
231+
"img = np.array(img)\n",
232+
"otsu_threshold = threshold_otsu(img)\n",
233+
"img_thr = img > otsu_threshold\n",
234+
"img_thr = remove_small_holes(img_thr)\n",
235+
"img_thr_cleaned = remove_small_objects(img_thr, min_size=min_size)\n",
236+
"label_image = label(img_thr_cleaned)\n",
237+
"label_image.shape"
238+
]
239+
},
240+
{
241+
"cell_type": "markdown",
242+
"id": "326adb15-f5dc-466a-8e95-beac9adb89e5",
243+
"metadata": {},
244+
"source": [
245+
"### 3a) Save the resulting label image back to the OME-Zarr file"
246+
]
247+
},
248+
{
249+
"cell_type": "code",
250+
"execution_count": null,
251+
"id": "bd8cbf7b-40aa-4d95-8dc4-c63b08de773c",
252+
"metadata": {},
253+
"outputs": [],
254+
"source": [
255+
"new_label_name = \"new_label_img_1\"\n",
256+
"label_attrs = generate_label_zattrs_from_img_zattrs(zattrs, new_label_name)\n",
257+
"save_label_image(label_image, new_label_name, zarr_url, label_attrs)\n"
258+
]
259+
},
260+
{
261+
"cell_type": "markdown",
262+
"id": "9cc0517f-1ce1-4e4e-a791-0ac74e723b29",
263+
"metadata": {},
264+
"source": [
265+
"### 1b) Load a label image"
266+
]
267+
},
268+
{
269+
"cell_type": "code",
270+
"execution_count": null,
271+
"id": "29d9f3a6-1d70-448e-8f88-3c127561d0d1",
272+
"metadata": {},
273+
"outputs": [],
274+
"source": [
275+
"zarr_url = \"/Users/joel/Desktop/20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr/B/03/0\"\n",
276+
"level = 0\n",
277+
"label_name = \"nuclei\"\n",
278+
"\n",
279+
"label_img = da.from_zarr(f\"{zarr_url}/labels/{label_name}/{level}\")\n",
280+
"label_zattrs = get_zattrs(zarr_url = Path(zarr_url) / \"labels\" / label_name)\n",
281+
"label_img_scale = label_zattrs['multiscales'][0]['datasets'][level]['coordinateTransformations'][0][\"scale\"]"
282+
]
283+
},
284+
{
285+
"cell_type": "markdown",
286+
"id": "49bbdea8-adfb-4152-b774-7fc5a55e2c28",
287+
"metadata": {},
288+
"source": [
289+
"### 2b) Edit the label image in napari"
290+
]
291+
},
292+
{
293+
"cell_type": "code",
294+
"execution_count": null,
295+
"id": "7cffaa85-8990-4f7f-8267-208a08100e72",
296+
"metadata": {},
297+
"outputs": [],
298+
"source": [
299+
"# Have a look at the label image in napari\n",
300+
"# Needs the numpy arrays, because dask arrays aren't easily edited in napari\n",
301+
"viewer = napari.Viewer()\n",
302+
"viewer.add_image(np.array(img), scale=img_scale)\n",
303+
"label_layer = viewer.add_labels(np.array(label_img), scale=label_img_scale)"
304+
]
305+
},
306+
{
307+
"cell_type": "code",
308+
"execution_count": null,
309+
"id": "6293af34-0fea-49ce-9917-406b0d42a314",
310+
"metadata": {},
311+
"outputs": [],
312+
"source": [
313+
"# Optionally modify the label layer manually in napari, then get that modified label layer\n",
314+
"label_image = label_layer.data"
315+
]
316+
},
317+
{
318+
"cell_type": "markdown",
319+
"id": "edb08843-f1d4-4899-85ec-010116aa188d",
320+
"metadata": {},
321+
"source": [
322+
"### 3b) Save changed label image to OME-Zarr"
323+
]
324+
},
325+
{
326+
"cell_type": "code",
327+
"execution_count": null,
328+
"id": "863e6f6f-ecb0-48ab-9d94-f30d2c9d8e8a",
329+
"metadata": {},
330+
"outputs": [],
331+
"source": [
332+
"new_label_name = \"manual_label_correction_6\"\n",
333+
"save_label_image(label_image, new_label_name, zarr_url, label_zattrs)"
334+
]
335+
},
336+
{
337+
"cell_type": "code",
338+
"execution_count": null,
339+
"id": "a1a59cf2",
340+
"metadata": {},
341+
"outputs": [],
342+
"source": [
343+
"output_roi_name =\"new_masking_ROI_table\"\n",
344+
"\n",
345+
"masking_roi_df = array_to_bounding_box_table(label_image, pxl_sizes_zyx=label_img_scale)\n",
346+
"save_masking_roi_table(\n",
347+
" masking_roi_df=masking_roi_df, \n",
348+
" zarr_url=zarr_url, \n",
349+
" output_ROI_table=output_roi_name,\n",
350+
" output_label_name=new_label_name,\n",
351+
" overwrite=True\n",
352+
")"
353+
]
354+
},
355+
{
356+
"cell_type": "markdown",
357+
"id": "fea0e0ef",
358+
"metadata": {},
359+
"source": [
360+
"### 4) Save masking ROI table for the new labels"
361+
]
362+
},
363+
{
364+
"cell_type": "code",
365+
"execution_count": null,
366+
"id": "70f6bd94",
367+
"metadata": {},
368+
"outputs": [],
369+
"source": [
370+
"new_label_name = \"manual_label_correction_6\"\n",
371+
"label_attrs = get_zattrs(zarr_url = Path(zarr_url) / \"labels\" / label_name)\n",
372+
"save_label_image(label_image, new_label_name, zarr_url, label_attrs)"
373+
]
374+
}
375+
],
376+
"metadata": {
377+
"kernelspec": {
378+
"display_name": "Python 3 (ipykernel)",
379+
"language": "python",
380+
"name": "python3"
381+
},
382+
"language_info": {
383+
"codemirror_mode": {
384+
"name": "ipython",
385+
"version": 3
386+
},
387+
"file_extension": ".py",
388+
"mimetype": "text/x-python",
389+
"name": "python",
390+
"nbconvert_exporter": "python",
391+
"pygments_lexer": "ipython3",
392+
"version": "3.10.13"
393+
}
394+
},
395+
"nbformat": 4,
396+
"nbformat_minor": 5
397+
}

0 commit comments

Comments
 (0)