Skip to content

Commit c99df52

Browse files
authored
Add script for counting total images and annotated objects (#1082)
* Add script for counting total images and annotated objects * Fix DynamicNuclearNet data counts
1 parent 364b160 commit c99df52

File tree

1 file changed

+236
-0
lines changed

1 file changed

+236
-0
lines changed

development/check_data_count.py

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
import os
2+
from glob import glob
3+
4+
import numpy as np
5+
import imageio.v3 as imageio
6+
7+
from torch_em.data import datasets
8+
9+
from elf.io import open_file
10+
11+
12+
ROOT = "/mnt/vast-nhr/projects/cidas/cca/experiments/micro_sam/data"
13+
14+
15+
def check_data_count(lm_version="v3"):
16+
image_counter, object_counter = 0, 0
17+
18+
# LIVECell data.
19+
image_paths, label_paths = datasets.light_microscopy.livecell.get_livecell_paths(
20+
path=os.path.join(ROOT, "livecell"), split="train",
21+
)
22+
image_counter += len(image_paths)
23+
object_counter += sum(
24+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
25+
)
26+
27+
print("LIVECell", image_counter, object_counter)
28+
29+
# DeepBacs data.
30+
image_dir, label_dir = datasets.light_microscopy.deepbacs.get_deepbacs_paths(
31+
path=os.path.join(ROOT, "deepbacs"), bac_type="mixed", split="train",
32+
)
33+
image_paths = sorted(glob(os.path.join(image_dir, "*.tif")))
34+
label_paths = sorted(glob(os.path.join(label_dir, "*.tif")))
35+
36+
curr_image_counter = len(image_paths)
37+
curr_object_counter = sum(
38+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
39+
)
40+
41+
image_counter += curr_image_counter
42+
object_counter += curr_object_counter
43+
44+
print("DeepBacs", curr_image_counter, curr_object_counter)
45+
46+
# TissueNet data.
47+
sample_paths = datasets.light_microscopy.tissuenet.get_tissuenet_paths(
48+
path=os.path.join(ROOT, "tissuenet"), split="train",
49+
)
50+
curr_image_counter = len(sample_paths)
51+
curr_object_counter = sum(
52+
[len(np.unique(open_file(p)["labels/cell"])[1:]) for p in sample_paths]
53+
)
54+
55+
image_counter += curr_image_counter
56+
object_counter += curr_object_counter
57+
58+
print("TissueNet", curr_image_counter, curr_object_counter)
59+
60+
# PlantSeg (Root) data.
61+
volume_paths = datasets.light_microscopy.plantseg.get_plantseg_paths(
62+
path=os.path.join(ROOT, "plantseg"), name="root", split="train",
63+
)
64+
curr_image_counter, curr_object_counter = 0, 0
65+
for p in volume_paths:
66+
f = open_file(p)
67+
curr_image_counter += f["raw"].shape[0]
68+
curr_object_counter += sum(
69+
[len(np.unique(curr_label)[1:]) for curr_label in f["label"]]
70+
)
71+
72+
image_counter += curr_image_counter
73+
object_counter += curr_object_counter
74+
75+
print("PlantSeg (Root)", curr_image_counter, curr_object_counter)
76+
77+
# NeurIPS CellSeg data.
78+
image_paths, label_paths = datasets.light_microscopy.neurips_cell_seg.get_neurips_cellseg_paths(
79+
root=os.path.join(ROOT, "neurips_cellseg"), split="train",
80+
)
81+
curr_image_counter = len(image_paths)
82+
curr_object_counter = sum(
83+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
84+
)
85+
86+
image_counter += curr_image_counter
87+
object_counter += curr_object_counter
88+
89+
print("NeurIPS CellSeg", curr_image_counter, curr_object_counter)
90+
91+
# CTC data.
92+
curr_image_counter, curr_object_counter = 0, 0
93+
for dataset_name in datasets.ctc.CTC_CHECKSUMS["train"].keys():
94+
if dataset_name in ["Fluo-N2DH-GOWT1", "Fluo-N2DL-HeLa"]:
95+
continue
96+
97+
image_dirs, label_dirs = datasets.light_microscopy.ctc.get_ctc_segmentation_paths(
98+
path=os.path.join(ROOT, "ctc"), dataset_name=dataset_name,
99+
)
100+
image_paths = [p for d in image_dirs for p in sorted(glob(os.path.join(d, "*.tif")))]
101+
label_paths = [p for d in label_dirs for p in sorted(glob(os.path.join(d, "*.tif")))]
102+
103+
curr_image_counter += len(image_paths)
104+
curr_object_counter += sum(
105+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
106+
)
107+
108+
image_counter += curr_image_counter
109+
object_counter += curr_object_counter
110+
111+
print("CTC", curr_image_counter, curr_object_counter)
112+
113+
# DSB Nucleus data.
114+
image_paths, label_paths = datasets.light_microscopy.dsb.get_dsb_paths(
115+
path=os.path.join(ROOT, "dsb"), source="reduced", split="train",
116+
)
117+
curr_image_counter = len(image_paths)
118+
curr_object_counter = sum(
119+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
120+
)
121+
122+
image_counter += curr_image_counter
123+
object_counter += curr_object_counter
124+
125+
print("DSB Nucleus", curr_image_counter, curr_object_counter)
126+
127+
if lm_version == "v2":
128+
return image_counter, object_counter
129+
130+
# EmbedSeg data.
131+
curr_image_counter, curr_object_counter = 0, 0
132+
names = [
133+
"Mouse-Organoid-Cells-CBG", "Mouse-Skull-Nuclei-CBG", "Platynereis-ISH-Nuclei-CBG", "Platynereis-Nuclei-CBG",
134+
]
135+
for name in names:
136+
image_paths, label_paths = datasets.light_microscopy.embedseg_data.get_embedseg_paths(
137+
path=os.path.join(ROOT, "embedseg"), name=name, split="train",
138+
)
139+
curr_image_counter += sum(
140+
[imageio.imread(p).shape[0] for p in image_paths]
141+
)
142+
curr_object_counter += sum(
143+
[sum(len(np.unique(curr_label)[1:]) for curr_label in imageio.imread(p)) for p in label_paths]
144+
)
145+
146+
image_counter += curr_image_counter
147+
object_counter += curr_object_counter
148+
149+
print("EmbedSeg", curr_image_counter, curr_object_counter)
150+
151+
# CVZ Fluo data.
152+
curr_image_counter, curr_object_counter = 0, 0
153+
for stain_choice in ["cell", "dapi"]:
154+
image_paths, label_paths = datasets.light_microscopy.cvz_fluo.get_cvz_fluo_paths(
155+
path=os.path.join(ROOT, "cvz"), stain_choice=stain_choice,
156+
)
157+
curr_image_counter += len(image_paths)
158+
curr_object_counter += sum(
159+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
160+
)
161+
162+
image_counter += curr_image_counter
163+
object_counter += curr_object_counter
164+
165+
print("CVZ Fluo", curr_image_counter, curr_object_counter)
166+
167+
# DynamicNuclearNet data.
168+
sample_paths = datasets.light_microscopy.dynamicnuclearnet.get_dynamicnuclearnet_paths(
169+
path=os.path.join(ROOT, "dynamicnuclearnet"), split="train",
170+
)
171+
172+
curr_image_counter = len(sample_paths)
173+
curr_object_counter = sum(
174+
[len(np.unique(open_file(p)["labels"])[1:]) for p in sample_paths]
175+
)
176+
177+
image_counter += curr_image_counter
178+
object_counter += curr_object_counter
179+
180+
print("DynamicNuclearNet", curr_image_counter, curr_object_counter)
181+
182+
# CellPose data.
183+
image_paths, label_paths = datasets.light_microscopy.cellpose.get_cellpose_paths(
184+
path=os.path.join(ROOT, "cellpose"), split="train", choice="cyto",
185+
)
186+
curr_image_counter = len(image_paths)
187+
curr_object_counter = sum(
188+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
189+
)
190+
191+
image_counter += curr_image_counter
192+
object_counter += curr_object_counter
193+
194+
print("CellPose", curr_image_counter, curr_object_counter)
195+
196+
# OmniPose data.
197+
image_paths, label_paths = datasets.light_microscopy.omnipose.get_omnipose_paths(
198+
path=os.path.join(ROOT, "omnipose"), split="train",
199+
)
200+
curr_image_counter = len(image_paths)
201+
curr_object_counter = sum(
202+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
203+
)
204+
205+
image_counter += curr_image_counter
206+
object_counter += curr_object_counter
207+
208+
print("OmniPose", curr_image_counter, curr_object_counter)
209+
210+
# OrgaSegment data.
211+
image_paths, label_paths = datasets.light_microscopy.orgasegment.get_orgasegment_paths(
212+
path=os.path.join(ROOT, "orgasegment"), split="train",
213+
)
214+
curr_image_counter = len(image_paths)
215+
curr_object_counter = sum(
216+
[len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
217+
)
218+
219+
image_counter += curr_image_counter
220+
object_counter += curr_object_counter
221+
222+
print("OrgaSegment", curr_image_counter, curr_object_counter)
223+
224+
return image_counter, object_counter
225+
226+
227+
def main():
228+
# image_counts, object_counts = check_data_count("v2")
229+
# print(f"v2 Model - Count of images: '{image_counts}'; and count of objects: '{object_counts}'")
230+
231+
image_counts, object_counts = check_data_count("v3")
232+
print(f"v3 and v4 Model - Count of images: '{image_counts}'; and count of objects: '{object_counts}'")
233+
234+
235+
if __name__ == "__main__":
236+
main()

0 commit comments

Comments
 (0)