1212stem = "STEM"
1313
1414
15- def aggregate_vesicle_train_data (roots , test_tomograms , conditions , resolutions ):
15+ def aggregate_vesicle_train_data (roots , conditions , resolutions ):
1616 tomo_names = []
17- tomo_vesicles = []
17+ tomo_vesicles_all , tomo_vesicles_imod = [], []
1818 tomo_condition = []
1919 tomo_resolution = []
2020 tomo_train = []
2121
22- for ds , root in roots .items ():
23- print ("Aggregate data for" , ds )
24- train_root = root ["train" ]
25- if train_root == "" :
26- test_root = root ["test" ]
27- tomograms = sorted (glob (os .path .join (test_root , "2024**" , "*.h5" ), recursive = True ))
28- this_test_tomograms = [os .path .basename (tomo ) for tomo in tomograms ]
22+ def aggregate_split (ds , split_root , split ):
23+ if ds .startswith ("04" ):
24+ tomograms = sorted (glob (os .path .join (split_root , "2024**" , "*.h5" ), recursive = True ))
2925 else :
30- # This is only the case for 04, which is also nested
31- tomograms = sorted (glob (os .path .join (train_root , "*.h5" )))
32- this_test_tomograms = test_tomograms [ds ]
26+ tomograms = sorted (glob (os .path .join (split_root , "*.h5" )))
3327
3428 assert len (tomograms ) > 0 , ds
3529 this_condition = conditions [ds ]
3630 this_resolution = resolutions [ds ][0 ]
3731
38- for tomo_path in tqdm (tomograms ):
32+ for tomo_path in tqdm (tomograms , desc = f"Aggregate { split } " ):
3933 fname = os .path .basename (tomo_path )
4034 with h5py .File (tomo_path , "r" ) as f :
4135 try :
4236 tomo_name = f .attrs ["filename" ]
4337 except KeyError :
4438 tomo_name = fname
4539
46- n_label_sets = len (f ["labels" ])
47- if n_label_sets > 2 :
48- print (tomo_path , "contains the following labels:" , list (f ["labels" ].keys ()))
49- seg = f ["labels/vesicles" ][:]
50- n_vesicles = len (np .unique (seg )) - 1
40+ if "labels/vesicles/combined_vesicles" in f :
41+ all_vesicles = f ["labels/vesicles/combined_vesicles" ][:]
42+ imod_vesicles = f ["labels/vesicles/masked_vesicles" ][:]
43+ n_vesicles_all = len (np .unique (all_vesicles )) - 1
44+ n_vesicles_imod = len (np .unique (imod_vesicles )) - 2
45+ else :
46+ vesicles = f ["labels/vesicles" ][:]
47+ n_vesicles_all = len (np .unique (vesicles )) - 1
48+ n_vesicles_imod = n_vesicles_all
5149
5250 tomo_names .append (tomo_name )
53- tomo_vesicles .append (n_vesicles )
51+ tomo_vesicles_all .append (n_vesicles_all )
52+ tomo_vesicles_imod .append (n_vesicles_imod )
5453 tomo_condition .append (this_condition )
5554 tomo_resolution .append (this_resolution )
56- tomo_train .append ("test" if fname in this_test_tomograms else "train/val" )
55+ tomo_train .append (split )
56+
57+ for ds , root in roots .items ():
58+ print ("Aggregate data for" , ds )
59+ train_root = root ["train" ]
60+ if train_root != "" :
61+ aggregate_split (ds , train_root , "train/val" )
62+ test_root = root ["test" ]
63+ if test_root != "" :
64+ aggregate_split (ds , test_root , "test" )
5765
5866 df = pd .DataFrame ({
5967 "tomogram" : tomo_names ,
6068 "condition" : tomo_condition ,
6169 "resolution" : tomo_resolution ,
6270 "used_for" : tomo_train ,
63- "vesicle_count" : tomo_vesicles ,
71+ "vesicle_count_all" : tomo_vesicles_all ,
72+ "vesicle_count_imod" : tomo_vesicles_imod ,
6473 })
6574
6675 os .makedirs ("data_summary" , exist_ok = True )
@@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
7079def vesicle_train_data ():
7180 roots = {
7281 "01" : {
73- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /01_hoi_maus_2020_incomplete" , # noqa
82+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /01_hoi_maus_2020_incomplete" , # noqa
7483 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete" , # noqa
7584 },
7685 "02" : {
77- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /02_hcc_nanogold" , # noqa
86+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /02_hcc_nanogold" , # noqa
7887 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold" , # noqa
7988 },
8089 "03" : {
81- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /03_hog_cs1sy7" , # noqa
90+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /03_hog_cs1sy7" , # noqa
8291 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7" , # noqa
8392 },
8493 "04" : {
8594 "train" : "" ,
8695 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/" , # noqa
8796 },
8897 "05" : {
89- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /05_stem750_sv_training" , # noqa
98+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /05_stem750_sv_training" , # noqa
9099 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training" , # noqa
91100 },
92101 "07" : {
93- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /07_hoi_s1sy7_tem250_ihgp" , # noqa
102+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /07_hoi_s1sy7_tem250_ihgp" , # noqa
94103 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp" , # noqa
95104 },
96105 "09" : {
97- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /09_stem750_66k" , # noqa
106+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /09_stem750_66k" , # noqa
98107 "test" : "" ,
99108 },
100109 "10" : {
101- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /10_tem_single_release" , # noqa
110+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /10_tem_single_release" , # noqa
102111 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release" , # noqa
103112 },
104113 "11" : {
105- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /11_tem_multiple_release" , # noqa
114+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /11_tem_multiple_release" , # noqa
106115 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release" , # noqa
107116 },
108117 "12" : {
109- "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /12_chemical_fix_cryopreparation" , # noqa
118+ "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /12_chemical_fix_cryopreparation" , # noqa
110119 "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation" , # noqa
111120 },
112121 }
113122
114- test_tomograms = {
115- "01" : ["tomogram-009.h5" , "tomogram-038.h5" , "tomogram-049.h5" , "tomogram-052.h5" , "tomogram-057.h5" , "tomogram-060.h5" , "tomogram-067.h5" , "tomogram-074.h5" , "tomogram-076.h5" , "tomogram-083.h5" , "tomogram-133.h5" , "tomogram-136.h5" , "tomogram-145.h5" , "tomogram-149.h5" , "tomogram-150.h5" ], # noqa
116- "02" : ["tomogram-004.h5" , "tomogram-008.h5" ],
117- "03" : ["tomogram-003.h5" , "tomogram-004.h5" , "tomogram-008.h5" ,],
118- "04" : [], # all used for test
119- "05" : ["tomogram-003.h5" , "tomogram-005.h5" ,],
120- "07" : ["tomogram-006.h5" , "tomogram-017.h5" ,],
121- "09" : [], # no test data
122- "10" : ["tomogram-001.h5" , "tomogram-002.h5" , "tomogram-007.h5" ],
123- "11" : ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5" ],
124- "12" : ["tomogram-004.h5" , "tomogram-021.h5" , "tomogram-022.h5" ,],
125- }
126-
127123 conditions = {
128124 "01" : single_ax_tem ,
129125 "02" : dual_ax_tem ,
@@ -150,7 +146,7 @@ def vesicle_train_data():
150146 "12" : (1.554 , 1.554 , 1.554 )
151147 }
152148
153- aggregate_vesicle_train_data (roots , test_tomograms , conditions , resolutions )
149+ aggregate_vesicle_train_data (roots , conditions , resolutions )
154150
155151
156152def aggregate_az_train_data (roots , test_tomograms , conditions , resolutions ):
@@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
397393 "MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
398394 "MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
399395 "MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5"
396+ ],
397+ "frog" : [
398+ "block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
399+ "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
400+ "block30UB_one.h5" , "block10U3A_five.h5" ,
400401 ]
401402 }
402403
@@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
439440 aggregate_da (roots , train_tomograms , test_tomograms , resolutions )
440441
441442
443+ def get_n_images_frog ():
444+ root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
445+ tomos = ["block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
446+ "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
447+ "block30UB_one.h5" , "block10U3A_five.h5" ]
448+
449+ n_images = 0
450+ for tomo in tomos :
451+ path = os .path .join (root , tomo )
452+ with h5py .File (path , "r" ) as f :
453+ n_images += f ["raw" ].shape [0 ]
454+ print (n_images )
455+
456+
457+ def get_image_sizes_tem_2d ():
458+ root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
459+ tomos = [
460+ "MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
461+ "MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
462+ "MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5"
463+ ]
464+ for tomo in tomos :
465+ path = os .path .join (root , tomo )
466+ with h5py .File (path , "r" ) as f :
467+ print (f ["raw" ].shape )
468+
469+
442470def main ():
443471 # active_zone_train_data()
444472 # compartment_train_data()
445473 # mito_train_data()
446- # vesicle_train_data()
474+ vesicle_train_data ()
447475
448- vesicle_domain_adaptation_data ()
476+ # vesicle_domain_adaptation_data()
477+ # get_n_images_frog()
478+ # get_image_sizes_tem_2d()
449479
450480
451481main ()
0 commit comments