1212stem  =  "STEM" 
1313
1414
15- def  aggregate_vesicle_train_data (roots , test_tomograms ,  conditions , resolutions ):
15+ def  aggregate_vesicle_train_data (roots , conditions , resolutions ):
1616    tomo_names  =  []
17-     tomo_vesicles   =  []
17+     tomo_vesicles_all ,  tomo_vesicles_imod   =  [],  []
1818    tomo_condition  =  []
1919    tomo_resolution  =  []
2020    tomo_train  =  []
2121
22-     for  ds , root  in  roots .items ():
23-         print ("Aggregate data for" , ds )
24-         train_root  =  root ["train" ]
25-         if  train_root  ==  "" :
26-             test_root  =  root ["test" ]
27-             tomograms  =  sorted (glob (os .path .join (test_root , "2024**" , "*.h5" ), recursive = True ))
28-             this_test_tomograms  =  [os .path .basename (tomo ) for  tomo  in  tomograms ]
22+     def  aggregate_split (ds , split_root , split ):
23+         if  ds .startswith ("04" ):
24+             tomograms  =  sorted (glob (os .path .join (split_root , "2024**" , "*.h5" ), recursive = True ))
2925        else :
30-             # This is only the case for 04, which is also nested 
31-             tomograms  =  sorted (glob (os .path .join (train_root , "*.h5" )))
32-             this_test_tomograms  =  test_tomograms [ds ]
26+             tomograms  =  sorted (glob (os .path .join (split_root , "*.h5" )))
3327
3428        assert  len (tomograms ) >  0 , ds 
3529        this_condition  =  conditions [ds ]
3630        this_resolution  =  resolutions [ds ][0 ]
3731
38-         for  tomo_path  in  tqdm (tomograms ):
32+         for  tomo_path  in  tqdm (tomograms ,  desc = f"Aggregate  { split } "  ):
3933            fname  =  os .path .basename (tomo_path )
4034            with  h5py .File (tomo_path , "r" ) as  f :
4135                try :
4236                    tomo_name  =  f .attrs ["filename" ]
4337                except  KeyError :
4438                    tomo_name  =  fname 
4539
46-                 n_label_sets  =  len (f ["labels" ])
47-                 if  n_label_sets  >  2 :
48-                     print (tomo_path , "contains the following labels:" , list (f ["labels" ].keys ()))
49-                 seg  =  f ["labels/vesicles" ][:]
50-                 n_vesicles  =  len (np .unique (seg )) -  1 
40+                 if  "labels/vesicles/combined_vesicles"  in  f :
41+                     all_vesicles  =  f ["labels/vesicles/combined_vesicles" ][:]
42+                     imod_vesicles  =  f ["labels/vesicles/masked_vesicles" ][:]
43+                     n_vesicles_all  =  len (np .unique (all_vesicles )) -  1 
44+                     n_vesicles_imod  =  len (np .unique (imod_vesicles )) -  2 
45+                 else :
46+                     vesicles  =  f ["labels/vesicles" ][:]
47+                     n_vesicles_all  =  len (np .unique (vesicles )) -  1 
48+                     n_vesicles_imod  =  n_vesicles_all 
5149
5250            tomo_names .append (tomo_name )
53-             tomo_vesicles .append (n_vesicles )
51+             tomo_vesicles_all .append (n_vesicles_all )
52+             tomo_vesicles_imod .append (n_vesicles_imod )
5453            tomo_condition .append (this_condition )
5554            tomo_resolution .append (this_resolution )
56-             tomo_train .append ("test"  if  fname  in  this_test_tomograms  else  "train/val" )
55+             tomo_train .append (split )
56+ 
57+     for  ds , root  in  roots .items ():
58+         print ("Aggregate data for" , ds )
59+         train_root  =  root ["train" ]
60+         if  train_root  !=  "" :
61+             aggregate_split (ds , train_root , "train/val" )
62+         test_root  =  root ["test" ]
63+         if  test_root  !=  "" :
64+             aggregate_split (ds , test_root , "test" )
5765
5866    df  =  pd .DataFrame ({
5967        "tomogram" : tomo_names ,
6068        "condition" : tomo_condition ,
6169        "resolution" : tomo_resolution ,
6270        "used_for" : tomo_train ,
63-         "vesicle_count" : tomo_vesicles ,
71+         "vesicle_count_all" : tomo_vesicles_all ,
72+         "vesicle_count_imod" : tomo_vesicles_imod ,
6473    })
6574
6675    os .makedirs ("data_summary" , exist_ok = True )
@@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
7079def  vesicle_train_data ():
7180    roots  =  {
7281        "01" : {
73-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /01_hoi_maus_2020_incomplete" ,  # noqa 
82+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /01_hoi_maus_2020_incomplete" ,  # noqa 
7483            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete" ,  # noqa 
7584        },
7685        "02" : {
77-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /02_hcc_nanogold" ,  # noqa 
86+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /02_hcc_nanogold" ,  # noqa 
7887            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold" ,  # noqa 
7988        },
8089        "03" : {
81-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /03_hog_cs1sy7" ,  # noqa 
90+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /03_hog_cs1sy7" ,  # noqa 
8291            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7" ,  # noqa 
8392        },
8493        "04" : {
8594            "train" : "" ,
8695            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/" ,  # noqa 
8796        },
8897        "05" : {
89-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /05_stem750_sv_training" ,  # noqa 
98+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /05_stem750_sv_training" ,  # noqa 
9099            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training" ,  # noqa 
91100        },
92101        "07" : {
93-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /07_hoi_s1sy7_tem250_ihgp" ,  # noqa 
102+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /07_hoi_s1sy7_tem250_ihgp" ,  # noqa 
94103            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp" ,  # noqa 
95104        },
96105        "09" : {
97-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /09_stem750_66k" ,  # noqa 
106+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /09_stem750_66k" ,  # noqa 
98107            "test" : "" ,
99108        },
100109        "10" : {
101-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /10_tem_single_release" ,  # noqa 
110+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /10_tem_single_release" ,  # noqa 
102111            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release" ,  # noqa 
103112        },
104113        "11" : {
105-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /11_tem_multiple_release" ,  # noqa 
114+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /11_tem_multiple_release" ,  # noqa 
106115            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release" ,  # noqa 
107116        },
108117        "12" : {
109-             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer /12_chemical_fix_cryopreparation" ,  # noqa 
118+             "train" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2 /12_chemical_fix_cryopreparation" ,  # noqa 
110119            "test" : "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation" ,  # noqa 
111120        },
112121    }
113122
114-     test_tomograms  =  {
115-         "01" : ["tomogram-009.h5" ,  "tomogram-038.h5" , "tomogram-049.h5" , "tomogram-052.h5" , "tomogram-057.h5" , "tomogram-060.h5" , "tomogram-067.h5" , "tomogram-074.h5" , "tomogram-076.h5" , "tomogram-083.h5" ,    "tomogram-133.h5" , "tomogram-136.h5" , "tomogram-145.h5" , "tomogram-149.h5" , "tomogram-150.h5" ],  # noqa 
116-         "02" : ["tomogram-004.h5" , "tomogram-008.h5" ],
117-         "03" : ["tomogram-003.h5" , "tomogram-004.h5" , "tomogram-008.h5" ,],
118-         "04" : [],  # all used for test 
119-         "05" : ["tomogram-003.h5" , "tomogram-005.h5" ,],
120-         "07" : ["tomogram-006.h5" , "tomogram-017.h5" ,],
121-         "09" : [],  # no test data 
122-         "10" : ["tomogram-001.h5" , "tomogram-002.h5" , "tomogram-007.h5" ],
123-         "11" : ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5" ],
124-         "12" : ["tomogram-004.h5" , "tomogram-021.h5" , "tomogram-022.h5" ,],
125-     }
126- 
127123    conditions  =  {
128124        "01" : single_ax_tem ,
129125        "02" : dual_ax_tem ,
@@ -150,7 +146,7 @@ def vesicle_train_data():
150146        "12" : (1.554 , 1.554 , 1.554 )
151147    }
152148
153-     aggregate_vesicle_train_data (roots , test_tomograms ,  conditions , resolutions )
149+     aggregate_vesicle_train_data (roots , conditions , resolutions )
154150
155151
156152def  aggregate_az_train_data (roots , test_tomograms , conditions , resolutions ):
@@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
397393            "MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
398394            "MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
399395            "MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5" 
396+         ],
397+         "frog" : [
398+             "block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
399+             "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
400+             "block30UB_one.h5" , "block10U3A_five.h5" ,
400401        ]
401402    }
402403
@@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
439440    aggregate_da (roots , train_tomograms , test_tomograms , resolutions )
440441
441442
443+ def  get_n_images_frog ():
444+     root  =  "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2" 
445+     tomos  =  ["block10U3A_three.h5" , "block30UB_one_two.h5" , "block30UB_two.h5" , "block10U3A_one.h5" ,
446+              "block184B_one.h5" , "block30UB_three.h5" , "block10U3A_two.h5" , "block30UB_four.h5" ,
447+              "block30UB_one.h5" , "block10U3A_five.h5" ]
448+ 
449+     n_images  =  0 
450+     for  tomo  in  tomos :
451+         path  =  os .path .join (root , tomo )
452+         with  h5py .File (path , "r" ) as  f :
453+             n_images  +=  f ["raw" ].shape [0 ]
454+     print (n_images )
455+ 
456+ 
457+ def  get_image_sizes_tem_2d ():
458+     root  =  "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled"   # noqa 
459+     tomos  =  [
460+         "MF_05649_P-09175-E_06.h5" , "MF_05646_C-09175-B_001B.h5" , "MF_05649_P-09175-E_07.h5" ,
461+         "MF_05649_G-09175-C_001.h5" , "MF_05646_C-09175-B_002.h5" , "MF_05649_G-09175-C_04.h5" ,
462+         "MF_05649_P-09175-E_05.h5" , "MF_05646_C-09175-B_000.h5" , "MF_05646_C-09175-B_001.h5" 
463+     ]
464+     for  tomo  in  tomos :
465+         path  =  os .path .join (root , tomo )
466+         with  h5py .File (path , "r" ) as  f :
467+             print (f ["raw" ].shape )
468+ 
469+ 
442470def  main ():
443471    # active_zone_train_data() 
444472    # compartment_train_data() 
445473    # mito_train_data() 
446-     #  vesicle_train_data()
474+     vesicle_train_data ()
447475
448-     vesicle_domain_adaptation_data ()
476+     # vesicle_domain_adaptation_data() 
477+     # get_n_images_frog() 
478+     # get_image_sizes_tem_2d() 
449479
450480
451481main ()
0 commit comments