| 
 | 1 | +import os  | 
 | 2 | +from glob import glob  | 
 | 3 | +from shutil import copyfile  | 
 | 4 | + | 
 | 5 | +import h5py  | 
 | 6 | +from tqdm import tqdm  | 
 | 7 | + | 
 | 8 | +OUTPUT_ROOT = "./data_summary/for_zenodo"  | 
 | 9 | + | 
 | 10 | + | 
 | 11 | +def _copy_vesicles(tomos, out_folder):  | 
 | 12 | +    label_key = "labels/vesicles/combined_vesicles"  | 
 | 13 | +    os.makedirs(out_folder, exist_ok=True)  | 
 | 14 | +    for tomo in tqdm(tomos, desc="Export tomos"):  | 
 | 15 | +        out_path = os.path.join(out_folder, os.path.basename(tomo))  | 
 | 16 | +        if os.path.exists(out_path):  | 
 | 17 | +            continue  | 
 | 18 | + | 
 | 19 | +        with h5py.File(tomo, "r") as f:  | 
 | 20 | +            raw = f["raw"][:]  | 
 | 21 | +            labels = f[label_key][:]  | 
 | 22 | +            try:  | 
 | 23 | +                fname = f.attrs["filename"]  | 
 | 24 | +            except KeyError:  | 
 | 25 | +                fname = None  | 
 | 26 | + | 
 | 27 | +        with h5py.File(out_path, "a") as f:  | 
 | 28 | +            f.create_dataset("raw", data=raw, compression="gzip")  | 
 | 29 | +            f.create_dataset("labels/vesicles", data=labels, compression="gzip")  | 
 | 30 | +            if fname is not None:  | 
 | 31 | +                f.attrs["filename"] = fname  | 
 | 32 | + | 
 | 33 | + | 
 | 34 | +def _export_vesicles(train_root, test_root, name):  | 
 | 35 | +    train_tomograms = sorted(glob(os.path.join(train_root, "*.h5")))  | 
 | 36 | +    test_tomograms = sorted(glob(os.path.join(test_root, "*.h5")))  | 
 | 37 | +    print(f"Vesicle data for {name}:")  | 
 | 38 | +    print(len(train_tomograms), len(test_tomograms), len(train_tomograms) + len(test_tomograms))  | 
 | 39 | + | 
 | 40 | +    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "train", name)  | 
 | 41 | +    _copy_vesicles(train_tomograms, train_out)  | 
 | 42 | + | 
 | 43 | +    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "test", name)  | 
 | 44 | +    _copy_vesicles(test_tomograms, test_out)  | 
 | 45 | + | 
 | 46 | + | 
 | 47 | +def _export_az(train_root, test_tomos, name):  | 
 | 48 | +    tomograms = sorted(glob(os.path.join(train_root, "*.h5")))  | 
 | 49 | +    print(f"AZ data for {name}:")  | 
 | 50 | + | 
 | 51 | +    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "train", name)  | 
 | 52 | +    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "test", name)  | 
 | 53 | + | 
 | 54 | +    os.makedirs(train_out, exist_ok=True)  | 
 | 55 | +    os.makedirs(test_out, exist_ok=True)  | 
 | 56 | + | 
 | 57 | +    for tomo in tqdm(tomograms):  | 
 | 58 | +        fname = os.path.basename(tomo)  | 
 | 59 | +        if tomo in test_tomos:  | 
 | 60 | +            out_path = os.path.join(test_out, fname)  | 
 | 61 | +        else:  | 
 | 62 | +            out_path = os.path.join(train_out, fname)  | 
 | 63 | +        if os.path.exists(out_path):  | 
 | 64 | +            continue  | 
 | 65 | + | 
 | 66 | +        with h5py.File(tomo, "r") as f:  | 
 | 67 | +            raw = f["raw"][:]  | 
 | 68 | +            az = f["labels/AZ"][:]  | 
 | 69 | + | 
 | 70 | +        with h5py.File(out_path, "a") as f:  | 
 | 71 | +            f.create_dataset("raw", data=raw, compression="gzip")  | 
 | 72 | +            f.create_dataset("labels/AZ", data=az, compression="gzip")  | 
 | 73 | + | 
 | 74 | + | 
 | 75 | +# NOTE: we have very few mito annotations from 01, so we don't include them in here.  | 
 | 76 | +def prepare_single_ax_stem_chemical_fix():  | 
 | 77 | +    # single-axis-tem: vesicles  | 
 | 78 | +    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete"  # noqa  | 
 | 79 | +    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete"  # noqa  | 
 | 80 | +    _export_vesicles(train_root, test_root, name="single_axis_tem")  | 
 | 81 | + | 
 | 82 | +    # single-axis-tem: active zones  | 
 | 83 | +    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete"  # noqa  | 
 | 84 | +    test_tomos = [  | 
 | 85 | +        "WT_MF_DIV28_01_MS_09204_F1.h5", "WT_MF_DIV14_01_MS_B2_09175_CA3.h5", "M13_CTRL_22723_O2_05_DIV29_5.2.h5", "WT_Unt_SC_09175_D4_05_DIV14_mtk_05.h5",  # noqa  | 
 | 86 | +        "20190805_09002_B4_SC_11_SP.h5", "20190807_23032_D4_SC_01_SP.h5", "M13_DKO_22723_A1_03_DIV29_03_MS.h5", "WT_MF_DIV28_05_MS_09204_F1.h5", "M13_CTRL_09201_S2_06_DIV31_06_MS.h5", # noqa  | 
 | 87 | +        "WT_MF_DIV28_1.2_MS_09002_B1.h5", "WT_Unt_SC_09175_C4_04_DIV15_mtk_04.h5",   "M13_DKO_22723_A4_10_DIV29_10_MS.h5",  "WT_MF_DIV14_3.2_MS_D2_09175_CA3.h5",  # noqa  | 
 | 88 | +           "20190805_09002_B4_SC_10_SP.h5", "M13_CTRL_09201_S2_02_DIV31_02_MS.h5", "WT_MF_DIV14_04_MS_E1_09175_CA3.h5", "WT_MF_DIV28_10_MS_09002_B3.h5",   "WT_Unt_SC_05646_D4_02_DIV16_mtk_02.h5",   "M13_DKO_22723_A4_08_DIV29_08_MS.h5",  "WT_MF_DIV28_04_MS_09204_M1.h5",   "WT_MF_DIV28_03_MS_09204_F1.h5",   "M13_DKO_22723_A1_05_DIV29_05_MS.h5",  # noqa  | 
 | 89 | +        "WT_Unt_SC_09175_C4_06_DIV15_mtk_06.h5",  "WT_MF_DIV28_09_MS_09002_B3.h5", "20190524_09204_F4_SC_07_SP.h5",  | 
 | 90 | +           "WT_MF_DIV14_02_MS_C2_09175_CA3.h5",    "M13_DKO_23037_K1_01_DIV29_01_MS.h5",  "WT_Unt_SC_09175_E2_01_DIV14_mtk_01.h5", "20190807_23032_D4_SC_05_SP.h5",   "WT_MF_DIV14_01_MS_E2_09175_CA3.h5",   "WT_MF_DIV14_03_MS_B2_09175_CA3.h5",   "M13_DKO_09201_O1_01_DIV31_01_MS.h5",  "M13_DKO_09201_U1_04_DIV31_04_MS.h5",  # noqa  | 
 | 91 | +        "WT_MF_DIV14_04_MS_E2_09175_CA3_2.h5",   "WT_Unt_SC_09175_D5_01_DIV14_mtk_01.h5",  | 
 | 92 | +        "M13_CTRL_22723_O2_05_DIV29_05_MS_.h5",  "WT_MF_DIV14_02_MS_B2_09175_CA3.h5", "WT_MF_DIV14_01.2_MS_D1_09175_CA3.h5",  # noqa  | 
 | 93 | +    ]  | 
 | 94 | +    _export_az(train_root, test_tomos, name="single_axis_tem")  | 
 | 95 | + | 
 | 96 | +    # chemical_fixation: vesicles  | 
 | 97 | +    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation"  # noqa  | 
 | 98 | +    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation"  # noqa  | 
 | 99 | +    _export_vesicles(train_root, test_root, name="chemical_fixation")  | 
 | 100 | + | 
 | 101 | +    # chemical-fixation: active zones  | 
 | 102 | +    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation"  # noqa  | 
 | 103 | +    test_tomos = ["20180305_09_MS.h5", "20180305_04_MS.h5", "20180305_08_MS.h5",  | 
 | 104 | +                  "20171113_04_MS.h5", "20171006_05_MS.h5", "20180305_01_MS.h5"]  | 
 | 105 | +    _export_az(train_root, test_tomos, name="chemical_fixation")  | 
 | 106 | + | 
 | 107 | + | 
 | 108 | +def prepare_ier():  | 
 | 109 | +    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser/other_tomograms"  | 
 | 110 | +    sets = {  | 
 | 111 | +        "01_vesicle_pools": "vesicle_pools",  | 
 | 112 | +        "02_tether": "tether",  | 
 | 113 | +        "03_ratten_tomos": "rat",  | 
 | 114 | +    }  | 
 | 115 | + | 
 | 116 | +    output_folder = os.path.join(OUTPUT_ROOT, "IER")  | 
 | 117 | +    label_names = {  | 
 | 118 | +        "ribbons": "ribbon",  | 
 | 119 | +        "membrane": "membrane",  | 
 | 120 | +        "presynapse": "PD",  | 
 | 121 | +        "postsynapse": "PSD",  | 
 | 122 | +        "vesicles": "vesicles",  | 
 | 123 | +    }  | 
 | 124 | + | 
 | 125 | +    for name, output_name in sets.items():  | 
 | 126 | +        out_set = os.path.join(output_folder, output_name)  | 
 | 127 | +        os.makedirs(out_set, exist_ok=True)  | 
 | 128 | +        tomos = sorted(glob(os.path.join(root, name, "*.h5")))  | 
 | 129 | + | 
 | 130 | +        print("Export", output_name)  | 
 | 131 | +        for tomo in tqdm(tomos):  | 
 | 132 | +            with h5py.File(tomo, "r") as f:  | 
 | 133 | +                try:  | 
 | 134 | +                    fname = os.path.split(f.attrs["filename"])[1][:-4]  | 
 | 135 | +                except KeyError:  | 
 | 136 | +                    fname = f.attrs["path"][1]  | 
 | 137 | +                    fname = "_".join(fname.split("/")[-2:])  | 
 | 138 | + | 
 | 139 | +                out_path = os.path.join(out_set, os.path.basename(tomo))  | 
 | 140 | +                if os.path.exists(out_path):  | 
 | 141 | +                    continue  | 
 | 142 | + | 
 | 143 | +                raw = f["raw"][:]  | 
 | 144 | +                labels = {}  | 
 | 145 | +                for label_name, out_name in label_names.items():  | 
 | 146 | +                    key = f"labels/{label_name}"  | 
 | 147 | +                    if key not in f:  | 
 | 148 | +                        continue  | 
 | 149 | +                    labels[out_name] = f[key][:]  | 
 | 150 | + | 
 | 151 | +            with h5py.File(out_path, "a") as f:  | 
 | 152 | +                f.attrs["filename"] = fname  | 
 | 153 | +                f.create_dataset("raw", data=raw, compression="gzip")  | 
 | 154 | +                for label_name, seg in labels.items():  | 
 | 155 | +                    f.create_dataset(f"labels/{label_name}", data=seg, compression="gzip")  | 
 | 156 | + | 
 | 157 | + | 
 | 158 | +def prepare_frog():  | 
 | 159 | +    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted"  | 
 | 160 | +    train_tomograms = [  | 
 | 161 | +        "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",  | 
 | 162 | +        "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",  | 
 | 163 | +        "block30UB_one.h5", "block10U3A_five.h5",  | 
 | 164 | +    ]  | 
 | 165 | +    test_tomograms = ["block10U3A_four.h5", "block30UB_five.h5"]  | 
 | 166 | + | 
 | 167 | +    output_folder = os.path.join(OUTPUT_ROOT, "frog")  | 
 | 168 | +    output_train = os.path.join(output_folder, "train_unlabeled")  | 
 | 169 | +    os.makedirs(output_train, exist_ok=True)  | 
 | 170 | + | 
 | 171 | +    for name in train_tomograms:  | 
 | 172 | +        path = os.path.join(root, name)  | 
 | 173 | +        out_path = os.path.join(output_train, name)  | 
 | 174 | +        if os.path.exists(out_path):  | 
 | 175 | +            continue  | 
 | 176 | +        copyfile(path, out_path)  | 
 | 177 | + | 
 | 178 | +    output_test = os.path.join(output_folder, "test")  | 
 | 179 | +    os.makedirs(output_test, exist_ok=True)  | 
 | 180 | +    for name in test_tomograms:  | 
 | 181 | +        path = os.path.join(root, name)  | 
 | 182 | +        out_path = os.path.join(output_test, name)  | 
 | 183 | +        if os.path.exists(out_path):  | 
 | 184 | +            continue  | 
 | 185 | +        copyfile(path, out_path)  | 
 | 186 | + | 
 | 187 | + | 
 | 188 | +def prepare_2d_tem():  | 
 | 189 | +    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled"  # noqa  | 
 | 190 | +    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicle_gt_2d/maus_2020_tem2d"  # noqa  | 
 | 191 | +    train_images = [  | 
 | 192 | +        "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",  | 
 | 193 | +        "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",  | 
 | 194 | +        "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"  | 
 | 195 | +    ]  | 
 | 196 | +    test_images = [  | 
 | 197 | +        "MF_05649_G-09175-C_04B.h5", "MF_05646_C-09175-B_000B.h5",  | 
 | 198 | +        "MF_05649_G-09175-C_03.h5", "MF_05649_G-09175-C_02.h5"  | 
 | 199 | +    ]  | 
 | 200 | +    print(len(train_images) + len(test_images))  | 
 | 201 | + | 
 | 202 | +    output_folder = os.path.join(OUTPUT_ROOT, "2d_tem")  | 
 | 203 | + | 
 | 204 | +    output_train = os.path.join(output_folder, "train_unlabeled")  | 
 | 205 | +    os.makedirs(output_train, exist_ok=True)  | 
 | 206 | +    for name in tqdm(train_images, desc="Export train images"):  | 
 | 207 | +        out_path = os.path.join(output_train, name)  | 
 | 208 | +        if os.path.exists(out_path):  | 
 | 209 | +            continue  | 
 | 210 | +        in_path = os.path.join(train_root, name)  | 
 | 211 | +        with h5py.File(in_path, "r") as f:  | 
 | 212 | +            raw = f["raw"][:]  | 
 | 213 | +        with h5py.File(out_path, "a") as f:  | 
 | 214 | +            f.create_dataset("raw", data=raw, compression="gzip")  | 
 | 215 | + | 
 | 216 | +    output_test = os.path.join(output_folder, "test")  | 
 | 217 | +    os.makedirs(output_test, exist_ok=True)  | 
 | 218 | +    for name in tqdm(test_images, desc="Export test images"):  | 
 | 219 | +        out_path = os.path.join(output_test, name)  | 
 | 220 | +        if os.path.exists(out_path):  | 
 | 221 | +            continue  | 
 | 222 | +        in_path = os.path.join(test_root, name)  | 
 | 223 | +        with h5py.File(in_path, "r") as f:  | 
 | 224 | +            raw = f["data"][:]  | 
 | 225 | +            labels = f["labels/vesicles"][:]  | 
 | 226 | +            mask = f["labels/mask"][:]  | 
 | 227 | +        with h5py.File(out_path, "a") as f:  | 
 | 228 | +            f.create_dataset("raw", data=raw, compression="gzip")  | 
 | 229 | +            f.create_dataset("labels/vesicles", data=labels, compression="gzip")  | 
 | 230 | +            f.create_dataset("labels/mask", data=mask, compression="gzip")  | 
 | 231 | + | 
 | 232 | + | 
 | 233 | +def prepare_munc_snap():  | 
 | 234 | +    pass  | 
 | 235 | + | 
 | 236 | + | 
 | 237 | +def main():  | 
 | 238 | +    prepare_single_ax_stem_chemical_fix()  | 
 | 239 | +    # prepare_2d_tem()  | 
 | 240 | +    # prepare_frog()  | 
 | 241 | +    # prepare_ier()  | 
 | 242 | +    # prepare_munc_snap()  | 
 | 243 | + | 
 | 244 | + | 
 | 245 | +if __name__ == "__main__":  | 
 | 246 | +    main()  | 
0 commit comments