prepare for training AZ

SarahMuth · SarahMuth · commit 59c353490494 · 2025-06-09T20:06:59.000+02:00
diff --git a/run_sbatch_revision.sbatch b/run_sbatch_revision.sbatch
@@ -2,10 +2,12 @@
 #SBATCH -c 4 #4 #8
 #SBATCH --mem 256G #120G #32G #64G #256G
 #SBATCH -p grete:shared #grete:shared #grete-h100:shared
-#SBATCH -t 24:00:00 #6:00:00 #48:00:00 #SBATCH -G A100:1 #V100:1 #2 #A100:1  #gtx1080:2 #v100:1 #H100:1
+#SBATCH -t 48:00:00 #6:00:00 #48:00:00 
+#SBATCH -G A100:1 #V100:1 #2 #A100:1  #gtx1080:2 #v100:1 #H100:1
 #SBATCH --output=/user/muth9/u12095/synapse-net/slurm_revision/slurm-%j.out 
-#SBATCH -A nim00007 #SBATCH --constraint 80gb 
+#SBATCH -A nim00007 
+#SBATCH --constraint 80gb 
 
 source ~/.bashrc
 conda activate synapse-net
-python /user/muth9/u12095/synapse-net/scripts/cooper/revision/merge_az.py -v 6
+python /user/muth9/u12095/synapse-net/scripts/cooper/revision/train_az.py -k az_merged_v6
diff --git a/scripts/cooper/revision/common.py b/scripts/cooper/revision/common.py
@@ -5,7 +5,7 @@
 # The root folder which contains the new AZ training data.
 INPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data"
 # The output folder for AZ predictions.
-OUTPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_predictions_new"
+OUTPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_prediction_new_copy"
 
 # The names of all datasets for which to run prediction / evaluation.
 # This excludes 'endbulb_of_held_cropped', which is a duplicate of 'endbulb_of_held',
diff --git a/scripts/cooper/revision/merge_az.py b/scripts/cooper/revision/merge_az.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+from glob import glob
 
 import h5py
 import napari
@@ -19,13 +20,19 @@
 # STEM CROPPED IS OFTEN TOO SMALL!
 def merge_az(name, version, check):
     split_folder = get_split_folder(version)
-    file_names = get_file_names(name, split_folder, split_names=["train", "val", "test"])
+   
+    if name == "stem_cropped":
+        file_paths = glob(os.path.join("/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data/stem_cropped", "*.h5"))
+        file_names = [os.path.basename(path) for path in file_paths]
+    else:
+        file_names = get_file_names(name, split_folder, split_names=["train", "val", "test"])
     seg_paths, gt_paths = get_paths(name, file_names)
 
     for seg_path, gt_path in zip(seg_paths, gt_paths):
 
         with h5py.File(gt_path, "r") as f:
-            if not check and ("labels/az_merged" in f):
+            #if not check and ("labels/az_merged" in f):
+            if f"labels/az_merged_v{version}" in f :
                 continue
             raw = f["raw"][:]
             gt = f["labels/az"][:]
@@ -56,9 +63,16 @@ def merge_az(name, version, check):
             v.title = f"{name}/{fname}"
             napari.run()
 
+            print(f"gt_path {gt_path}")
+            with h5py.File(gt_path, "a") as f:
+                f.create_dataset(f"labels/az_merged_v{version}", data=az_merged, compression="lzf")
+
         else:
-            with h5py.File(seg_path, "a") as f:
+            print(f"gt_path {gt_path}")
+            with h5py.File(gt_path, "a") as f:
                 f.create_dataset(f"labels/az_merged_v{version}", data=az_merged, compression="lzf")
+            '''with h5py.File(seg_path, "a") as f:
+                f.create_dataset(f"labels/az_merged_v{version}", data=az_merged, compression="lzf")'''
 
 
 def visualize_merge(args):
diff --git a/scripts/cooper/revision/remove_az_thin.py b/scripts/cooper/revision/remove_az_thin.py
@@ -1,4 +1,4 @@
-import h5py
+'''import h5py
 
 files = [
     "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data/stem_cropped2_rescaled/36859_H2_SP_02_rec_2Kb1dawbp_crop_crop1.h5",
@@ -15,4 +15,32 @@
             del f["labels/az_thin"]
 
         # Recreate the dataset with the new data
-        f.create_dataset("labels/az_thin", data=gt)
+        f.create_dataset("labels/az_thin", data=gt)
+'''
+import h5py
+import numpy as np
+import os
+from glob import glob
+
+folder = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data/stem_cropped/"
+
+# List of file names to process
+file_names = [
+    "36859_H2_SP_01_rec_2Kb1dawbp_crop_cropped_noAZ.h5",
+    "36859_H2_SP_02_rec_2Kb1dawbp_crop_cropped_noAZ.h5",
+    "36859_H2_SP_03_rec_2Kb1dawbp_crop_cropped_noAZ.h5",
+    "36859_H3_SP_05_rec_2kb1dawbp_crop_cropped_noAZ.h5",
+    "36859_H3_SP_07_rec_2kb1dawbp_crop_cropped_noAZ.h5",
+    "36859_H3_SP_10_rec_2kb1dawbp_crop_cropped_noAZ.h5"
+]
+
+file_paths = glob(os.path.join("/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data/endbulb_of_held_cropped", "*.h5"))
+
+for fname in file_paths:
+    #file_path = os.path.join(folder, fname)
+    
+    with h5py.File(fname, "a") as f:
+        az_merged = f["/labels/az_merged"][:]
+        f.create_dataset("/labels/az_merged_v6", data=az_merged, compression="lzf")
+
+    print(f"Updated file: {fname}")
diff --git a/scripts/cooper/revision/train_az.py b/scripts/cooper/revision/train_az.py
@@ -9,12 +9,12 @@
 
 from synapse_net.training import supervised_training, AZDistanceLabelTransform
 
-TRAIN_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/new_AZ_train_data"
+TRAIN_ROOT = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data"
 OUTPUT_ROOT = "./models_az_thin"
 
 
 def _require_train_val_test_split(datasets):
-    train_ratio, val_ratio, test_ratio = 0.70, 0.1, 0.2
+    train_ratio, val_ratio, test_ratio = 0.60, 0.2, 0.2
 
     def _train_val_test_split(names):
         train, test = train_test_split(names, test_size=1 - train_ratio, shuffle=True)
@@ -87,17 +87,22 @@ def train(key, ignore_label=None, use_distances=False, training_2D=False, testse
 
     os.makedirs(OUTPUT_ROOT, exist_ok=True)
 
-    datasets = ["tem", "chemical_fixation", "stem", "stem_cropped", "endbulb_of_held", "endbulb_of_held_cropped"]
-    train_paths = get_paths("train", datasets=datasets, testset=testset)
-    val_paths = get_paths("val", datasets=datasets, testset=testset)
+    datasets_with_testset_true = ["tem", "chemical_fixation", "stem", "endbulb_of_held"]
+    datasets_with_testset_false = ["stem_cropped", "endbulb_of_held_cropped"]
+
+    train_paths = get_paths("train", datasets=datasets_with_testset_true, testset=True)
+    val_paths = get_paths("val", datasets=datasets_with_testset_true, testset=True)
+
+    train_paths += get_paths("train", datasets=datasets_with_testset_false, testset=False)
+    val_paths += get_paths("val", datasets=datasets_with_testset_false, testset=False)
 
     print("Start training with:")
     print(len(train_paths), "tomograms for training")
     print(len(val_paths), "tomograms for validation")
 
     # patch_shape = [48, 256, 256]
     patch_shape = [48, 384, 384]
-    model_name = "v6"
+    model_name = "v7"
 
     # checking for 2D training
     if training_2D:
@@ -121,7 +126,7 @@ def train(key, ignore_label=None, use_distances=False, training_2D=False, testse
         sampler=torch_em.data.sampler.MinInstanceSampler(min_num_instances=1, p_reject=0.85),
         n_samples_train=None, n_samples_val=100,
         check=check,
-        save_root=OUTPUT_ROOT,
+        save_root="/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/models/ConstantinAZ",
         n_iterations=int(2e5),
         ignore_label=ignore_label,
         label_transform=label_transform,