cropping stem data

SarahMuth · SarahMuth · commit 5f66328d29f5 · 2025-06-05T22:02:53.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@ models/*/
 *.egg-info/
 run_sbatch.sbatch
 slurm/
+slurm_revision/
 scripts/cooper/evaluation_results/
 scripts/cooper/training/copy_testset.py
 scripts/rizzoli/upsample_data.py
diff --git a/environment.yaml b/environment.yaml
@@ -1,7 +1,7 @@
 channels:
     - conda-forge
 name:
-    synapse-net
+    synapse-net-cpu
 dependencies:
     - bioimageio.core
     - kornia
diff --git a/run_sbatch_revision.sbatch b/run_sbatch_revision.sbatch
@@ -0,0 +1,11 @@
+#! /bin/bash
+#SBATCH -c 4 #4 #8
+#SBATCH --mem 256G #120G #32G #64G #256G
+#SBATCH -p grete:shared #grete:shared #grete-h100:shared
+#SBATCH -t 24:00:00 #6:00:00 #48:00:00 #SBATCH -G A100:1 #V100:1 #2 #A100:1  #gtx1080:2 #v100:1 #H100:1
+#SBATCH --output=/user/muth9/u12095/synapse-net/slurm_revision/slurm-%j.out 
+#SBATCH -A nim00007 #SBATCH --constraint 80gb 
+
+source ~/.bashrc
+conda activate synapse-net
+python /user/muth9/u12095/synapse-net/scripts/cooper/revision/merge_az.py -v 6
diff --git a/scripts/cooper/revision/assort_new_az_data.py b/scripts/cooper/revision/assort_new_az_data.py
@@ -5,10 +5,13 @@
 import numpy as np
 from tqdm import tqdm
 from skimage.transform import resize
+from skimage.measure import label
+from scipy.ndimage import binary_closing
 
-ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/AZ_data/training_data"
-INTER_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/AZ_predictions"
-OUTPUT_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/new_AZ_train_data"
+ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_data/training_data"
+INTER_ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_predictions"
+OUTPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data"
+STEM_INPUT="/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/for_revison/postprocessed_AZ"
 
 
 def _check_data(files, label_folder, check_thinned):
@@ -195,6 +198,70 @@ def crop_wichmann():
             f.create_dataset("labels/az", data=az, compression="lzf")
             f.create_dataset("labels/az_thin", data=az, compression="lzf")
 
+def crop_stem():
+    input_name = "04_hoi_stem_examples_minusSVseg"
+    output_name = "stem_cropped2"
+
+    input_folder = os.path.join(STEM_INPUT, input_name)
+    output_folder = os.path.join(OUTPUT_ROOT, output_name)
+    os.makedirs(output_folder, exist_ok=True)
+    files = glob(os.path.join(input_folder, "*.h5"))
+
+    min_shape = (32, 512, 512)
+
+    for ff in tqdm(files):
+        with h5py.File(ff, "r") as f:
+            az = f["labels/az"][:]
+            raw_full = f["raw"][:]
+
+        # Label connected components in the az volume 
+        labeled = label(az)
+        num, sizes = np.unique(labeled, return_counts=True)
+        #print(f"num {num}, sizes {sizes}")
+        num, sizes = num[1:], sizes[1:]
+
+        #exclude artifacts and background
+        keep_labels = num[(sizes > 2000) & (num != 0)]
+        #print(f"keep_labels {keep_labels}")
+
+        #Clean up az annotations
+        az = np.isin(labeled, keep_labels).astype("uint8")
+        # Apply binary closing.
+        az = np.logical_or(az, binary_closing(az, iterations=4)).astype("uint8")
+
+        crop_id = 1
+        for l in keep_labels:
+
+            output_path = os.path.join(output_folder, os.path.basename(ff).replace(".h5", f"_crop{crop_id}.h5"))
+            if os.path.exists(output_path):
+                print(f"Skipping existing file: {output_path}")
+                crop_id += 1
+                continue
+
+
+            mask = labeled == l
+            bb = np.where(mask)
+            if not bb[0].size:
+                continue
+            bb = tuple(slice(int(b.min()), int(b.max()) + 1) for b in bb)
+            pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_shape)]
+            bb = tuple(
+                slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az.shape)
+            )
+            az_crop = az[bb]
+            raw_crop = raw_full[bb]
+
+
+            import napari
+            v = napari.Viewer()
+            v.add_image(raw_crop)
+            v.add_labels(az_crop)
+            napari.run()
+
+            with h5py.File(output_path, "a") as f:
+                f.create_dataset("raw", data=raw_crop, compression="lzf")
+                f.create_dataset("labels/az", data=az_crop, compression="lzf")
+            crop_id += 1
 
 def main():
     # assort_tem()
@@ -203,7 +270,9 @@ def main():
     # assort_stem()
 
     # assort_wichmann()
-    crop_wichmann()
+    #crop_wichmann()
+
+    crop_stem()
 
 
 if __name__ == "__main__":
diff --git a/scripts/cooper/revision/common.py b/scripts/cooper/revision/common.py
@@ -3,9 +3,9 @@
 
 
 # The root folder which contains the new AZ training data.
-INPUT_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/new_AZ_train_data"
+INPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data"
 # The output folder for AZ predictions.
-OUTPUT_ROOT = "/mnt/ceph-hdd/cold_store/projects/nim00007/AZ_predictions_new"
+OUTPUT_ROOT = "/mnt/ceph-hdd/cold/nim00007/AZ_predictions_new"
 
 # The names of all datasets for which to run prediction / evaluation.
 # This excludes 'endbulb_of_held_cropped', which is a duplicate of 'endbulb_of_held',
@@ -61,9 +61,11 @@ def get_file_names(name, split_folder, split_names):
 
 
 def get_split_folder(version):
-    assert version in (3, 4, 5)
+    assert version in (3, 4, 5, 6)
     if version == 3:
         split_folder = "splits"
+    elif version == 6:
+        split_folder= "/mnt/ceph-hdd/cold/nim00007/new_AZ_train_data/splits"
     else:
         split_folder = "models_az_thin"
     return split_folder
diff --git a/scripts/cooper/revision/merge_az.py b/scripts/cooper/revision/merge_az.py
@@ -57,8 +57,8 @@ def merge_az(name, version, check):
             napari.run()
 
         else:
-            with h5py.File(gt_path, "a") as f:
-                f.create_dataset("labels/az_merged", data=az_merged, compression="lzf")
+            with h5py.File(seg_path, "a") as f:
+                f.create_dataset(f"labels/az_merged_v{version}", data=az_merged, compression="lzf")
 
 
 def visualize_merge(args):