Implement RF based classificaiton WIP

constantinpape · constantinpape · commit 20ac761cc47d · 2025-06-04T10:10:46.000+02:00
diff --git a/flamingo_tools/classification/__init__.py b/flamingo_tools/classification/__init__.py
@@ -0,0 +1 @@
+from .classification_gui import run_classification_gui
diff --git a/flamingo_tools/classification/classification_gui.py b/flamingo_tools/classification/classification_gui.py
@@ -0,0 +1,128 @@
+import os
+from multiprocessing import cpu_count
+from pathlib import Path
+from typing import Optional
+
+import h5py
+import imageio.v3 as imageio
+import napari
+import numpy as np
+import pandas as pd
+
+from joblib import dump
+from magicgui import magic_factory
+from skimage.measure import regionprops_table
+
+import micro_sam.sam_annotator.object_classifier as classifier_util
+from micro_sam.object_classification import project_prediction_to_segmentation
+from micro_sam.sam_annotator._widgets import _generate_message
+
+IMAGE_LAYER_NAME = None
+SEGMENTATION_LAYER_NAME = None
+FEATURES = None
+SEG_IDS = None
+CLASSIFIER = None
+LABELS = None
+
+
+# TODO refactor
+def _compute_features(segmentation, image):
+    features = pd.DataFrame(regionprops_table(
+        segmentation, image, properties=[
+            "label", "area", "axis_major_length", "axis_minor_length",
+            "equivalent_diameter_area", "euler_number", "extent",
+            "feret_diameter_max", "inertia_tensor_eigvals",
+            "intensity_max", "intensity_mean", "intensity_min",
+            "intensity_std", "moments_central",
+            "moments_weighted", "solidity",
+        ]
+    ))
+    seg_ids = features.label.values.astype(int)
+    features = features.drop(columns="label").values
+    return features, seg_ids
+
+
+@magic_factory(call_button="Train and predict")
+def _train_and_predict_rf_widget(viewer: "napari.viewer.Viewer") -> None:
+    global FEATURES, SEG_IDS, CLASSIFIER, LABELS
+
+    annotations = viewer.layers["annotations"].data
+    segmentation = viewer.layers[SEGMENTATION_LAYER_NAME].data
+    labels = classifier_util._accumulate_labels(segmentation, annotations)
+    LABELS = labels
+
+    if FEATURES is None:
+        print("Computing features ...")
+        image = viewer.layers[IMAGE_LAYER_NAME].data
+        FEATURES, SEG_IDS = _compute_features(segmentation, image)
+
+    print("Training random forest ...")
+    rf = classifier_util._train_rf(FEATURES, labels, n_estimators=200, max_depth=10, n_jobs=cpu_count())
+    CLASSIFIER = rf
+
+    # Run and set the prediction.
+    print("Run prediction ...")
+    pred = rf.predict(FEATURES)
+    prediction_data = project_prediction_to_segmentation(segmentation, pred, SEG_IDS)
+    viewer.layers["prediction"].data = prediction_data
+
+
+@magic_factory(call_button="Export Classifier")
+def _create_export_rf_widget(export_path: Optional[Path] = None) -> None:
+    rf = CLASSIFIER
+    if rf is None:
+        return _generate_message("error", "You have not run training yet.")
+    if export_path is None or export_path == "":
+        return _generate_message("error", "You have to provide an export path.")
+    # Do we add an extension? .joblib?
+    dump(rf, export_path)
+
+
+@magic_factory(call_button="Export Features")
+def _create_export_feature_widget(export_path: Optional[Path] = None) -> None:
+
+    if FEATURES is None or LABELS is None:
+        return _generate_message("error", "You have not run training yet.")
+    if export_path is None or export_path == "":
+        return _generate_message("error", "You have to provide an export path.")
+
+    valid = LABELS != 0
+    features, labels = FEATURES[valid], LABELS[valid]
+
+    export_path = Path(export_path).with_suffix(".h5")
+    with h5py.File(export_path, "a") as f:
+        g = f.create_group(IMAGE_LAYER_NAME)
+        g.create_dataset("features", data=features, compression="lzf")
+        g.create_dataset("labels", data=labels, compression="lzf")
+
+
+def run_classification_gui(image_path, segmentation_path, image_name=None, segmentation_name=None):
+    global IMAGE_LAYER_NAME, SEGMENTATION_LAYER_NAME
+
+    image = imageio.imread(image_path)
+    segmentation = imageio.imread(segmentation_path)
+
+    image_name = os.path.basename(image_path) if image_name is None else image_name
+    segmentation_name = os.path.basename(segmentation_path) if segmentation_name is None else segmentation_name
+
+    IMAGE_LAYER_NAME = image_name
+    SEGMENTATION_LAYER_NAME = segmentation_name
+
+    viewer = napari.Viewer()
+    viewer.add_image(image, name=image_name)
+    viewer.add_labels(segmentation, name=segmentation_name)
+
+    shape = image.shape
+    viewer.add_labels(name="prediction", data=np.zeros(shape, dtype="uint8"))
+    viewer.add_labels(name="annotations", data=np.zeros(shape, dtype="uint8"))
+
+    # Add the gui elements.
+    train_widget = _train_and_predict_rf_widget()
+    rf_export_widget = _create_export_rf_widget()
+    feature_export_widget = _create_export_feature_widget()
+
+    viewer.window.add_dock_widget(train_widget)
+    viewer.window.add_dock_widget(feature_export_widget)
+    viewer.window.add_dock_widget(rf_export_widget)
+
+    napari.run()
diff --git a/flamingo_tools/classification/training_and_prediction.py b/flamingo_tools/classification/training_and_prediction.py
@@ -0,0 +1,10 @@
+
+
+# TODO train a classifier on all features and labels stored in h5
+def train_classifier(feature_paths):
+    pass
+
+
+# TODO run prediction on a full cochlea
+def predict_classifier():
+    pass
diff --git a/flamingo_tools/measurements.py b/flamingo_tools/measurements.py
@@ -26,6 +26,9 @@ def _measure_volume_and_surface(mask, resolution):
     return volume, surface
 
 
+# TODO extend this to also support regionprops featurs,
+# maybe spherical harmonics, line profiles, and nucleus (in SGNs, based on thresholding).
+# For this, refactor the feature function.
 def compute_object_measures_impl(
     image: np.typing.ArrayLike,
     segmentation: np.typing.ArrayLike,
diff --git a/scripts/check_ihc_seg.py b/scripts/check_ihc_seg.py
@@ -0,0 +1,196 @@
+import os
+from glob import glob
+
+import h5py
+import imageio.v3 as imageio
+import napari
+import numpy as np
+
+IHC_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/croppings/IHC_crop"
+IHC_SEG = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/croppings/IHC_seg"
+
+
+def inspect_all_data():
+
+    images = sorted(glob(os.path.join(IHC_ROOT, "**/*.tif"), recursive=True))
+    segmentations = sorted(glob(os.path.join(IHC_SEG, "**/*.tif"), recursive=True))
+
+    skip_names = ["Calretinin"]
+
+    for im_path, seg_path in zip(images, segmentations):
+        print("Loading", im_path)
+        root, fname = os.path.split(im_path)
+        folder = os.path.basename(root)
+        if folder in skip_names:
+            continue
+
+        try:
+            im = imageio.imread(im_path)
+            seg = imageio.imread(seg_path).astype("uint32")
+
+            v = napari.Viewer()
+            v.add_image(im)
+            v.add_labels(seg)
+            v.title = f"{folder}/{fname}"
+            napari.run()
+        except ValueError:
+            continue
+
+
+def _require_prediction(im, image_path, with_mask):
+    model_path = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/IHC/v2_cochlea_distance_unet_IHC_supervised_2025-05-21"  # noqa
+
+    root, fname = os.path.split(image_path)
+    folder = os.path.basename(root)
+
+    cache_path = f"/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/croppings/predictions/{folder}"
+    os.makedirs(cache_path, exist_ok=True)
+    cache_path = os.path.join(cache_path, fname.replace(".tif", ".h5"))
+
+    output_key = "pred_masked" if with_mask else "pred"
+
+    if os.path.exists(cache_path):
+        with h5py.File(cache_path, "r") as f:
+            if output_key in f:
+                pred = f[output_key][:]
+                return pred
+
+    from torch_em.util import load_model
+    from torch_em.util.prediction import predict_with_halo
+    from torch_em.transform.raw import standardize
+
+    block_shape = (128, 128, 128)
+    halo = (16, 32, 32)
+    if with_mask:
+        import nifty.tools as nt
+
+        mask = np.zeros(im.shape, dtype=bool)
+        blocking = nt.blocking([0, 0, 0], im.shape, block_shape)
+
+        for block_id in range(blocking.numberOfBlocks):
+            block = blocking.getBlock(block_id)
+            bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end))
+            data = im[bb]
+            max_ = np.percentile(data, 95)
+            if max_ > 200:
+                mask[bb] = 1
+    else:
+        mask = None
+
+    im = standardize(im)
+
+    model = load_model(model_path)
+
+    pred = predict_with_halo(
+        im, model, gpu_ids=[0], block_shape=block_shape, halo=halo, preprocess=None, mask=mask
+    )
+
+    with h5py.File(cache_path, "a") as f:
+        f.create_dataset(output_key, data=pred, compression="lzf")
+
+
+def check_block_artifacts():
+    image_path = os.path.join(IHC_ROOT, "Calretinin/M61L_CR_IHC_forannotations_C1.tif")
+    im = imageio.imread(image_path)
+    predictions = _require_prediction(im, image_path, with_mask=False)
+
+    seg_path = os.path.join(IHC_SEG, "Calretinin/M61L_CR_IHC_forannotations_C1.tif")
+    seg_old = imageio.imread(seg_path)
+
+    v = napari.Viewer()
+    v.add_image(im)
+    v.add_image(predictions)
+    v.add_labels(seg_old)
+    napari.run()
+
+
+def _get_ihc_v_sgn_mask(seg, props, threshold, criterion="ratio"):
+    sgn_ids = props.label[props[criterion] < threshold].values
+    ihc_ids = props.label[props[criterion] >= threshold].values
+
+    ihc_v_sgn = np.zeros_like(seg, dtype="uint32")
+    ihc_v_sgn[np.isin(seg, ihc_ids)] = 1
+    ihc_v_sgn[np.isin(seg, sgn_ids)] = 2
+
+    return ihc_v_sgn
+
+
+# Too simple, need to learn this.
+def try_filtering():
+    import pandas as pd
+    from skimage.measure import regionprops_table
+    from magicgui import magic_factory
+
+    seg_path = os.path.join(IHC_SEG, "Myo7a/3.1L_Myo7a_apex_HCAT_reslice_C2.tif")
+    seg = imageio.imread(seg_path)
+
+    props = regionprops_table(
+        seg, properties=["label", "area", "axis_major_length", "axis_minor_length"]
+    )
+    props = pd.DataFrame(props)
+    props["ratio"] = props.axis_major_length / props.axis_minor_length
+
+    ratio_threshold = 1.5
+    size_threshold = 5000
+    ihc_v_sgn = _get_ihc_v_sgn_mask(seg, props, ratio_threshold, criterion="ratio")
+
+    @magic_factory(
+        call_button="Update ratio threshold",
+        threshold={"widget_type": "FloatSlider", "min": 1.0, "max": 5.0, "step": 0.1}
+    )
+    def update_ratio_threshold(threshold: float = ratio_threshold):
+        ihc_v_sgn = _get_ihc_v_sgn_mask(seg, props, threshold, criterion="ratio")
+        v.layers["ihc_v_sgn"].data = ihc_v_sgn
+
+    @magic_factory(
+        call_button="Update size threshold",
+        threshold={"widget_type": "FloatSlider", "min": 1000, "max": 20_000, "step": 100}
+    )
+    def update_size_threshold(threshold: float = size_threshold):
+        ihc_v_sgn = _get_ihc_v_sgn_mask(seg, props, threshold, criterion="area")
+        v.layers["ihc_v_sgn"].data = ihc_v_sgn
+
+    image_path = os.path.join(IHC_ROOT, "Myo7a/3.1L_Myo7a_apex_HCAT_reslice_C2.tif")
+    im = imageio.imread(image_path)
+
+    v = napari.Viewer()
+    v.add_image(im)
+    v.add_labels(seg)
+    v.add_labels(ihc_v_sgn)
+
+    ratio_widget = update_ratio_threshold()
+    size_widget = update_size_threshold()
+    v.window.add_dock_widget(ratio_widget, name="Ratio Threshold Slider")
+    v.window.add_dock_widget(size_widget, name="Size Threshold Slider")
+
+    napari.run()
+
+
+def run_object_classifier():
+    from flamingo_tools.classification import run_classification_gui
+
+    image_path = os.path.join(IHC_ROOT, "Myo7a/3.1L_Myo7a_apex_HCAT_reslice_C2.tif")
+    seg_path = os.path.join(IHC_SEG, "Myo7a/3.1L_Myo7a_apex_HCAT_reslice_C2.tif")
+
+    run_classification_gui(image_path, seg_path, segmentation_name="IHCs")
+
+
+# From inspection:
+# - CR looks quite good, but also shows the blocking artifacts, and some merges:
+#   Calretinin/M61L_CR_IHC_forannotations_C1.tif (blocking artifacts)
+#   Calretinin/M63R_CR640_apexIHC_C2.tif (merges, but also weird looking stain)
+#   Calretinin/M78L_CR488_apexIHC2_C6.tif (background structures are segmented)
+#   Background is the case for some others too; it segments the hairs.
+# - Myo7a, looks good, but as we discussed the stain is not specific
+#   Myo7a/3.1L_Myo7a_apex_HCAT_reslice_C2.tif (good candidate for filtering)
+#   Myo7a/3.1L_Myo7a_mid_HCAT_reslice_C4.tif (good candidate for filtering)
+# - PV: Stain looks quite different, segmentations don't look so good.
+def main():
+    # inspect_all_data()
+    # check_block_artifacts()
+    # try_filtering()
+    run_object_classifier()
+
+
+if __name__ == "__main__":
+    main()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .classification_gui import run_classification_gui`