Merge pull request #79 from computational-cell-analytics/sample-data

constantinpape · web-flow · commit a366371d7b80 · 2023-07-18T09:41:59.000+02:00
Add sample data script and update the annotator examples
diff --git a/development/annotator_2d_tiled.py b/development/annotator_2d_tiled.py
diff --git a/examples/sam_annotator_2d.py b/examples/sam_annotator_2d.py
@@ -1,23 +1,38 @@
 import imageio.v3 as imageio
 from micro_sam.sam_annotator import annotator_2d
+from micro_sam.sample_data import fetch_hela_2d_example_data, fetch_livecell_example_data, fetch_wholeslide_example_data
 
 
-# TODO describe how to get the data and don't use hard-coded path
 def livecell_annotator():
-    im = imageio.imread(
-        "/home/pape/Work/data/incu_cyte/livecell/images/livecell_test_images/A172_Phase_C7_1_01d04h00m_4.tif"
-    )
-    embedding_path = "./embeddings/embeddings-livecell_cropped.zarr"
-    annotator_2d(im, embedding_path, show_embeddings=False)
+    """Run the 2d annotator for an example image from the LiveCELL dataset.
+
+    See https://doi.org/10.1038/s41592-021-01249-6 for details on the data.
+    """
+    example_data = fetch_livecell_example_data("./data")
+    image = imageio.imread(example_data)
+    embedding_path = "./embeddings/embeddings-livecell.zarr"
+    annotator_2d(image, embedding_path, show_embeddings=False)
 
 
-# This runs interactive 2d annotation for data from the cell tracking challenge:
-# It uses the training data for the HeLA dataset. You can download the data from
-# http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip
 def hela_2d_annotator():
-    im = imageio.imread("./data/DIC-C2DH-HeLa/train/01/t011.tif")
+    """Run the 2d annotator for an example image form the cell tracking challenge HeLa 2d dataset.
+    """
+    example_data = fetch_hela_2d_example_data("./data")
+    image = imageio.imread(example_data)
     embedding_path = "./embeddings/embeddings-hela2d.zarr"
-    annotator_2d(im, embedding_path, show_embeddings=False)
+    annotator_2d(image, embedding_path, show_embeddings=False)
+
+
+def wholeslide_annotator():
+    """Run the 2d annotator with tiling for an example whole-slide image from the
+    NeuRIPS cell segmentation challenge.
+
+    See https://neurips22-cellseg.grand-challenge.org/ for details on the data.
+    """
+    example_data = fetch_wholeslide_example_data("./data")
+    image = imageio.imread(example_data)
+    embedding_path = "./embeddings/whole-slide-embeddings.zarr"
+    annotator_2d(image, embedding_path, tile_shape=(1024, 1024), halo=(256, 256))
 
 
 def main():
@@ -27,6 +42,9 @@ def main():
     # 2d annotator for cell tracking challenge hela data
     hela_2d_annotator()
 
+    # 2d annotator for a whole slide image
+    # wholeslide_annotator()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/sam_annotator_3d.py b/examples/sam_annotator_3d.py
@@ -1,38 +1,22 @@
-import os
-from pathlib import Path
-
 from elf.io import open_file
-import pooch
 from micro_sam.sam_annotator import annotator_3d
+from micro_sam.sample_data import fetch_3d_example_data
 
 
-def main():
-    example_data_directory = "./data"
-    with open_file(str(fetch_example_data(example_data_directory))) as f:
+def em_3d_annotator():
+    """Run the 3d annotator for an example EM volume."""
+    # download the example data
+    example_data = fetch_3d_example_data("./data")
+    # load the example data (load the sequence of tif files as 3d volume)
+    with open_file(example_data) as f:
         raw = f["*.png"][:]
+    # start the annotator, cache the embeddings
     embedding_path = "./embeddings/embeddings-lucchi.zarr"
     annotator_3d(raw, embedding_path, show_embeddings=False)
 
 
-def fetch_example_data(save_directory):
-    # Lucchi++ Data from: https://casser.io/connectomics/
-    save_directory = Path(save_directory)
-    if not save_directory.exists():
-        os.makedirs(save_directory)
-        print("Created new folder for example data downloads.")
-    print("Example data directory is:", save_directory.resolve())
-    unpack_filenames = [os.path.join("Lucchi++", "Test_In", f"mask{str(i).zfill(4)}.png") for i in range(165)]
-    unpack = pooch.Unzip(members=unpack_filenames)
-    fnames = pooch.retrieve(
-        url="http://www.casser.io/files/lucchi_pp.zip",
-        known_hash="770ce9e98fc6f29c1b1a250c637e6c5125f2b5f1260e5a7687b55a79e2e8844d",
-        fname="lucchi_pp.zip",
-        path=save_directory,
-        progressbar=True,
-        processor=unpack,
-    )
-    lucchi_testin_dir = save_directory.joinpath("lucchi_pp.zip.unzip", "Lucchi++", "Test_In")
-    return lucchi_testin_dir
+def main():
+    em_3d_annotator()
 
 
 if __name__ == "__main__":
diff --git a/examples/sam_annotator_tracking.py b/examples/sam_annotator_tracking.py
@@ -1,46 +1,23 @@
-import os
-from pathlib import Path
-
 from elf.io import open_file
-import pooch
 from micro_sam.sam_annotator import annotator_tracking
+from micro_sam.sample_data import fetch_tracking_example_data
 
 
 def track_ctc_data():
-    example_data_directory = "./data"
-    with open_file(str(fetch_example_data(example_data_directory)), mode="r") as f:
+    """Run interactive tracking for data from the cell tracking challenge.
+    """
+    # download the example data
+    example_data = fetch_tracking_example_data("./data")
+    # load the example data (load the sequence of tif files as timeseries)
+    with open_file(example_data, mode="r") as f:
         timeseries = f["*.tif"]
+    # start the annotator with cached embeddings
     annotator_tracking(timeseries, embedding_path="./embeddings/embeddings-ctc.zarr", show_embeddings=False)
 
 
-def fetch_example_data(save_directory):
-    """Cell tracking challenge dataset DIC-C2DH-HeLa.
-
-    Cell tracking challenge webpage: http://data.celltrackingchallenge.net
-    HeLa cells on a flat glass
-    Dr. G. van Cappellen. Erasmus Medical Center, Rotterdam, The Netherlands
-    Training dataset: http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip (37 MB)
-    Challenge dataset: http://data.celltrackingchallenge.net/challenge-datasets/DIC-C2DH-HeLa.zip (41 MB)
-    """
-    save_directory = Path(save_directory)
-    if not save_directory.exists():
-        os.makedirs(save_directory)
-        print("Created new folder for example data downloads.")
-    print("Example data directory is:", save_directory.resolve())
-    unpack_filenames = [os.path.join("DIC-C2DH-HeLa", "01", f"t{str(i).zfill(3)}.tif") for i in range(84)]
-    unpack = pooch.Unzip(members=unpack_filenames)
-    fnames = pooch.retrieve(
-        url="http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip",  # 37 MB
-        known_hash="fac24746fa0ad5ddf6f27044c785edef36bfa39f7917da4ad79730a7748787af",
-        fname="DIC-C2DH-HeLa.zip",
-        path=save_directory,
-        progressbar=True,
-        processor=unpack,
-    )
-    cell_tracking_directory = save_directory.joinpath("DIC-C2DH-HeLa", "train", "01")
-    return cell_tracking_directory
+def main():
+    track_ctc_data()
 
 
 if __name__ == "__main__":
-    # run interactive tracking for data from the cell tracking challenge
-    track_ctc_data()
+    main()
diff --git a/examples/sam_image_series_annotator.py b/examples/sam_image_series_annotator.py
@@ -1,13 +1,20 @@
 from micro_sam.sam_annotator import image_folder_annotator
+from micro_sam.sample_data import fetch_image_series_example_data
 
 
-def main():
+def series_annotation():
+    """Annotate a series of images. Example runs for three different example images.
+    """
+    example_data = fetch_image_series_example_data("./data")
     image_folder_annotator(
-        "./data/series", "./segmented-series",
-        embedding_path="./embeddings/series-embeddings",
+        example_data, "./data/series-segmentation-result", embedding_path="./embeddings/series-embeddings",
         pattern="*.tif", model_type="vit_b"
     )
 
 
+def main():
+    series_annotation()
+
+
 if __name__ == "__main__":
     main()
diff --git a/micro_sam/sample_data.py b/micro_sam/sample_data.py
@@ -0,0 +1,138 @@
+import os
+from pathlib import Path
+
+import pooch
+
+
+def fetch_image_series_example_data(save_directory):
+    """Download the sample images for the image series annotator.
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    fname = "image-series.zip"
+    unpack_filenames = [os.path.join("series", f"im{i}.tif") for i in range(3)]
+    unpack = pooch.Unzip(members=unpack_filenames)
+    pooch.retrieve(
+        url="https://owncloud.gwdg.de/index.php/s/M1zGnfkulWoAhUG/download",
+        known_hash="92346ca9770bcaf55248efee590718d54c7135b6ebca15d669f3b77b6afc8706",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+        processor=unpack,
+    )
+    data_folder = os.path.join(save_directory, f"{fname}.unzip", "series")
+    assert os.path.exists(data_folder)
+    return data_folder
+
+
+def fetch_wholeslide_example_data(save_directory):
+    """Download the sample data for the 2d annotator.
+
+    This downloads part of a whole-slide image from the NeurIPS Cell Segmentation Challenge.
+    See https://neurips22-cellseg.grand-challenge.org/ for details on the data.
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    fname = "whole-slide-example-image.tif"
+    pooch.retrieve(
+        url="https://owncloud.gwdg.de/index.php/s/6ozPtgBmAAJC1di/download",
+        known_hash="3ddb9c9dcc844429932ab951eb0743d5a1af83ee9b0ab54f06ceb2090a606d36",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+    )
+    return os.path.join(save_directory, fname)
+
+
+def fetch_livecell_example_data(save_directory):
+    """Download the sample data for the 2d annotator.
+
+    This downloads a single image from the LiveCELL dataset.
+    See https://doi.org/10.1038/s41592-021-01249-6 for details on the data.
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    fname = "livecell-2d-image.png"
+    pooch.retrieve(
+        url="https://owncloud.gwdg.de/index.php/s/fSaOJIOYjmFBjPM/download",
+        known_hash="4f190983ea672fc333ac26d735d9625d5abb6e4a02bd4d32523127977a31e8fe",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+    )
+    return os.path.join(save_directory, fname)
+
+
+def fetch_hela_2d_example_data(save_directory):
+    """Download the sample data for the 2d annotator.
+
+    This downloads a single image from the HeLa CTC dataset.
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    fname = "hela-2d-image.png"
+    pooch.retrieve(
+        url="https://owncloud.gwdg.de/index.php/s/2sr1DHQ34tV7WEb/download",
+        known_hash="908fa00e4b273610aa4e0a9c0f22dfa64a524970852f387908f3fa65238259c7",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+    )
+    return os.path.join(save_directory, fname)
+
+
+def fetch_3d_example_data(save_directory):
+    """Download the sample data for the 3d annotator.
+
+    This downloads the Lucchi++ datasets from https://casser.io/connectomics/.
+    It is a dataset for mitochondria segmentation in EM.
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    unpack_filenames = [os.path.join("Lucchi++", "Test_In", f"mask{str(i).zfill(4)}.png") for i in range(165)]
+    unpack = pooch.Unzip(members=unpack_filenames)
+    fname = "lucchi_pp.zip"
+    pooch.retrieve(
+        url="http://www.casser.io/files/lucchi_pp.zip",
+        known_hash="770ce9e98fc6f29c1b1a250c637e6c5125f2b5f1260e5a7687b55a79e2e8844d",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+        processor=unpack,
+    )
+    lucchi_dir = save_directory.joinpath(f"{fname}.unzip", "Lucchi++", "Test_In")
+    return str(lucchi_dir)
+
+
+def fetch_tracking_example_data(save_directory):
+    """Download the sample data for the tracking annotator.
+
+    This data is the cell tracking challenge dataset DIC-C2DH-HeLa.
+    Cell tracking challenge webpage: http://data.celltrackingchallenge.net
+    HeLa cells on a flat glass
+    Dr. G. van Cappellen. Erasmus Medical Center, Rotterdam, The Netherlands
+    Training dataset: http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip (37 MB)
+    Challenge dataset: http://data.celltrackingchallenge.net/challenge-datasets/DIC-C2DH-HeLa.zip (41 MB)
+    """
+    save_directory = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    print("Example data directory is:", save_directory.resolve())
+    unpack_filenames = [os.path.join("DIC-C2DH-HeLa", "01", f"t{str(i).zfill(3)}.tif") for i in range(84)]
+    unpack = pooch.Unzip(members=unpack_filenames)
+    fname = "DIC-C2DH-HeLa.zip"
+    pooch.retrieve(
+        url="http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip",  # 37 MB
+        known_hash="fac24746fa0ad5ddf6f27044c785edef36bfa39f7917da4ad79730a7748787af",
+        fname=fname,
+        path=save_directory,
+        progressbar=True,
+        processor=unpack,
+    )
+    cell_tracking_dir = save_directory.joinpath(f"{fname}.unzip", "DIC-C2DH-HeLa", "01")
+    assert os.path.exists(cell_tracking_dir)
+    return str(cell_tracking_dir)
diff --git a/micro_sam/util.py b/micro_sam/util.py