Merge pull request #12 from computational-cell-analytics/misc

constantinpape · web-flow · commit ea84438de8b1 · 2023-05-12T13:14:51.000+02:00
Expose model type as argument
diff --git a/README.md b/README.md
@@ -15,6 +15,8 @@ We implement napari applications for:
 This is an early beta version. Any feedback is welcome, but please be aware that the functionality is under active development and that several features are not finalized or thoroughly tested yet.
 Once the functionality has matured we plan to release the interactive annotation applications as [napari plugins](https://napari.org/stable/plugins/index.html).
 
+If you run into any problems or have questions please open an issue or reach out via [image.sc](https://forum.image.sc/) using the tag `micro-sam` and tagging @constantinpape.
+
 
 ## Functionality overview
 
@@ -143,6 +145,7 @@ TODO link to video tutorial
 
 - By default, the applications pre-compute the image embeddings produced by SegmentAnything and store them on disc. If you are using a CPU this step can take a while for 3d data or timeseries (you will see a progress bar with a time estimate). If you have access to a GPU without graphical interface (e.g. via a local computer cluster or a cloud provider), you can also pre-compute the embeddings there and then copy them to your laptop / local machine to speed this up. You can use the command `micro_sam.precompute_embeddings` for this (it is installed with the rest of the applications). You can specify the location of the precomputed embeddings via the `embedding_path` argument.
 - Most other processing steps are very fast even on a CPU, so interactive annotation is possible. An exception is the automatic segmentation step (2d segmentation), which takes several minutes without a GPU (depending on the image size). For large volumes and timeseries segmenting an object in 3d / tracking across time can take a couple settings with a CPU (it is very fast with a GPU).
+- You can also try using a smaller version of the SegmentAnything model to speed up the computations. For this you can pass the `model_type` argument and either set it to `vit_l` or `vit_b` (default is `vit_h`). However, this may lead to worse results.
 - You can save and load the results from the `committed_objects` / `committed_tracks` layer to correct segmentations you obtained from another tool (e.g. CellPose) or to save intermediate annotation results. The results can be saved via `File->Save Selected Layer(s) ...` in the napari menu (see the tutorial videos for details). They can be loaded again by specifying the corresponding location via the `segmentation_result` (2d and 3d segmentation) or `tracking_result` (tracking) argument.
 
 ### Known limitations
diff --git a/micro_sam/sam_annotator/annotator_2d.py b/micro_sam/sam_annotator/annotator_2d.py
@@ -36,11 +36,11 @@ def autosegment_widget(v: Viewer, method: str = "default"):
     v.layers["auto_segmentation"].refresh()
 
 
-def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_result=None):
+def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_result=None, model_type="vit_h"):
     # for access to the predictor and the image embeddings in the widgets
     global PREDICTOR, IMAGE_EMBEDDINGS, SAM
 
-    PREDICTOR, SAM = util.get_sam_model(return_sam=True)
+    PREDICTOR, SAM = util.get_sam_model(model_type=model_type, return_sam=True)
     IMAGE_EMBEDDINGS = util.precompute_image_embeddings(PREDICTOR, raw, save_path=embedding_path, ndim=2)
     util.set_precomputed(PREDICTOR, IMAGE_EMBEDDINGS)
 
@@ -166,6 +166,9 @@ def main():
         "--show_embeddings", action="store_true",
         help="Visualize the embeddings computed by SegmentAnything. This can be helpful for debugging."
     )
+    parser.add_argument(
+        "--model_type", default="vit_h", help="The segment anything model that will be used, one of vit_h,l,b."
+    )
 
     args = parser.parse_args()
     raw = util.load_image_data(args.input, ndim=2, key=args.key)
@@ -180,5 +183,6 @@ def main():
 
     annotator_2d(
         raw, embedding_path=args.embedding_path,
-        show_embeddings=args.show_embeddings, segmentation_result=segmentation_result
+        show_embeddings=args.show_embeddings, segmentation_result=segmentation_result,
+        model_type=args.model_type,
     )
diff --git a/micro_sam/sam_annotator/annotator_3d.py b/micro_sam/sam_annotator/annotator_3d.py
@@ -162,10 +162,10 @@ def segment_volume_widget(v: Viewer, iou_threshold: float = 0.8, projection: str
     v.layers["current_object"].refresh()
 
 
-def annotator_3d(raw, embedding_path=None, show_embeddings=False, segmentation_result=None):
+def annotator_3d(raw, embedding_path=None, show_embeddings=False, segmentation_result=None, model_type="vit_h"):
     # for access to the predictor and the image embeddings in the widgets
     global PREDICTOR, IMAGE_EMBEDDINGS, DEFAULT_PROJECTION
-    PREDICTOR = util.get_sam_model()
+    PREDICTOR = util.get_sam_model(model_type=model_type)
     IMAGE_EMBEDDINGS = util.precompute_image_embeddings(PREDICTOR, raw, save_path=embedding_path)
 
     # the mask projection currently only works for square images
@@ -291,6 +291,9 @@ def main():
         "--show_embeddings", action="store_true",
         help="Visualize the embeddings computed by SegmentAnything. This can be helpful for debugging."
     )
+    parser.add_argument(
+        "--model_type", default="vit_h", help="The segment anything model that will be used, one of vit_h,l,b."
+    )
 
     args = parser.parse_args()
     raw = util.load_image_data(args.input, ndim=3, key=args.key)
@@ -305,5 +308,6 @@ def main():
 
     annotator_3d(
         raw, embedding_path=args.embedding_path,
-        show_embeddings=args.show_embeddings, segmentation_result=segmentation_result
+        show_embeddings=args.show_embeddings, segmentation_result=segmentation_result,
+        model_type=args.model_type,
     )
diff --git a/micro_sam/sam_annotator/annotator_tracking.py b/micro_sam/sam_annotator/annotator_tracking.py
@@ -299,12 +299,12 @@ def commit_tracking_widget(v: Viewer, layer: str = "current_track"):
     v.layers["prompts"].refresh()
 
 
-def annotator_tracking(raw, embedding_path=None, show_embeddings=False, tracking_result=None):
+def annotator_tracking(raw, embedding_path=None, show_embeddings=False, tracking_result=None, model_type="vit_h"):
     # global state
     global PREDICTOR, IMAGE_EMBEDDINGS, CURRENT_TRACK_ID, LINEAGE
     global TRACKING_WIDGET
 
-    PREDICTOR = util.get_sam_model()
+    PREDICTOR = util.get_sam_model(model_type=model_type)
     IMAGE_EMBEDDINGS = util.precompute_image_embeddings(PREDICTOR, raw, save_path=embedding_path)
 
     CURRENT_TRACK_ID = 1
@@ -445,6 +445,9 @@ def main():
         "--show_embeddings", action="store_true",
         help="Visualize the embeddings computed by SegmentAnything. This can be helpful for debugging."
     )
+    parser.add_argument(
+        "--model_type", default="vit_h", help="The segment anything model that will be used, one of vit_h,l,b."
+    )
 
     args = parser.parse_args()
     raw = util.load_image_data(args.input, ndim=3, key=args.key)
@@ -458,5 +461,6 @@ def main():
         warnings.warn("You have not passed an embedding_path. Restarting the annotator may take a long time.")
 
     annotator_tracking(
-        raw, embedding_path=args.embedding_path, show_embeddings=args.show_embeddings, tracking_result=tracking_result
+        raw, embedding_path=args.embedding_path, show_embeddings=args.show_embeddings,
+        tracking_result=tracking_result, model_type=args.model_type,
     )
diff --git a/micro_sam/util.py b/micro_sam/util.py
@@ -1,3 +1,4 @@
+import hashlib
 import os
 from shutil import copyfileobj
 
@@ -28,9 +29,14 @@
     "vit_b": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
 }
 CHECKPOINT_FOLDER = os.environ.get("SAM_MODELS", os.path.expanduser("~/.sam_models"))
+CHECKSUMS = {
+    "vit_h": "a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
+    "vit_l": "3adcc4315b642a4d2101128f611684e8734c41232a17c648ed1693702a49a622",
+    "vit_b": "ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912"
+}
 
 
-def _download(url, path):
+def _download(url, path, model_type):
     with requests.get(url, stream=True, verify=True) as r:
         if r.status_code != 200:
             r.raise_for_status()
@@ -42,6 +48,20 @@ def _download(url, path):
         with tqdm.wrapattr(r.raw, "read", total=file_size, desc=desc) as r_raw, open(path, "wb") as f:
             copyfileobj(r_raw, f)
 
+    # validate the checksum
+    expected_checksum = CHECKSUMS[model_type]
+    if expected_checksum is None:
+        return
+    with open(path, "rb") as f:
+        file_ = f.read()
+        checksum = hashlib.sha256(file_).hexdigest()
+    if checksum != expected_checksum:
+        raise RuntimeError(
+            "The checksum of the download does not match the expected checksum."
+            f"Expected: {expected_checksum}, got: {checksum}"
+        )
+    print("Download successful and checksums agree.")
+
 
 def _get_checkpoint(model_type, checkpoint_path=None):
     if checkpoint_path is None:
@@ -52,7 +72,7 @@ def _get_checkpoint(model_type, checkpoint_path=None):
         # download the checkpoint if necessary
         if not os.path.exists(checkpoint_path):
             os.makedirs(CHECKPOINT_FOLDER, exist_ok=True)
-            _download(checkpoint_url, checkpoint_path)
+            _download(checkpoint_url, checkpoint_path, model_type)
     elif not os.path.exists(checkpoint_path):
         raise ValueError(f"The checkpoint path {checkpoint_path} that was passed does not exist.")