Allow using PIL with predict functions

johnbradley · thompsonmj · egrace479 · johnbradley · commit 8520cd5d316b · 2024-10-25T13:16:44.000-04:00
Fixes #54 Co-authored-by: Matt Thompson <31709066+thompsonmj@users.noreply.github.com> Co-authored-by: Elizabeth Campolongo <38985481+egrace479@users.noreply.github.com>
diff --git a/README.md b/README.md
@@ -120,6 +120,11 @@ big 0.99992835521698
 small 7.165559509303421e-05
 ```
 
+### PIL Images
+The predict() functions used in all the examples above allow passing a list of paths or a list of [PIL Images](https://pillow.readthedocs.io/en/stable/reference/Image.html).
+When a list of PIL images is passed the index of the image will be filled in for `file_name`. This is because PIL images may not have an associated file name.
+
+
 ## Command Line Usage
 ```
 bioclip predict [-h] [--format {table,csv}] [--output OUTPUT]
diff --git a/src/bioclip/__main__.py b/src/bioclip/__main__.py
@@ -51,16 +51,16 @@ def predict(image_file: list[str],
             **kwargs):
     if cls_str:
         classifier = CustomLabelsClassifier(cls_ary=cls_str.split(','), **kwargs)
-        predictions = classifier.predict(image_paths=image_file, k=k)
+        predictions = classifier.predict(images=image_file, k=k)
         write_results(predictions, format, output)
     elif bins_path:
         cls_to_bin = parse_bins_csv(bins_path)
         classifier = CustomLabelsBinningClassifier(cls_to_bin=cls_to_bin, **kwargs)
-        predictions = classifier.predict(image_paths=image_file, k=k)
+        predictions = classifier.predict(images=image_file, k=k)
         write_results(predictions, format, output)
     else:
         classifier = TreeOfLifeClassifier(**kwargs)
-        predictions = classifier.predict(image_paths=image_file, rank=rank, k=k)
+        predictions = classifier.predict(images=image_file, rank=rank, k=k)
         write_results(predictions, format, output)
 
 
@@ -72,7 +72,7 @@ def embed(image_file: list[str], output: str, **kwargs):
         "embeddings": images_dict
     }
     for image_path in image_file:
-        features = classifier.create_image_features_for_path(image_path=image_path, normalize=False)
+        features = classifier.create_image_features_for_image(image=image_path, normalize=False)
         images_dict[image_path] = features.tolist()
     if output == 'stdout':
         print(json.dumps(data, indent=4))
diff --git a/src/bioclip/predict.py b/src/bioclip/predict.py
@@ -184,10 +184,20 @@ def load_pretrained_model(self, model_str: str = BIOCLIP_MODEL_STR, pretrained_s
         self.preprocess = preprocess_img if self.model_str == BIOCLIP_MODEL_STR else preprocess
 
     @staticmethod
-    def open_image(image_path):
-        img = PIL.Image.open(image_path)
+    def ensure_rgb_image(image: str | PIL.Image.Image) -> PIL.Image.Image:
+        if isinstance(image, PIL.Image.Image):
+            img = image
+        else:
+            img = PIL.Image.open(image)
         return img.convert("RGB")
 
+    @staticmethod
+    def make_key(image: str | PIL.Image.Image, idx: int) -> str:
+        if isinstance(image, PIL.Image.Image):
+            return f"{idx}"
+        else:
+            return image
+
     @torch.no_grad()
     def create_image_features(self, images: List[PIL.Image.Image], normalize : bool = True) -> torch.Tensor:
         preprocessed_images = []
@@ -202,8 +212,8 @@ def create_image_features(self, images: List[PIL.Image.Image], normalize : bool
             return img_features
 
     @torch.no_grad()
-    def create_image_features_for_path(self, image_path: str, normalize: bool) -> torch.Tensor:
-        img = self.open_image(image_path)
+    def create_image_features_for_image(self, image: str | PIL.Image.Image, normalize: bool) -> torch.Tensor:
+        img = self.ensure_rgb_image(image)
         result = self.create_image_features([img], normalize=normalize)
         return result[0]
 
@@ -213,13 +223,14 @@ def create_probabilities(self, img_features: torch.Tensor,
         logits = (self.model.logit_scale.exp() * img_features @ txt_features)
         return F.softmax(logits, dim=1)
 
-    def create_probabilities_for_image_paths(self, image_paths: List[str] | str,
-                                             txt_features: torch.Tensor) -> dict[str, torch.Tensor]:
-        images = [self.open_image(image_path) for image_path in image_paths]
+    def create_probabilities_for_images(self, images: List[str] | List[PIL.Image.Image],
+                                        txt_features: torch.Tensor) -> dict[str, torch.Tensor]:
+        keys = [self.make_key(image, i) for i,image in enumerate(images)]
+        images = [self.ensure_rgb_image(image) for image in images]
         img_features = self.create_image_features(images)
         probs = self.create_probabilities(img_features, txt_features)
         result = {}
-        for i, key in enumerate(image_paths):
+        for i, key in enumerate(keys):
             result[key] = probs[i]
         return result
 
@@ -245,24 +256,25 @@ def _get_txt_features(self, classnames):
         return all_features
 
     @torch.no_grad()
-    def predict(self, image_paths: List[str] | str, k: int = None) -> dict[str, float]:
-        if isinstance(image_paths, str):
-            image_paths = [image_paths]
-        probs = self.create_probabilities_for_image_paths(image_paths, self.txt_features)
+    def predict(self, images: List[str] | str | List[PIL.Image.Image], k: int = None) -> dict[str, float]:
+        if isinstance(images, str):
+            images = [images]
+        probs = self.create_probabilities_for_images(images, self.txt_features)
         result = []
-        for image_path in image_paths:
-            img_probs = probs[image_path]
+        for i, image in enumerate(images):
+            key = self.make_key(image, i)
+            img_probs = probs[key]
             if not k or k > len(self.classes):
                 k = len(self.classes)
-            result.extend(self.group_probs(image_path, img_probs, k))
+            result.extend(self.group_probs(key, img_probs, k))
         return result
 
-    def group_probs(self, image_path: str, img_probs: torch.Tensor, k: int = None) -> List[dict[str, float]]:
+    def group_probs(self, image_key: str, img_probs: torch.Tensor, k: int = None) -> List[dict[str, float]]:
         result = []
         topk = img_probs.topk(k)
         for i, prob in zip(topk.indices, topk.values):
             result.append({
-                PRED_FILENAME_KEY: image_path,
+                PRED_FILENAME_KEY: image_key,
                 PRED_CLASSICATION_KEY: self.classes[i],
                 PRED_SCORE_KEY: prob.item()
             })
@@ -276,7 +288,7 @@ def __init__(self, cls_to_bin: dict, **kwargs):
         if any([pd.isna(x) or not x for x in cls_to_bin.values()]):
             raise ValueError("Empty, null, or nan are not allowed for bin values.")
 
-    def group_probs(self, image_path: str, img_probs: torch.Tensor, k: int = None) -> List[dict[str, float]]:
+    def group_probs(self, image_key: str, img_probs: torch.Tensor, k: int = None) -> List[dict[str, float]]:
         result = []
         output = collections.defaultdict(float)
         for i in range(len(self.classes)):
@@ -285,7 +297,7 @@ def group_probs(self, image_path: str, img_probs: torch.Tensor, k: int = None) -
         topk_names = heapq.nlargest(k, output, key=output.get)
         for name in topk_names:
             result.append({
-                PRED_FILENAME_KEY: image_path,
+                PRED_FILENAME_KEY: image_key,
                 PRED_CLASSICATION_KEY: name,
                 PRED_SCORE_KEY: output[name].item()
             })
@@ -335,17 +347,17 @@ def __init__(self, **kwargs):
         self.txt_features = get_txt_emb().to(self.device)
         self.txt_names = get_txt_names()
 
-    def format_species_probs(self, image_path: str, probs: torch.Tensor, k: int = 5) -> List[dict[str, float]]:
+    def format_species_probs(self, image_key: str, probs: torch.Tensor, k: int = 5) -> List[dict[str, float]]:
         topk = probs.topk(k)
         result = []
         for i, prob in zip(topk.indices, topk.values):
-            item = { PRED_FILENAME_KEY: image_path }
+            item = { PRED_FILENAME_KEY: image_key }
             item.update(create_classification_dict(self.txt_names[i], Rank.SPECIES))
             item[PRED_SCORE_KEY] = prob.item()
             result.append(item)
         return result
 
-    def format_grouped_probs(self, image_path: str, probs: torch.Tensor, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> List[dict[str, float]]:
+    def format_grouped_probs(self, image_key: str, probs: torch.Tensor, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> List[dict[str, float]]:
         output = collections.defaultdict(float)
         class_dict_lookup = {}
         name_to_class_dict = {}
@@ -358,27 +370,28 @@ def format_grouped_probs(self, image_path: str, probs: torch.Tensor, rank: Rank,
         topk_names = heapq.nlargest(k, output, key=output.get)
         prediction_ary = []
         for name in topk_names:
-            item = { PRED_FILENAME_KEY: image_path }
+            item = { PRED_FILENAME_KEY: image_key }
             item.update(name_to_class_dict[name])
             item[PRED_SCORE_KEY] = output[name].item()
             prediction_ary.append(item)
         return prediction_ary
 
     @torch.no_grad()
-    def predict(self, image_paths: List[str] | str, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> dict[str, dict[str, float]]:
-        if isinstance(image_paths, str):
-            image_paths = [image_paths]
-        probs = self.create_probabilities_for_image_paths(image_paths, self.txt_features)
+    def predict(self, images: List[str] | str | List[PIL.Image.Image], rank: Rank, min_prob: float = 1e-9, k: int = 5) -> dict[str, dict[str, float]]:
+        if isinstance(images, str):
+            images = [images]
+        probs = self.create_probabilities_for_images(images, self.txt_features)
         result = []
-        for image_path in image_paths:
+        for i, image in enumerate(images):
+            key = self.make_key(image, i)
             if rank == Rank.SPECIES:
-                result.extend(self.format_species_probs(image_path, probs[image_path], k))
+                result.extend(self.format_species_probs(key, probs[key], k))
             else:
-                result.extend(self.format_grouped_probs(image_path, probs[image_path], rank, min_prob, k))
+                result.extend(self.format_grouped_probs(key, probs[key], rank, min_prob, k))
         return result
 
 
-def predict_classification(img: str, rank: Rank, device: Union[str, torch.device] = 'cpu',
+def predict_classification(img: Union[PIL.Image.Image, str], rank: Rank, device: Union[str, torch.device] = 'cpu',
                            min_prob: float = 1e-9, k: int = 5) -> dict[str, float]:
     """
     Predicts from the entire tree of life.
diff --git a/tests/test_predict.py b/tests/test_predict.py
@@ -5,6 +5,7 @@
 import os
 import torch
 import pandas as pd
+import PIL.Image
 
 
 DIRNAME = os.path.dirname(os.path.realpath(__file__))
@@ -14,7 +15,7 @@
 class TestPredict(unittest.TestCase):
     def test_tree_of_life_classifier_species_single(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_paths=EXAMPLE_CAT_IMAGE, rank=Rank.SPECIES)
+        prediction_ary = classifier.predict(images=EXAMPLE_CAT_IMAGE, rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 5)
         prediction_dict = {
             'file_name': EXAMPLE_CAT_IMAGE,
@@ -33,18 +34,26 @@ def test_tree_of_life_classifier_species_single(self):
 
     def test_tree_of_life_classifier_species_ary_one(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE], rank=Rank.SPECIES)
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE], rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 5)
 
     def test_tree_of_life_classifier_species_ary_multiple(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2],
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2],
+                                            rank=Rank.SPECIES)
+        self.assertEqual(len(prediction_ary), 10)
+
+    def test_tree_of_life_classifier_species_ary_multiple_pil(self):
+        classifier = TreeOfLifeClassifier()
+        img1 = PIL.Image.open(EXAMPLE_CAT_IMAGE)
+        img2 = PIL.Image.open(EXAMPLE_CAT_IMAGE2)
+        prediction_ary = classifier.predict(images=[img1, img2],
                                             rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 10)
 
     def test_tree_of_life_classifier_family(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE], rank=Rank.FAMILY, k=2)
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE], rank=Rank.FAMILY, k=2)
         self.assertEqual(len(prediction_ary), 2)
         prediction_dict = {
             'file_name': EXAMPLE_CAT_IMAGE,
@@ -59,34 +68,46 @@ def test_tree_of_life_classifier_family(self):
 
     def test_custom_labels_classifier(self):
         classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
-        prediction_ary = classifier.predict(image_paths=EXAMPLE_CAT_IMAGE)
+        prediction_ary = classifier.predict(images=EXAMPLE_CAT_IMAGE)
         self.assertEqual(prediction_ary, [
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
         ])
 
     def test_custom_labels_classifier_ary_one(self):
         classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE])
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE])
         self.assertEqual(prediction_ary, [
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
         ])
 
     def test_custom_labels_classifier_ary_multiple(self):
         classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2])
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2])
         self.assertEqual(prediction_ary, [
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE2, 'classification': 'cat', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE2, 'classification': 'dog', 'score': unittest.mock.ANY},
         ])
 
+    def test_custom_labels_classifier_ary_multiple_pil(self):
+        classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
+        img1 = PIL.Image.open(EXAMPLE_CAT_IMAGE)
+        img2 = PIL.Image.open(EXAMPLE_CAT_IMAGE2)
+        prediction_ary = classifier.predict(images=[img1, img2])
+        self.assertEqual(prediction_ary, [
+            {'file_name': '0', 'classification': 'cat', 'score': unittest.mock.ANY},
+            {'file_name': '0', 'classification': 'dog', 'score': unittest.mock.ANY},
+            {'file_name': '1', 'classification': 'cat', 'score': unittest.mock.ANY},
+            {'file_name': '1', 'classification': 'dog', 'score': unittest.mock.ANY},
+        ])
+
     def test_predict_with_rgba_image(self):
         # Ensure that the classifier can handle RGBA images
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE2], rank=Rank.SPECIES)
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE2], rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 5)
 
     def test_predict_with_bins(self):
@@ -95,7 +116,7 @@ def test_predict_with_bins(self):
             'mouse': 'two',
             'fish': 'two',
         })
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE2])
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE2])
         self.assertEqual(len(prediction_ary), 2)
         self.assertEqual(prediction_ary[0]['file_name'], EXAMPLE_CAT_IMAGE2)
         names = set([pred['classification'] for pred in prediction_ary])
@@ -106,7 +127,7 @@ def test_predict_with_bins(self):
             'mouse': 'two',
             'fish': 'three',
         })
-        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE2])
+        prediction_ary = classifier.predict(images=[EXAMPLE_CAT_IMAGE2])
         self.assertEqual(len(prediction_ary), 3)
         self.assertEqual(prediction_ary[0]['file_name'], EXAMPLE_CAT_IMAGE2)
         names = set([pred['classification'] for pred in prediction_ary])
@@ -138,9 +159,29 @@ def test_predict_with_bins_bad_values(self):
         self.assertEqual(str(raised_exceptions.exception),
                          "Empty, null, or nan are not allowed for bin values.")
 
+    def test_predict_with_bins_pil(self):
+        classifier = CustomLabelsBinningClassifier(cls_to_bin={
+            'cat': 'one',
+            'mouse': 'two',
+            'fish': 'two',
+        })
+        img1 = PIL.Image.open(EXAMPLE_CAT_IMAGE)
+        prediction_ary = classifier.predict(images=[img1])
+        self.assertEqual(len(prediction_ary), 2)
+        self.assertEqual(prediction_ary[0]['file_name'], '0')
+        names = set([pred['classification'] for pred in prediction_ary])
+        self.assertEqual(names, set(['one', 'two']))
+
+
 class TestEmbed(unittest.TestCase):
     def test_get_image_features(self):
         classifier = TreeOfLifeClassifier(device='cpu')
         self.assertEqual(classifier.model_str, 'hf-hub:imageomics/bioclip')
-        features = classifier.create_image_features_for_path(EXAMPLE_CAT_IMAGE, normalize=False)
+        features = classifier.create_image_features_for_image(EXAMPLE_CAT_IMAGE, normalize=False)
+        self.assertEqual(features.shape, torch.Size([512]))
+
+    def test_get_image_features_pil(self):
+        classifier = TreeOfLifeClassifier(device='cpu')
+        self.assertEqual(classifier.model_str, 'hf-hub:imageomics/bioclip')
+        features = classifier.create_image_features_for_image(PIL.Image.open(EXAMPLE_CAT_IMAGE), normalize=False)
         self.assertEqual(features.shape, torch.Size([512]))