Allow batch processing of images

johnbradley · johnbradley · commit 50600a5485f8 · 2024-07-11T08:23:37.000-04:00
Instead of processing images one at a time allow loading multiple and processing them all at once. When used with the GPU this may improve processing time. Fixes #11
diff --git a/README.md b/README.md
@@ -85,6 +85,8 @@ predictions = classifier.predict("Ursus-arctos.jpeg", Rank.SPECIES)
 df = pd.DataFrame(predictions)
 ```
 
+The first argument of the `predict()` method supports both a single path or a list of paths.
+
 ### Predict from a list of classes
 ```python
 from bioclip import CustomLabelsClassifier
diff --git a/src/bioclip/__main__.py b/src/bioclip/__main__.py
@@ -28,20 +28,17 @@ def write_results_to_file(df, format, outfile):
     else:
         raise ValueError(f"Invalid format: {format}")
 
+
 def predict(image_file: list[str], format: str,  output: str,
              cls_str: str, device: str,  rank: Rank, k: int):
     if cls_str:
         classifier = CustomLabelsClassifier(cls_ary=cls_str.split(','), device=device)
-        data = []
-        for image_path in image_file:
-            data.extend(classifier.predict(image_path=image_path))
-        write_results(data, format, output)
+        predictions = classifier.predict(image_paths=image_file)
+        write_results(predictions, format, output)
     else:
         classifier = TreeOfLifeClassifier(device=device)
-        data = []
-        for image_path in image_file:
-            data.extend(classifier.predict(image_path=image_path, rank=rank, k=k))
-        write_results(data, format, output)
+        predictions = classifier.predict(image_paths=image_file, rank=rank, k=k)
+        write_results(predictions, format, output)
 
 
 def embed(image_file: list[str], output: str, device: str):
@@ -52,14 +49,14 @@ def embed(image_file: list[str], output: str, device: str):
         "embeddings": images_dict
     }
     for image_path in image_file:
-        features = classifier.get_image_features(image_path)[0]
+        features = classifier.create_image_features_for_path(image_path=image_path, normalize=False)
         images_dict[image_path] = features.tolist()
     if output == 'stdout':
         print(json.dumps(data, indent=4))
     else:
         with open(output, 'w') as outfile:
-            json.dump(data, outfile, indent=4) 
-        
+            json.dump(data, outfile, indent=4)
+
 
 def create_parser():
     parser = argparse.ArgumentParser(prog='bioclip', description='BioCLIP command line interface')
diff --git a/src/bioclip/predict.py b/src/bioclip/predict.py
@@ -119,22 +119,6 @@ def get_txt_names():
     return txt_names
 
 
-def open_image(image_path):
-    img = PIL.Image.open(image_path)
-    return img.convert("RGB")
-
-
-preprocess_img = transforms.Compose(
-    [
-        transforms.ToTensor(),
-        transforms.Resize((224, 224), antialias=True),
-        transforms.Normalize(
-            mean=(0.48145466, 0.4578275, 0.40821073),
-            std=(0.26862954, 0.26130258, 0.27577711),
-        ),
-    ]
-)
-
 class Rank(Enum):
     KINGDOM = 0
     PHYLUM = 1
@@ -165,11 +149,68 @@ def create_bioclip_tokenizer(tokenizer_str="ViT-B-16"):
     return get_tokenizer(tokenizer_str)
 
 
-class CustomLabelsClassifier(object):
-    def __init__(self, cls_ary: List[str], device: Union[str, torch.device] = 'cpu', model_str: str = MODEL_STR):
+preprocess_img = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Resize((224, 224), antialias=True),
+        transforms.Normalize(
+            mean=(0.48145466, 0.4578275, 0.40821073),
+            std=(0.26862954, 0.26130258, 0.27577711),
+        ),
+    ]
+)
+
+
+class BaseClassifier(object):
+    def __init__(self, device: Union[str, torch.device] = 'cpu', model_str: str = MODEL_STR):
         self.device = device
         self.model = create_bioclip_model(device=device, model_str=model_str)
         self.model_str = model_str
+
+    @staticmethod
+    def open_image(image_path):
+        img = PIL.Image.open(image_path)
+        return img.convert("RGB")
+
+    @torch.no_grad()
+    def create_image_features(self, images: List[PIL.Image.Image], normalize : bool = True) -> torch.Tensor:
+        preprocessed_images = []
+        for img in images:
+            prep_img = preprocess_img(img).to(self.device)
+            preprocessed_images.append(prep_img)
+        preprocessed_image_tensor = torch.stack(preprocessed_images)
+        img_features = self.model.encode_image(preprocessed_image_tensor)
+        if normalize:
+            return F.normalize(img_features, dim=-1)
+        else:
+            return img_features
+
+    @torch.no_grad()
+    def create_image_features_for_path(self, image_path: str, normalize: bool) -> torch.Tensor:
+        img = self.open_image(image_path)
+        result = self.create_image_features([img], normalize=normalize)
+        return result[0]
+
+    @torch.no_grad()
+    def create_probabilities(self, img_features: torch.Tensor,
+                             txt_features: torch.Tensor) -> dict[str, torch.Tensor]:
+        logits = (self.model.logit_scale.exp() * img_features @ txt_features)
+        return F.softmax(logits, dim=1)
+
+    def create_probabilities_for_image_paths(self, image_paths: List[str] | str,
+                                             txt_features: torch.Tensor) -> dict[str, torch.Tensor]:
+        images = [self.open_image(image_path) for image_path in image_paths]
+        img_features = self.create_image_features(images)
+        probs = self.create_probabilities(img_features, txt_features)
+        result = {}
+        for i, key in enumerate(image_paths):
+            result[key] = probs[i]
+        return result
+
+
+class CustomLabelsClassifier(BaseClassifier):
+    def __init__(self, cls_ary: List[str], device: Union[str, torch.device] = 'cpu', model_str: str = MODEL_STR):
+        super().__init__(device=device, model_str=model_str)
         self.tokenizer = create_bioclip_tokenizer()
         self.classes = [cls.strip() for cls in cls_ary]
         self.txt_features = self._get_txt_features(self.classes)
@@ -188,28 +229,24 @@ def _get_txt_features(self, classnames):
         return all_features
 
     @torch.no_grad()
-    def predict(self, image_path: str) -> dict[str, float]:
-        img = open_image(image_path)
-
-        img = preprocess_img(img).to(self.device)
-        img_features = self.model.encode_image(img.unsqueeze(0))
-        img_features = F.normalize(img_features, dim=-1)
-
-        logits = (self.model.logit_scale.exp() * img_features @ self.txt_features).squeeze()
-        probs = F.softmax(logits, dim=0).to("cpu").tolist()
-        pred_list = []
-        for cls, prob in zip(self.classes, probs):
-            pred_list.append({
-                PRED_FILENAME_KEY: image_path,
-                PRED_CLASSICATION_KEY: cls,
-                PRED_SCORE_KEY: prob
-            })
-        return pred_list
+    def predict(self, image_paths: List[str] | str) -> dict[str, float]:
+        if isinstance(image_paths, str):
+            image_paths = [image_paths]
+        probs = self.create_probabilities_for_image_paths(image_paths, self.txt_features)
+        result = []
+        for image_path in image_paths:
+            for cls_str, prob in zip(self.classes, probs[image_path]):
+                result.append({
+                    PRED_FILENAME_KEY: image_path,
+                    PRED_CLASSICATION_KEY: cls_str,
+                    PRED_SCORE_KEY: prob.item()
+                })
+        return result
 
 
 def predict_classifications_from_list(img: Union[PIL.Image.Image, str], cls_ary: List[str], device: Union[str, torch.device] = 'cpu') -> dict[str, float]:
     classifier = CustomLabelsClassifier(cls_ary=cls_ary, device=device)
-    return classifier.predict(img)
+    return classifier.predict([img])
 
 
 def get_tol_classification_labels(rank: Rank) -> List[str]:
@@ -244,31 +281,12 @@ def join_names(classification_dict: dict[str, str]) -> str:
     return " ".join(classification_dict.values())
 
 
-class TreeOfLifeClassifier(object):
+class TreeOfLifeClassifier(BaseClassifier):
     def __init__(self, device: Union[str, torch.device] = 'cpu', model_str: str = MODEL_STR):
-        self.device = device
-        self.model = create_bioclip_model(device=device, model_str=model_str)
-        self.model_str = model_str
-        self.txt_emb = get_txt_emb().to(device)
+        super().__init__(device=device, model_str=model_str)
+        self.txt_features = get_txt_emb().to(device)
         self.txt_names = get_txt_names()
 
-    @torch.no_grad()
-    def get_image_features(self, image_path: str) -> torch.Tensor:
-        img = open_image(image_path)
-        return self.encode_image(img)
-
-    def encode_image(self, img: PIL.Image.Image) -> torch.Tensor:
-        img = preprocess_img(img).to(self.device)
-        img_features = self.model.encode_image(img.unsqueeze(0))
-        return img_features
-
-    def predict_species(self, img: PIL.Image.Image) -> torch.Tensor:
-        img_features = self.encode_image(img)
-        img_features = F.normalize(img_features, dim=-1)
-        logits = (self.model.logit_scale.exp() * img_features @ self.txt_emb).squeeze()
-        probs = F.softmax(logits, dim=0)
-        return probs
-
     def format_species_probs(self, image_path: str, probs: torch.Tensor, k: int = 5) -> List[dict[str, float]]:
         topk = probs.topk(k)
         result = []
@@ -299,12 +317,17 @@ def format_grouped_probs(self, image_path: str, probs: torch.Tensor, rank: Rank,
         return prediction_ary
 
     @torch.no_grad()
-    def predict(self, image_path: str, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> List[dict[str, float]]:
-        img = open_image(image_path)
-        probs = self.predict_species(img)
-        if rank == Rank.SPECIES:
-            return self.format_species_probs(image_path, probs, k)
-        return self.format_grouped_probs(image_path, probs, rank, min_prob, k)
+    def predict(self, image_paths: List[str] | str, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> dict[str, dict[str, float]]:
+        if isinstance(image_paths, str):
+            image_paths = [image_paths]
+        probs = self.create_probabilities_for_image_paths(image_paths, self.txt_features)
+        result = []
+        for image_path in image_paths:
+            if rank == Rank.SPECIES:
+                result.extend(self.format_species_probs(image_path, probs[image_path], k))
+            else:
+                result.extend(self.format_grouped_probs(image_path, probs[image_path], rank, min_prob, k))
+        return result
 
 
 def predict_classification(img: str, rank: Rank, device: Union[str, torch.device] = 'cpu',
@@ -315,4 +338,4 @@ def predict_classification(img: str, rank: Rank, device: Union[str, torch.device
     species, then sums up species-level probabilities for the given rank.
     """
     classifier = TreeOfLifeClassifier(device=device)
-    return classifier.predict(img, rank, min_prob, k)
+    return classifier.predict([img], rank, min_prob, k)
diff --git a/tests/test_predict.py b/tests/test_predict.py
@@ -10,9 +10,9 @@
 EXAMPLE_CAT_IMAGE2 = os.path.join(DIRNAME, "images", "mycat.png")
 
 class TestPredict(unittest.TestCase):
-    def test_tree_of_life_classifier_species(self):
+    def test_tree_of_life_classifier_species_single(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_path=EXAMPLE_CAT_IMAGE, rank=Rank.SPECIES)
+        prediction_ary = classifier.predict(image_paths=EXAMPLE_CAT_IMAGE, rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 5)
         prediction_dict = {
             'file_name': EXAMPLE_CAT_IMAGE,
@@ -29,9 +29,20 @@ def test_tree_of_life_classifier_species(self):
         }
         self.assertEqual(prediction_ary[0], prediction_dict)
 
+    def test_tree_of_life_classifier_species_ary_one(self):
+        classifier = TreeOfLifeClassifier()
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE], rank=Rank.SPECIES)
+        self.assertEqual(len(prediction_ary), 5)
+
+    def test_tree_of_life_classifier_species_ary_multiple(self):
+        classifier = TreeOfLifeClassifier()
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2],
+                                            rank=Rank.SPECIES)
+        self.assertEqual(len(prediction_ary), 10)
+
     def test_tree_of_life_classifier_family(self):
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_path=EXAMPLE_CAT_IMAGE, rank=Rank.FAMILY, k=2)
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE], rank=Rank.FAMILY, k=2)
         self.assertEqual(len(prediction_ary), 2)
         prediction_dict = {
             'file_name': EXAMPLE_CAT_IMAGE,
@@ -46,22 +57,41 @@ def test_tree_of_life_classifier_family(self):
 
     def test_custom_labels_classifier(self):
         classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
-        results = classifier.predict(image_path=EXAMPLE_CAT_IMAGE)
-        self.assertEqual(results, [
+        prediction_ary = classifier.predict(image_paths=EXAMPLE_CAT_IMAGE)
+        self.assertEqual(prediction_ary, [
+            {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
+            {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
+        ])
+
+    def test_custom_labels_classifier_ary_one(self):
+        classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE])
+        self.assertEqual(prediction_ary, [
+            {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
+            {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
+        ])
+
+    def test_custom_labels_classifier_ary_multiple(self):
+        classifier = CustomLabelsClassifier(cls_ary=['cat', 'dog'])
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE, EXAMPLE_CAT_IMAGE2])
+        self.assertEqual(prediction_ary, [
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'cat', 'score': unittest.mock.ANY},
             {'file_name': EXAMPLE_CAT_IMAGE, 'classification': 'dog', 'score': unittest.mock.ANY},
+            {'file_name': EXAMPLE_CAT_IMAGE2, 'classification': 'cat', 'score': unittest.mock.ANY},
+            {'file_name': EXAMPLE_CAT_IMAGE2, 'classification': 'dog', 'score': unittest.mock.ANY},
         ])
 
+
     def test_predict_with_rgba_image(self):
         # Ensure that the classifier can handle RGBA images
         classifier = TreeOfLifeClassifier()
-        prediction_ary = classifier.predict(image_path=EXAMPLE_CAT_IMAGE2, rank=Rank.SPECIES)
+        prediction_ary = classifier.predict(image_paths=[EXAMPLE_CAT_IMAGE2], rank=Rank.SPECIES)
         self.assertEqual(len(prediction_ary), 5)
 
 
 class TestEmbed(unittest.TestCase):
     def test_get_image_features(self):
         classifier = TreeOfLifeClassifier(device='cpu')
         self.assertEqual(classifier.model_str, 'hf-hub:imageomics/bioclip')
-        features = classifier.get_image_features(EXAMPLE_CAT_IMAGE)
-        self.assertEqual(features.shape, torch.Size([1, 512]))
+        features = classifier.create_image_features_for_path(EXAMPLE_CAT_IMAGE, normalize=False)
+        self.assertEqual(features.shape, torch.Size([512]))