ssciwr
diff --git a/‎ammico/__init__.py‎
Lines changed: 9 additions & 1 deletion b/‎ammico/__init__.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎ammico/model.py‎
Lines changed: 109 additions & 1 deletion b/‎ammico/model.py‎
Lines changed: 109 additions & 1 deletion
@@ -1,9 +1,14 @@
 from ammico.display import AnalysisExplorer
-from ammico.model import MultimodalSummaryModel
+from ammico.model import (
+    MultimodalSummaryModel,
+    AudioToTextModel,
+    MultimodalEmbeddingsModel,
+)
 from ammico.text import TextDetector, TextAnalyzer, privacy_disclosure
 from ammico.image_summary import ImageSummaryDetector
 from ammico.utils import find_files, get_dataframe, AnalysisType, find_videos
 from ammico.video_summary import VideoSummaryDetector
+from ammico.multimodal_search import MultimodalSearch
 
 # Export the version defined in project metadata
 try:
@@ -17,6 +22,9 @@
     "AnalysisType",
     "AnalysisExplorer",
     "MultimodalSummaryModel",
+    "MultimodalEmbeddingsModel",
+    "AudioToTextModel",
+    "MultimodalSearch",
     "TextDetector",
     "TextAnalyzer",
     "ImageSummaryDetector",
 
@@ -13,7 +13,10 @@
     BitsAndBytesConfig,
     AutoTokenizer,
 )
-from typing import Optional
+from typing import Optional, List, Union
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+import numpy as np
 
 
 class MultimodalSummaryModel:
@@ -208,3 +211,108 @@ def close(self) -> None:
                     RuntimeWarning,
                     stacklevel=2,
                 )
+
+
+class MultimodalEmbeddingsModel:
+    def __init__(
+        self,
+        device: Optional[str] = None,
+    ) -> None:
+        """
+        Class for Multimodal Embeddings model loading and inference. Uses Jina CLIP-V2 model.
+        Args:
+            device: "cuda" or "cpu" (auto-detected when None).
+        """
+        self.device = resolve_model_device(device)
+
+        model_id = "jinaai/jina-clip-v2"
+
+        self.model = SentenceTransformer(
+            model_id,
+            device=self.device,
+            trust_remote_code=True,
+            model_kwargs={"torch_dtype": "auto"},
+        )
+
+        self.model.eval()
+
+        self.embedding_dim = 1024
+
+    @torch.inference_mode()
+    def encode_text(
+        self,
+        texts: Union[str, List[str]],
+        batch_size: int = 64,
+        truncate_dim: Optional[int] = None,
+    ) -> Union[torch.Tensor, np.ndarray]:
+        if isinstance(texts, str):
+            texts = [texts]
+
+        convert_to_tensor = self.device == "cuda"
+        convert_to_numpy = not convert_to_tensor
+
+        embeddings = self.model.encode(
+            texts,
+            batch_size=batch_size,
+            convert_to_tensor=convert_to_tensor,
+            convert_to_numpy=convert_to_numpy,
+            normalize_embeddings=True,
+        )
+
+        if truncate_dim is not None:
+            if not (64 <= truncate_dim <= self.embedding_dim):
+                raise ValueError(
+                    f"truncate_dim must be between 64 and {self.embedding_dim}"
+                )
+            embeddings = embeddings[:, :truncate_dim]
+        return embeddings
+
+    @torch.inference_mode()
+    def encode_image(
+        self,
+        images: Union[Image.Image, List[Image.Image]],
+        batch_size: int = 32,
+        truncate_dim: Optional[int] = None,
+    ) -> Union[torch.Tensor, np.ndarray]:
+        if not isinstance(images, (Image.Image, list)):
+            raise ValueError(
+                "images must be a PIL.Image or a list of PIL.Image objects. Please load images properly."
+            )
+
+        convert_to_tensor = self.device == "cuda"
+        convert_to_numpy = not convert_to_tensor
+
+        embeddings = self.model.encode(
+            images if isinstance(images, list) else [images],
+            batch_size=batch_size,
+            convert_to_tensor=convert_to_tensor,
+            convert_to_numpy=convert_to_numpy,
+            normalize_embeddings=True,
+        )
+
+        if truncate_dim is not None:
+            if not (64 <= truncate_dim <= self.embedding_dim):
+                raise ValueError(
+                    f"truncate_dim must be between 64 and {self.embedding_dim}"
+                )
+            embeddings = embeddings[:, :truncate_dim]
+
+        return embeddings
+
+    def close(self) -> None:
+        """Free model resources (helpful in long-running processes)."""
+        try:
+            if self.model is not None:
+                del self.model
+                self.model = None
+        finally:
+            try:
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            except Exception as e:
+                warnings.warn(
+                    "Failed to empty CUDA cache. This is not critical, but may lead to memory lingering: "
+                    f"{e!r}",
+                    RuntimeWarning,
+                    stacklevel=2,
+                )