Image type hints (#457)

hh-space-invader · joein · web-flow · commit a5b266e0183f · 2025-02-06T11:24:19.000+01:00
* chore: Added type hints

* new: Add type hints for parallel processor

* new: Add image type hints

* fix: NdArray -&gt; NumpyArray

* fix: remove redundant property

* refactoring: remove redundant new lines

* refactoring: remove redundant new line

* fix: fix image input types

* fix: remove redundant import

* fix: remove mp subscriptions due to mac os issues

* chore: Update type hints

* chore: Added type gints for functional

* refactor

---------

Co-authored-by: George Panchuk &lt;george.panchuk@qdrant.tech&gt;
diff --git a/fastembed/common/__init__.py b/fastembed/common/__init__.py
@@ -1,3 +1,3 @@
-from fastembed.common.types import ImageInput, OnnxProvider, PathInput, PilInput
+from fastembed.common.types import ImageInput, OnnxProvider, PathInput
 
-__all__ = ["OnnxProvider", "ImageInput", "PathInput", "PilInput"]
+__all__ = ["OnnxProvider", "ImageInput", "PathInput"]
diff --git a/fastembed/common/model_management.py b/fastembed/common/model_management.py
@@ -114,7 +114,6 @@ def download_files_from_huggingface(
             extra_patterns (list[str]): extra patterns to allow in the snapshot download, typically
                 includes the required model files.
             local_files_only (bool, optional): Whether to only use local files. Defaults to False.
-            specific_model_path (Optional[str], optional): The path to the model dir already pooled from external source
         Returns:
             Path: The path to the model directory.
         """
@@ -161,9 +160,7 @@ def _collect_file_metadata(
                         }
             return meta
 
-        def _save_file_metadata(
-            model_dir: Path, meta: dict[str, dict[str, Union[int, str]]]
-        ) -> None:
+        def _save_file_metadata(model_dir: Path, meta: dict[str, dict[str, Union[int, str]]]) -> None:
             try:
                 if not model_dir.exists():
                     model_dir.mkdir(parents=True, exist_ok=True)
diff --git a/fastembed/common/types.py b/fastembed/common/types.py
@@ -1,8 +1,7 @@
 from pathlib import Path
 import sys
 from PIL import Image
-from typing import Any, Iterable, Union
-
+from typing import Any, Union
 import numpy as np
 from numpy.typing import NDArray
 
@@ -13,11 +12,9 @@
 
 
 PathInput: TypeAlias = Union[str, Path]
-PilInput: TypeAlias = Union[Image.Image, Iterable[Image.Image]]
-ImageInput: TypeAlias = Union[PathInput, Iterable[PathInput], PilInput]
+ImageInput: TypeAlias = Union[PathInput, Image.Image]
 
 OnnxProvider: TypeAlias = Union[str, tuple[str, dict[Any, Any]]]
-
 NumpyArray = Union[
     NDArray[np.float32],
     NDArray[np.float16],
diff --git a/fastembed/image/image_embedding.py b/fastembed/image/image_embedding.py
@@ -1,7 +1,6 @@
-from typing import Any, Iterable, Optional, Sequence, Type
-
-import numpy as np
+from typing import Any, Iterable, Optional, Sequence, Type, Union
 
+from fastembed.common.types import NumpyArray
 from fastembed.common import ImageInput, OnnxProvider
 from fastembed.image.image_embedding_base import ImageEmbeddingBase
 from fastembed.image.onnx_embedding import OnnxImageEmbedding
@@ -35,7 +34,7 @@ def list_supported_models(cls) -> list[dict[str, Any]]:
                 ]
                 ```
         """
-        result = []
+        result: list[dict[str, Any]] = []
         for embedding in cls.EMBEDDINGS_REGISTRY:
             result.extend(embedding.list_supported_models())
         return result
@@ -74,11 +73,11 @@ def __init__(
 
     def embed(
         self,
-        images: ImageInput,
+        images: Union[ImageInput, Iterable[ImageInput]],
         batch_size: int = 16,
         parallel: Optional[int] = None,
         **kwargs: Any,
-    ) -> Iterable[np.ndarray]:
+    ) -> Iterable[NumpyArray]:
         """
         Encode a list of documents into list of embeddings.
         We use mean pooling with attention so that the model can handle variable-length inputs.
diff --git a/fastembed/image/image_embedding_base.py b/fastembed/image/image_embedding_base.py
@@ -1,7 +1,6 @@
-from typing import Iterable, Optional, Any
-
-import numpy as np
+from typing import Iterable, Optional, Any, Union
 
+from fastembed.common.types import NumpyArray
 from fastembed.common.model_management import ModelManagement
 from fastembed.common.types import ImageInput
 
@@ -21,11 +20,11 @@ def __init__(
 
     def embed(
         self,
-        images: ImageInput,
+        images: Union[ImageInput, Iterable[ImageInput]],
         batch_size: int = 16,
         parallel: Optional[int] = None,
         **kwargs: Any,
-    ) -> Iterable[np.ndarray]:
+    ) -> Iterable[NumpyArray]:
         """
         Embeds a list of images into a list of embeddings.
 
@@ -39,6 +38,6 @@ def embed(
             **kwargs: Additional keyword argument to pass to the embed method.
 
         Yields:
-            Iterable[np.ndarray]: The embeddings.
+            Iterable[NdArray]: The embeddings.
         """
         raise NotImplementedError()
diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
@@ -1,6 +1,8 @@
-from typing import Any, Iterable, Optional, Sequence, Type
+from typing import Any, Iterable, Optional, Sequence, Type, Union
 
 import numpy as np
+
+from fastembed.common.types import NumpyArray
 from fastembed.common import ImageInput, OnnxProvider
 from fastembed.common.onnx_model import OnnxOutputContext
 from fastembed.common.utils import define_cache_dir, normalize
@@ -66,7 +68,7 @@
 ]
 
 
-class OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[np.ndarray]):
+class OnnxImageEmbedding(ImageEmbeddingBase, OnnxImageModel[NumpyArray]):
     def __init__(
         self,
         model_name: str,
@@ -111,15 +113,14 @@ def __init__(
         self.cuda = cuda
 
         # This device_id will be used if we need to load model in current process
+        self.device_id: Optional[int] = None
         if device_id is not None:
             self.device_id = device_id
         elif self.device_ids is not None:
             self.device_id = self.device_ids[0]
-        else:
-            self.device_id = None
 
         self.model_description = self._get_model_description(model_name)
-        self.cache_dir = define_cache_dir(cache_dir)
+        self.cache_dir = str(define_cache_dir(cache_dir))
         self._model_dir = self.download_model(
             self.model_description,
             self.cache_dir,
@@ -155,11 +156,11 @@ def list_supported_models(cls) -> list[dict[str, Any]]:
 
     def embed(
         self,
-        images: ImageInput,
+        images: Union[ImageInput, Iterable[ImageInput]],
         batch_size: int = 16,
         parallel: Optional[int] = None,
         **kwargs: Any,
-    ) -> Iterable[np.ndarray]:
+    ) -> Iterable[NumpyArray]:
         """
         Encode a list of images into list of embeddings.
         We use mean pooling with attention so that the model can handle variable-length inputs.
@@ -189,23 +190,23 @@ def embed(
         )
 
     @classmethod
-    def _get_worker_class(cls) -> Type["ImageEmbeddingWorker"]:
+    def _get_worker_class(cls) -> Type["ImageEmbeddingWorker[NumpyArray]"]:
         return OnnxImageEmbeddingWorker
 
     def _preprocess_onnx_input(
-        self, onnx_input: dict[str, np.ndarray], **kwargs: Any
-    ) -> dict[str, np.ndarray]:
+        self, onnx_input: dict[str, NumpyArray], **kwargs: Any
+    ) -> dict[str, NumpyArray]:
         """
         Preprocess the onnx input.
         """
 
         return onnx_input
 
-    def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.ndarray]:
+    def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[NumpyArray]:
         return normalize(output.model_output).astype(np.float32)
 
 
-class OnnxImageEmbeddingWorker(ImageEmbeddingWorker):
+class OnnxImageEmbeddingWorker(ImageEmbeddingWorker[NumpyArray]):
     def init_embedding(self, model_name: str, cache_dir: str, **kwargs: Any) -> OnnxImageEmbedding:
         return OnnxImageEmbedding(
             model_name=model_name,
diff --git a/fastembed/image/onnx_image_model.py b/fastembed/image/onnx_image_model.py
@@ -2,11 +2,13 @@
 import os
 from multiprocessing import get_all_start_methods
 from pathlib import Path
-from typing import Any, Iterable, Optional, Sequence, Type
+from typing import Any, Iterable, Optional, Sequence, Type, Union
 
 import numpy as np
 from PIL import Image
 
+from fastembed.image.transform.operators import Compose
+from fastembed.common.types import NumpyArray
 from fastembed.common import ImageInput, OnnxProvider
 from fastembed.common.onnx_model import EmbeddingWorker, OnnxModel, OnnxOutputContext, T
 from fastembed.common.preprocessor_utils import load_preprocessor
@@ -18,19 +20,19 @@
 
 class OnnxImageModel(OnnxModel[T]):
     @classmethod
-    def _get_worker_class(cls) -> Type["ImageEmbeddingWorker"]:
+    def _get_worker_class(cls) -> Type["ImageEmbeddingWorker[T]"]:
         raise NotImplementedError("Subclasses must implement this method")
 
     def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[T]:
         raise NotImplementedError("Subclasses must implement this method")
 
-    def __init__(self):
+    def __init__(self) -> None:
         super().__init__()
-        self.processor = None
+        self.processor: Optional[Compose] = None
 
     def _preprocess_onnx_input(
-        self, onnx_input: dict[str, np.ndarray], **kwargs: Any
-    ) -> dict[str, np.ndarray]:
+        self, onnx_input: dict[str, NumpyArray], **kwargs: Any
+    ) -> dict[str, NumpyArray]:
         """
         Preprocess the onnx input.
         """
@@ -58,16 +60,18 @@ def _load_onnx_model(
     def load_onnx_model(self) -> None:
         raise NotImplementedError("Subclasses must implement this method")
 
-    def _build_onnx_input(self, encoded: np.ndarray) -> dict[str, np.ndarray]:
-        return {node.name: encoded for node in self.model.get_inputs()}
+    def _build_onnx_input(self, encoded: NumpyArray) -> dict[str, NumpyArray]:
+        input_name = self.model.get_inputs()[0].name
+        return {input_name: encoded}
 
     def onnx_embed(self, images: list[ImageInput], **kwargs: Any) -> OnnxOutputContext:
         with contextlib.ExitStack():
             image_files = [
                 Image.open(image) if not isinstance(image, Image.Image) else image
                 for image in images
             ]
-            encoded = self.processor(image_files)
+            assert self.processor is not None, "Processor is not initialized"
+            encoded = np.array(self.processor(image_files))
         onnx_input = self._build_onnx_input(encoded)
         onnx_input = self._preprocess_onnx_input(onnx_input)
         model_output = self.model.run(None, onnx_input)
@@ -78,7 +82,7 @@ def _embed_images(
         self,
         model_name: str,
         cache_dir: str,
-        images: ImageInput,
+        images: Union[ImageInput, Iterable[ImageInput]],
         batch_size: int = 256,
         parallel: Optional[int] = None,
         providers: Optional[Sequence[OnnxProvider]] = None,
@@ -124,7 +128,7 @@ def _embed_images(
                 yield from self._post_process_onnx_output(batch)
 
 
-class ImageEmbeddingWorker(EmbeddingWorker):
+class ImageEmbeddingWorker(EmbeddingWorker[T]):
     def process(self, items: Iterable[tuple[int, Any]]) -> Iterable[tuple[int, Any]]:
         for idx, batch in items:
             embeddings = self.model.onnx_embed(batch)
diff --git a/fastembed/image/transform/functional.py b/fastembed/image/transform/functional.py
@@ -1,8 +1,10 @@
-from typing import Sized, Union
+from typing import Union
 
 import numpy as np
 from PIL import Image
 
+from fastembed.common.types import NumpyArray
+
 
 def convert_to_rgb(image: Image.Image) -> Image.Image:
     if image.mode == "RGB":
@@ -13,9 +15,9 @@ def convert_to_rgb(image: Image.Image) -> Image.Image:
 
 
 def center_crop(
-    image: Union[Image.Image, np.ndarray],
+    image: Union[Image.Image, NumpyArray],
     size: tuple[int, int],
-) -> np.ndarray:
+) -> NumpyArray:
     if isinstance(image, np.ndarray):
         _, orig_height, orig_width = image.shape
     else:
@@ -40,7 +42,7 @@ def center_crop(
     new_height = max(crop_height, orig_height)
     new_width = max(crop_width, orig_width)
     new_shape = image.shape[:-2] + (new_height, new_width)
-    new_image = np.zeros_like(image, shape=new_shape)
+    new_image = np.zeros_like(image, shape=new_shape, dtype=np.float32)
 
     top_pad = (new_height - orig_height) // 2
     bottom_pad = top_pad + orig_height
@@ -61,37 +63,34 @@ def center_crop(
 
 
 def normalize(
-    image: np.ndarray,
-    mean: Union[float, np.ndarray],
-    std: Union[float, np.ndarray],
-) -> np.ndarray:
-    if not isinstance(image, np.ndarray):
-        raise ValueError("image must be a numpy array")
-
+    image: NumpyArray,
+    mean: Union[float, list[float]],
+    std: Union[float, list[float]],
+) -> NumpyArray:
     num_channels = image.shape[1] if len(image.shape) == 4 else image.shape[0]
 
     if not np.issubdtype(image.dtype, np.floating):
         image = image.astype(np.float32)
 
-    if isinstance(mean, Sized):
-        if len(mean) != num_channels:
-            raise ValueError(
-                f"mean must have {num_channels} elements if it is an iterable, got {len(mean)}"
-            )
-    else:
-        mean = [mean] * num_channels
-    mean = np.array(mean, dtype=image.dtype)
-
-    if isinstance(std, Sized):
-        if len(std) != num_channels:
-            raise ValueError(
-                f"std must have {num_channels} elements if it is an iterable, got {len(std)}"
-            )
-    else:
-        std = [std] * num_channels
-    std = np.array(std, dtype=image.dtype)
+    mean = mean if isinstance(mean, list) else [mean] * num_channels
+
+    if len(mean) != num_channels:
+        raise ValueError(
+            f"mean must have the same number of channels as the image, image has {num_channels} channels, got "
+            f"{len(mean)}"
+        )
+
+    mean_arr = np.array(mean, dtype=np.float32)
+
+    std = std if isinstance(std, list) else [std] * num_channels
+    if len(std) != num_channels:
+        raise ValueError(
+            f"std must have the same number of channels as the image, image has {num_channels} channels, got {len(std)}"
+        )
+
+    std_arr = np.array(std, dtype=np.float32)
 
-    image = ((image.T - mean) / std).T
+    image = ((image.T - mean_arr) / std_arr).T
     return image
 
 
@@ -114,11 +113,11 @@ def resize(
     return image.resize(new_size, resample)
 
 
-def rescale(image: np.ndarray, scale: float, dtype: type = np.float32) -> np.ndarray:
+def rescale(image: NumpyArray, scale: float, dtype: type = np.float32) -> NumpyArray:
     return (image * scale).astype(dtype)
 
 
-def pil2ndarray(image: Union[Image.Image, np.ndarray]) -> np.ndarray:
+def pil2ndarray(image: Union[Image.Image, NumpyArray]) -> NumpyArray:
     if isinstance(image, Image.Image):
         return np.asarray(image).transpose((2, 0, 1))
     return image
diff --git a/fastembed/image/transform/operators.py b/fastembed/image/transform/operators.py