Skip to content

Commit 14eddad

Browse files
📝 Add docstrings to fix-no-internet-embed-parallel
Docstrings generation was requested by @joein. * #524 (comment) The following files were modified: * `fastembed/image/onnx_embedding.py` * `fastembed/image/onnx_image_model.py` * `fastembed/late_interaction/colbert.py` * `fastembed/late_interaction_multimodal/colpali.py` * `fastembed/late_interaction_multimodal/onnx_multimodal_model.py` * `fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py` * `fastembed/rerank/cross_encoder/onnx_text_model.py` * `fastembed/sparse/bm25.py` * `fastembed/sparse/bm42.py` * `fastembed/sparse/minicoil.py` * `fastembed/sparse/splade_pp.py` * `fastembed/text/onnx_embedding.py` * `fastembed/text/onnx_text_model.py`
1 parent 95d166f commit 14eddad

File tree

13 files changed

+263
-247
lines changed

13 files changed

+263
-247
lines changed

fastembed/image/onnx_embedding.py

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -74,25 +74,21 @@ def __init__(
7474
**kwargs: Any,
7575
):
7676
"""
77+
Initializes an ONNX image embedding model with configurable device, threading, and loading options.
78+
7779
Args:
78-
model_name (str): The name of the model to use.
79-
cache_dir (str, optional): The path to the cache directory.
80-
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
81-
Defaults to `fastembed_cache` in the system's temp directory.
82-
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
83-
providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
84-
Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
85-
cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
86-
Defaults to False.
87-
device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
88-
workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
89-
lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
90-
Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
91-
device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
92-
specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
93-
80+
model_name: Name of the ONNX model to use, in the format <org>/<model>.
81+
cache_dir: Optional directory for caching model files.
82+
threads: Number of threads for ONNX runtime session.
83+
providers: Optional list of ONNX runtime providers to use for inference.
84+
cuda: If True, enables CUDA for inference; mutually exclusive with `providers`.
85+
device_ids: Optional list of device IDs for parallel processing; used with `cuda=True`.
86+
lazy_load: If True, defers model loading until first use.
87+
device_id: Optional device ID for model loading in the current process.
88+
specific_model_path: Optional path to a specific ONNX model directory.
89+
9490
Raises:
95-
ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
91+
ValueError: If `model_name` is not in the required <org>/<model> format.
9692
"""
9793

9894
super().__init__(model_name, cache_dir, threads, **kwargs)
@@ -154,19 +150,15 @@ def embed(
154150
**kwargs: Any,
155151
) -> Iterable[NumpyArray]:
156152
"""
157-
Encode a list of images into list of embeddings.
158-
We use mean pooling with attention so that the model can handle variable-length inputs.
159-
153+
Generates embeddings for one or more images using the loaded ONNX model.
154+
160155
Args:
161-
images: Iterator of image paths or single image path to embed
162-
batch_size: Batch size for encoding -- higher values will use more memory, but be faster
163-
parallel:
164-
If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
165-
If 0, use all available cores.
166-
If None, don't use data-parallel processing, use default onnxruntime threading instead.
167-
156+
images: A single image input or an iterable of image inputs to embed.
157+
batch_size: Number of images to process in each batch.
158+
parallel: Number of parallel workers to use for data-parallel encoding. If 0, uses all available cores; if None, disables parallel processing.
159+
168160
Returns:
169-
List of embeddings, one per document
161+
An iterable of numpy arrays, each representing the embedding of an input image.
170162
"""
171163

172164
yield from self._embed_images(

fastembed/image/onnx_image_model.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,25 @@ def _embed_images(
101101
specific_model_path: Optional[str] = None,
102102
**kwargs: Any,
103103
) -> Iterable[T]:
104+
"""
105+
Embeds images using the ONNX model, processing them sequentially or in parallel.
106+
107+
Depending on the input size and the `parallel` parameter, images are embedded either in batches on the main process or distributed across multiple worker processes. Supports additional configuration for model loading and caching.
108+
109+
Args:
110+
model_name: Name of the ONNX model to use.
111+
cache_dir: Directory for model caching.
112+
images: Single image or iterable of images to embed.
113+
batch_size: Number of images per batch.
114+
parallel: Number of parallel worker processes to use; if None or input is small, runs sequentially.
115+
cuda: Whether to use CUDA-enabled devices.
116+
device_ids: List of device IDs for parallel workers.
117+
local_files_only: If True, restricts model loading to local files.
118+
specific_model_path: Path to a specific model file to load.
119+
120+
Yields:
121+
Embeddings for each input image, post-processed as defined by the subclass.
122+
"""
104123
is_small = False
105124

106125
if isinstance(images, (str, Path, Image.Image)):

fastembed/late_interaction/colbert.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -130,25 +130,9 @@ def __init__(
130130
**kwargs: Any,
131131
):
132132
"""
133-
Args:
134-
model_name (str): The name of the model to use.
135-
cache_dir (str, optional): The path to the cache directory.
136-
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
137-
Defaults to `fastembed_cache` in the system's temp directory.
138-
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
139-
providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
140-
Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
141-
cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
142-
Defaults to False.
143-
device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
144-
workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
145-
lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
146-
Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
147-
device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
148-
specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
149-
150-
Raises:
151-
ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
133+
Initializes a Colbert model instance for ONNX-based late interaction text embedding.
134+
135+
Configures model loading, device selection, threading, and caching options. Optionally supports lazy loading and specifying a custom ONNX model path. Raises a ValueError if the model name format is invalid.
152136
"""
153137

154138
super().__init__(model_name, cache_dir, threads, **kwargs)
@@ -211,19 +195,15 @@ def embed(
211195
**kwargs: Any,
212196
) -> Iterable[NumpyArray]:
213197
"""
214-
Encode a list of documents into list of embeddings.
215-
We use mean pooling with attention so that the model can handle variable-length inputs.
216-
198+
Generates embeddings for one or more documents using mean pooling with attention.
199+
217200
Args:
218-
documents: Iterator of documents or single document to embed
219-
batch_size: Batch size for encoding -- higher values will use more memory, but be faster
220-
parallel:
221-
If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
222-
If 0, use all available cores.
223-
If None, don't use data-parallel processing, use default onnxruntime threading instead.
224-
201+
documents: A single document or an iterable of documents to embed.
202+
batch_size: Number of documents to process per batch.
203+
parallel: Number of parallel workers to use for data-parallel encoding. If 0, uses all available cores. If None, uses default threading.
204+
225205
Returns:
226-
List of embeddings, one per document
206+
An iterable of embeddings, one per input document.
227207
"""
228208
yield from self._embed_documents(
229209
model_name=self.model_name,

fastembed/late_interaction_multimodal/colpali.py

Lines changed: 17 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,9 @@ def __init__(
5757
**kwargs: Any,
5858
):
5959
"""
60-
Args:
61-
model_name (str): The name of the model to use.
62-
cache_dir (str, optional): The path to the cache directory.
63-
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
64-
Defaults to `fastembed_cache` in the system's temp directory.
65-
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
66-
providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
67-
Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
68-
cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
69-
Defaults to False.
70-
device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
71-
workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
72-
lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
73-
Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
74-
device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
75-
76-
Raises:
77-
ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
60+
Initializes the ColPali multimodal embedding model with specified configuration.
61+
62+
Configures model loading, device and threading options, ONNX runtime providers, and cache directory. Supports lazy loading, CUDA acceleration, and custom model paths. Raises a ValueError if the model name format is invalid.
7863
"""
7964

8065
super().__init__(model_name, cache_dir, threads, **kwargs)
@@ -214,18 +199,15 @@ def embed_text(
214199
**kwargs: Any,
215200
) -> Iterable[NumpyArray]:
216201
"""
217-
Encode a list of documents into list of embeddings.
218-
202+
Generates embeddings for one or more text documents.
203+
219204
Args:
220-
documents: Iterator of documents or single document to embed
221-
batch_size: Batch size for encoding -- higher values will use more memory, but be faster
222-
parallel:
223-
If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
224-
If 0, use all available cores.
225-
If None, don't use data-parallel processing, use default onnxruntime threading instead.
226-
205+
documents: A string or iterable of strings representing the documents to embed.
206+
batch_size: Number of documents to process per batch.
207+
parallel: Number of parallel workers to use for encoding. If 0, uses all available cores; if None, disables parallelism.
208+
227209
Returns:
228-
List of embeddings, one per document
210+
An iterable of NumPy arrays, each representing the embedding of a document.
229211
"""
230212
yield from self._embed_documents(
231213
model_name=self.model_name,
@@ -249,18 +231,15 @@ def embed_image(
249231
**kwargs: Any,
250232
) -> Iterable[NumpyArray]:
251233
"""
252-
Encode a list of images into list of embeddings.
253-
234+
Generates embeddings for one or more images.
235+
254236
Args:
255-
images: Iterator of image paths or single image path to embed
256-
batch_size: Batch size for encoding -- higher values will use more memory, but be faster
257-
parallel:
258-
If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
259-
If 0, use all available cores.
260-
If None, don't use data-parallel processing, use default onnxruntime threading instead.
261-
237+
images: A single image input or an iterable of image inputs to embed.
238+
batch_size: Number of images to process per batch.
239+
parallel: Number of parallel workers to use for encoding. If 0, uses all available cores; if None, disables parallel processing.
240+
262241
Returns:
263-
List of embeddings, one per document
242+
An iterable of NumPy arrays, each representing the embedding of an input image.
264243
"""
265244
yield from self._embed_images(
266245
model_name=self.model_name,

fastembed/late_interaction_multimodal/onnx_multimodal_model.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,26 @@ def _embed_documents(
124124
specific_model_path: Optional[str] = None,
125125
**kwargs: Any,
126126
) -> Iterable[T]:
127+
"""
128+
Embeds a collection of text documents using the ONNX model, with optional parallel processing.
129+
130+
If the input is small or parallelism is not requested, processes documents in batches on the main process. Otherwise, distributes batches across parallel worker processes. Supports additional options for local file usage and specifying a model path.
131+
132+
Args:
133+
model_name: Name of the ONNX model to use.
134+
cache_dir: Directory for model caching.
135+
documents: Single string or iterable of text documents to embed.
136+
batch_size: Number of documents per batch.
137+
parallel: Number of parallel worker processes to use. If None or input is small, runs in the main process.
138+
providers: Optional sequence of ONNX runtime providers.
139+
cuda: Whether to use CUDA-enabled devices.
140+
device_ids: Optional list of device IDs for parallel workers.
141+
local_files_only: If True, restricts model loading to local files.
142+
specific_model_path: Optional path to a specific model file.
143+
144+
Yields:
145+
Embeddings for each input document, in order.
146+
"""
127147
is_small = False
128148

129149
if isinstance(documents, str):
@@ -191,6 +211,11 @@ def _embed_images(
191211
specific_model_path: Optional[str] = None,
192212
**kwargs: Any,
193213
) -> Iterable[T]:
214+
"""
215+
Embeds images using the ONNX model, with optional parallel processing.
216+
217+
Processes a collection of images in batches, either sequentially or in parallel using worker processes. Supports loading models from local files only or a specific model path if specified. Yields post-processed embeddings for each image.
218+
"""
194219
is_small = False
195220

196221
if isinstance(images, (str, Path, Image.Image)):

fastembed/rerank/cross_encoder/onnx_text_cross_encoder.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -88,25 +88,9 @@ def __init__(
8888
**kwargs: Any,
8989
):
9090
"""
91-
Args:
92-
model_name (str): The name of the model to use.
93-
cache_dir (str, optional): The path to the cache directory.
94-
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
95-
Defaults to `fastembed_cache` in the system's temp directory.
96-
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
97-
providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
98-
Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
99-
cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
100-
Defaults to False.
101-
device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
102-
workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
103-
lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
104-
Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
105-
device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
106-
specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else
107-
108-
Raises:
109-
ValueError: If the model_name is not in the format <org>/<model> e.g. Xenova/ms-marco-MiniLM-L-6-v2.
91+
Initializes an ONNX-based cross-encoder model for text re-ranking.
92+
93+
Configures model selection, caching, threading, device assignment, ONNX runtime providers, and model loading behavior. Downloads and prepares the ONNX model for inference, with support for custom model paths and lazy loading. Raises a ValueError if the model name format is invalid.
11094
"""
11195
super().__init__(model_name, cache_dir, threads, **kwargs)
11296
self.providers = providers
@@ -181,6 +165,17 @@ def rerank_pairs(
181165
parallel: Optional[int] = None,
182166
**kwargs: Any,
183167
) -> Iterable[float]:
168+
"""
169+
Reranks pairs of texts using the ONNX cross-encoder model.
170+
171+
Args:
172+
pairs: An iterable of (query, document) string tuples to be scored.
173+
batch_size: Number of pairs to process in each batch. Defaults to 64.
174+
parallel: Optional number of parallel workers for processing.
175+
176+
Yields:
177+
Relevance scores as floats for each input pair, in order.
178+
"""
184179
yield from self._rerank_pairs(
185180
model_name=self.model_name,
186181
cache_dir=str(self.cache_dir),

fastembed/rerank/cross_encoder/onnx_text_model.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,26 @@ def _rerank_pairs(
9898
specific_model_path: Optional[str] = None,
9999
**kwargs: Any,
100100
) -> Iterable[float]:
101+
"""
102+
Reranks a sequence of text pairs using the ONNX cross-encoder model, with optional parallel processing.
103+
104+
If parallel processing is enabled and the input is large, distributes batches across multiple worker processes; otherwise, processes batches in the current process. Supports additional options for model loading, including restricting to local files and specifying a model path.
105+
106+
Args:
107+
model_name: Name of the ONNX model to use.
108+
cache_dir: Directory for model caching.
109+
pairs: Iterable of (query, document) text pairs to rerank.
110+
batch_size: Number of pairs per inference batch.
111+
parallel: Number of worker processes to use; if None or input is small, runs in the current process.
112+
providers: Optional ONNX runtime providers.
113+
cuda: Whether to use CUDA-enabled devices.
114+
device_ids: Optional list of device IDs for parallel workers.
115+
local_files_only: If True, restricts model loading to local files only.
116+
specific_model_path: Optional path to a specific model file.
117+
118+
Yields:
119+
Reranked scores as floats, in the same order as the input pairs.
120+
"""
101121
is_small = False
102122

103123
if isinstance(pairs, tuple):

0 commit comments

Comments
 (0)