Skip to content

Commit 73e1e5e

Browse files
chore: Add missing returns in defs (#451)
* chore: Add missing returns in defs * remove return type from init * remove incorrect ndarray specifier --------- Co-authored-by: George Panchuk <george.panchuk@qdrant.tech>
1 parent 105d6cf commit 73e1e5e

17 files changed

+64
-49
lines changed

docs/examples/ColBERT_with_FastEmbed.ipynb

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,14 @@
5454
},
5555
{
5656
"data": {
57-
"text/plain": "[{'model': 'colbert-ir/colbertv2.0',\n 'dim': 128,\n 'description': 'Late interaction model',\n 'size_in_GB': 0.44,\n 'sources': {'hf': 'colbert-ir/colbertv2.0'},\n 'model_file': 'model.onnx'}]"
57+
"text/plain": [
58+
"[{'model': 'colbert-ir/colbertv2.0',\n",
59+
" 'dim': 128,\n",
60+
" 'description': 'Late interaction model',\n",
61+
" 'size_in_GB': 0.44,\n",
62+
" 'sources': {'hf': 'colbert-ir/colbertv2.0'},\n",
63+
" 'model_file': 'model.onnx'}]"
64+
]
5865
},
5966
"execution_count": 1,
6067
"metadata": {},
@@ -212,7 +219,9 @@
212219
"outputs": [
213220
{
214221
"data": {
215-
"text/plain": "((26, 128), (32, 128))"
222+
"text/plain": [
223+
"((26, 128), (32, 128))"
224+
]
216225
},
217226
"execution_count": 18,
218227
"metadata": {},
@@ -271,7 +280,9 @@
271280
"import numpy as np\n",
272281
"\n",
273282
"\n",
274-
"def compute_relevance_scores(query_embedding: np.array, document_embeddings: np.array, k: int):\n",
283+
"def compute_relevance_scores(\n",
284+
" query_embedding: np.array, document_embeddings: np.array, k: int\n",
285+
") -> list[int]:\n",
275286
" \"\"\"\n",
276287
" Compute relevance scores for top-k documents given a query.\n",
277288
"\n",

docs/examples/FastEmbed_vs_HF_Comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@
152152
" HuggingFace Transformer implementation of FlagEmbedding\n",
153153
" \"\"\"\n",
154154
"\n",
155-
" def __init__(self, model_id: str):\n",
155+
" def __init__(self, model_id: str) -> None:\n",
156156
" self.model = AutoModel.from_pretrained(model_id)\n",
157157
" self.tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
158158
"\n",

docs/examples/Hybrid_Search.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@
488488
}
489489
],
490490
"source": [
491-
"def make_sparse_embedding(texts: list[str]):\n",
491+
"def make_sparse_embedding(texts: list[str]) -> list[SparseEmbedding]:\n",
492492
" return list(sparse_model.embed(texts, batch_size=32))\n",
493493
"\n",
494494
"\n",
@@ -615,7 +615,7 @@
615615
}
616616
],
617617
"source": [
618-
"def get_tokens_and_weights(sparse_embedding, model_name):\n",
618+
"def get_tokens_and_weights(sparse_embedding, model_name) -> dict[str, float]:\n",
619619
" # Find the tokenizer for the model\n",
620620
" tokenizer_source = None\n",
621621
" for model_info in SparseTextEmbedding.list_supported_models():\n",
@@ -626,7 +626,7 @@
626626
" raise ValueError(f\"Model {model_name} not found in the supported models.\")\n",
627627
"\n",
628628
" tokenizer = AutoTokenizer.from_pretrained(tokenizer_source)\n",
629-
" token_weight_dict = {}\n",
629+
" token_weight_dict: dict[str, float] = {}\n",
630630
" for i in range(len(sparse_embedding.indices)):\n",
631631
" token = tokenizer.decode([sparse_embedding.indices[i]])\n",
632632
" weight = sparse_embedding.values[i]\n",

fastembed/common/model_management.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def _save_file_metadata(model_dir: Path, meta: dict[str, dict[str, int]]) -> Non
255255
return result
256256

257257
@classmethod
258-
def decompress_to_cache(cls, targz_path: str, cache_dir: str):
258+
def decompress_to_cache(cls, targz_path: str, cache_dir: str) -> str:
259259
"""
260260
Decompresses a .tar.gz file to a cache directory.
261261

fastembed/image/onnx_image_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def _get_worker_class(cls) -> Type["ImageEmbeddingWorker"]:
2424
def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[T]:
2525
raise NotImplementedError("Subclasses must implement this method")
2626

27-
def __init__(self) -> None:
27+
def __init__(self):
2828
super().__init__()
2929
self.processor = None
3030

fastembed/image/transform/functional.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def rescale(image: np.ndarray, scale: float, dtype=np.float32) -> np.ndarray:
118118
return (image * scale).astype(dtype)
119119

120120

121-
def pil2ndarray(image: Union[Image.Image, np.ndarray]):
121+
def pil2ndarray(image: Union[Image.Image, np.ndarray]) -> np.ndarray:
122122
if isinstance(image, Image.Image):
123123
return np.asarray(image).transpose((2, 0, 1))
124124
return image

fastembed/image/transform/operators.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,11 @@ def from_config(cls, config: dict[str, Any]) -> "Compose":
133133
return cls(transforms=transforms)
134134

135135
@staticmethod
136-
def _get_convert_to_rgb(transforms: list[Transform], config: dict[str, Any]):
136+
def _get_convert_to_rgb(transforms: list[Transform], config: dict[str, Any]) -> None:
137137
transforms.append(ConvertToRGB())
138138

139139
@classmethod
140-
def _get_resize(cls, transforms: list[Transform], config: dict[str, Any]):
140+
def _get_resize(cls, transforms: list[Transform], config: dict[str, Any]) -> None:
141141
mode = config.get("image_processor_type", "CLIPImageProcessor")
142142
if mode == "CLIPImageProcessor":
143143
if config.get("do_resize", False):
@@ -200,7 +200,7 @@ def _get_resize(cls, transforms: list[Transform], config: dict[str, Any]):
200200
raise ValueError(f"Preprocessor {mode} is not supported")
201201

202202
@staticmethod
203-
def _get_center_crop(transforms: list[Transform], config: dict[str, Any]):
203+
def _get_center_crop(transforms: list[Transform], config: dict[str, Any]) -> None:
204204
mode = config.get("image_processor_type", "CLIPImageProcessor")
205205
if mode == "CLIPImageProcessor":
206206
if config.get("do_center_crop", False):
@@ -220,24 +220,24 @@ def _get_center_crop(transforms: list[Transform], config: dict[str, Any]):
220220
raise ValueError(f"Preprocessor {mode} is not supported")
221221

222222
@staticmethod
223-
def _get_pil2ndarray(transforms: list[Transform], config: dict[str, Any]):
223+
def _get_pil2ndarray(transforms: list[Transform], config: dict[str, Any]) -> None:
224224
transforms.append(PILtoNDarray())
225225

226226
@staticmethod
227-
def _get_rescale(transforms: list[Transform], config: dict[str, Any]):
227+
def _get_rescale(transforms: list[Transform], config: dict[str, Any]) -> None:
228228
if config.get("do_rescale", True):
229229
rescale_factor = config.get("rescale_factor", 1 / 255)
230230
transforms.append(Rescale(scale=rescale_factor))
231231

232232
@staticmethod
233-
def _get_normalize(transforms: list[Transform], config: dict[str, Any]):
233+
def _get_normalize(transforms: list[Transform], config: dict[str, Any]) -> None:
234234
if config.get("do_normalize", False):
235235
transforms.append(Normalize(mean=config["image_mean"], std=config["image_std"]))
236236
elif "mean" in config and "std" in config:
237237
transforms.append(Normalize(mean=config["mean"], std=config["std"]))
238238

239239
@staticmethod
240-
def _get_pad2square(transforms: list[Transform], config: dict[str, Any]):
240+
def _get_pad2square(transforms: list[Transform], config: dict[str, Any]) -> None:
241241
mode = config.get("image_processor_type", "CLIPImageProcessor")
242242
if mode == "CLIPImageProcessor":
243243
pass

fastembed/rerank/cross_encoder/onnx_text_model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
22
from multiprocessing import get_all_start_methods
33
from pathlib import Path
4-
from typing import Any, Iterable, Optional, Sequence, Type
4+
from typing import Any, Iterable, Optional, Sequence, Type, Union
55

66
import numpy as np
7+
from numpy.typing import NDArray
78
from tokenizers import Encoding
89

910
from fastembed.common.onnx_model import (
@@ -46,7 +47,9 @@ def _load_onnx_model(
4647
def tokenize(self, pairs: list[tuple[str, str]], **_: Any) -> list[Encoding]:
4748
return self.tokenizer.encode_batch(pairs)
4849

49-
def _build_onnx_input(self, tokenized_input):
50+
def _build_onnx_input(
51+
self, tokenized_input
52+
) -> dict[str, NDArray[Union[np.float32, np.int64]]]:
5053
input_names = {node.name for node in self.model.get_inputs()}
5154
inputs = {
5255
"input_ids": np.array([enc.ids for enc in tokenized_input], dtype=np.int64),

fastembed/text/onnx_text_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _get_worker_class(cls) -> Type["TextEmbeddingWorker"]:
2323
def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[T]:
2424
raise NotImplementedError("Subclasses must implement this method")
2525

26-
def __init__(self) -> None:
26+
def __init__(self):
2727
super().__init__()
2828
self.tokenizer = None
2929
self.special_token_to_id = {}

fastembed/text/text_embedding_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def passage_embed(self, texts: Iterable[str], **kwargs: Any) -> Iterable[np.ndar
4343
yield from self.embed(texts, **kwargs)
4444

4545
def query_embed(self, query: Union[str, Iterable[str]], **kwargs: Any) -> Iterable[np.ndarray]:
46+
4647
"""
4748
Embeds queries
4849

0 commit comments

Comments
 (0)