Skip to content

Commit 4b7e907

Browse files
committed
Migration of models to dataclasses
1 parent a931f14 commit 4b7e907

File tree

17 files changed

+614
-576
lines changed

17 files changed

+614
-576
lines changed

fastembed/common/model_management.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def _get_model_description(cls, model_name: str) -> dict[str, Any]:
4545
dict[str, Any]: The model description.
4646
"""
4747
for model in cls.list_supported_models():
48-
if model_name.lower() == model["model"].lower():
48+
if model_name.lower() == model.model.lower():
4949
return model
5050

5151
raise ValueError(f"Model {model_name} is not supported in {cls.__name__}.")
@@ -160,7 +160,9 @@ def _collect_file_metadata(
160160
}
161161
return meta
162162

163-
def _save_file_metadata(model_dir: Path, meta: dict[str, dict[str, Union[int, str]]]) -> None:
163+
def _save_file_metadata(
164+
model_dir: Path, meta: dict[str, dict[str, Union[int, str]]]
165+
) -> None:
164166
try:
165167
if not model_dir.exists():
166168
model_dir.mkdir(parents=True, exist_ok=True)
@@ -368,16 +370,16 @@ def download_model(
368370
if specific_model_path:
369371
return Path(specific_model_path)
370372
retries = 1 if local_files_only else retries
371-
hf_source = model.get("sources", {}).get("hf")
372-
url_source = model.get("sources", {}).get("url")
373+
hf_source = model.sources.hf
374+
url_source = model.sources.url
373375

374376
sleep = 3.0
375377
while retries > 0:
376378
retries -= 1
377379

378380
if hf_source:
379-
extra_patterns = [model["model_file"]]
380-
extra_patterns.extend(model.get("additional_files", []))
381+
extra_patterns = [model.model_file]
382+
extra_patterns.extend(model.additional_files)
381383

382384
try:
383385
return Path(
@@ -399,7 +401,7 @@ def download_model(
399401
if url_source or local_files_only:
400402
try:
401403
return cls.retrieve_model_gcs(
402-
model["model"],
404+
model.model,
403405
url_source,
404406
str(cache_dir),
405407
local_files_only=local_files_only,
@@ -417,4 +419,4 @@ def download_model(
417419
time.sleep(sleep)
418420
sleep *= 3
419421

420-
raise ValueError(f"Could not load model {model['model']} from any source.")
422+
raise ValueError(f"Could not load model {model.model} from any source.")

fastembed/image/onnx_embedding.py

Lines changed: 48 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,62 +9,54 @@
99
from fastembed.image.image_embedding_base import ImageEmbeddingBase
1010
from fastembed.image.onnx_image_model import ImageEmbeddingWorker, OnnxImageModel
1111

12-
supported_onnx_models = [
13-
{
14-
"model": "Qdrant/clip-ViT-B-32-vision",
15-
"dim": 512,
16-
"description": "Image embeddings, Multimodal (text&image), 2021 year",
17-
"license": "mit",
18-
"size_in_GB": 0.34,
19-
"sources": {
20-
"hf": "Qdrant/clip-ViT-B-32-vision",
21-
},
22-
"model_file": "model.onnx",
23-
},
24-
{
25-
"model": "Qdrant/resnet50-onnx",
26-
"dim": 2048,
27-
"description": "Image embeddings, Unimodal (image), 2016 year",
28-
"license": "apache-2.0",
29-
"size_in_GB": 0.1,
30-
"sources": {
31-
"hf": "Qdrant/resnet50-onnx",
32-
},
33-
"model_file": "model.onnx",
34-
},
35-
{
36-
"model": "Qdrant/Unicom-ViT-B-16",
37-
"dim": 768,
38-
"description": "Image embeddings (more detailed than Unicom-ViT-B-32), Multimodal (text&image), 2023 year",
39-
"license": "apache-2.0",
40-
"size_in_GB": 0.82,
41-
"sources": {
42-
"hf": "Qdrant/Unicom-ViT-B-16",
43-
},
44-
"model_file": "model.onnx",
45-
},
46-
{
47-
"model": "Qdrant/Unicom-ViT-B-32",
48-
"dim": 512,
49-
"description": "Image embeddings, Multimodal (text&image), 2023 year",
50-
"license": "apache-2.0",
51-
"size_in_GB": 0.48,
52-
"sources": {
53-
"hf": "Qdrant/Unicom-ViT-B-32",
54-
},
55-
"model_file": "model.onnx",
56-
},
57-
{
58-
"model": "jinaai/jina-clip-v1",
59-
"dim": 768,
60-
"description": "Image embeddings, Multimodal (text&image), 2024 year",
61-
"license": "apache-2.0",
62-
"size_in_GB": 0.34,
63-
"sources": {
64-
"hf": "jinaai/jina-clip-v1",
65-
},
66-
"model_file": "onnx/vision_model.onnx",
67-
},
12+
from fastembed.common.model_description import ModelDescription, ModelSource
13+
14+
supported_onnx_models: list[ModelDescription] = [
15+
ModelDescription(
16+
model="Qdrant/clip-ViT-B-32-vision",
17+
dim=512,
18+
description="Image embeddings, Multimodal (text&image), 2021 year",
19+
license="mit",
20+
size_in_GB=0.34,
21+
sources=ModelSource(hf="Qdrant/clip-ViT-B-32-vision"),
22+
model_file="model.onnx",
23+
),
24+
ModelDescription(
25+
model="Qdrant/resnet50-onnx",
26+
dim=2048,
27+
description="Image embeddings, Unimodal (image), 2016 year",
28+
license="apache-2.0",
29+
size_in_GB=0.1,
30+
sources=ModelSource(hf="Qdrant/resnet50-onnx"),
31+
model_file="model.onnx",
32+
),
33+
ModelDescription(
34+
model="Qdrant/Unicom-ViT-B-16",
35+
dim=768,
36+
description="Image embeddings (more detailed than Unicom-ViT-B-32), Multimodal (text&image), 2023 year",
37+
license="apache-2.0",
38+
size_in_GB=0.82,
39+
sources=ModelSource(hf="Qdrant/Unicom-ViT-B-16"),
40+
model_file="model.onnx",
41+
),
42+
ModelDescription(
43+
model="Qdrant/Unicom-ViT-B-32",
44+
dim=512,
45+
description="Image embeddings, Multimodal (text&image), 2023 year",
46+
license="apache-2.0",
47+
size_in_GB=0.48,
48+
sources=ModelSource(hf="Qdrant/Unicom-ViT-B-32"),
49+
model_file="model.onnx",
50+
),
51+
ModelDescription(
52+
model="jinaai/jina-clip-v1",
53+
dim=768,
54+
description="Image embeddings, Multimodal (text&image), 2024 year",
55+
license="apache-2.0",
56+
size_in_GB=0.34,
57+
sources=ModelSource(hf="jinaai/jina-clip-v1"),
58+
model_file="onnx/vision_model.onnx",
59+
),
6860
]
6961

7062

fastembed/late_interaction/colbert.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,27 @@
1212
LateInteractionTextEmbeddingBase,
1313
)
1414
from fastembed.text.onnx_text_model import OnnxTextModel, TextEmbeddingWorker
15-
16-
17-
supported_colbert_models = [
18-
{
19-
"model": "colbert-ir/colbertv2.0",
20-
"dim": 128,
21-
"description": "Late interaction model",
22-
"license": "mit",
23-
"size_in_GB": 0.44,
24-
"sources": {
25-
"hf": "colbert-ir/colbertv2.0",
26-
},
27-
"model_file": "model.onnx",
28-
},
29-
{
30-
"model": "answerdotai/answerai-colbert-small-v1",
31-
"dim": 96,
32-
"description": "Text embeddings, Unimodal (text), Multilingual (~100 languages), 512 input tokens truncation, 2024 year",
33-
"license": "apache-2.0",
34-
"size_in_GB": 0.13,
35-
"sources": {
36-
"hf": "answerdotai/answerai-colbert-small-v1",
37-
},
38-
"model_file": "vespa_colbert.onnx",
39-
},
15+
from fastembed.common.model_description import ModelDescription, ModelSource
16+
17+
supported_colbert_models: list[ModelDescription] = [
18+
ModelDescription(
19+
model="colbert-ir/colbertv2.0",
20+
dim=128,
21+
description="Late interaction model",
22+
license="mit",
23+
size_in_GB=0.44,
24+
sources=ModelSource(hf="colbert-ir/colbertv2.0"),
25+
model_file="model.onnx",
26+
),
27+
ModelDescription(
28+
model="answerdotai/answerai-colbert-small-v1",
29+
dim=96,
30+
description="Text embeddings, Unimodal (text), Multilingual (~100 languages), 512 input tokens truncation, 2024 year",
31+
license="apache-2.0",
32+
size_in_GB=0.13,
33+
sources=ModelSource(hf="answerdotai/answerai-colbert-small-v1"),
34+
model_file="vespa_colbert.onnx",
35+
),
4036
]
4137

4238

fastembed/late_interaction/jina_colbert.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,19 @@
22

33
from fastembed.common.types import NumpyArray
44
from fastembed.late_interaction.colbert import Colbert, ColbertEmbeddingWorker
5-
6-
7-
supported_jina_colbert_models = [
8-
{
9-
"model": "jinaai/jina-colbert-v2",
10-
"dim": 128,
11-
"description": "New model that expands capabilities of colbert-v1 with multilingual and context length of 8192, 2024 year",
12-
"license": "cc-by-nc-4.0",
13-
"size_in_GB": 2.24,
14-
"sources": {
15-
"hf": "jinaai/jina-colbert-v2",
16-
},
17-
"model_file": "onnx/model.onnx",
18-
"additional_files": ["onnx/model.onnx_data"],
19-
},
5+
from fastembed.common.model_description import ModelDescription, ModelSource
6+
7+
supported_jina_colbert_models: list[ModelDescription] = [
8+
ModelDescription(
9+
model="jinaai/jina-colbert-v2",
10+
dim=128,
11+
description="New model that expands capabilities of colbert-v1 with multilingual and context length of 8192, 2024 year",
12+
license="cc-by-nc-4.0",
13+
size_in_GB=2.24,
14+
sources=ModelSource(hf="jinaai/jina-colbert-v2"),
15+
model_file="onnx/model.onnx",
16+
additional_files=["onnx/model.onnx_data"],
17+
)
2018
]
2119

2220

fastembed/late_interaction_multimodal/colpali.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,19 @@
1515
TextEmbeddingWorker,
1616
ImageEmbeddingWorker,
1717
)
18-
19-
supported_colpali_models = [
20-
{
21-
"model": "Qdrant/colpali-v1.3-fp16",
22-
"dim": 128,
23-
"description": "Text embeddings, Multimodal (text&image), English, 50 tokens query length truncation, 2024.",
24-
"license": "mit",
25-
"size_in_GB": 6.5,
26-
"sources": {
27-
"hf": "Qdrant/colpali-v1.3-fp16",
28-
},
29-
"additional_files": [
30-
"model.onnx_data",
31-
],
32-
"model_file": "model.onnx",
33-
},
18+
from fastembed.common.model_description import ModelDescription, ModelSource
19+
20+
supported_colpali_models: list[ModelDescription] = [
21+
ModelDescription(
22+
model="Qdrant/colpali-v1.3-fp16",
23+
dim=128,
24+
description="Text embeddings, Multimodal (text&image), English, 50 tokens query length truncation, 2024.",
25+
license="mit",
26+
size_in_GB=6.5,
27+
sources=ModelSource(hf="Qdrant/colpali-v1.3-fp16"),
28+
additional_files=["model.onnx_data"],
29+
model_file="model.onnx",
30+
),
3431
]
3532

3633

0 commit comments

Comments
 (0)