Skip to content

Commit 6b1bcbf

Browse files
committed
VoyageAI's new multimodal embedding model
1 parent d34ba9f commit 6b1bcbf

File tree

3 files changed

+153
-0
lines changed

3 files changed

+153
-0
lines changed

test/collection/test_config.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ def test_basic_config():
9898
}
9999
},
100100
),
101+
(
102+
Configure.Vectorizer.multi2vec_voyageai(
103+
model="voyage-multimodal-3",
104+
truncation=False,
105+
output_encoding=None,
106+
vectorize_collection_name=False,
107+
base_url="https://api.voyageai.com",
108+
),
109+
{
110+
"multi2vec-cohere": {
111+
"model": "voyage-multimodal-3",
112+
"truncation": False,
113+
"output_encoding": None,
114+
"vectorizeClassName": False,
115+
"baseURL": "https://api.voyageai.com",
116+
}
117+
},
118+
),
101119
(
102120
Configure.Vectorizer.text2vec_gpt4all(),
103121
{
@@ -1293,6 +1311,20 @@ def test_vector_config_flat_pq() -> None:
12931311
}
12941312
},
12951313
),
1314+
(
1315+
[Configure.NamedVectors.multi2vec_voyageai(name="test", text_fields=["prop"])],
1316+
{
1317+
"test": {
1318+
"vectorizer": {
1319+
"multi2vec-voyageai": {
1320+
"vectorizeClassName": True,
1321+
"textFields": ["prop"],
1322+
}
1323+
},
1324+
"vectorIndexType": "hnsw",
1325+
}
1326+
},
1327+
),
12961328
(
12971329
[
12981330
Configure.NamedVectors.multi2vec_jinaai(

weaviate/collections/classes/config_named_vectors.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
_Img2VecNeuralConfig,
2222
_Multi2VecBindConfig,
2323
_Multi2VecClipConfig,
24+
_Multi2VecVoyageaiConfig,
2425
_Multi2VecGoogleConfig,
2526
_Ref2VecCentroidConfig,
2627
_Text2VecAWSConfig,
@@ -48,6 +49,7 @@
4849
OpenAIType,
4950
Vectorizers,
5051
VoyageModel,
52+
VoyageMultimodalModel,
5153
_map_multi2vec_fields,
5254
_VectorizerCustomConfig,
5355
_Text2VecDatabricksConfig,
@@ -755,6 +757,58 @@ def multi2vec_bind(
755757
vector_index_config=vector_index_config,
756758
)
757759

760+
def multi2vec_voyageai(
761+
name: str,
762+
*,
763+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
764+
vectorize_collection_name: bool = True,
765+
base_url: Optional[AnyHttpUrl] = None,
766+
model: Optional[Union[VoyageMultimodalModel, str]] = None,
767+
truncation: Optional[bool] = None,
768+
output_encoding: Optional[str] = None,
769+
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
770+
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
771+
) -> _NamedVectorConfigCreate:
772+
"""Create a named vector using the `multi2vec_cohere` model.
773+
774+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
775+
for detailed usage.
776+
777+
Arguments:
778+
`name`
779+
The name of the named vector.
780+
`vector_index_config`
781+
The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
782+
`vectorize_collection_name`
783+
Whether to vectorize the collection name. Defaults to `True`.
784+
`model`
785+
The model to use. Defaults to `None`, which uses the server-defined default.
786+
`truncation`
787+
The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
788+
`base_url`
789+
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
790+
`image_fields`
791+
The image fields to use in vectorization.
792+
`text_fields`
793+
The text fields to use in vectorization.
794+
795+
Raises:
796+
`pydantic.ValidationError` if `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
797+
"""
798+
return _NamedVectorConfigCreate(
799+
name=name,
800+
vectorizer=_Multi2VecVoyageaiConfig(
801+
baseURL=base_url,
802+
model=model,
803+
truncation=truncation,
804+
output_encoding=output_encoding,
805+
vectorizeClassName=vectorize_collection_name,
806+
imageFields=_map_multi2vec_fields(image_fields),
807+
textFields=_map_multi2vec_fields(text_fields),
808+
),
809+
vector_index_config=vector_index_config,
810+
)
811+
758812
@staticmethod
759813
def ref2vec_centroid(
760814
name: str,

weaviate/collections/classes/config_vectorizers.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@
5555
"voyage-finance-2",
5656
"voyage-multilingual-2",
5757
]
58+
VoyageMultimodalModel: TypeAlias = Literal[
59+
"voyage-multimodal-3",
60+
]
5861
AWSModel: TypeAlias = Literal[
5962
"amazon.titan-embed-text-v1",
6063
"cohere.embed-english-v3",
@@ -106,6 +109,8 @@ class Vectorizers(str, Enum):
106109
Weaviate module backed by a palm model for images and text.
107110
`MULTI2VEC_BIND`
108111
Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video.
112+
`MULTI2VEC_VOYAGEAI`
113+
Weaviate module backed by a Voyage AI multimodal embedding models.
109114
`REF2VEC_CENTROID`
110115
Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors.
111116
"""
@@ -131,6 +136,7 @@ class Vectorizers(str, Enum):
131136
MULTI2VEC_JINAAI = "multi2vec-jinaai"
132137
MULTI2VEC_BIND = "multi2vec-bind"
133138
MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version
139+
MULTI2VEC_VOYAGEAI = "multi2vec-voyageai"
134140
REF2VEC_CENTROID = "ref2vec-centroid"
135141

136142

@@ -462,6 +468,22 @@ class _Multi2VecBindConfig(_Multi2VecBase):
462468
videoFields: Optional[List[Multi2VecField]]
463469

464470

471+
class _Multi2VecVoyageaiConfig(_Multi2VecBase):
472+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
473+
default=Vectorizers.MULTI2VEC_VOYAGEAI, frozen=True, exclude=True
474+
)
475+
baseURL: Optional[AnyHttpUrl]
476+
model: Optional[str]
477+
truncation: Optional[bool]
478+
output_encoding: Optional[str]
479+
480+
def _to_dict(self) -> Dict[str, Any]:
481+
ret_dict = super()._to_dict()
482+
if self.baseURL is not None:
483+
ret_dict["baseURL"] = self.baseURL.unicode_string()
484+
return ret_dict
485+
486+
465487
class _Ref2VecCentroidConfig(_VectorizerConfigCreate):
466488
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
467489
default=Vectorizers.REF2VEC_CENTROID, frozen=True, exclude=True
@@ -796,6 +818,51 @@ def multi2vec_cohere(
796818
textFields=_map_multi2vec_fields(text_fields),
797819
)
798820

821+
@staticmethod
822+
def multi2vec_voyageai(
823+
*,
824+
model: Optional[Union[CohereMultimodalModel, str]] = None,
825+
truncation: Optional[bool] = None,
826+
output_encoding: Optional[str],
827+
vectorize_collection_name: bool = True,
828+
base_url: Optional[AnyHttpUrl] = None,
829+
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
830+
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
831+
) -> _VectorizerConfigCreate:
832+
"""Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model.
833+
834+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
835+
for detailed usage.
836+
837+
Arguments:
838+
`model`
839+
The model to use. Defaults to `None`, which uses the server-defined default.
840+
`truncate`
841+
The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
842+
`output_encoding`
843+
Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers.
844+
`vectorize_collection_name`
845+
Whether to vectorize the collection name. Defaults to `True`.
846+
`base_url`
847+
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
848+
`image_fields`
849+
The image fields to use in vectorization.
850+
`text_fields`
851+
The text fields to use in vectorization.
852+
853+
Raises:
854+
`pydantic.ValidationError` if `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
855+
"""
856+
return _Multi2VecVoyageaiConfig(
857+
baseURL=base_url,
858+
model=model,
859+
truncation=truncation,
860+
output_encoding=output_encoding,
861+
vectorizeClassName=vectorize_collection_name,
862+
imageFields=_map_multi2vec_fields(image_fields),
863+
textFields=_map_multi2vec_fields(text_fields),
864+
)
865+
799866
@staticmethod
800867
def text2vec_databricks(
801868
*,

0 commit comments

Comments
 (0)