diff --git a/test/collection/test_config.py b/test/collection/test_config.py index d6432bc54..35ca349a0 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -98,6 +98,24 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3", + truncation=False, + output_encoding="base64", + vectorize_collection_name=False, + base_url="https://api.voyageai.com", + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3", + "truncation": False, + "output_encoding": "base64", + "vectorizeClassName": False, + "baseURL": "https://api.voyageai.com/", + } + }, + ), ( Configure.Vectorizer.text2vec_gpt4all(), { @@ -1293,6 +1311,20 @@ def test_vector_config_flat_pq() -> None: } }, ), + ( + [Configure.NamedVectors.multi2vec_voyageai(name="test", text_fields=["prop"])], + { + "test": { + "vectorizer": { + "multi2vec-voyageai": { + "vectorizeClassName": True, + "textFields": ["prop"], + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [ Configure.NamedVectors.multi2vec_jinaai( diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 83c1a9b57..471ca39f6 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -21,6 +21,7 @@ _Img2VecNeuralConfig, _Multi2VecBindConfig, _Multi2VecClipConfig, + _Multi2VecVoyageaiConfig, _Multi2VecGoogleConfig, _Ref2VecCentroidConfig, _Text2VecAWSConfig, @@ -48,6 +49,7 @@ OpenAIType, Vectorizers, VoyageModel, + VoyageMultimodalModel, _map_multi2vec_fields, _VectorizerCustomConfig, _Text2VecDatabricksConfig, @@ -755,6 +757,59 @@ def multi2vec_bind( vector_index_config=vector_index_config, ) + @staticmethod + def multi2vec_voyageai( + name: str, + *, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[Union[VoyageMultimodalModel, str]] = None, + truncation: Optional[bool] = None, + output_encoding: Optional[str] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `multi2vec_voyageai` model. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `truncation` + The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + `base_url` + The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + `image_fields` + The image fields to use in vectorization. + `text_fields` + The text fields to use in vectorization. + + Raises: + `pydantic.ValidationError` if `model` is not a valid value from the `VoyageaiMultimodalModel` type. + """ + return _NamedVectorConfigCreate( + name=name, + vectorizer=_Multi2VecVoyageaiConfig( + baseURL=base_url, + model=model, + truncation=truncation, + output_encoding=output_encoding, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ), + vector_index_config=vector_index_config, + ) + @staticmethod def ref2vec_centroid( name: str, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 730a6cb28..e62c6b4b1 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -55,6 +55,7 @@ "voyage-finance-2", "voyage-multilingual-2", ] +VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",] AWSModel: TypeAlias = Literal[ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", @@ -106,6 +107,8 @@ class Vectorizers(str, Enum): Weaviate module backed by a palm model for images and text. `MULTI2VEC_BIND` Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video. + `MULTI2VEC_VOYAGEAI` + Weaviate module backed by a Voyage AI multimodal embedding models. `REF2VEC_CENTROID` Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors. """ @@ -131,6 +134,7 @@ class Vectorizers(str, Enum): MULTI2VEC_JINAAI = "multi2vec-jinaai" MULTI2VEC_BIND = "multi2vec-bind" MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version + MULTI2VEC_VOYAGEAI = "multi2vec-voyageai" REF2VEC_CENTROID = "ref2vec-centroid" @@ -462,6 +466,22 @@ class _Multi2VecBindConfig(_Multi2VecBase): videoFields: Optional[List[Multi2VecField]] +class _Multi2VecVoyageaiConfig(_Multi2VecBase): + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.MULTI2VEC_VOYAGEAI, frozen=True, exclude=True + ) + baseURL: Optional[AnyHttpUrl] + model: Optional[str] + truncation: Optional[bool] + output_encoding: Optional[str] + + def _to_dict(self) -> Dict[str, Any]: + ret_dict = super()._to_dict() + if self.baseURL is not None: + ret_dict["baseURL"] = self.baseURL.unicode_string() + return ret_dict + + class _Ref2VecCentroidConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.REF2VEC_CENTROID, frozen=True, exclude=True @@ -796,6 +816,51 @@ def multi2vec_cohere( textFields=_map_multi2vec_fields(text_fields), ) + @staticmethod + def multi2vec_voyageai( + *, + model: Optional[Union[CohereMultimodalModel, str]] = None, + truncation: Optional[bool] = None, + output_encoding: Optional[str], + vectorize_collection_name: bool = True, + base_url: Optional[AnyHttpUrl] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + ) -> _VectorizerConfigCreate: + """Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) + for detailed usage. + + Arguments: + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `truncate` + The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + `output_encoding` + Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `base_url` + The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + `image_fields` + The image fields to use in vectorization. + `text_fields` + The text fields to use in vectorization. + + Raises: + `pydantic.ValidationError` if `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. + """ + return _Multi2VecVoyageaiConfig( + baseURL=base_url, + model=model, + truncation=truncation, + output_encoding=output_encoding, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ) + @staticmethod def text2vec_databricks( *,