diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 7585882c8..d7130c9d6 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -128,6 +128,61 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + output_encoding="base64", + vectorize_collection_name=True, + base_url="https://api.voyageai.com", + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "baseURL": "https://api.voyageai.com/", + } + }, + ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + truncation=True, + text_fields=[Multi2VecField(name="text", weight=0.2)], + image_fields=[Multi2VecField(name="image", weight=0.3)], + video_fields=[Multi2VecField(name="video", weight=0.5)], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "truncation": True, + "textFields": ["text"], + "imageFields": ["image"], + "videoFields": ["video"], + "weights": { + "textFields": [0.2], + "imageFields": [0.3], + "videoFields": [0.5], + }, + } + }, + ), + ( + Configure.Vectorizer.multi2vec_voyageai( + model="voyage-multimodal-3.5", + dimensions=512, + text_fields=["text"], + video_fields=["video"], + ), + { + "multi2vec-voyageai": { + "model": "voyage-multimodal-3.5", + "dimensions": 512, + "textFields": ["text"], + "videoFields": ["video"], + } + }, + ), ( Configure.Vectorizer.multi2vec_nvidia( model="nvidia/nvclip", diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index bc1d27cd7..f4552b573 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -700,9 +700,11 @@ def multi2vec_voyageai( base_url: Optional[AnyHttpUrl] = None, model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, + dimensions: Optional[int] = None, output_encoding: Optional[str] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, ) -> _NamedVectorConfigCreate: @@ -717,9 +719,11 @@ def multi2vec_voyageai( vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. model: The model to use. Defaults to `None`, which uses the server-defined default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. Raises: pydantic.ValidationError: If `model` is not a valid value from the `VoyageaiMultimodalModel` type. @@ -730,8 +734,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ), vector_index_config=vector_index_config, ) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index e757580fd..5917911de 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -62,7 +62,10 @@ "voyage-finance-2", "voyage-multilingual-2", ] -VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",] +VoyageMultimodalModel: TypeAlias = Literal[ + "voyage-multimodal-3", + "voyage-multimodal-3.5", +] AWSModel: TypeAlias = Literal[ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", @@ -550,6 +553,8 @@ class _Multi2VecVoyageaiConfig(_Multi2VecBase): baseURL: Optional[AnyHttpUrl] model: Optional[str] truncation: Optional[bool] + dimensions: Optional[int] + videoFields: Optional[List[Multi2VecField]] def _to_dict(self) -> Dict[str, Any]: ret_dict = super()._to_dict() @@ -881,37 +886,43 @@ def multi2vec_cohere( @staticmethod def multi2vec_voyageai( *, - model: Optional[Union[CohereMultimodalModel, str]] = None, + model: Optional[Union[VoyageMultimodalModel, str]] = None, truncation: Optional[bool] = None, - output_encoding: Optional[str], + dimensions: Optional[int] = None, + output_encoding: Optional[str] = None, vectorize_collection_name: bool = True, base_url: Optional[AnyHttpUrl] = None, image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, ) -> _VectorizerConfigCreate: - """Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model. + """Create a `_Multi2VecVoyageaiConfig` object for use when vectorizing using the `multi2vec-voyageai` model. - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal) for detailed usage. Args: model: The model to use. Defaults to `None`, which uses the server-defined default. - truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default (1024 for voyage-multimodal-3.5). output_encoding: Deprecated, has no effect. vectorize_collection_name: Deprecated, has no effect. base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. Raises: - pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. + pydantic.ValidationError: If `model` is not a valid value from the `VoyageMultimodalModel` type. """ return _Multi2VecVoyageaiConfig( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ) @staticmethod diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index 64d11b100..613e1244e 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -1075,6 +1075,8 @@ def multi2vec_voyageai( image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, model: Optional[Union[VoyageMultimodalModel, str]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + dimensions: Optional[int] = None, truncation: Optional[bool] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, ) -> _VectorConfigCreate: @@ -1089,8 +1091,9 @@ def multi2vec_voyageai( base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. image_fields: The image fields to use in vectorization. model: The model to use. Defaults to `None`, which uses the server-defined default. - output_encoding: The output encoding to use. Defaults to `None`, which uses the server-defined default. text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. + dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default. truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default @@ -1103,8 +1106,10 @@ def multi2vec_voyageai( baseURL=base_url, model=model, truncation=truncation, + dimensions=dimensions, imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), ), vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), )