Merge pull request #1915 from voyage-ai/feat/embedding-model-voyage-multimodal-3.5

databyjp · web-flow · commit 82eefa81c38b · 2025-12-31T11:00:59.000Z
feat: voyage-multimodal-3.5 (video!)
diff --git a/test/collection/test_config.py b/test/collection/test_config.py
@@ -128,6 +128,61 @@ def test_basic_config():
             }
         },
     ),
+    (
+        Configure.Vectorizer.multi2vec_voyageai(
+            model="voyage-multimodal-3.5",
+            truncation=True,
+            output_encoding="base64",
+            vectorize_collection_name=True,
+            base_url="https://api.voyageai.com",
+        ),
+        {
+            "multi2vec-voyageai": {
+                "model": "voyage-multimodal-3.5",
+                "truncation": True,
+                "baseURL": "https://api.voyageai.com/",
+            }
+        },
+    ),
+    (
+        Configure.Vectorizer.multi2vec_voyageai(
+            model="voyage-multimodal-3.5",
+            truncation=True,
+            text_fields=[Multi2VecField(name="text", weight=0.2)],
+            image_fields=[Multi2VecField(name="image", weight=0.3)],
+            video_fields=[Multi2VecField(name="video", weight=0.5)],
+        ),
+        {
+            "multi2vec-voyageai": {
+                "model": "voyage-multimodal-3.5",
+                "truncation": True,
+                "textFields": ["text"],
+                "imageFields": ["image"],
+                "videoFields": ["video"],
+                "weights": {
+                    "textFields": [0.2],
+                    "imageFields": [0.3],
+                    "videoFields": [0.5],
+                },
+            }
+        },
+    ),
+    (
+        Configure.Vectorizer.multi2vec_voyageai(
+            model="voyage-multimodal-3.5",
+            dimensions=512,
+            text_fields=["text"],
+            video_fields=["video"],
+        ),
+        {
+            "multi2vec-voyageai": {
+                "model": "voyage-multimodal-3.5",
+                "dimensions": 512,
+                "textFields": ["text"],
+                "videoFields": ["video"],
+            }
+        },
+    ),
     (
         Configure.Vectorizer.multi2vec_nvidia(
             model="nvidia/nvclip",
diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py
@@ -700,9 +700,11 @@ def multi2vec_voyageai(
         base_url: Optional[AnyHttpUrl] = None,
         model: Optional[Union[VoyageMultimodalModel, str]] = None,
         truncation: Optional[bool] = None,
+        dimensions: Optional[int] = None,
         output_encoding: Optional[str] = None,
         image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
         text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
         vector_index_config: Optional[_VectorIndexConfigCreate] = None,
         vectorize_collection_name: bool = True,
     ) -> _NamedVectorConfigCreate:
@@ -717,9 +719,11 @@ def multi2vec_voyageai(
             vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
             model: The model to use. Defaults to `None`, which uses the server-defined default.
             truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
+            dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default.
             base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
             image_fields: The image fields to use in vectorization.
             text_fields: The text fields to use in vectorization.
+            video_fields: The video fields to use in vectorization.
 
         Raises:
             pydantic.ValidationError: If `model` is not a valid value from the `VoyageaiMultimodalModel` type.
@@ -730,8 +734,10 @@ def multi2vec_voyageai(
                 baseURL=base_url,
                 model=model,
                 truncation=truncation,
+                dimensions=dimensions,
                 imageFields=_map_multi2vec_fields(image_fields),
                 textFields=_map_multi2vec_fields(text_fields),
+                videoFields=_map_multi2vec_fields(video_fields),
             ),
             vector_index_config=vector_index_config,
         )
diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py
@@ -62,7 +62,10 @@
     "voyage-finance-2",
     "voyage-multilingual-2",
 ]
-VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",]
+VoyageMultimodalModel: TypeAlias = Literal[
+    "voyage-multimodal-3",
+    "voyage-multimodal-3.5",
+]
 AWSModel: TypeAlias = Literal[
     "amazon.titan-embed-text-v1",
     "cohere.embed-english-v3",
@@ -550,6 +553,8 @@ class _Multi2VecVoyageaiConfig(_Multi2VecBase):
     baseURL: Optional[AnyHttpUrl]
     model: Optional[str]
     truncation: Optional[bool]
+    dimensions: Optional[int]
+    videoFields: Optional[List[Multi2VecField]]
 
     def _to_dict(self) -> Dict[str, Any]:
         ret_dict = super()._to_dict()
@@ -881,37 +886,43 @@ def multi2vec_cohere(
     @staticmethod
     def multi2vec_voyageai(
         *,
-        model: Optional[Union[CohereMultimodalModel, str]] = None,
+        model: Optional[Union[VoyageMultimodalModel, str]] = None,
         truncation: Optional[bool] = None,
-        output_encoding: Optional[str],
+        dimensions: Optional[int] = None,
+        output_encoding: Optional[str] = None,
         vectorize_collection_name: bool = True,
         base_url: Optional[AnyHttpUrl] = None,
         image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
         text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
     ) -> _VectorizerConfigCreate:
-        """Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model.
+        """Create a `_Multi2VecVoyageaiConfig` object for use when vectorizing using the `multi2vec-voyageai` model.
 
-        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal)
         for detailed usage.
 
         Args:
             model: The model to use. Defaults to `None`, which uses the server-defined default.
-            truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
+            truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
+            dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default (1024 for voyage-multimodal-3.5).
             output_encoding: Deprecated, has no effect.
             vectorize_collection_name: Deprecated, has no effect.
             base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
             image_fields: The image fields to use in vectorization.
             text_fields: The text fields to use in vectorization.
+            video_fields: The video fields to use in vectorization.
 
         Raises:
-            pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
+            pydantic.ValidationError: If `model` is not a valid value from the `VoyageMultimodalModel` type.
         """
         return _Multi2VecVoyageaiConfig(
             baseURL=base_url,
             model=model,
             truncation=truncation,
+            dimensions=dimensions,
             imageFields=_map_multi2vec_fields(image_fields),
             textFields=_map_multi2vec_fields(text_fields),
+            videoFields=_map_multi2vec_fields(video_fields),
         )
 
     @staticmethod
diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py
@@ -1075,6 +1075,8 @@ def multi2vec_voyageai(
         image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
         model: Optional[Union[VoyageMultimodalModel, str]] = None,
         text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        dimensions: Optional[int] = None,
         truncation: Optional[bool] = None,
         vector_index_config: Optional[_VectorIndexConfigCreate] = None,
     ) -> _VectorConfigCreate:
@@ -1089,8 +1091,9 @@ def multi2vec_voyageai(
             base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
             image_fields: The image fields to use in vectorization.
             model: The model to use. Defaults to `None`, which uses the server-defined default.
-            output_encoding: The output encoding to use. Defaults to `None`, which uses the server-defined default.
             text_fields: The text fields to use in vectorization.
+            video_fields: The video fields to use in vectorization.
+            dimensions: The number of dimensions for the output embeddings. Defaults to `None`, which uses the model's default.
             truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
             vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
 
@@ -1103,8 +1106,10 @@ def multi2vec_voyageai(
                 baseURL=base_url,
                 model=model,
                 truncation=truncation,
+                dimensions=dimensions,
                 imageFields=_map_multi2vec_fields(image_fields),
                 textFields=_map_multi2vec_fields(text_fields),
+                videoFields=_map_multi2vec_fields(video_fields),
             ),
             vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
         )