Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions test/collection/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ def test_basic_config():
}
},
),
(
Configure.Vectorizer.multi2vec_voyageai(
model="voyage-multimodal-3",
truncation=False,
output_encoding="base64",
vectorize_collection_name=False,
base_url="https://api.voyageai.com",
),
{
"multi2vec-voyageai": {
"model": "voyage-multimodal-3",
"truncation": False,
"output_encoding": "base64",
"vectorizeClassName": False,
"baseURL": "https://api.voyageai.com/",
}
},
),
(
Configure.Vectorizer.text2vec_gpt4all(),
{
Expand Down Expand Up @@ -1293,6 +1311,20 @@ def test_vector_config_flat_pq() -> None:
}
},
),
(
[Configure.NamedVectors.multi2vec_voyageai(name="test", text_fields=["prop"])],
{
"test": {
"vectorizer": {
"multi2vec-voyageai": {
"vectorizeClassName": True,
"textFields": ["prop"],
}
},
"vectorIndexType": "hnsw",
}
},
),
(
[
Configure.NamedVectors.multi2vec_jinaai(
Expand Down
55 changes: 55 additions & 0 deletions weaviate/collections/classes/config_named_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
_Img2VecNeuralConfig,
_Multi2VecBindConfig,
_Multi2VecClipConfig,
_Multi2VecVoyageaiConfig,
_Multi2VecGoogleConfig,
_Ref2VecCentroidConfig,
_Text2VecAWSConfig,
Expand Down Expand Up @@ -48,6 +49,7 @@
OpenAIType,
Vectorizers,
VoyageModel,
VoyageMultimodalModel,
_map_multi2vec_fields,
_VectorizerCustomConfig,
_Text2VecDatabricksConfig,
Expand Down Expand Up @@ -755,6 +757,59 @@ def multi2vec_bind(
vector_index_config=vector_index_config,
)

@staticmethod
def multi2vec_voyageai(
name: str,
*,
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
model: Optional[Union[VoyageMultimodalModel, str]] = None,
truncation: Optional[bool] = None,
output_encoding: Optional[str] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `multi2vec_voyageai` model.

See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal)
for detailed usage.

Arguments:
`name`
The name of the named vector.
`vector_index_config`
The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
`truncation`
The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
`base_url`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`image_fields`
The image fields to use in vectorization.
`text_fields`
The text fields to use in vectorization.

Raises:
`pydantic.ValidationError` if `model` is not a valid value from the `VoyageaiMultimodalModel` type.
"""
return _NamedVectorConfigCreate(
name=name,
vectorizer=_Multi2VecVoyageaiConfig(
baseURL=base_url,
model=model,
truncation=truncation,
output_encoding=output_encoding,
vectorizeClassName=vectorize_collection_name,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
),
vector_index_config=vector_index_config,
)

@staticmethod
def ref2vec_centroid(
name: str,
Expand Down
65 changes: 65 additions & 0 deletions weaviate/collections/classes/config_vectorizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"voyage-finance-2",
"voyage-multilingual-2",
]
VoyageMultimodalModel: TypeAlias = Literal["voyage-multimodal-3",]
AWSModel: TypeAlias = Literal[
"amazon.titan-embed-text-v1",
"cohere.embed-english-v3",
Expand Down Expand Up @@ -106,6 +107,8 @@ class Vectorizers(str, Enum):
Weaviate module backed by a palm model for images and text.
`MULTI2VEC_BIND`
Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video.
`MULTI2VEC_VOYAGEAI`
Weaviate module backed by a Voyage AI multimodal embedding models.
`REF2VEC_CENTROID`
Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors.
"""
Expand All @@ -131,6 +134,7 @@ class Vectorizers(str, Enum):
MULTI2VEC_JINAAI = "multi2vec-jinaai"
MULTI2VEC_BIND = "multi2vec-bind"
MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version
MULTI2VEC_VOYAGEAI = "multi2vec-voyageai"
REF2VEC_CENTROID = "ref2vec-centroid"


Expand Down Expand Up @@ -462,6 +466,22 @@ class _Multi2VecBindConfig(_Multi2VecBase):
videoFields: Optional[List[Multi2VecField]]


class _Multi2VecVoyageaiConfig(_Multi2VecBase):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.MULTI2VEC_VOYAGEAI, frozen=True, exclude=True
)
baseURL: Optional[AnyHttpUrl]
model: Optional[str]
truncation: Optional[bool]
output_encoding: Optional[str]

def _to_dict(self) -> Dict[str, Any]:
ret_dict = super()._to_dict()
if self.baseURL is not None:
ret_dict["baseURL"] = self.baseURL.unicode_string()
return ret_dict


class _Ref2VecCentroidConfig(_VectorizerConfigCreate):
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
default=Vectorizers.REF2VEC_CENTROID, frozen=True, exclude=True
Expand Down Expand Up @@ -796,6 +816,51 @@ def multi2vec_cohere(
textFields=_map_multi2vec_fields(text_fields),
)

@staticmethod
def multi2vec_voyageai(
*,
model: Optional[Union[CohereMultimodalModel, str]] = None,
truncation: Optional[bool] = None,
output_encoding: Optional[str],
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _VectorizerConfigCreate:
"""Create a `_Multi2VecCohereConfig` object for use when vectorizing using the `multi2vec-cohere` model.

See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
for detailed usage.

Arguments:
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
`truncate`
The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
`output_encoding`
Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers.
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
`base_url`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`image_fields`
The image fields to use in vectorization.
`text_fields`
The text fields to use in vectorization.

Raises:
`pydantic.ValidationError` if `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _Multi2VecVoyageaiConfig(
baseURL=base_url,
model=model,
truncation=truncation,
output_encoding=output_encoding,
vectorizeClassName=vectorize_collection_name,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
)

@staticmethod
def text2vec_databricks(
*,
Expand Down
Loading