Skip to content

Commit 146e62f

Browse files
authored
Merge pull request #1857 from weaviate/multi2multivec-weaviate
add multi2multivec-weaviate vectorizer
2 parents 82eefa8 + 3930f06 commit 146e62f

File tree

3 files changed

+73
-1
lines changed

3 files changed

+73
-1
lines changed

test/collection/test_config.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
Multi2VecField,
2020
VectorDistances,
2121
)
22-
2322
from weaviate.collections.classes.config_vectors import _VectorConfigCreate
2423

2524
DEFAULTS = {
@@ -2173,6 +2172,24 @@ def test_config_with_named_vectors(
21732172
}
21742173
},
21752174
),
2175+
(
2176+
[Configure.MultiVectors.multi2vec_weaviate(name="test", image_field="prop")],
2177+
{
2178+
"test": {
2179+
"vectorizer": {
2180+
"multi2multivec-weaviate": {
2181+
"imageFields": ["prop"],
2182+
}
2183+
},
2184+
"vectorIndexConfig": {
2185+
"multivector": {
2186+
"enabled": True,
2187+
},
2188+
},
2189+
"vectorIndexType": "hnsw",
2190+
}
2191+
},
2192+
),
21762193
(
21772194
[Configure.Vectors.text2vec_gpt4all(name="test", source_properties=["prop"])],
21782195
{

weaviate/collections/classes/config_vectorizers.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
WeaviateModel: TypeAlias = Literal[
7979
"Snowflake/snowflake-arctic-embed-l-v2.0", "Snowflake/snowflake-arctic-embed-m-v1.5"
8080
]
81+
WeaviateMultimodalModel: TypeAlias = Literal["ModernVBERT/colmodernvbert"]
8182

8283

8384
class Vectorizers(str, Enum):
@@ -134,6 +135,7 @@ class Vectorizers(str, Enum):
134135
MULTI2VEC_COHERE = "multi2vec-cohere"
135136
MULTI2VEC_JINAAI = "multi2vec-jinaai"
136137
MULTI2MULTI_JINAAI = "multi2multivec-jinaai"
138+
MULTI2MULTI_WEAVIATE = "multi2multivec-weaviate"
137139
MULTI2VEC_BIND = "multi2vec-bind"
138140
MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version
139141
MULTI2VEC_VOYAGEAI = "multi2vec-voyageai"
@@ -516,6 +518,20 @@ def _to_dict(self) -> Dict[str, Any]:
516518
return ret_dict
517519

518520

521+
class _Multi2MultiVecWeaviateConfig(_Multi2VecBase):
522+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
523+
default=Vectorizers.MULTI2MULTI_WEAVIATE, frozen=True, exclude=True
524+
)
525+
baseURL: Optional[AnyHttpUrl]
526+
model: Optional[str]
527+
528+
def _to_dict(self) -> Dict[str, Any]:
529+
ret_dict = super()._to_dict()
530+
if self.baseURL is not None:
531+
ret_dict["baseURL"] = self.baseURL.unicode_string()
532+
return ret_dict
533+
534+
519535
class _Multi2VecClipConfig(_Multi2VecBase):
520536
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
521537
default=Vectorizers.MULTI2VEC_CLIP, frozen=True, exclude=True

weaviate/collections/classes/config_vectors.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@
3737
VoyageModel,
3838
VoyageMultimodalModel,
3939
WeaviateModel,
40+
WeaviateMultimodalModel,
4041
_Img2VecNeuralConfig,
4142
_map_multi2vec_fields,
4243
_Multi2MultiVecJinaConfig,
44+
_Multi2MultiVecWeaviateConfig,
4345
_Multi2VecAWSConfig,
4446
_Multi2VecBindConfig,
4547
_Multi2VecClipConfig,
@@ -288,6 +290,43 @@ def multi2vec_jinaai(
288290
),
289291
)
290292

293+
@staticmethod
294+
def multi2vec_weaviate(
295+
*,
296+
image_field: str,
297+
name: Optional[str] = None,
298+
encoding: Optional[_MultiVectorEncodingConfigCreate] = None,
299+
quantizer: Optional[_QuantizerConfigCreate] = None,
300+
base_url: Optional[AnyHttpUrl] = None,
301+
model: Optional[Union[WeaviateMultimodalModel, str]] = None,
302+
multi_vector_config: Optional[_MultiVectorConfigCreate] = None,
303+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
304+
) -> _VectorConfigCreate:
305+
"""Create a vector using the `multi2multivec-weaviate` module.
306+
307+
Args:
308+
image_field: The image field to use in vectorization.
309+
name: The name of the vector.
310+
encoding: The type of multi-vector encoding to use in the vector index. Defaults to `None`, which uses the server-defined default.
311+
quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
312+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
313+
model: The model to use. Defaults to `None`, which uses the server-defined default.
314+
multi_vector_config: The configuration for the multi-vector index. Use `wvc.config.Configure.VectorIndex.MultiVector` to create a multi-vector configuration. None by default
315+
vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
316+
"""
317+
return _VectorConfigCreate(
318+
name=name,
319+
vectorizer=_Multi2MultiVecWeaviateConfig(
320+
baseURL=base_url,
321+
model=model,
322+
imageFields=_map_multi2vec_fields([image_field]),
323+
textFields=None,
324+
),
325+
vector_index_config=_IndexWrappers.multi(
326+
vector_index_config, quantizer, multi_vector_config, encoding
327+
),
328+
)
329+
291330

292331
class _Vectors:
293332
@staticmethod

0 commit comments

Comments
 (0)