diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index ae6db911a..32e153edd 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -45,6 +45,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vector_index import ( @@ -1886,6 +1887,20 @@ def vector_index_type() -> str: VectorIndexConfigHNSW = _VectorIndexConfigHNSW +@dataclass +class _VectorIndexConfigHFresh(_VectorIndexConfig): + distance_metric: VectorDistances + max_posting_size: int + min_posting_size: int + replicas: int + rng_factor: int + search_probe: int + + @staticmethod + def vector_index_type() -> str: + return VectorIndexType.HFRESH.value + +VectorIndexConfigHFresh = _VectorIndexConfigHFresh @dataclass class _VectorIndexConfigFlat(_VectorIndexConfig): @@ -1960,7 +1975,7 @@ def to_dict(self) -> Dict[str, Any]: class _NamedVectorConfig(_ConfigBase): vectorizer: _NamedVectorizerConfig vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic + VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, VectorIndexConfigHFresh ] def to_dict(self) -> Dict: @@ -1985,7 +2000,7 @@ class _CollectionConfig(_ConfigBase): reranker_config: Optional[RerankerConfig] sharding_config: Optional[ShardingConfig] vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, None + VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, VectorIndexConfigHFresh, None ] vector_index_type: Optional[VectorIndexType] vectorizer_config: Optional[VectorizerConfig] @@ -2625,6 +2640,29 @@ def dynamic( quantizer=quantizer, ) + @staticmethod + def hfresh( + max_posting_size: Optional[int] = None, + min_posting_size: Optional[int] = None, + rng_factor: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_RQConfigUpdate] = None, + ) -> _VectorIndexConfigHFreshUpdate: + """Create an `_VectorIndexConfigHFreshUpdate` object to update the configuration of the HFresh vector index. + + Use this method when defining the `vectorizer_config` argument in `collection.update()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigHFreshUpdate( + maxPostingSize=max_posting_size, + minPostingSize=min_posting_size, + rngFactor=rng_factor, + searchProbe=search_probe, + quantizer=quantizer, + ) + class Reconfigure: """Use this factory class to generate the correct `xxxConfig` object for use when using the `collection.update()` method. diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 6b815ba24..238c9cf06 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -39,6 +39,7 @@ _VectorIndexConfigDynamic, _VectorIndexConfigFlat, _VectorIndexConfigHNSW, + _VectorIndexConfigHFresh, _VectorizerConfig, ) @@ -210,6 +211,18 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW: multi_vector=__get_multivector(config), ) +def __get_hfresh_config(config: Dict[str, Any]) -> _VectorIndexConfigHFresh: + quantizer = __get_quantizer_config(config) + return _VectorIndexConfigHFresh( + distance_metric=VectorDistances(config.get("distance")), + max_posting_size=config["maxPostingSize"], + min_posting_size=config["minPostingSize"], + replicas=config["replicas"], + rng_factor=config["rngFactor"], + search_probe=config["searchProbe"], + quantizer=quantizer, + multi_vector=None, + ) def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: quantizer = __get_quantizer_config(config) @@ -223,7 +236,7 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: def __get_vector_index_config( schema: Dict[str, Any], -) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]: +) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, _VectorIndexConfigHFresh, None]: if "vectorIndexConfig" not in schema: return None if schema["vectorIndexType"] == "hnsw": @@ -237,6 +250,8 @@ def __get_vector_index_config( hnsw=__get_hnsw_config(schema["vectorIndexConfig"]["hnsw"]), flat=__get_flat_config(schema["vectorIndexConfig"]["flat"]), ) + elif schema["vectorIndexType"] == "hfresh": + return __get_hfresh_config(schema["vectorIndexConfig"]) else: return None diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index bc1d27cd7..963b39b6a 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -15,6 +15,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -1338,6 +1339,7 @@ def update( *, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index ce70c9a0f..60bdfb39e 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -34,11 +34,14 @@ class VectorIndexType(str, Enum): Attributes: HNSW: Hierarchical Navigable Small World (HNSW) index. FLAT: Flat index. + DYNAMIC: Dynamic index. + HFRESH: HFRESH index. """ HNSW = "hnsw" FLAT = "flat" DYNAMIC = "dynamic" + HFRESH = "hfresh" class _MultiVectorConfigCreateBase(_ConfigCreateModel): @@ -127,6 +130,18 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshCreate(_VectorIndexConfigCreate): + maxPostingSize: Optional[int] + minPostingSize: Optional[int] + replicas: Optional[int] + rngFactor: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatCreate(_VectorIndexConfigCreate): vectorCacheMaxObjects: Optional[int] @@ -149,6 +164,17 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshUpdate(_VectorIndexConfigUpdate): + maxPostingSize: Optional[int] + minPostingSize: Optional[int] + rngFactor: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatUpdate(_VectorIndexConfigUpdate): vectorCacheMaxObjects: Optional[int] @@ -564,6 +590,36 @@ def hnsw( multivector=multi_vector, ) + @staticmethod + def hfresh( + distance_metric: Optional[VectorDistances] = None, + max_posting_size: Optional[int] = None, + min_posting_size: Optional[int] = None, + replicas: Optional[int] = None, + rng_factor: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, + + ) -> _VectorIndexConfigHFreshCreate: + """Create a `_VectorIndexConfigHFreshCreate` object to be used when defining the HFresh vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hfresh) for a more detailed view! + """ + return _VectorIndexConfigHFreshCreate( + distance=distance_metric, + maxPostingSize=max_posting_size, + minPostingSize=min_posting_size, + replicas=replicas, + rngFactor=rng_factor, + searchProbe=search_probe, + quantizer=quantizer, + multivector=multi_vector, + ) + @staticmethod def flat( distance_metric: Optional[VectorDistances] = None, diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index 64d11b100..d004c0372 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -20,6 +20,8 @@ _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWCreate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshCreate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -126,6 +128,20 @@ def __hnsw( multivector=multivector, ) + @staticmethod + def __hfresh(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigHFreshCreate: + return _VectorIndexConfigHFreshCreate( + distance_metric=None, + maxPostingSize=None, + minPostingSize=None, + replicas=None, + rngFactor=None, + searchProbe=None, + quantizer=quantizer, + multivector=None, + distance=None, + ) + @staticmethod def __flat(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigFlatCreate: return _VectorIndexConfigFlatCreate( @@ -1760,6 +1776,7 @@ def update( name: Optional[str] = None, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi index 9fcfefdb3..1997eef2a 100644 --- a/weaviate/collections/config/async_.pyi +++ b/weaviate/collections/config/async_.pyi @@ -21,6 +21,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate from weaviate.connect.v4 import ConnectionAsync @@ -45,13 +46,14 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]): multi_tenancy_config: Optional[_MultiTenancyConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHFreshUpdate] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index e5772b76a..80534531a 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -38,6 +38,7 @@ _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_methods import ( _collection_config_from_json, @@ -134,6 +135,7 @@ def update( Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, ] ] = None, vectorizer_config: Optional[ @@ -141,6 +143,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, @@ -184,6 +187,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, ), ): _Warnings.vectorizer_config_in_config_update() diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi index 89f37615e..3c9dd9dd9 100644 --- a/weaviate/collections/config/sync.pyi +++ b/weaviate/collections/config/sync.pyi @@ -21,6 +21,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate from weaviate.connect.v4 import ConnectionSync @@ -43,13 +44,14 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]): multi_tenancy_config: Optional[_MultiTenancyConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHFreshUpdate] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/outputs/config.py b/weaviate/outputs/config.py index d6c8ed230..d6c3b4965 100644 --- a/weaviate/outputs/config.py +++ b/weaviate/outputs/config.py @@ -23,6 +23,7 @@ VectorDistances, VectorIndexConfigFlat, VectorIndexConfigHNSW, + VectorIndexConfigHFresh, VectorIndexType, VectorizerConfig, Vectorizers, @@ -52,6 +53,7 @@ "ShardTypes", "VectorDistances", "VectorIndexConfigHNSW", + "VectorIndexConfigHFresh", "VectorIndexConfigFlat", "VectorIndexType", "Vectorizers",