diff --git a/CHANGELOG.md b/CHANGELOG.md index cba546e60..debfb72be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Updated APIs - Updated opensearch-py APIs to reflect [opensearch-api-specification@578a78d](https://github.com/opensearch-project/opensearch-api-specification/commit/578a78dcec746e81da88f81ad442ab1836db7694) ### Changed +- Rename `DenseVector` field type to `KnnVector` ([925](https://github.com/opensearch-project/opensearch-py/pull/925)) ### Deprecated ### Removed ### Fixed diff --git a/opensearchpy/__init__.py b/opensearchpy/__init__.py index 94b6d0565..d74b7372c 100644 --- a/opensearchpy/__init__.py +++ b/opensearchpy/__init__.py @@ -92,7 +92,6 @@ CustomField, Date, DateRange, - DenseVector, Double, DoubleRange, Field, @@ -107,6 +106,7 @@ IpRange, Join, Keyword, + KnnVector, Long, LongRange, Murmur3, @@ -178,7 +178,7 @@ "Date", "DateHistogramFacet", "DateRange", - "DenseVector", + "KnnVector", "Document", "Double", "DoubleRange", diff --git a/opensearchpy/helpers/field.py b/opensearchpy/helpers/field.py index e41aa46d7..be43ecc59 100644 --- a/opensearchpy/helpers/field.py +++ b/opensearchpy/helpers/field.py @@ -354,12 +354,12 @@ def _deserialize(self, data: Any) -> Any: return float(data) -class DenseVector(Float): - name: Optional[str] = "dense_vector" +class KnnVector(Float): + name: Optional[str] = "knn_vector" - def __init__(self, dims: Any, **kwargs: Any) -> None: + def __init__(self, dimension: Any, **kwargs: Any) -> None: kwargs["multi"] = True - super().__init__(dims=dims, **kwargs) + super().__init__(dimension=dimension, **kwargs) class SparseVector(Field): diff --git a/test_opensearchpy/test_helpers/test_field.py b/test_opensearchpy/test_helpers/test_field.py index 65dbab5a4..f4f25f2cf 100644 --- a/test_opensearchpy/test_helpers/test_field.py +++ b/test_opensearchpy/test_helpers/test_field.py @@ -34,6 +34,9 @@ from opensearchpy import InnerDoc, Range, ValidationException from opensearchpy.helpers import field +from opensearchpy.helpers.index import Index +from opensearchpy.helpers.mapping import Mapping +from opensearchpy.helpers.test import OpenSearchTestCase def test_date_range_deserialization() -> None: @@ -221,3 +224,114 @@ class Inner(InnerDoc): with pytest.raises(ValidationException): field.Object(doc_class=Inner, dynamic=False) + + +def test_knn_vector() -> None: + f = field.KnnVector(dimension=128) + assert f.to_dict() == {"type": "knn_vector", "dimension": 128} + + # Test that dimension parameter is required + with pytest.raises(TypeError): + field.KnnVector() # type: ignore + + assert f._multi is True + + +def test_knn_vector_with_additional_params() -> None: + f = field.KnnVector( + dimension=256, method={"name": "hnsw", "space_type": "l2", "engine": "faiss"} + ) + expected = { + "type": "knn_vector", + "dimension": 256, + "method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"}, + } + assert f.to_dict() == expected + + +def test_knn_vector_serialization() -> None: + f = field.KnnVector(dimension=3) + + vector_data = [1.0, 2.0, 3.0] + serialized = f.serialize(vector_data) + assert serialized == vector_data + + assert f.serialize(None) is None + + +def test_knn_vector_deserialization() -> None: + f = field.KnnVector(dimension=3) + + vector_data = [1.0, 2.0, 3.0] + deserialized = f.deserialize(vector_data) + assert deserialized == vector_data + + assert f.deserialize(None) is None + + +def test_knn_vector_construct_from_dict() -> None: + f = field.construct_field({"type": "knn_vector", "dimension": 128}) + + assert isinstance(f, field.KnnVector) + assert f.to_dict() == {"type": "knn_vector", "dimension": 128} + + +def test_knn_vector_construct_from_dict_with_method() -> None: + f = field.construct_field( + { + "type": "knn_vector", + "dimension": 256, + "method": {"name": "hnsw", "space_type": "cosinesimil", "engine": "lucene"}, + } + ) + + assert isinstance(f, field.KnnVector) + expected = { + "type": "knn_vector", + "dimension": 256, + "method": {"name": "hnsw", "space_type": "cosinesimil", "engine": "lucene"}, + } + assert f.to_dict() == expected + + +class TestKnnVectorIntegration(OpenSearchTestCase): + def test_index_and_retrieve_knn_vector(self) -> None: + index_name = "itest-knn-vector" + # ensure clean state + self.client.indices.delete(index=index_name, ignore=404) + + # Create index using DSL abstractions + idx = Index(index_name, using=self.client) + idx.settings(**{"index.knn": True}) + + mapping = Mapping() + mapping.field("vec", field.KnnVector(dimension=3)) + idx.mapping(mapping) + + result = idx.create() + assert result["acknowledged"] is True + + field_mapping = idx.get_field_mapping(fields="vec") + assert field_mapping[index_name]["mappings"]["vec"]["mapping"]["vec"] == { + "type": "knn_vector", + "dimension": 3, + } + + # search tests + doc = {"vec": [1.0, 2.0, 3.0]} + result = self.client.index(index=index_name, id=1, body=doc, refresh=True) + assert result["_shards"]["successful"] == 1 + get_resp = self.client.get(index=index_name, id=1) + assert get_resp["_source"]["vec"] == doc["vec"] + + search_body = { + "size": 1, + "query": {"knn": {"vec": {"vector": [1.0, 2.0, 3.0], "k": 1}}}, + } + search_resp = self.client.search(index=index_name, body=search_body) + hits = search_resp["hits"]["hits"] + assert len(hits) == 1 + assert hits[0]["_id"] == "1" + + # cleanup + self.client.indices.delete(index=index_name)