Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Updated APIs
- Updated opensearch-py APIs to reflect [opensearch-api-specification@578a78d](https://github.com/opensearch-project/opensearch-api-specification/commit/578a78dcec746e81da88f81ad442ab1836db7694)
### Changed
- Rename `DenseVector` field type to `KnnVector` ([925](https://github.com/opensearch-project/opensearch-py/pull/925))
### Deprecated
### Removed
### Fixed
Expand Down
4 changes: 2 additions & 2 deletions opensearchpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@
CustomField,
Date,
DateRange,
DenseVector,
Double,
DoubleRange,
Field,
Expand All @@ -107,6 +106,7 @@
IpRange,
Join,
Keyword,
KnnVector,
Long,
LongRange,
Murmur3,
Expand Down Expand Up @@ -178,7 +178,7 @@
"Date",
"DateHistogramFacet",
"DateRange",
"DenseVector",
"KnnVector",
"Document",
"Double",
"DoubleRange",
Expand Down
8 changes: 4 additions & 4 deletions opensearchpy/helpers/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,12 @@ def _deserialize(self, data: Any) -> Any:
return float(data)


class DenseVector(Float):
name: Optional[str] = "dense_vector"
class KnnVector(Float):
name: Optional[str] = "knn_vector"

def __init__(self, dims: Any, **kwargs: Any) -> None:
def __init__(self, dimension: Any, **kwargs: Any) -> None:
kwargs["multi"] = True
super().__init__(dims=dims, **kwargs)
super().__init__(dimension=dimension, **kwargs)


class SparseVector(Field):
Expand Down
114 changes: 114 additions & 0 deletions test_opensearchpy/test_helpers/test_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

from opensearchpy import InnerDoc, Range, ValidationException
from opensearchpy.helpers import field
from opensearchpy.helpers.index import Index
from opensearchpy.helpers.mapping import Mapping
from opensearchpy.helpers.test import OpenSearchTestCase


def test_date_range_deserialization() -> None:
Expand Down Expand Up @@ -221,3 +224,114 @@ class Inner(InnerDoc):

with pytest.raises(ValidationException):
field.Object(doc_class=Inner, dynamic=False)


def test_knn_vector() -> None:
f = field.KnnVector(dimension=128)
assert f.to_dict() == {"type": "knn_vector", "dimension": 128}

# Test that dimension parameter is required
with pytest.raises(TypeError):
field.KnnVector() # type: ignore

assert f._multi is True


def test_knn_vector_with_additional_params() -> None:
f = field.KnnVector(
dimension=256, method={"name": "hnsw", "space_type": "l2", "engine": "faiss"}
)
expected = {
"type": "knn_vector",
"dimension": 256,
"method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},
}
assert f.to_dict() == expected


def test_knn_vector_serialization() -> None:
f = field.KnnVector(dimension=3)

vector_data = [1.0, 2.0, 3.0]
serialized = f.serialize(vector_data)
assert serialized == vector_data

assert f.serialize(None) is None


def test_knn_vector_deserialization() -> None:
f = field.KnnVector(dimension=3)

vector_data = [1.0, 2.0, 3.0]
deserialized = f.deserialize(vector_data)
assert deserialized == vector_data

assert f.deserialize(None) is None


def test_knn_vector_construct_from_dict() -> None:
f = field.construct_field({"type": "knn_vector", "dimension": 128})

assert isinstance(f, field.KnnVector)
assert f.to_dict() == {"type": "knn_vector", "dimension": 128}


def test_knn_vector_construct_from_dict_with_method() -> None:
f = field.construct_field(
{
"type": "knn_vector",
"dimension": 256,
"method": {"name": "hnsw", "space_type": "cosinesimil", "engine": "lucene"},
}
)

assert isinstance(f, field.KnnVector)
expected = {
"type": "knn_vector",
"dimension": 256,
"method": {"name": "hnsw", "space_type": "cosinesimil", "engine": "lucene"},
}
assert f.to_dict() == expected


class TestKnnVectorIntegration(OpenSearchTestCase):
def test_index_and_retrieve_knn_vector(self) -> None:
index_name = "itest-knn-vector"
# ensure clean state
self.client.indices.delete(index=index_name, ignore=404)

# Create index using DSL abstractions
idx = Index(index_name, using=self.client)
idx.settings(**{"index.knn": True})

mapping = Mapping()
mapping.field("vec", field.KnnVector(dimension=3))
idx.mapping(mapping)

result = idx.create()
assert result["acknowledged"] is True

field_mapping = idx.get_field_mapping(fields="vec")
assert field_mapping[index_name]["mappings"]["vec"]["mapping"]["vec"] == {
"type": "knn_vector",
"dimension": 3,
}

# search tests
doc = {"vec": [1.0, 2.0, 3.0]}
result = self.client.index(index=index_name, id=1, body=doc, refresh=True)
assert result["_shards"]["successful"] == 1
get_resp = self.client.get(index=index_name, id=1)
assert get_resp["_source"]["vec"] == doc["vec"]

search_body = {
"size": 1,
"query": {"knn": {"vec": {"vector": [1.0, 2.0, 3.0], "k": 1}}},
}
search_resp = self.client.search(index=index_name, body=search_body)
hits = search_resp["hits"]["hits"]
assert len(hits) == 1
assert hits[0]["_id"] == "1"

# cleanup
self.client.indices.delete(index=index_name)
Loading