fix: support non-float vectors in struct array in search#3277
fix: support non-float vectors in struct array in search#3277SpadeA-Tang wants to merge 1 commit intomilvus-io:masterfrom
Conversation
Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
|
[APPROVALNOTIFIER] This PR is NOT APPROVED This pull-request has been approved by: SpadeA-Tang The full list of commands accepted by this bot can be found here. DetailsNeeds approval from an approver in each of these files:Approvers can indicate their approval by writing |
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #3277 +/- ##
==========================================
+ Coverage 76.36% 76.57% +0.21%
==========================================
Files 63 63
Lines 13321 13330 +9
==========================================
+ Hits 10173 10208 +35
+ Misses 3148 3122 -26 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
Verified against Milvus However, the single-vector Reproduction scriptimport numpy as np
from pymilvus import MilvusClient, DataType
URI = "http://localhost:19530"
DIM = 32
NB = 100
client = MilvusClient(uri=URI)
col = "repro_ef_float16"
if col in client.list_collections():
client.drop_collection(col)
schema = client.create_schema(auto_id=False)
schema.add_field("id", DataType.INT64, is_primary=True)
schema.add_field("top_vec", DataType.FLOAT_VECTOR, dim=DIM)
struct_schema = client.create_struct_field_schema()
struct_schema.add_field("embedding", DataType.FLOAT16_VECTOR, dim=DIM)
struct_schema.add_field("val", DataType.INT64)
schema.add_field("items", DataType.ARRAY, element_type=DataType.STRUCT,
struct_schema=struct_schema, max_capacity=5)
index_params = client.prepare_index_params()
index_params.add_index("top_vec", index_type="HNSW", metric_type="COSINE",
params={"M": 16, "efConstruction": 200})
index_params.add_index("items[embedding]", index_type="HNSW",
metric_type="MAX_SIM_L2",
params={"M": 16, "efConstruction": 200})
client.create_collection(collection_name=col, schema=schema, index_params=index_params)
data = []
for i in range(NB):
elems = [{"embedding": np.random.rand(DIM).astype(np.float16).tobytes(),
"val": i * 10 + j} for j in range(3)]
data.append({"id": i, "top_vec": np.random.rand(DIM).astype(np.float32).tolist(),
"items": elems})
client.insert(col, data)
client.flush(col)
query_vec = data[0]["items"][0]["embedding"] # raw bytes
client.search(
collection_name=col, data=[query_vec],
anns_field="items[embedding]",
search_params={"metric_type": "L2"},
filter="element_filter(items, $[val] >= 0)", limit=5,
) |
issue: #3269