Skip to content

Commit 5bcb030

Browse files
committed
added sample for getting around 2mb limit
1 parent 1833f1e commit 5bcb030

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

datastore/cloud-client/vector_search.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,35 @@ def vector_search_distance_threshold(db):
116116
for entity in vector_query.fetch():
117117
print(f"{entity.id}")
118118
# [END datastore_vector_search_distance_threshold]
119-
return vector_query
119+
return vector_query
120+
121+
122+
def vector_search_large_query(db):
123+
# [START datastore_vector_search_large_query]
124+
from google.cloud.datastore.vector import DistanceMeasure
125+
from google.cloud.datastore.vector import Vector
126+
from google.cloud.datastore.vector import FindNearest
127+
128+
# first, perform a vector search query retrieving just the keys
129+
vector_query = db.query(
130+
kind="coffee-beans",
131+
find_nearest=FindNearest(
132+
vector_property="embedding_field",
133+
query_vector=Vector([3.0, 1.0, 2.0]),
134+
distance_measure=DistanceMeasure.EUCLIDEAN,
135+
limit=100,
136+
distance_result_property="vector_distance",
137+
)
138+
)
139+
vector_query.keys_only()
140+
vector_results = list(vector_query.fetch())
141+
key_list = [entity.key for entity in vector_results]
142+
# next, perfrom a second query for the remaining data
143+
full_results = db.get_multi(key_list)
144+
# combine and print results
145+
vector_map = {entity.key: entity for entity in vector_results}
146+
full_map = {entity.key: entity for entity in full_results}
147+
for key in key_list:
148+
print(f"distance: {vector_map[key]['vector_distance']} entity: {full_map[key]}")
149+
# [END datastore_vector_search_large_query]
150+
return key_list, vector_results, full_results

datastore/cloud-client/vector_search_test.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,29 @@ def test_vector_search_distance_threshold(db):
106106

107107
assert len(results) == 2
108108
assert results[0].key.name == "Liberica"
109-
assert results[1].key.name == "Robusta"
109+
assert results[1].key.name == "Robusta"
110+
111+
def test_vector_search_large_query(db):
112+
key_list, vector_results, full_results = vector_search_large_query(db)
113+
assert len(key_list) == 4
114+
# each list should have same number of elements
115+
assert len(key_list) == len(vector_results)
116+
assert len(key_list) == len(full_results)
117+
# should all have the same keys
118+
vector_map = {entity.key: entity for entity in vector_results}
119+
full_map = {entity.key: entity for entity in full_results}
120+
for key in key_list:
121+
assert key in vector_map.keys()
122+
assert key in full_map.keys()
123+
# vector_results should just contain key and distance
124+
for entity in vector_results:
125+
assert entity.key is not None
126+
assert entity["vector_distance"] is not None
127+
with pytest.raises(KeyError):
128+
entity["embedding_field"]
129+
# full_results should have other fields, but no vector_distance
130+
for entity in full_results:
131+
assert entity.key is not None
132+
assert isinstance(entity["embedding_field"], Vector)
133+
with pytest.raises(KeyError):
134+
entity["vector_distance"]

0 commit comments

Comments
 (0)