Skip to content

Commit 4ecefa0

Browse files
authored
fix: more realistic text embeddings in Firestore vector search examples (#12791)
* fix: more realistic text embeddings
1 parent a4bbb88 commit 4ecefa0

File tree

2 files changed

+19
-17
lines changed

2 files changed

+19
-17
lines changed

firestore/cloud-client/vector_search.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def store_vectors():
2424
doc = {
2525
"name": "Kahawa coffee beans",
2626
"description": "Information about the Kahawa coffee beans.",
27-
"embedding_field": Vector([1.0, 2.0, 3.0]),
27+
"embedding_field": Vector([0.18332680, 0.24160706, 0.3416704]),
2828
}
2929

3030
collection.add(doc)
@@ -41,7 +41,7 @@ def vector_search_basic(db):
4141
# Requires a single-field vector index
4242
vector_query = collection.find_nearest(
4343
vector_field="embedding_field",
44-
query_vector=Vector([3.0, 1.0, 2.0]),
44+
query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
4545
distance_measure=DistanceMeasure.EUCLIDEAN,
4646
limit=5,
4747
)
@@ -60,7 +60,7 @@ def vector_search_prefilter(db):
6060
# Requires a composite vector index
6161
vector_query = collection.where("color", "==", "red").find_nearest(
6262
vector_field="embedding_field",
63-
query_vector=Vector([3.0, 1.0, 2.0]),
63+
query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
6464
distance_measure=DistanceMeasure.EUCLIDEAN,
6565
limit=5,
6666
)
@@ -77,7 +77,7 @@ def vector_search_distance_result_field(db):
7777

7878
vector_query = collection.find_nearest(
7979
vector_field="embedding_field",
80-
query_vector=Vector([3.0, 1.0, 2.0]),
80+
query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
8181
distance_measure=DistanceMeasure.EUCLIDEAN,
8282
limit=10,
8383
distance_result_field="vector_distance",
@@ -97,7 +97,7 @@ def vector_search_distance_result_field_with_mask(db):
9797
# [START firestore_vector_search_distance_result_field_masked]
9898
vector_query = collection.select(["color", "vector_distance"]).find_nearest(
9999
vector_field="embedding_field",
100-
query_vector=Vector([3.0, 1.0, 2.0]),
100+
query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
101101
distance_measure=DistanceMeasure.EUCLIDEAN,
102102
limit=10,
103103
distance_result_field="vector_distance",
@@ -115,7 +115,7 @@ def vector_search_distance_threshold(db):
115115

116116
vector_query = collection.find_nearest(
117117
vector_field="embedding_field",
118-
query_vector=Vector([3.0, 1.0, 2.0]),
118+
query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
119119
distance_measure=DistanceMeasure.EUCLIDEAN,
120120
limit=10,
121121
distance_threshold=4.5,

firestore/cloud-client/vector_search_test.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,18 @@ def test_store_vectors():
4141
def add_coffee_beans_data(db):
4242
coll = db.collection("coffee-beans")
4343
coll.document("bean1").set(
44-
{"name": "Arabica", "embedding_field": Vector([10.0, 1.0, 2.0]), "color": "red"}
44+
{"name": "Arabica", "embedding_field": Vector([0.80522226, 0.18332680, 0.24160706]), "color": "red"}
4545
)
4646
coll.document("bean2").set(
47-
{"name": "Robusta", "embedding_field": Vector([4.0, 1.0, 2.0]), "color": "blue"}
47+
{"name": "Robusta", "embedding_field": Vector([0.43979567, 0.18332680, 0.24160706]), "color": "blue"}
4848
)
4949
coll.document("bean3").set(
50-
{"name": "Excelsa", "embedding_field": Vector([11.0, 1.0, 2.0]), "color": "red"}
50+
{"name": "Excelsa", "embedding_field": Vector([0.90477061, 0.18332680, 0.24160706]), "color": "red"}
5151
)
5252
coll.document("bean4").set(
5353
{
5454
"name": "Liberica",
55-
"embedding_field": Vector([3.0, 1.0, 2.0]),
55+
"embedding_field": Vector([0.3416704, 0.18332680, 0.24160706]),
5656
"color": "green",
5757
}
5858
)
@@ -101,11 +101,11 @@ def test_vector_search_distance_result_field():
101101
assert results[0].to_dict()["name"] == "Liberica"
102102
assert results[0].to_dict()["vector_distance"] == 0.0
103103
assert results[1].to_dict()["name"] == "Robusta"
104-
assert results[1].to_dict()["vector_distance"] == 1.0
104+
assert results[1].to_dict()["vector_distance"] == 0.09812527000000004
105105
assert results[2].to_dict()["name"] == "Arabica"
106-
assert results[2].to_dict()["vector_distance"] == 7.0
106+
assert results[2].to_dict()["vector_distance"] == 0.46355186
107107
assert results[3].to_dict()["name"] == "Excelsa"
108-
assert results[3].to_dict()["vector_distance"] == 8.0
108+
assert results[3].to_dict()["vector_distance"] == 0.56310021
109109

110110

111111
def test_vector_search_distance_result_field_with_mask():
@@ -119,9 +119,9 @@ def test_vector_search_distance_result_field_with_mask():
119119

120120
assert len(results) == 4
121121
assert results[0].to_dict() == {"color": "green", "vector_distance": 0.0}
122-
assert results[1].to_dict() == {"color": "blue", "vector_distance": 1.0}
123-
assert results[2].to_dict() == {"color": "red", "vector_distance": 7.0}
124-
assert results[3].to_dict() == {"color": "red", "vector_distance": 8.0}
122+
assert results[1].to_dict() == {"color": "blue", "vector_distance": 0.09812527000000004}
123+
assert results[2].to_dict() == {"color": "red", "vector_distance": 0.46355186}
124+
assert results[3].to_dict() == {"color": "red", "vector_distance": 0.56310021}
125125

126126

127127
def test_vector_search_distance_threshold():
@@ -133,6 +133,8 @@ def test_vector_search_distance_threshold():
133133
vector_query = vector_search_distance_threshold(db)
134134
results = list(vector_query.stream())
135135

136-
assert len(results) == 2
136+
assert len(results) == 4
137137
assert results[0].to_dict()["name"] == "Liberica"
138138
assert results[1].to_dict()["name"] == "Robusta"
139+
assert results[2].to_dict()["name"] == "Arabica"
140+
assert results[3].to_dict()["name"] == "Excelsa"

0 commit comments

Comments
 (0)