Skip to content

Commit 017b06c

Browse files
committed
validator/tests: add vector_similarity() function test
Add tests to validate the results of vector_similarity() function. Refs: scylladb/scylladb#25993
1 parent 3291db0 commit 017b06c

File tree

2 files changed

+256
-0
lines changed

2 files changed

+256
-0
lines changed

crates/validator/src/tests/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod crud;
88
mod full_scan;
99
mod reconnect;
1010
mod serde;
11+
mod vector_similarity;
1112

1213
use crate::ServicesSubnet;
1314
use crate::dns::Dns;
@@ -221,6 +222,7 @@ pub(crate) async fn register() -> Vec<(String, TestCase)> {
221222
("full_scan", full_scan::new().await),
222223
("reconnect", reconnect::new().await),
223224
("serde", serde::new().await),
225+
("vector_similarity", vector_similarity::new().await),
224226
]
225227
.into_iter()
226228
.map(|(name, test_case)| (name.to_string(), test_case))
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
/*
2+
* Copyright 2025-present ScyllaDB
3+
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
4+
*/
5+
6+
use crate::common::*;
7+
use crate::tests::*;
8+
use std::time::Duration;
9+
use tracing::info;
10+
11+
pub(crate) async fn new() -> TestCase {
12+
let timeout = Duration::from_secs(30);
13+
TestCase::empty()
14+
.with_init(timeout, init)
15+
.with_cleanup(timeout, cleanup)
16+
.with_test(
17+
"vector_similarity_function_returns_expected_results",
18+
timeout,
19+
vector_similarity_function_returns_expected_results,
20+
)
21+
.with_test(
22+
"vector_similarity_function_with_clustering_key",
23+
timeout,
24+
vector_similarity_function_with_clustering_key,
25+
)
26+
.with_test(
27+
"vector_similarity_function_with_multi_column_partition_key",
28+
timeout,
29+
vector_similarity_function_with_multi_column_partition_key,
30+
)
31+
}
32+
33+
async fn vector_similarity_function_returns_expected_results(actors: TestActors) {
34+
info!("started");
35+
36+
let (session, client) = prepare_connection(&actors).await;
37+
38+
let keyspace = create_keyspace(&session).await;
39+
let table = create_table(&session, "pk INT PRIMARY KEY, v VECTOR<FLOAT, 3>", None).await;
40+
41+
// Insert test data
42+
let embeddings: Vec<Vec<f32>> = vec![
43+
vec![1.0, 2.0, 3.0],
44+
vec![4.0, 5.0, 6.0],
45+
vec![7.0, 8.0, 9.0],
46+
];
47+
for (i, embedding) in embeddings.into_iter().enumerate() {
48+
session
49+
.query_unpaged(
50+
format!("INSERT INTO {table} (pk, v) VALUES (?, ?)"),
51+
(i as i32, &embedding),
52+
)
53+
.await
54+
.expect("failed to insert data");
55+
}
56+
57+
let index = create_index(
58+
&session,
59+
&client,
60+
&table,
61+
"v",
62+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
63+
)
64+
.await;
65+
66+
wait_for(
67+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
68+
"Waiting for 3 vectors to be indexed",
69+
Duration::from_secs(5),
70+
)
71+
.await;
72+
73+
// Check if the query returns the expected distances
74+
let rows = get_query_results(
75+
format!(
76+
"SELECT pk, vector_similarity() FROM {table} ORDER BY v ANN OF [0.0, 0.0, 0.0] LIMIT 5"
77+
),
78+
&session,
79+
)
80+
.await;
81+
assert_eq!(rows.len(), 3);
82+
83+
// Expected results are calculated using Euclidean distance formula
84+
let expected_distances = vec![(0, 14.0), (1, 77.0), (2, 194.0)];
85+
for (i, row) in rows.iter().enumerate() {
86+
let pk: i32 = row.columns[0].as_ref().unwrap().as_int().unwrap();
87+
let similarity: f32 = row.columns[1].as_ref().unwrap().as_float().unwrap();
88+
assert_eq!(
89+
(pk, similarity),
90+
expected_distances[i],
91+
"Row {i} does not match expected result"
92+
);
93+
}
94+
95+
// Drop keyspace
96+
session
97+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
98+
.await
99+
.expect("failed to drop a keyspace");
100+
101+
info!("finished");
102+
}
103+
104+
async fn vector_similarity_function_with_clustering_key(actors: TestActors) {
105+
info!("started");
106+
107+
let (session, client) = prepare_connection(&actors).await;
108+
109+
let keyspace = create_keyspace(&session).await;
110+
let table = create_table(
111+
&session,
112+
"pk INT, ck INT, v VECTOR<FLOAT, 3>, PRIMARY KEY (pk, ck)",
113+
None,
114+
)
115+
.await;
116+
117+
// Insert test data
118+
let embeddings: Vec<Vec<f32>> = vec![
119+
vec![1.0, 2.0, 3.0],
120+
vec![4.0, 5.0, 6.0],
121+
vec![7.0, 8.0, 9.0],
122+
];
123+
for (i, embedding) in embeddings.into_iter().enumerate() {
124+
session
125+
.query_unpaged(
126+
format!("INSERT INTO {table} (pk, ck, v) VALUES (?, ?, ?)"),
127+
(123, i as i32, &embedding),
128+
)
129+
.await
130+
.expect("failed to insert data");
131+
}
132+
133+
let index = create_index(
134+
&session,
135+
&client,
136+
&table,
137+
"v",
138+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
139+
)
140+
.await;
141+
142+
wait_for(
143+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
144+
"Waiting for 3 vectors to be indexed",
145+
Duration::from_secs(5),
146+
)
147+
.await;
148+
149+
// Check if the query returns the expected distances
150+
let rows = get_query_results(
151+
format!(
152+
"SELECT ck, vector_similarity() FROM {table} ORDER BY v ANN OF [0.0, 0.0, 0.0] LIMIT 5"
153+
),
154+
&session,
155+
)
156+
.await;
157+
assert_eq!(rows.len(), 3);
158+
159+
// Expected results are calculated using Euclidean distance formula
160+
let expected_distances = vec![(0, 14.0), (1, 77.0), (2, 194.0)];
161+
for (i, row) in rows.iter().enumerate() {
162+
let ck: i32 = row.columns[0].as_ref().unwrap().as_int().unwrap();
163+
let similarity: f32 = row.columns[1].as_ref().unwrap().as_float().unwrap();
164+
assert_eq!(
165+
(ck, similarity),
166+
expected_distances[i],
167+
"Row {i} does not match expected result"
168+
);
169+
}
170+
171+
// Drop keyspace
172+
session
173+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
174+
.await
175+
.expect("failed to drop a keyspace");
176+
177+
info!("finished");
178+
}
179+
180+
async fn vector_similarity_function_with_multi_column_partition_key(actors: TestActors) {
181+
info!("started");
182+
183+
let (session, client) = prepare_connection(&actors).await;
184+
185+
let keyspace = create_keyspace(&session).await;
186+
let table = create_table(
187+
&session,
188+
"pk1 INT, pk2 INT, v VECTOR<FLOAT, 3>, PRIMARY KEY ((pk1, pk2))",
189+
None,
190+
)
191+
.await;
192+
193+
// Insert test data
194+
let embeddings: Vec<Vec<f32>> = vec![
195+
vec![1.0, 2.0, 3.0],
196+
vec![4.0, 5.0, 6.0],
197+
vec![7.0, 8.0, 9.0],
198+
];
199+
for (i, embedding) in embeddings.into_iter().enumerate() {
200+
session
201+
.query_unpaged(
202+
format!("INSERT INTO {table} (pk1, pk2, v) VALUES (?, ?, ?)"),
203+
(123, i as i32, &embedding),
204+
)
205+
.await
206+
.expect("failed to insert data");
207+
}
208+
209+
let index = create_index(
210+
&session,
211+
&client,
212+
&table,
213+
"v",
214+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
215+
)
216+
.await;
217+
218+
wait_for(
219+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
220+
"Waiting for 3 vectors to be indexed",
221+
Duration::from_secs(5),
222+
)
223+
.await;
224+
225+
// Check if the query returns the expected distances
226+
let rows = get_query_results(
227+
format!(
228+
"SELECT pk2, vector_similarity() FROM {table} ORDER BY v ANN OF [0.0, 0.0, 0.0] LIMIT 5"
229+
),
230+
&session,
231+
)
232+
.await;
233+
assert_eq!(rows.len(), 3);
234+
235+
// Expected results are calculated using Euclidean distance formula
236+
let expected_distances = vec![(0, 14.0), (1, 77.0), (2, 194.0)];
237+
for (i, row) in rows.iter().enumerate() {
238+
let pk: i32 = row.columns[0].as_ref().unwrap().as_int().unwrap();
239+
let similarity: f32 = row.columns[1].as_ref().unwrap().as_float().unwrap();
240+
assert_eq!(
241+
(pk, similarity),
242+
expected_distances[i],
243+
"Row {i} does not match expected result"
244+
);
245+
}
246+
247+
// Drop keyspace
248+
session
249+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
250+
.await
251+
.expect("failed to drop a keyspace");
252+
253+
info!("finished");
254+
}

0 commit comments

Comments
 (0)