Skip to content

Commit 33bd1aa

Browse files
committed
validator/tests: add vector_similarity() function test
Add tests to validate the results of vector_similarity() function. Refs: scylladb/scylladb#25993
1 parent a473e5e commit 33bd1aa

File tree

2 files changed

+211
-0
lines changed

2 files changed

+211
-0
lines changed

crates/validator/src/tests/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod crud;
88
mod full_scan;
99
mod reconnect;
1010
mod serde;
11+
mod vector_similarity;
1112

1213
use crate::ServicesSubnet;
1314
use crate::dns::Dns;
@@ -221,6 +222,7 @@ pub(crate) async fn register() -> Vec<(String, TestCase)> {
221222
("full_scan", full_scan::new().await),
222223
("reconnect", reconnect::new().await),
223224
("serde", serde::new().await),
225+
("vector_similarity", vector_similarity::new().await),
224226
]
225227
.into_iter()
226228
.map(|(name, test_case)| (name.to_string(), test_case))
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/*
2+
* Copyright 2025-present ScyllaDB
3+
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
4+
*/
5+
6+
use crate::common::*;
7+
use crate::tests::*;
8+
use scylla::client::session::Session;
9+
use std::time::Duration;
10+
use tracing::info;
11+
12+
pub(crate) async fn new() -> TestCase {
13+
let timeout = Duration::from_secs(30);
14+
TestCase::empty()
15+
.with_init(timeout, init)
16+
.with_cleanup(timeout, cleanup)
17+
.with_test(
18+
"vector_similarity_function_returns_expected_results",
19+
timeout,
20+
vector_similarity_function_returns_expected_results,
21+
)
22+
.with_test(
23+
"vector_similarity_function_with_clustering_key",
24+
timeout,
25+
vector_similarity_function_with_clustering_key,
26+
)
27+
.with_test(
28+
"vector_similarity_function_with_multi_column_partition_key",
29+
timeout,
30+
vector_similarity_function_with_multi_column_partition_key,
31+
)
32+
}
33+
34+
pub(crate) static EMBEDDINGS: [[f32; 3]; 3] = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
35+
36+
async fn assert_similarity_function_results(session: &Session, table: &str, key_column: &str) {
37+
let results = get_query_results(
38+
format!(
39+
"SELECT {key_column}, vector_similarity() FROM {table} ORDER BY v ANN OF [0.0, 0.0, 0.0] LIMIT 5"
40+
),
41+
session,
42+
)
43+
.await;
44+
let rows = results.rows::<(i32, f32)>().expect("failed to get rows");
45+
assert_eq!(rows.rows_remaining(), 3);
46+
47+
// Expected results are calculated using Euclidean distance formula
48+
let expected_distances = [(0, 14.0), (1, 77.0), (2, 194.0)];
49+
for (i, row) in rows.enumerate() {
50+
let row = row.expect("failed to get row");
51+
let (key, distance) = row;
52+
assert_eq!(
53+
(key, distance),
54+
expected_distances[i],
55+
"Row {i} does not match expected result"
56+
);
57+
}
58+
}
59+
60+
async fn vector_similarity_function_returns_expected_results(actors: TestActors) {
61+
info!("started");
62+
63+
let (session, client) = prepare_connection(&actors).await;
64+
65+
let keyspace = create_keyspace(&session).await;
66+
let table = create_table(&session, "pk INT PRIMARY KEY, v VECTOR<FLOAT, 3>", None).await;
67+
68+
// Insert test data
69+
for (i, embedding) in EMBEDDINGS.into_iter().enumerate() {
70+
session
71+
.query_unpaged(
72+
format!("INSERT INTO {table} (pk, v) VALUES (?, ?)"),
73+
(i as i32, embedding.as_slice()),
74+
)
75+
.await
76+
.expect("failed to insert data");
77+
}
78+
79+
let index = create_index(
80+
&session,
81+
&client,
82+
&table,
83+
"v",
84+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
85+
)
86+
.await;
87+
88+
wait_for(
89+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
90+
"Waiting for 3 vectors to be indexed",
91+
Duration::from_secs(5),
92+
)
93+
.await;
94+
95+
// Check if the query returns the expected distances
96+
assert_similarity_function_results(&session, &table, "pk").await;
97+
98+
// Drop keyspace
99+
session
100+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
101+
.await
102+
.expect("failed to drop a keyspace");
103+
104+
info!("finished");
105+
}
106+
107+
async fn vector_similarity_function_with_clustering_key(actors: TestActors) {
108+
info!("started");
109+
110+
let (session, client) = prepare_connection(&actors).await;
111+
112+
let keyspace = create_keyspace(&session).await;
113+
let table = create_table(
114+
&session,
115+
"pk INT, ck INT, v VECTOR<FLOAT, 3>, PRIMARY KEY (pk, ck)",
116+
None,
117+
)
118+
.await;
119+
120+
// Insert test data
121+
for (i, embedding) in EMBEDDINGS.into_iter().enumerate() {
122+
session
123+
.query_unpaged(
124+
format!("INSERT INTO {table} (pk, ck, v) VALUES (?, ?, ?)"),
125+
(123, i as i32, &embedding.as_slice()),
126+
)
127+
.await
128+
.expect("failed to insert data");
129+
}
130+
131+
let index = create_index(
132+
&session,
133+
&client,
134+
&table,
135+
"v",
136+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
137+
)
138+
.await;
139+
140+
wait_for(
141+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
142+
"Waiting for 3 vectors to be indexed",
143+
Duration::from_secs(5),
144+
)
145+
.await;
146+
147+
// Check if the query returns the expected distances
148+
assert_similarity_function_results(&session, &table, "ck").await;
149+
150+
// Drop keyspace
151+
session
152+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
153+
.await
154+
.expect("failed to drop a keyspace");
155+
156+
info!("finished");
157+
}
158+
159+
async fn vector_similarity_function_with_multi_column_partition_key(actors: TestActors) {
160+
info!("started");
161+
162+
let (session, client) = prepare_connection(&actors).await;
163+
164+
let keyspace = create_keyspace(&session).await;
165+
let table = create_table(
166+
&session,
167+
"pk1 INT, pk2 INT, v VECTOR<FLOAT, 3>, PRIMARY KEY ((pk1, pk2))",
168+
None,
169+
)
170+
.await;
171+
172+
// Insert test data
173+
for (i, embedding) in EMBEDDINGS.into_iter().enumerate() {
174+
session
175+
.query_unpaged(
176+
format!("INSERT INTO {table} (pk1, pk2, v) VALUES (?, ?, ?)"),
177+
(123, i as i32, &embedding.as_slice()),
178+
)
179+
.await
180+
.expect("failed to insert data");
181+
}
182+
183+
let index = create_index(
184+
&session,
185+
&client,
186+
&table,
187+
"v",
188+
Some("{'similarity_function' : 'EUCLIDEAN'}"),
189+
)
190+
.await;
191+
192+
wait_for(
193+
|| async { client.count(&index.keyspace, &index.index).await == Some(3) },
194+
"Waiting for 3 vectors to be indexed",
195+
Duration::from_secs(5),
196+
)
197+
.await;
198+
199+
// Check if the query returns the expected distances
200+
assert_similarity_function_results(&session, &table, "pk2").await;
201+
202+
// Drop keyspace
203+
session
204+
.query_unpaged(format!("DROP KEYSPACE {keyspace}"), ())
205+
.await
206+
.expect("failed to drop a keyspace");
207+
208+
info!("finished");
209+
}

0 commit comments

Comments
 (0)