Skip to content

Commit f97bfba

Browse files
committed
refactor(benchmarks): improve get_many and similarity_search reporting
* Split get_many into chunks to handle SQLite variable limits * Update similarity_search to report total time and count * Separate similarity_search results in print_results for clarity
1 parent a0dbecc commit f97bfba

File tree

2 files changed

+51
-26
lines changed

2 files changed

+51
-26
lines changed

benchmarks/operations.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,14 @@ def benchmark_add(
2828

2929
def benchmark_get_many(client: SQLiteVecClient, rowids: list[int]) -> dict:
3030
"""Benchmark get_many operations."""
31-
elapsed, _ = benchmark_operation(client.get_many, rowids)
31+
# Split into chunks to avoid SQLite variable limit (999)
32+
chunk_size = 500
33+
import time
34+
35+
start = time.perf_counter()
36+
for i in range(0, len(rowids), chunk_size):
37+
client.get_many(rowids[i : i + chunk_size])
38+
elapsed = time.perf_counter() - start
3239
return {
3340
"operation": "get_many",
3441
"count": len(rowids),
@@ -49,14 +56,16 @@ def benchmark_similarity_search(
4956
times.append(elapsed)
5057

5158
avg_time = statistics.mean(times)
59+
total_time = sum(times)
5260
return {
5361
"operation": "similarity_search",
5462
"top_k": top_k,
55-
"iterations": iterations,
63+
"count": iterations,
64+
"time": total_time,
65+
"ops_per_sec": iterations / total_time,
5666
"avg_time": avg_time,
5767
"min_time": min(times),
5868
"max_time": max(times),
59-
"searches_per_sec": 1 / avg_time,
6069
}
6170

6271

benchmarks/reporter.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,49 @@
99

1010
def print_results(results: list[dict], table_format: str):
1111
"""Print benchmark results in a formatted table."""
12-
table_data = []
13-
for result in results:
14-
op = result["operation"]
15-
if "top_k" in result:
16-
op = f"{op} (k={result['top_k']})"
12+
# Separate similarity_search results
13+
regular_results = [r for r in results if r["operation"] != "similarity_search"]
14+
search_results = [r for r in results if r["operation"] == "similarity_search"]
1715

18-
count = result.get("count", result.get("iterations", "-"))
19-
time_val = result.get("time", result.get("avg_time", 0))
20-
ops_per_sec = result.get("ops_per_sec", result.get("searches_per_sec", 0))
16+
# Get count from first result for header
17+
count = regular_results[0].get("count", 0) if regular_results else 0
2118

22-
table_data.append([op, count, f"{time_val:.4f}", f"{ops_per_sec:.2f}"])
19+
# Print CRUD operations table
20+
table_data = []
21+
for result in regular_results:
22+
op = result["operation"]
23+
time_val = result.get("time", 0)
24+
ops_per_sec = result.get("ops_per_sec", 0)
25+
table_data.append([op, f"{time_val:.4f}", f"{ops_per_sec:.2f}"])
2326

27+
print(f"\nCRUD Operations (n={count:,}):")
2428
print(
25-
"\n"
26-
+ tabulate(
29+
tabulate(
2730
table_data,
28-
headers=["Operation", "Count", "Time (s)", "Ops/sec"],
31+
headers=["Operation", "Time (s)", "Ops/sec"],
2932
tablefmt=table_format,
3033
)
3134
)
3235

36+
# Print similarity search table separately
37+
if search_results:
38+
iterations = search_results[0].get("count", 0)
39+
search_data = []
40+
for result in search_results:
41+
top_k = result.get("top_k", "-")
42+
time_val = result.get("time", 0)
43+
ops_per_sec = result.get("ops_per_sec", 0)
44+
search_data.append([top_k, f"{time_val:.4f}", f"{ops_per_sec:.2f}"])
45+
46+
print(f"\nSimilarity Search (iterations={iterations}):")
47+
print(
48+
tabulate(
49+
search_data,
50+
headers=["Top-K", "Time (s)", "Searches/sec"],
51+
tablefmt=table_format,
52+
)
53+
)
54+
3355

3456
def print_summary(
3557
all_results: dict[str, dict[int, list[dict]]],
@@ -45,20 +67,18 @@ def print_summary(
4567
operations = [
4668
"add",
4769
"get_many",
48-
"similarity_search",
4970
"update_many",
5071
"get_all",
5172
"delete_many",
73+
"similarity_search",
5274
]
5375
summary_data = []
5476
for op in operations:
5577
row = [op]
5678
for size in dataset_sizes:
5779
matching = [r for r in mode_results[size] if r["operation"] == op]
5880
if matching:
59-
ops_per_sec = matching[0].get(
60-
"ops_per_sec", matching[0].get("searches_per_sec", 0)
61-
)
81+
ops_per_sec = matching[0].get("ops_per_sec", 0)
6282
row.append(f"{ops_per_sec:,.0f}")
6383
else:
6484
row.append("N/A")
@@ -91,20 +111,16 @@ def export_to_csv(
91111
operations = [
92112
"add",
93113
"get_many",
94-
"similarity_search",
95114
"update_many",
96115
"get_all",
97116
"delete_many",
117+
"similarity_search",
98118
]
99119
for op in operations:
100120
matching = [r for r in mode_results[size] if r["operation"] == op]
101121
if matching:
102-
ops_per_sec = matching[0].get(
103-
"ops_per_sec", matching[0].get("searches_per_sec", 0)
104-
)
105-
time_val = matching[0].get(
106-
"time", matching[0].get("avg_time", 0)
107-
)
122+
ops_per_sec = matching[0].get("ops_per_sec", 0)
123+
time_val = matching[0].get("time", 0)
108124
writer.writerow([op, f"{ops_per_sec:.2f}", f"{time_val:.4f}"])
109125
else:
110126
writer.writerow([op, "N/A", "N/A"])

0 commit comments

Comments
 (0)