Skip to content

Commit 4b21878

Browse files
authored
add benchmark scripts (#1546)
* add benchmark scripts * remove unnecessary srand
1 parent 6122f5e commit 4b21878

File tree

8 files changed

+621
-0
lines changed

8 files changed

+621
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.env/
2+
benchtest
3+
blobtest
4+
anntest
5+
*.db
6+
*.db-shm
7+
*.db-wal
8+
*.sql

libsql-sqlite3/benchmark/Makefile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
C_INCLUDE_PATH := ../
2+
all: diskann bruteforce no_vectors
3+
4+
anntest: anntest.c
5+
@cc anntest.c -o anntest -L ../.libs/ -llibsql -O2
6+
benchtest: benchtest.c
7+
@cc benchtest.c -o benchtest -L ../.libs/ -llibsql -O2
8+
blobtest: blobtest.c
9+
@cc blobtest.c -o blobtest -L ../.libs/ -llibsql -O2
10+
11+
diskann.sql:
12+
@python3 workload.py diskann 64 1000 1000 > diskann.sql
13+
diskann: benchtest diskann.sql
14+
@rm test.db || exit 0
15+
@LD_LIBRARY_PATH=../.libs/ ./benchtest diskann.sql test.db
16+
17+
bruteforce.sql:
18+
@python3 workload.py bruteforce 64 1000 1000 > bruteforce.sql
19+
bruteforce: benchtest bruteforce.sql
20+
@rm test.db || exit 0
21+
@LD_LIBRARY_PATH=../.libs/ ./benchtest bruteforce.sql test.db
22+
23+
no_vectors.sql:
24+
@python3 workload.py no_vectors 1000 1000 > no_vectors.sql
25+
no_vectors: benchtest no_vectors.sql
26+
@rm test.db || exit 0
27+
@LD_LIBRARY_PATH=../.libs/ ./benchtest no_vectors.sql test.db
28+
clean:
29+
-rm benchtest
30+
-rm blobtest
31+
-rm anntest
32+
-rm *.sql
33+
-rm *.db
34+
-rm *.db-*
35+
-rm *.db-*

libsql-sqlite3/benchmark/README.md

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
## benchmarks tools
2+
3+
Simple benchmark tools intentionally written in C in order to have faster feedback loops (no need to wait for Rust builds)
4+
5+
You need to install `numpy` for some scripts to work. You can do it globally or using virtual env:
6+
```py
7+
$> python -m venv .env
8+
$> source .env/bin/activate
9+
$> pip install -r requirements.txt
10+
```
11+
12+
### benchtest
13+
14+
Simple generic tool which takes SQL file, db file and run all queries against provded DB file.
15+
For SQL file generation you can use/extend `workload.py` script.
16+
17+
Take a look at the example:
18+
```sh
19+
$> LD_LIBRARY_PATH=../.libs/ ./benchtest queries.sql data.db
20+
open queries file at queries.sql
21+
open sqlite db at 'data.db'
22+
executed simple statement: 'CREATE TABLE t ( id INTEGER PRIMARY KEY, emb FLOAT32(4) );'
23+
executed simple statement: 'CREATE INDEX t_idx ON t ( libsql_vector_idx(emb) );'
24+
prepared statement: 'INSERT INTO t VALUES ( ?, vector(?) );'
25+
inserts (queries.sql):
26+
insert: 710.25 micros (avg.), 4 (count)
27+
size : 0.2695 MB
28+
reads : 1.00 (avg.), 4 (total)
29+
writes: 1.00 (avg.), 4 (total)
30+
prepared statement: 'SELECT * FROM vector_top_k('t_idx', vector(?), ?);'
31+
search (queries.sql):
32+
select: 63.25 micros (avg.), 4 (count)
33+
size : 0.2695 MB
34+
reads : 1.00 (avg.), 4 (total)
35+
```
36+
37+
It is linked against liblibsql.so which resides in the `../libs/` directory and must be explicitly built from `libsql-sqlite3` sources:
38+
```sh
39+
$> basename $(pwd)
40+
libsql-sqlite3
41+
$> make # this command will generate libs in the .libs directory
42+
$> cd benchmark
43+
$> make bruteforce
44+
open queries file at bruteforce.sql
45+
open sqlite db at 'test.db'
46+
executed simple statement: 'PRAGMA journal_mode=WAL;'
47+
executed simple statement: 'CREATE TABLE x ( id INTEGER PRIMARY KEY, embedding FLOAT32(64) );'
48+
prepared statement: 'INSERT INTO x VALUES (?, vector(?));'
49+
inserts (bruteforce.sql):
50+
insert: 46.27 micros (avg.), 1000 (count)
51+
size : 0.2695 MB
52+
reads : 1.00 (avg.), 1000 (total)
53+
writes: 1.00 (avg.), 1000 (total)
54+
prepared statement: 'SELECT id FROM x ORDER BY vector_distance_cos(embedding, vector(?)) LIMIT ?;'
55+
search (bruteforce.sql):
56+
select: 329.32 micros (avg.), 1000 (count)
57+
size : 0.2695 MB
58+
reads : 2000.00 (avg.), 2000000 (total)
59+
```
60+
61+
### anntest
62+
63+
Simple tool which takes DB file with 2 tables `data (id INTEGER PRIMARY KEY, emb FLOAT32(n))` and `queries (emb FLOAT32(n))` and execute vector search for all vectors in `queries` table abainst `data` table using provided SQL statements.
64+
65+
In order to generate DB file you can use `benchtest` with `workload.py` tools. Take a look at the example:
66+
```sh
67+
$> python workload.py recall_uniform 64 1000 1000 > recall_uniform.sql
68+
$> LD_LIBRARY_PATH=../.libs/ ./benchtest recall_uniform.sql recall_uniform.db
69+
$> # ./anntext [db path] [test name (used only for printed stats)] [ann query] [exact query]
70+
$> LD_LIBRARY_PATH=../.libs/ ./anntest recall_uniform.db 10-recall@10 "SELECT rowid FROM vector_top_k('data_idx', ?, 10)" "SELECT id FROM data ORDER BY vector_distance_cos(emb, ?) LIMIT 10"
71+
open sqlite db at 'recall_uniform.db'
72+
ready to perform 1000 queries with SELECT rowid FROM vector_top_k('data_idx', ?, 10) ann query and SELECT id FROM data ORDER BY vector_distance_cos(emb, ?) LIMIT 10 exact query
73+
88.91% 10-recall@10 (avg.)
74+
```
75+
76+
### blobtest
77+
78+
Simple tool which aims to prove that `sqlite3_blob_reopen` API can substantially increase performance of reads.
79+
80+
Take a look at the example:
81+
```sh
82+
$> LD_LIBRARY_PATH=../.libs/ ./blobtest blob-read-simple.db read simple 1000 1000
83+
open sqlite db at 'blob-read-simple.db'
84+
blob table: ready to prepare
85+
blob table: prepared
86+
time: 3.76 micros (avg.), 1000 (count)
87+
$> LD_LIBRARY_PATH=../.libs/ ./blobtest blob-read-reopen.db read reopen 1000 1000
88+
open sqlite db at 'blob-read-reopen.db'
89+
blob table: ready to prepare
90+
blob table: prepared
91+
time: 0.31 micros (avg.), 1000 (count)
92+
```

libsql-sqlite3/benchmark/anntest.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#include "../parse.h"
2+
#include "../opcodes.h"
3+
#include "../src/sqliteInt.h"
4+
#include "../src/vectorIndexInt.h"
5+
#include "assert.h"
6+
#include "stdbool.h"
7+
#include "string.h"
8+
#include "stdarg.h"
9+
#include "time.h"
10+
#include <sqlite3.h>
11+
12+
#define eprintf(...) fprintf(stderr, __VA_ARGS__)
13+
#define ensure(condition, ...) { if (!(condition)) { eprintf(__VA_ARGS__); exit(1); } }
14+
15+
int searchVectors(sqlite3 *db, sqlite3_stmt *pStmt, void **ppItems, int *pItemSize) {
16+
ensure(sqlite3_reset(pStmt) == SQLITE_OK, "failed to reset statement: %s\n", sqlite3_errmsg(db));
17+
int rows = 0;
18+
while(1){
19+
int rc = sqlite3_step(pStmt);
20+
if( rc == SQLITE_DONE ){
21+
break;
22+
} else if( rc == SQLITE_ROW ){
23+
const void *pBlob = sqlite3_column_blob(pStmt, 0);
24+
int nBlobSize = sqlite3_column_bytes(pStmt, 0);
25+
void *pBlobCopy = malloc(nBlobSize);
26+
memcpy(pBlobCopy, pBlob, nBlobSize);
27+
ppItems[rows] = pBlobCopy;
28+
pItemSize[rows] = nBlobSize;
29+
rows++;
30+
}else{
31+
ensure(false, "unexpected step result: %s\n", sqlite3_errmsg(db));
32+
}
33+
}
34+
return rows;
35+
}
36+
37+
int searchRows(sqlite3 *db, sqlite3_stmt *pStmt, unsigned char *pBlob, int nBlobSize, int *result) {
38+
ensure(sqlite3_reset(pStmt) == SQLITE_OK, "failed to reset statement: %s\n", sqlite3_errmsg(db));
39+
ensure(sqlite3_bind_blob(pStmt, 1, pBlob, nBlobSize, SQLITE_TRANSIENT) == SQLITE_OK, "failed to bind blob: %s\n", sqlite3_errmsg(db));
40+
int rows = 0;
41+
while(1){
42+
int rc = sqlite3_step(pStmt);
43+
if( rc == SQLITE_DONE ){
44+
break;
45+
} else if( rc == SQLITE_ROW ){
46+
int rowid = sqlite3_column_int(pStmt, 0);
47+
result[rows++] = rowid;
48+
}else{
49+
ensure(false, "unexpected step result: %s\n", sqlite3_errmsg(db));
50+
}
51+
}
52+
return rows;
53+
}
54+
55+
double recall(int *pExact, int nExactSize, int *pAnn, int nAnnSize) {
56+
int overlap = 0;
57+
for( int i = 0; i < nExactSize; i++ ){
58+
int ok = 0;
59+
for(int s = 0; !ok && s < nAnnSize; s++ ){
60+
ok |= pExact[i] == pAnn[s];
61+
}
62+
if(ok){
63+
overlap++;
64+
}
65+
}
66+
return overlap * 1.0 / nExactSize;
67+
}
68+
69+
int main(int argc, char* argv[]) {
70+
ensure(argc == 5, "path to the db file, recall type, ann query, exact query");
71+
sqlite3* db;
72+
int rc = sqlite3_open(argv[1], &db);
73+
ensure(rc == 0, "failed to open db: rc=%d\n", rc);
74+
printf("open sqlite db at '%s'\n", argv[1]);
75+
76+
char *zType = argv[2];
77+
void* vectors[65536];
78+
int vectorSize[65536];
79+
char *zAnnQuery = argv[3];
80+
char *zExactQuery = argv[4];
81+
82+
sqlite3_stmt *pVectors;
83+
ensure(sqlite3_prepare_v2(db, "SELECT emb FROM queries", -1, &pVectors, 0) == SQLITE_OK, "failed to prepare vectors statement: %s\n", sqlite3_errmsg(db));
84+
sqlite3_stmt *pAnn;
85+
ensure(sqlite3_prepare_v2(db, zAnnQuery, -1, &pAnn, 0) == SQLITE_OK, "failed to prepare ann statement: %s\n", sqlite3_errmsg(db));
86+
sqlite3_stmt *pExact;
87+
ensure(sqlite3_prepare_v2(db, zExactQuery, -1, &pExact, 0) == SQLITE_OK, "failed to prepare exact statement: %s\n", sqlite3_errmsg(db));
88+
89+
int nVectors = searchVectors(db, pVectors, vectors, vectorSize);
90+
91+
unsigned char blob[8 * 65536];
92+
int annResult[65536];
93+
int exactResult[65536];
94+
95+
printf("ready to perform %d queries with %s ann query and %s exact query\n", nVectors, zAnnQuery, zExactQuery);
96+
double totalRecall = 0;
97+
int total = 0;
98+
for(int i = 0; i < nVectors; i++){
99+
if( i % 10 == 9 ){
100+
eprintf("progress: %d / %d, %.2f%% %s (avg.)\n", i, nVectors, totalRecall / total * 100, zType);
101+
}
102+
int nAnnSize = searchRows(db, pAnn, vectors[i], vectorSize[i], annResult);
103+
int nExactSize = searchRows(db, pExact, vectors[i], vectorSize[i], exactResult);
104+
double r = recall(exactResult, nExactSize, annResult, nAnnSize);
105+
totalRecall += r;
106+
total++;
107+
}
108+
sqlite3_finalize(pAnn);
109+
sqlite3_finalize(pExact);
110+
printf("%.2f%% %s (avg.)\n", totalRecall / total * 100, zType);
111+
sqlite3_close(db);
112+
return 0;
113+
}

0 commit comments

Comments
 (0)