Skip to content

Commit 5eeba43

Browse files
committed
improve random row selection
1 parent e7de104 commit 5eeba43

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

libsql-sqlite3/src/vectordiskann.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ int diskAnnCreateIndex(
442442
int type, dims;
443443
u64 maxNeighborsParam, blockSizeBytes;
444444
char *zSql;
445+
const char *zRowidColumnName;
445446
char columnSqlDefs[VECTOR_INDEX_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...)
446447
char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
447448
if( vectorIdxKeyDefsRender(pKey, "index_key", columnSqlDefs, sizeof(columnSqlDefs)) != 0 ){
@@ -509,6 +510,7 @@ int diskAnnCreateIndex(
509510
columnSqlDefs,
510511
columnSqlNames
511512
);
513+
zRowidColumnName = "index_key";
512514
}else{
513515
zSql = sqlite3MPrintf(
514516
db,
@@ -518,9 +520,31 @@ int diskAnnCreateIndex(
518520
columnSqlDefs,
519521
columnSqlNames
520522
);
523+
zRowidColumnName = "rowid";
521524
}
522525
rc = sqlite3_exec(db, zSql, 0, 0, 0);
523526
sqlite3DbFree(db, zSql);
527+
if( rc != SQLITE_OK ){
528+
return rc;
529+
}
530+
/*
531+
* vector blobs are usually pretty huge (more than a page size, for example, node block for 1024d f32 embeddings with 1bit compression will occupy ~20KB)
532+
* in this case, main table B-Tree takes on redundant shape where all leaf nodes has only 1 cell
533+
*
534+
* as we have a query which selects random row using OFFSET/LIMIT trick - we will need to read all these leaf nodes pages just to skip them
535+
* so, in order to remove this overhead for random row selection - we creating an index with just single column used
536+
* in this case B-Tree leafs will be full of rowids and the overhead for page reads will be very small
537+
*/
538+
zSql = sqlite3MPrintf(
539+
db,
540+
"CREATE INDEX IF NOT EXISTS \"%w\".%s_shadow_idx ON %s_shadow (%s)",
541+
zDbSName,
542+
zIdxName,
543+
zIdxName,
544+
zRowidColumnName
545+
);
546+
rc = sqlite3_exec(db, zSql, 0, 0, 0);
547+
sqlite3DbFree(db, zSql);
524548
return rc;
525549
}
526550

0 commit comments

Comments
 (0)