Skip to content

Commit 90dbcff

Browse files
committed
improve vector index key handling code
1 parent a964315 commit 90dbcff

File tree

5 files changed

+116
-61
lines changed

5 files changed

+116
-61
lines changed

libsql-sqlite3/src/build.c

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4308,21 +4308,6 @@ void sqlite3CreateIndex(
43084308
pIndex->aSortOrder[i] = (u8)requestedSortOrder;
43094309
}
43104310

4311-
4312-
#ifndef SQLITE_OMIT_VECTOR
4313-
vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing);
4314-
if( vectorIdxRc < 0 ){
4315-
goto exit_create_index;
4316-
}
4317-
if( vectorIdxRc >= 1 ){
4318-
idxType = SQLITE_IDXTYPE_VECTOR;
4319-
pIndex->idxType = idxType;
4320-
}
4321-
if( vectorIdxRc == 1 ){
4322-
skipRefill = 1;
4323-
}
4324-
#endif
4325-
43264311
/* Append the table key to the end of the index. For WITHOUT ROWID
43274312
** tables (when pPk!=0) this will be the declared PRIMARY KEY. For
43284313
** normal tables (when pPk==0) this will be the rowid.
@@ -4349,6 +4334,26 @@ void sqlite3CreateIndex(
43494334
sqlite3DefaultRowEst(pIndex);
43504335
if( pParse->pNewTable==0 ) estimateIndexWidth(pIndex);
43514336

4337+
#ifndef SQLITE_OMIT_VECTOR
4338+
// we want to have complete information about index columns before invocation of vectorIndexCreate method
4339+
vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing);
4340+
if( vectorIdxRc < 0 ){
4341+
goto exit_create_index;
4342+
}
4343+
if( vectorIdxRc >= 1 ){
4344+
idxType = SQLITE_IDXTYPE_VECTOR;
4345+
/*
4346+
* SQLite can use B-Tree indices in some optimizations (like SELECT COUNT(*) can use any full B-Tree index instead of PK index)
4347+
* But, SQLite pretty conservative about usage of unordered indices - that's what we need here
4348+
*/
4349+
pIndex->bUnordered = 1;
4350+
pIndex->idxType = idxType;
4351+
}
4352+
if( vectorIdxRc == 1 ){
4353+
skipRefill = 1;
4354+
}
4355+
#endif
4356+
43524357
/* If this index contains every column of its table, then mark
43534358
** it as a covering index */
43544359
assert( HasRowid(pTab)

libsql-sqlite3/src/vectorIndex.c

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -105,30 +105,41 @@ int vectorIdxParamsPutF64(VectorIdxParams *pParams, char tag, double value) {
105105
** VectorIdxKey utilities
106106
****************************************************************************/
107107

108-
int vectorIdxKeyGet(Table *pTable, VectorIdxKey *pKey, const char **pzErrMsg) {
109-
int i;
110-
Index *pPk;
111-
// we actually need to change strategy here and use PK if it's available and fallback to ROWID only if there is no other choice
112-
// will change this later as it must be done carefully in order to not brake behaviour of existing indices
113-
if( !HasRowid(pTable) ){
114-
pPk = sqlite3PrimaryKeyIndex(pTable);
115-
if( pPk->nKeyCol > VECTOR_INDEX_MAX_KEY_COLUMNS ){
116-
*pzErrMsg = "exceeded limit for composite columns in primary key index";
117-
return -1;
118-
}
119-
pKey->nKeyColumns = pPk->nKeyCol;
120-
for(i = 0; i < pPk->nKeyCol; i++){
121-
pKey->aKeyAffinity[i] = pTable->aCol[pPk->aiColumn[i]].affinity;
122-
pKey->azKeyCollation[i] = pPk->azColl[i];
123-
}
124-
} else{
108+
int vectorIdxKeyGet(const Index *pIndex, VectorIdxKey *pKey, const char **pzErrMsg) {
109+
Table *pTable;
110+
Index *pPkIndex;
111+
int i, nKeyColumns;
112+
113+
assert( pIndex->nKeyCol == 1 );
114+
assert( pIndex->nColumn > pIndex->nKeyCol );
115+
116+
pTable = pIndex->pTable;
117+
nKeyColumns = pIndex->nColumn - pIndex->nKeyCol;
118+
if( nKeyColumns == 1 && pIndex->aiColumn[pIndex->nKeyCol] == XN_ROWID ){
125119
pKey->nKeyColumns = 1;
126120
pKey->aKeyAffinity[0] = SQLITE_AFF_INTEGER;
127121
pKey->azKeyCollation[0] = "BINARY";
122+
return 0;
123+
}
124+
if( nKeyColumns > VECTOR_INDEX_MAX_KEY_COLUMNS ){
125+
*pzErrMsg = "exceeded limit for composite columns in primary key index";
126+
return -1;
127+
}
128+
pPkIndex = sqlite3PrimaryKeyIndex(pIndex->pTable);
129+
assert( pPkIndex->nKeyCol == nKeyColumns );
130+
131+
pKey->nKeyColumns = nKeyColumns;
132+
for(i = 0; i < pPkIndex->nKeyCol; i++){
133+
pKey->aKeyAffinity[i] = pTable->aCol[pPkIndex->aiColumn[i]].affinity;
134+
pKey->azKeyCollation[i] = pPkIndex->azColl[i];
128135
}
129136
return 0;
130137
}
131138

139+
int vectorIdxKeyRowidLike(const VectorIdxKey *pKey){
140+
return pKey->nKeyColumns == 1 && pKey->aKeyAffinity[0] == SQLITE_AFF_INTEGER && sqlite3StrICmp(pKey->azKeyCollation[0], "BINARY") == 0;
141+
}
142+
132143
int vectorIdxKeyDefsRender(const VectorIdxKey *pKey, const char *prefix, char *pBuf, int nBufSize) {
133144
static const char * const azType[] = {
134145
/* SQLITE_AFF_BLOB */ " BLOB",
@@ -276,7 +287,7 @@ void vectorInRowFree(sqlite3 *db, VectorInRow *pVectorInRow) {
276287
** VectorOutRows utilities
277288
****************************************************************************/
278289

279-
int vectorOutRowsAlloc(sqlite3 *db, VectorOutRows *pRows, int nRows, int nCols, char firstColumnAff){
290+
int vectorOutRowsAlloc(sqlite3 *db, VectorOutRows *pRows, int nRows, int nCols, int rowidLike){
280291
assert( nCols > 0 && nRows >= 0 );
281292
pRows->nRows = nRows;
282293
pRows->nCols = nCols;
@@ -287,7 +298,8 @@ int vectorOutRowsAlloc(sqlite3 *db, VectorOutRows *pRows, int nRows, int nCols,
287298
return SQLITE_NOMEM_BKPT;
288299
}
289300

290-
if( nCols == 1 && firstColumnAff == SQLITE_AFF_INTEGER ){
301+
if( rowidLike ){
302+
assert( nCols == 1 );
291303
pRows->aIntValues = sqlite3DbMallocRaw(db, nRows * sizeof(i64));
292304
if( pRows->aIntValues == NULL ){
293305
return SQLITE_NOMEM_BKPT;
@@ -911,7 +923,7 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co
911923
if( rc != SQLITE_OK ){
912924
return CREATE_FAIL;
913925
}
914-
if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){
926+
if( vectorIdxKeyGet(pIdx, &idxKey, &pzErrMsg) != 0 ){
915927
sqlite3ErrorMsg(pParse, "vector index: failed to detect underlying table key: %s", pzErrMsg);
916928
return CREATE_FAIL;
917929
}
@@ -1008,7 +1020,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value
10081020
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to open diskann index");
10091021
goto out;
10101022
}
1011-
if( vectorIdxKeyGet(pIndex->pTable, &pKey, &zErrMsg) != 0 ){
1023+
if( vectorIdxKeyGet(pIndex, &pKey, &zErrMsg) != 0 ){
10121024
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to extract table key: %s", zErrMsg);
10131025
rc = SQLITE_ERROR;
10141026
goto out;

libsql-sqlite3/src/vectorIndexInt.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,19 @@ struct VectorOutRows {
187187
sqlite3_value **ppValues;
188188
};
189189

190+
// limit to the sql part which we render in order to perform operations with shadow tables
191+
// we render this parts of SQL on stack - thats why we have hard limit on this
192+
// stack simplify memory managment code and also doesn't impose very strict limits here since 128 bytes for column names should be enough for almost all use cases
193+
#define VECTOR_INDEX_SQL_RENDER_LIMIT 128
194+
190195
void vectorIdxParamsInit(VectorIdxParams *, u8 *, int);
191196
u64 vectorIdxParamsGetU64(const VectorIdxParams *, char);
192197
double vectorIdxParamsGetF64(const VectorIdxParams *, char);
193198
int vectorIdxParamsPutU64(VectorIdxParams *, char, u64);
194199
int vectorIdxParamsPutF64(VectorIdxParams *, char, double);
195200

196-
int vectorIdxKeyGet(Table*, VectorIdxKey *, const char **);
201+
int vectorIdxKeyGet(const Index *, VectorIdxKey *, const char **);
202+
int vectorIdxKeyRowidLike(const VectorIdxKey *);
197203
int vectorIdxKeyDefsRender(const VectorIdxKey *, const char *, char *, int);
198204
int vectorIdxKeyNamesRender(int, const char *, char *, int);
199205

@@ -204,7 +210,7 @@ i64 vectorInRowLegacyId(const VectorInRow *);
204210
int vectorInRowPlaceholderRender(const VectorInRow *, char *, int);
205211
void vectorInRowFree(sqlite3 *, VectorInRow *);
206212

207-
int vectorOutRowsAlloc(sqlite3 *, VectorOutRows *, int, int, char);
213+
int vectorOutRowsAlloc(sqlite3 *, VectorOutRows *, int, int, int);
208214
int vectorOutRowsPut(VectorOutRows *, int, int, const u64 *, sqlite3_value *);
209215
void vectorOutRowsGet(sqlite3_context *, const VectorOutRows *, int, int);
210216
void vectorOutRowsFree(sqlite3 *, VectorOutRows *);

libsql-sqlite3/src/vectordiskann.c

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,8 @@ int diskAnnCreateIndex(
436436
int type, dims;
437437
u64 maxNeighborsParam, blockSizeBytes;
438438
char *zSql;
439-
char columnSqlDefs[DISKANN_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...)
440-
char columnSqlNames[DISKANN_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
439+
char columnSqlDefs[VECTOR_INDEX_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...)
440+
char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
441441
if( vectorIdxKeyDefsRender(pKey, "index_key", columnSqlDefs, sizeof(columnSqlDefs)) != 0 ){
442442
return SQLITE_ERROR;
443443
}
@@ -490,14 +490,29 @@ int diskAnnCreateIndex(
490490
return SQLITE_ERROR;
491491
}
492492
}
493-
zSql = sqlite3MPrintf(
494-
db,
495-
"CREATE TABLE IF NOT EXISTS \"%w\".%s_shadow (%s, data BLOB, PRIMARY KEY (%s))",
496-
zDbSName,
497-
zIdxName,
498-
columnSqlDefs,
499-
columnSqlNames
500-
);
493+
// we want to preserve rowid - so it must be explicit in the schema
494+
// also, we don't want to store redundant set of fields - so the strategy is like that:
495+
// 1. If we have single PK with INTEGER affinity and BINARY collation we only need single PK of same type
496+
// 2. In other case we need rowid PK and unique index over other fields
497+
if( vectorIdxKeyRowidLike(pKey) ){
498+
zSql = sqlite3MPrintf(
499+
db,
500+
"CREATE TABLE IF NOT EXISTS \"%w\".%s_shadow (%s, data BLOB, PRIMARY KEY (%s))",
501+
zDbSName,
502+
zIdxName,
503+
columnSqlDefs,
504+
columnSqlNames
505+
);
506+
}else{
507+
zSql = sqlite3MPrintf(
508+
db,
509+
"CREATE TABLE IF NOT EXISTS \"%w\".%s_shadow (rowid INTEGER PRIMARY KEY, %s, data BLOB, UNIQUE (%s))",
510+
zDbSName,
511+
zIdxName,
512+
columnSqlDefs,
513+
columnSqlNames
514+
);
515+
}
501516
rc = sqlite3_exec(db, zSql, 0, 0, 0);
502517
sqlite3DbFree(db, zSql);
503518
return rc;
@@ -570,8 +585,8 @@ static int diskAnnGetShadowRowid(const DiskAnnIndex *pIndex, const VectorInRow *
570585
sqlite3_stmt *pStmt = NULL;
571586
char *zSql = NULL;
572587

573-
char columnSqlNames[DISKANN_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
574-
char columnSqlPlaceholders[DISKANN_SQL_RENDER_LIMIT]; // just placeholders (e.g. ?,?,?, ...)
588+
char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
589+
char columnSqlPlaceholders[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just placeholders (e.g. ?,?,?, ...)
575590
if( vectorIdxKeyNamesRender(pInRow->nKeys, "index_key", columnSqlNames, sizeof(columnSqlNames)) != 0 ){
576591
rc = SQLITE_ERROR;
577592
goto out;
@@ -628,7 +643,7 @@ static int diskAnnGetShadowRowKeys(const DiskAnnIndex *pIndex, u64 nRowid, const
628643
sqlite3_stmt *pStmt = NULL;
629644
char *zSql = NULL;
630645

631-
char columnSqlNames[DISKANN_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
646+
char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
632647
if( vectorIdxKeyNamesRender(pKey->nKeyColumns, "index_key", columnSqlNames, sizeof(columnSqlNames)) != 0 ){
633648
rc = SQLITE_ERROR;
634649
goto out;
@@ -682,15 +697,19 @@ static int diskAnnInsertShadowRow(const DiskAnnIndex *pIndex, const VectorInRow
682697
sqlite3_stmt *pStmt = NULL;
683698
char *zSql = NULL;
684699

685-
char columnSqlPlaceholders[DISKANN_SQL_RENDER_LIMIT]; // just placeholders (e.g. ?,?,?, ...)
700+
char columnSqlPlaceholders[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just placeholders (e.g. ?,?,?, ...)
701+
char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
686702
if( vectorInRowPlaceholderRender(pVectorInRow, columnSqlPlaceholders, sizeof(columnSqlPlaceholders)) != 0 ){
687703
rc = SQLITE_ERROR;
688704
goto out;
689705
}
706+
if( vectorIdxKeyNamesRender(pVectorInRow->nKeys, "index_key", columnSqlNames, sizeof(columnSqlNames)) != 0 ){
707+
return SQLITE_ERROR;
708+
}
690709
zSql = sqlite3MPrintf(
691710
pIndex->db,
692-
"INSERT INTO \"%w\".%s VALUES (%s, ?) RETURNING rowid",
693-
pIndex->zDbSName, pIndex->zShadow, columnSqlPlaceholders
711+
"INSERT INTO \"%w\".%s(%s, data) VALUES (%s, ?) RETURNING rowid",
712+
pIndex->zDbSName, pIndex->zShadow, columnSqlNames, columnSqlPlaceholders
694713
);
695714
if( zSql == NULL ){
696715
rc = SQLITE_NOMEM_BKPT;
@@ -1247,7 +1266,7 @@ int diskAnnSearch(
12471266
goto out;
12481267
}
12491268
nOutRows = MIN(k, ctx.nCandidates);
1250-
rc = vectorOutRowsAlloc(pIndex->db, pRows, nOutRows, pKey->nKeyColumns, pKey->aKeyAffinity[0]);
1269+
rc = vectorOutRowsAlloc(pIndex->db, pRows, nOutRows, pKey->nKeyColumns, vectorIdxKeyRowidLike(pKey));
12511270
if( rc != SQLITE_OK ){
12521271
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to allocate output rows");
12531272
goto out;

libsql-sqlite3/test/libsql_vector_index.test

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,16 @@ do_execsql_test vector-text-pk {
7171
INSERT INTO t_text_pk VALUES ('e-2', vector('[2,3,4]'));
7272
INSERT INTO t_text_pk VALUES ('e-3', vector('[3,4,5]'));
7373
INSERT INTO t_text_pk VALUES ('e-4', vector('[4,5,6]'));
74-
SELECT rowid FROM vector_top_k('t_text_pk_idx', vector('[3,4,5]'), 4);
74+
SELECT id FROM vector_top_k('t_text_pk_idx', vector('[3,4,5]'), 4);
7575
} {3 4 2 1}
7676

7777
do_execsql_test vector-text-pk-norow {
78-
CREATE TABLE t_text_pk_norow( email TEXT PRIMARY KEY, v FLOAT32(3) ) WITHOUT ROWID;
78+
CREATE TABLE t_text_pk_norow( v1, v2, v3, email TEXT PRIMARY KEY, v FLOAT32(3) ) WITHOUT ROWID;
7979
CREATE INDEX t_text_pk_norow_idx ON t_text_pk_norow( libsql_vector_idx(v) );
80-
INSERT INTO t_text_pk_norow VALUES ('e-1', vector('[1,2,3]'));
81-
INSERT INTO t_text_pk_norow VALUES ('e-2', vector('[2,3,4]'));
82-
INSERT INTO t_text_pk_norow VALUES ('e-3', vector('[3,4,5]'));
83-
INSERT INTO t_text_pk_norow VALUES ('e-4', vector('[4,5,6]'));
80+
INSERT INTO t_text_pk_norow VALUES (1, 1, 1, 'e-1', vector('[1,2,3]'));
81+
INSERT INTO t_text_pk_norow VALUES (1, 1, 1, 'e-2', vector('[2,3,4]'));
82+
INSERT INTO t_text_pk_norow VALUES (1, 1, 1, 'e-3', vector('[3,4,5]'));
83+
INSERT INTO t_text_pk_norow VALUES (1, 1, 1, 'e-4', vector('[4,5,6]'));
8484
SELECT id FROM vector_top_k('t_text_pk_norow_idx', vector('[3,4,5]'), 4);
8585
} {e-3 e-4 e-2 e-1}
8686

@@ -210,6 +210,19 @@ do_execsql_test vector-index-dont-affect-sql {
210210
SELECT rowid FROM t_vector_other_sql WHERE emb = vector('[7,8]');
211211
} {4 1 1 2 3 4}
212212

213+
do_execsql_test vector-index-dont-affect-sql-pk {
214+
CREATE TABLE t_vector_other_sql_pk ( name TEXT PRIMARY KEY, emb FLOAT32(2) );
215+
INSERT INTO t_vector_other_sql_pk VALUES ('a', vector('[1,2]')), ('b', vector('[3,4]'));
216+
CREATE INDEX t_vector_other_sql_pk_idx ON t_vector_other_sql_pk(libsql_vector_idx(emb));
217+
INSERT INTO t_vector_other_sql_pk VALUES ('c', vector('[5,6]')), ('d', vector('[7,8]'));
218+
SELECT COUNT(*) FROM t_vector_other_sql_pk;
219+
SELECT COUNT(*) FROM t_vector_other_sql_pk WHERE emb = vector('[1,2]');
220+
SELECT rowid FROM t_vector_other_sql_pk WHERE emb = vector('[1,2]');
221+
SELECT rowid FROM t_vector_other_sql_pk WHERE emb = vector('[3,4]');
222+
SELECT rowid FROM t_vector_other_sql_pk WHERE emb = vector('[5,6]');
223+
SELECT rowid FROM t_vector_other_sql_pk WHERE emb = vector('[7,8]');
224+
} {4 1 1 2 3 4}
225+
213226
do_execsql_test vector-attach {
214227
CREATE TABLE t_attach ( emb FLOAT32(2) );
215228
INSERT INTO t_attach VALUES (vector('[1,2]')), (vector('[3,4]'));

0 commit comments

Comments
 (0)