Skip to content

Commit 21f405b

Browse files
authored
Merge pull request #1596 from tursodatabase/vector-search-adaptive-block
Vector search adaptive block
2 parents f70fcbe + 86624ef commit 21f405b

File tree

6 files changed

+196
-45
lines changed

6 files changed

+196
-45
lines changed

libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85036,6 +85036,8 @@ typedef u8 MetricType;
8503685036
#define VECTOR_SEARCH_L_PARAM_ID 9
8503785037
#define VECTOR_SEARCH_L_DEFAULT 200
8503885038

85039+
#define VECTOR_MAX_NEIGHBORS_PARAM_ID 10
85040+
8503985041
/* total amount of vector index parameters */
8504085042
#define VECTOR_PARAM_IDS_COUNT 9
8504185043

@@ -209459,6 +209461,7 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
209459209461
*/
209460209462
#ifndef SQLITE_OMIT_VECTOR
209461209463

209464+
/* #include "math.h" */
209462209465
/* #include "sqliteInt.h" */
209463209466
/* #include "vectorIndexInt.h" */
209464209467

@@ -209474,6 +209477,11 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
209474209477
// stack simplify memory managment code and also doesn't impose very strict limits here since 128 bytes for column names should be enough for almost all use cases
209475209478
#define DISKANN_SQL_RENDER_LIMIT 128
209476209479

209480+
// limit to the maximum size of DiskANN block (128 MB)
209481+
// even with 1MB we can store tens of thousands of nodes in several GBs - which is already too much
209482+
// but we are "generous" here and allow user to store up to 128MB blobs
209483+
#define DISKANN_MAX_BLOCK_SZ 134217728
209484+
209477209485
/*
209478209486
* Due to historical reasons parameter for index block size were stored as u16 value and divided by 512 (2^9)
209479209487
* So, we will make inverse transform before initializing index from stored parameters
@@ -209673,8 +209681,16 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
209673209681
** Layout specific utilities
209674209682
**************************************************************************/
209675209683

209684+
int nodeEdgeOverhead(int nEdgeVectorSize){
209685+
return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
209686+
}
209687+
209688+
int nodeOverhead(int nNodeVectorSize){
209689+
return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE;
209690+
}
209691+
209676209692
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
209677-
unsigned int nMaxEdges = (pIndex->nBlockSize - pIndex->nNodeVectorSize - VECTOR_NODE_METADATA_SIZE) / (pIndex->nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE);
209693+
unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
209678209694
assert( nMaxEdges > 0);
209679209695
return nMaxEdges;
209680209696
}
@@ -209829,6 +209845,8 @@ int diskAnnCreateIndex(
209829209845
VectorIdxParams *pParams
209830209846
){
209831209847
int rc;
209848+
int type, dims;
209849+
u64 maxNeighborsParam, blockSizeBytes;
209832209850
char *zSql;
209833209851
char columnSqlDefs[DISKANN_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...)
209834209852
char columnSqlNames[DISKANN_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
@@ -209841,16 +209859,34 @@ int diskAnnCreateIndex(
209841209859
if( vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN) != 0 ){
209842209860
return SQLITE_ERROR;
209843209861
}
209862+
type = vectorIdxParamsGetU64(pParams, VECTOR_TYPE_PARAM_ID);
209863+
if( type == 0 ){
209864+
return SQLITE_ERROR;
209865+
}
209866+
dims = vectorIdxParamsGetU64(pParams, VECTOR_DIM_PARAM_ID);
209867+
if( dims == 0 ){
209868+
return SQLITE_ERROR;
209869+
}
209870+
assert( 0 < dims && dims <= MAX_VECTOR_SZ );
209871+
209872+
maxNeighborsParam = vectorIdxParamsGetU64(pParams, VECTOR_MAX_NEIGHBORS_PARAM_ID);
209873+
if( maxNeighborsParam == 0 ){
209874+
// 3 D**(1/2) gives good recall values (90%+)
209875+
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
209876+
maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(type, dims)) + 1);
209877+
}
209878+
blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(type, dims));
209879+
if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
209880+
return SQLITE_ERROR;
209881+
}
209882+
if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, MAX(256, blockSizeBytes)) != 0 ){
209883+
return SQLITE_ERROR;
209884+
}
209844209885
if( vectorIdxParamsGetU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID) == 0 ){
209845209886
if( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS) != 0 ){
209846209887
return SQLITE_ERROR;
209847209888
}
209848209889
}
209849-
if( vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID) == 0 ){
209850-
if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, VECTOR_BLOCK_SIZE_DEFAULT) != 0 ){
209851-
return SQLITE_ERROR;
209852-
}
209853-
}
209854209890
if( vectorIdxParamsGetF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID) == 0 ){
209855209891
if( vectorIdxParamsPutF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID, VECTOR_PRUNING_ALPHA_DEFAULT) != 0 ){
209856209892
return SQLITE_ERROR;
@@ -210840,6 +210876,7 @@ int diskAnnOpenIndex(
210840210876
DiskAnnIndex **ppIndex /* OUT: Index */
210841210877
){
210842210878
DiskAnnIndex *pIndex;
210879+
u64 nBlockSize;
210843210880
pIndex = sqlite3DbMallocRaw(db, sizeof(DiskAnnIndex));
210844210881
if( pIndex == NULL ){
210845210882
return SQLITE_NOMEM;
@@ -210852,9 +210889,15 @@ int diskAnnOpenIndex(
210852210889
diskAnnCloseIndex(pIndex);
210853210890
return SQLITE_NOMEM_BKPT;
210854210891
}
210892+
nBlockSize = vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID);
210893+
// preserve backward compatibility: treat block size > 128 literally, but <= 128 with shift
210894+
if( nBlockSize <= 128 ){
210895+
nBlockSize <<= DISKANN_BLOCK_SIZE_SHIFT;
210896+
}
210897+
210855210898
pIndex->nFormatVersion = vectorIdxParamsGetU64(pParams, VECTOR_FORMAT_PARAM_ID);
210856210899
pIndex->nDistanceFunc = vectorIdxParamsGetU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID);
210857-
pIndex->nBlockSize = vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID) << DISKANN_BLOCK_SIZE_SHIFT;
210900+
pIndex->nBlockSize = nBlockSize;
210858210901
pIndex->nNodeVectorType = vectorIdxParamsGetU64(pParams, VECTOR_TYPE_PARAM_ID);
210859210902
pIndex->nVectorDims = vectorIdxParamsGetU64(pParams, VECTOR_DIM_PARAM_ID);
210860210903
pIndex->pruningAlpha = vectorIdxParamsGetF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID);
@@ -211810,18 +211853,19 @@ static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
211810211853
struct VectorParamName {
211811211854
const char *zName;
211812211855
int tag;
211813-
int type; // 0 - enum, 1 - integer, 2 - float
211856+
int type; // 0 - string enum, 1 - integer, 2 - float
211814211857
const char *zValueStr;
211815211858
u64 value;
211816211859
};
211817211860

211818211861
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
211819-
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
211820-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
211821-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
211822-
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
211823-
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
211824-
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 2, 0, 0 },
211862+
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
211863+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
211864+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
211865+
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
211866+
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
211867+
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 1, 0, 0 },
211868+
{ "max_neighbors", VECTOR_MAX_NEIGHBORS_PARAM_ID, 1, 0, 0 },
211825211869
};
211826211870

211827211871
static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, const char **pErrMsg) {
@@ -211841,11 +211885,15 @@ static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, con
211841211885
continue;
211842211886
}
211843211887
if( VECTOR_PARAM_NAMES[i].type == 1 ){
211844-
u64 value = sqlite3Atoi(zValue);
211888+
int value = sqlite3Atoi(zValue);
211845211889
if( value == 0 ){
211846211890
*pErrMsg = "invalid representation of integer vector index parameter";
211847211891
return -1;
211848211892
}
211893+
if( value < 0 ){
211894+
*pErrMsg = "integer vector index parameter must be positive";
211895+
return -1;
211896+
}
211849211897
if( vectorIdxParamsPutU64(pParams, VECTOR_PARAM_NAMES[i].tag, value) != 0 ){
211850211898
*pErrMsg = "unable to serialize integer vector index parameter";
211851211899
return -1;

libsql-ffi/bundled/src/sqlite3.c

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85036,6 +85036,8 @@ typedef u8 MetricType;
8503685036
#define VECTOR_SEARCH_L_PARAM_ID 9
8503785037
#define VECTOR_SEARCH_L_DEFAULT 200
8503885038

85039+
#define VECTOR_MAX_NEIGHBORS_PARAM_ID 10
85040+
8503985041
/* total amount of vector index parameters */
8504085042
#define VECTOR_PARAM_IDS_COUNT 9
8504185043

@@ -209459,6 +209461,7 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
209459209461
*/
209460209462
#ifndef SQLITE_OMIT_VECTOR
209461209463

209464+
/* #include "math.h" */
209462209465
/* #include "sqliteInt.h" */
209463209466
/* #include "vectorIndexInt.h" */
209464209467

@@ -209474,6 +209477,11 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
209474209477
// stack simplify memory managment code and also doesn't impose very strict limits here since 128 bytes for column names should be enough for almost all use cases
209475209478
#define DISKANN_SQL_RENDER_LIMIT 128
209476209479

209480+
// limit to the maximum size of DiskANN block (128 MB)
209481+
// even with 1MB we can store tens of thousands of nodes in several GBs - which is already too much
209482+
// but we are "generous" here and allow user to store up to 128MB blobs
209483+
#define DISKANN_MAX_BLOCK_SZ 134217728
209484+
209477209485
/*
209478209486
* Due to historical reasons parameter for index block size were stored as u16 value and divided by 512 (2^9)
209479209487
* So, we will make inverse transform before initializing index from stored parameters
@@ -209673,8 +209681,16 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
209673209681
** Layout specific utilities
209674209682
**************************************************************************/
209675209683

209684+
int nodeEdgeOverhead(int nEdgeVectorSize){
209685+
return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
209686+
}
209687+
209688+
int nodeOverhead(int nNodeVectorSize){
209689+
return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE;
209690+
}
209691+
209676209692
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
209677-
unsigned int nMaxEdges = (pIndex->nBlockSize - pIndex->nNodeVectorSize - VECTOR_NODE_METADATA_SIZE) / (pIndex->nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE);
209693+
unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
209678209694
assert( nMaxEdges > 0);
209679209695
return nMaxEdges;
209680209696
}
@@ -209829,6 +209845,8 @@ int diskAnnCreateIndex(
209829209845
VectorIdxParams *pParams
209830209846
){
209831209847
int rc;
209848+
int type, dims;
209849+
u64 maxNeighborsParam, blockSizeBytes;
209832209850
char *zSql;
209833209851
char columnSqlDefs[DISKANN_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...)
209834209852
char columnSqlNames[DISKANN_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...)
@@ -209841,16 +209859,34 @@ int diskAnnCreateIndex(
209841209859
if( vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN) != 0 ){
209842209860
return SQLITE_ERROR;
209843209861
}
209862+
type = vectorIdxParamsGetU64(pParams, VECTOR_TYPE_PARAM_ID);
209863+
if( type == 0 ){
209864+
return SQLITE_ERROR;
209865+
}
209866+
dims = vectorIdxParamsGetU64(pParams, VECTOR_DIM_PARAM_ID);
209867+
if( dims == 0 ){
209868+
return SQLITE_ERROR;
209869+
}
209870+
assert( 0 < dims && dims <= MAX_VECTOR_SZ );
209871+
209872+
maxNeighborsParam = vectorIdxParamsGetU64(pParams, VECTOR_MAX_NEIGHBORS_PARAM_ID);
209873+
if( maxNeighborsParam == 0 ){
209874+
// 3 D**(1/2) gives good recall values (90%+)
209875+
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
209876+
maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(type, dims)) + 1);
209877+
}
209878+
blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(type, dims));
209879+
if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
209880+
return SQLITE_ERROR;
209881+
}
209882+
if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, MAX(256, blockSizeBytes)) != 0 ){
209883+
return SQLITE_ERROR;
209884+
}
209844209885
if( vectorIdxParamsGetU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID) == 0 ){
209845209886
if( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS) != 0 ){
209846209887
return SQLITE_ERROR;
209847209888
}
209848209889
}
209849-
if( vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID) == 0 ){
209850-
if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, VECTOR_BLOCK_SIZE_DEFAULT) != 0 ){
209851-
return SQLITE_ERROR;
209852-
}
209853-
}
209854209890
if( vectorIdxParamsGetF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID) == 0 ){
209855209891
if( vectorIdxParamsPutF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID, VECTOR_PRUNING_ALPHA_DEFAULT) != 0 ){
209856209892
return SQLITE_ERROR;
@@ -210840,6 +210876,7 @@ int diskAnnOpenIndex(
210840210876
DiskAnnIndex **ppIndex /* OUT: Index */
210841210877
){
210842210878
DiskAnnIndex *pIndex;
210879+
u64 nBlockSize;
210843210880
pIndex = sqlite3DbMallocRaw(db, sizeof(DiskAnnIndex));
210844210881
if( pIndex == NULL ){
210845210882
return SQLITE_NOMEM;
@@ -210852,9 +210889,15 @@ int diskAnnOpenIndex(
210852210889
diskAnnCloseIndex(pIndex);
210853210890
return SQLITE_NOMEM_BKPT;
210854210891
}
210892+
nBlockSize = vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID);
210893+
// preserve backward compatibility: treat block size > 128 literally, but <= 128 with shift
210894+
if( nBlockSize <= 128 ){
210895+
nBlockSize <<= DISKANN_BLOCK_SIZE_SHIFT;
210896+
}
210897+
210855210898
pIndex->nFormatVersion = vectorIdxParamsGetU64(pParams, VECTOR_FORMAT_PARAM_ID);
210856210899
pIndex->nDistanceFunc = vectorIdxParamsGetU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID);
210857-
pIndex->nBlockSize = vectorIdxParamsGetU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID) << DISKANN_BLOCK_SIZE_SHIFT;
210900+
pIndex->nBlockSize = nBlockSize;
210858210901
pIndex->nNodeVectorType = vectorIdxParamsGetU64(pParams, VECTOR_TYPE_PARAM_ID);
210859210902
pIndex->nVectorDims = vectorIdxParamsGetU64(pParams, VECTOR_DIM_PARAM_ID);
210860210903
pIndex->pruningAlpha = vectorIdxParamsGetF64(pParams, VECTOR_PRUNING_ALPHA_PARAM_ID);
@@ -211810,18 +211853,19 @@ static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
211810211853
struct VectorParamName {
211811211854
const char *zName;
211812211855
int tag;
211813-
int type; // 0 - enum, 1 - integer, 2 - float
211856+
int type; // 0 - string enum, 1 - integer, 2 - float
211814211857
const char *zValueStr;
211815211858
u64 value;
211816211859
};
211817211860

211818211861
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
211819-
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
211820-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
211821-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
211822-
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
211823-
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
211824-
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 2, 0, 0 },
211862+
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
211863+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
211864+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
211865+
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
211866+
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
211867+
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 1, 0, 0 },
211868+
{ "max_neighbors", VECTOR_MAX_NEIGHBORS_PARAM_ID, 1, 0, 0 },
211825211869
};
211826211870

211827211871
static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, const char **pErrMsg) {
@@ -211841,11 +211885,15 @@ static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, con
211841211885
continue;
211842211886
}
211843211887
if( VECTOR_PARAM_NAMES[i].type == 1 ){
211844-
u64 value = sqlite3Atoi(zValue);
211888+
int value = sqlite3Atoi(zValue);
211845211889
if( value == 0 ){
211846211890
*pErrMsg = "invalid representation of integer vector index parameter";
211847211891
return -1;
211848211892
}
211893+
if( value < 0 ){
211894+
*pErrMsg = "integer vector index parameter must be positive";
211895+
return -1;
211896+
}
211849211897
if( vectorIdxParamsPutU64(pParams, VECTOR_PARAM_NAMES[i].tag, value) != 0 ){
211850211898
*pErrMsg = "unable to serialize integer vector index parameter";
211851211899
return -1;

libsql-sqlite3/src/vectorIndex.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -383,18 +383,19 @@ static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
383383
struct VectorParamName {
384384
const char *zName;
385385
int tag;
386-
int type; // 0 - enum, 1 - integer, 2 - float
386+
int type; // 0 - string enum, 1 - integer, 2 - float
387387
const char *zValueStr;
388388
u64 value;
389389
};
390390

391391
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
392-
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
393-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
394-
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
395-
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
396-
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
397-
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 2, 0, 0 },
392+
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
393+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
394+
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "l2", VECTOR_METRIC_TYPE_L2 },
395+
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
396+
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
397+
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 1, 0, 0 },
398+
{ "max_neighbors", VECTOR_MAX_NEIGHBORS_PARAM_ID, 1, 0, 0 },
398399
};
399400

400401
static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, const char **pErrMsg) {
@@ -414,11 +415,15 @@ static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, con
414415
continue;
415416
}
416417
if( VECTOR_PARAM_NAMES[i].type == 1 ){
417-
u64 value = sqlite3Atoi(zValue);
418+
int value = sqlite3Atoi(zValue);
418419
if( value == 0 ){
419420
*pErrMsg = "invalid representation of integer vector index parameter";
420421
return -1;
421422
}
423+
if( value < 0 ){
424+
*pErrMsg = "integer vector index parameter must be positive";
425+
return -1;
426+
}
422427
if( vectorIdxParamsPutU64(pParams, VECTOR_PARAM_NAMES[i].tag, value) != 0 ){
423428
*pErrMsg = "unable to serialize integer vector index parameter";
424429
return -1;

libsql-sqlite3/src/vectorIndexInt.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ typedef u8 MetricType;
130130
#define VECTOR_SEARCH_L_PARAM_ID 9
131131
#define VECTOR_SEARCH_L_DEFAULT 200
132132

133+
#define VECTOR_MAX_NEIGHBORS_PARAM_ID 10
134+
133135
/* total amount of vector index parameters */
134136
#define VECTOR_PARAM_IDS_COUNT 9
135137

0 commit comments

Comments
 (0)