Skip to content

Commit a964315

Browse files
authored
Merge pull request #1610 from tursodatabase/vector-search-refine-errors
refine errors for vector search
2 parents 21f405b + f5062f4 commit a964315

File tree

9 files changed

+307
-290
lines changed

9 files changed

+307
-290
lines changed

libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c

Lines changed: 91 additions & 90 deletions
Large diffs are not rendered by default.

libsql-ffi/bundled/src/sqlite3.c

Lines changed: 91 additions & 90 deletions
Large diffs are not rendered by default.

libsql-sqlite3/src/vector.c

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,22 @@ float vectorDistanceL2(const Vector *pVector1, const Vector *pVector2){
130130
return 0;
131131
}
132132

133+
const char *sqlite3_type_repr(int type){
134+
switch( type ){
135+
case SQLITE_NULL:
136+
return "NULL";
137+
case SQLITE_INTEGER:
138+
return "INTEGER";
139+
case SQLITE_FLOAT:
140+
return "FLOAT";
141+
case SQLITE_BLOB:
142+
return "BLOB";
143+
case SQLITE_TEXT:
144+
return "TEXT";
145+
default:
146+
return "UNKNOWN";
147+
}
148+
}
133149
/*
134150
* Parses vector from text representation (e.g. '[1,2,3]'); vector type must be set
135151
*/
@@ -149,26 +165,22 @@ static int vectorParseSqliteText(
149165
int iBuf = 0;
150166

151167
assert( pVector->type == VECTOR_TYPE_FLOAT32 || pVector->type == VECTOR_TYPE_FLOAT64 );
168+
assert( sqlite3_value_type(arg) == SQLITE_TEXT );
152169

153170
if( pVector->type == VECTOR_TYPE_FLOAT32 ){
154171
elemsFloat = pVector->data;
155172
} else if( pVector->type == VECTOR_TYPE_FLOAT64 ){
156173
elemsDouble = pVector->data;
157174
}
158175

159-
if( sqlite3_value_type(arg) != SQLITE_TEXT ){
160-
*pzErrMsg = sqlite3_mprintf("invalid vector: not a text type");
161-
goto error;
162-
}
163-
164176
pzText = sqlite3_value_text(arg);
165177
if ( pzText == NULL ) return 0;
166178

167179
while( sqlite3Isspace(*pzText) )
168180
pzText++;
169181

170182
if( *pzText != '[' ){
171-
*pzErrMsg = sqlite3_mprintf("invalid vector: doesn't start with '['");
183+
*pzErrMsg = sqlite3_mprintf("vector: must start with '['");
172184
goto error;
173185
}
174186
pzText++;
@@ -183,7 +195,7 @@ static int vectorParseSqliteText(
183195
}
184196
if( this != ',' && this != ']' ){
185197
if( iBuf > MAX_FLOAT_CHAR_SZ ){
186-
*pzErrMsg = sqlite3_mprintf("float too big while parsing vector: '%s'", valueBuf);
198+
*pzErrMsg = sqlite3_mprintf("vector: float string length exceeded %d characters: '%s'", MAX_FLOAT_CHAR_SZ, valueBuf);
187199
goto error;
188200
}
189201
valueBuf[iBuf++] = this;
@@ -194,11 +206,11 @@ static int vectorParseSqliteText(
194206
break;
195207
}
196208
if( sqlite3AtoF(valueBuf, &elem, iBuf, SQLITE_UTF8) <= 0 ){
197-
*pzErrMsg = sqlite3_mprintf("invalid number: '%s'", valueBuf);
209+
*pzErrMsg = sqlite3_mprintf("vector: invalid float at position %d: '%s'", iElem, valueBuf);
198210
goto error;
199211
}
200212
if( iElem >= MAX_VECTOR_SZ ){
201-
*pzErrMsg = sqlite3_mprintf("vector is larger than the maximum: (%d)", MAX_VECTOR_SZ);
213+
*pzErrMsg = sqlite3_mprintf("vector: max size exceeded %d", MAX_VECTOR_SZ);
202214
goto error;
203215
}
204216
// clear only first bufidx positions - all other are zero
@@ -217,7 +229,7 @@ static int vectorParseSqliteText(
217229
pzText++;
218230

219231
if( *pzText != ']' ){
220-
*pzErrMsg = sqlite3_mprintf("malformed vector, doesn't end with ']'");
232+
*pzErrMsg = sqlite3_mprintf("vector: must end with ']'");
221233
goto error;
222234
}
223235
pzText++;
@@ -226,7 +238,7 @@ static int vectorParseSqliteText(
226238
pzText++;
227239

228240
if( *pzText != '\0' ){
229-
*pzErrMsg = sqlite3_mprintf("malformed vector, extra data after closing ']'");
241+
*pzErrMsg = sqlite3_mprintf("vector: non-space symbols after closing ']' are forbidden");
230242
goto error;
231243
}
232244
pVector->dims = iElem;
@@ -271,11 +283,11 @@ int detectBlobVectorParameters(sqlite3_value *arg, int *pType, int *pDims, char
271283
} else if( *pType == VECTOR_TYPE_FLOAT64 ){
272284
*pDims = nBlobSize / sizeof(double);
273285
} else{
274-
*pzErrMsg = sqlite3_mprintf("invalid binary vector: unexpected type: %d", *pType);
286+
*pzErrMsg = sqlite3_mprintf("vector: unexpected binary type: got %d, expected %d or %d", *pType, VECTOR_TYPE_FLOAT32, VECTOR_TYPE_FLOAT64);
275287
return -1;
276288
}
277289
if( *pDims > MAX_VECTOR_SZ ){
278-
*pzErrMsg = sqlite3_mprintf("invalid binary vector: max size exceeded: %d > %d", *pDims, MAX_VECTOR_SZ);
290+
*pzErrMsg = sqlite3_mprintf("vector: max size exceeded: %d > %d", *pDims, MAX_VECTOR_SZ);
279291
return -1;
280292
}
281293
return 0;
@@ -317,15 +329,12 @@ int detectTextVectorParameters(sqlite3_value *arg, int typeHint, int *pType, int
317329

318330
int detectVectorParameters(sqlite3_value *arg, int typeHint, int *pType, int *pDims, char **pzErrMsg) {
319331
switch( sqlite3_value_type(arg) ){
320-
case SQLITE_NULL:
321-
*pzErrMsg = sqlite3_mprintf("invalid vector: NULL");
322-
return -1;
323332
case SQLITE_BLOB:
324333
return detectBlobVectorParameters(arg, pType, pDims, pzErrMsg);
325334
case SQLITE_TEXT:
326335
return detectTextVectorParameters(arg, typeHint, pType, pDims, pzErrMsg);
327336
default:
328-
*pzErrMsg = sqlite3_mprintf("invalid vector: not a text or blob type");
337+
*pzErrMsg = sqlite3_mprintf("vector: unexpected value type: got %s, expected TEXT or BLOB", sqlite3_type_repr(sqlite3_value_type(arg)));
329338
return -1;
330339
}
331340
}
@@ -336,15 +345,12 @@ int vectorParse(
336345
char **pzErrMsg
337346
){
338347
switch( sqlite3_value_type(arg) ){
339-
case SQLITE_NULL:
340-
*pzErrMsg = sqlite3_mprintf("invalid vector: NULL");
341-
return -1;
342348
case SQLITE_BLOB:
343349
return vectorParseSqliteBlob(arg, pVector, pzErrMsg);
344350
case SQLITE_TEXT:
345351
return vectorParseSqliteText(arg, pVector, pzErrMsg);
346352
default:
347-
*pzErrMsg = sqlite3_mprintf("invalid vector: not a text or blob type");
353+
*pzErrMsg = sqlite3_mprintf("vector: unexpected value type: got %s, expected TEXT or BLOB", sqlite3_type_repr(sqlite3_value_type(arg)));
348354
return -1;
349355
}
350356
}
@@ -545,11 +551,15 @@ static void vectorDistanceCosFunc(
545551
goto out_free;
546552
}
547553
if( type1 != type2 ){
548-
sqlite3_result_error(context, "vectors must have the same type", -1);
554+
pzErrMsg = sqlite3_mprintf("vector_distance_cos: vectors must have the same type: %d != %d", type1, type2);
555+
sqlite3_result_error(context, pzErrMsg, -1);
556+
sqlite3_free(pzErrMsg);
549557
goto out_free;
550558
}
551559
if( dims1 != dims2 ){
552-
sqlite3_result_error(context, "vectors must have the same length", -1);
560+
pzErrMsg = sqlite3_mprintf("vector_distance_cos: vectors must have the same length: %d != %d", dims1, dims2);
561+
sqlite3_result_error(context, pzErrMsg, -1);
562+
sqlite3_free(pzErrMsg);
553563
goto out_free;
554564
}
555565
pVector1 = vectorContextAlloc(context, type1, dims1);

libsql-sqlite3/src/vectorIndex.c

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, con
411411
zValue = zParam + iDelimiter + 1;
412412
nValueLen = sqlite3Strlen30(zValue);
413413
for(i = 0; i < ArraySize(VECTOR_PARAM_NAMES); i++){
414-
if( sqlite3_strnicmp(VECTOR_PARAM_NAMES[i].zName, zParam, iDelimiter) != 0 ){
414+
if( iDelimiter != strlen(VECTOR_PARAM_NAMES[i].zName) || sqlite3_strnicmp(VECTOR_PARAM_NAMES[i].zName, zParam, iDelimiter) != 0 ){
415415
continue;
416416
}
417417
if( VECTOR_PARAM_NAMES[i].type == 1 ){
@@ -454,33 +454,33 @@ static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, con
454454
return -1;
455455
}
456456
}
457-
*pErrMsg = "unexpected parameter key";
457+
*pErrMsg = "invalid parameter";
458458
return -1;
459459
}
460460

461461
int parseVectorIdxParams(Parse *pParse, VectorIdxParams *pParams, int type, int dims, struct ExprList_item *pArgList, int nArgs) {
462462
int i;
463463
const char *pErrMsg;
464464
if( vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, VECTOR_FORMAT_DEFAULT) != 0 ){
465-
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: format");
465+
sqlite3ErrorMsg(pParse, "vector index: unable to serialize vector index parameter: format");
466466
return SQLITE_ERROR;
467467
}
468468
if( vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, type) != 0 ){
469-
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: type");
469+
sqlite3ErrorMsg(pParse, "vector index: unable to serialize vector index parameter: type");
470470
return SQLITE_ERROR;
471471
}
472472
if( vectorIdxParamsPutU64(pParams, VECTOR_DIM_PARAM_ID, dims) != 0 ){
473-
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: dim");
473+
sqlite3ErrorMsg(pParse, "vector index: unable to serialize vector index parameter: dim");
474474
return SQLITE_ERROR;
475475
}
476476
for(i = 0; i < nArgs; i++){
477477
Expr *pArgExpr = pArgList[i].pExpr;
478478
if( pArgExpr->op != TK_STRING ){
479-
sqlite3ErrorMsg(pParse, "all arguments after first must be strings");
479+
sqlite3ErrorMsg(pParse, "vector index: all arguments after first must be strings");
480480
return SQLITE_ERROR;
481481
}
482482
if( parseVectorIdxParam(pArgExpr->u.zToken, pParams, &pErrMsg) != 0 ){
483-
sqlite3ErrorMsg(pParse, "invalid vector index parameter '%s': %s", pArgExpr->u.zToken, pErrMsg);
483+
sqlite3ErrorMsg(pParse, "vector index: invalid vector index parameter '%s': %s", pArgExpr->u.zToken, pErrMsg);
484484
return SQLITE_ERROR;
485485
}
486486
}
@@ -830,9 +830,9 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co
830830
// backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax
831831
if( pParse->db->init.busy == 0 && pUsing != NULL ){
832832
if( pIdx->zName != NULL && pTable->zName != NULL && pIdx->nKeyCol == 1 && pIdx->aiColumn != NULL && pIdx->aiColumn[0] < pTable->nCol ){
833-
sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX %s ON %s ( " VECTOR_INDEX_MARKER_FUNCTION "(%s) )", pIdx->zName, pTable->zName, pTable->aCol[pIdx->aiColumn[0]].zCnName);
833+
sqlite3ErrorMsg(pParse, "vector index: USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX %s ON %s ( " VECTOR_INDEX_MARKER_FUNCTION "(%s) )", pIdx->zName, pTable->zName, pTable->aCol[pIdx->aiColumn[0]].zCnName);
834834
} else {
835-
sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )");
835+
sqlite3ErrorMsg(pParse, "vector index: USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )");
836836
}
837837
return CREATE_FAIL;
838838
}
@@ -860,40 +860,40 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co
860860
return CREATE_IGNORE;
861861
}
862862
if( hasCollation ){
863-
sqlite3ErrorMsg(pParse, "vector index can't have collation");
863+
sqlite3ErrorMsg(pParse, "vector index: collation in expression is forbidden");
864864
return CREATE_FAIL;
865865
}
866866
if( pIdx->aColExpr->nExpr != 1 ) {
867-
sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function");
867+
sqlite3ErrorMsg(pParse, "vector index: must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function");
868868
return CREATE_FAIL;
869869
}
870870
// we are able to support this but I doubt this works for now - more polishing required to make this work
871871
if( pIdx->pPartIdxWhere != NULL ) {
872-
sqlite3ErrorMsg(pParse, "partial vector index is not supported");
872+
sqlite3ErrorMsg(pParse, "vector index: where condition is forbidden");
873873
return CREATE_FAIL;
874874
}
875875

876876
pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList;
877877
pListItem = pArgsList->a;
878878

879879
if( pArgsList->nExpr < 1 ){
880-
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument");
880+
sqlite3ErrorMsg(pParse, "vector idnex: " VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument");
881881
return CREATE_FAIL;
882882
}
883883
if( pListItem[0].pExpr->op != TK_COLUMN ) {
884-
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token");
884+
sqlite3ErrorMsg(pParse, "vector index: " VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token");
885885
return CREATE_FAIL;
886886
}
887887
iEmbeddingColumn = pListItem[0].pExpr->iColumn;
888888
if( iEmbeddingColumn < 0 ) {
889-
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type");
889+
sqlite3ErrorMsg(pParse, "vector index: " VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type");
890890
return CREATE_FAIL;
891891
}
892892
assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol );
893893

894894
zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], "");
895895
if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){
896-
sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName);
896+
sqlite3ErrorMsg(pParse, "vector index: %s: %s", pzErrMsg, zEmbeddingColumnTypeName);
897897
return CREATE_FAIL;
898898
}
899899

@@ -904,25 +904,24 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co
904904

905905
rc = initVectorIndexMetaTable(db, zDbSName);
906906
if( rc != SQLITE_OK ){
907-
sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db));
907+
sqlite3ErrorMsg(pParse, "vector index: failed to init meta table: %s", sqlite3_errmsg(db));
908908
return CREATE_FAIL;
909909
}
910910
rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1);
911911
if( rc != SQLITE_OK ){
912-
sqlite3ErrorMsg(pParse, "failed to parse vector idx params");
913912
return CREATE_FAIL;
914913
}
915914
if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){
916-
sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg);
915+
sqlite3ErrorMsg(pParse, "vector index: failed to detect underlying table key: %s", pzErrMsg);
917916
return CREATE_FAIL;
918917
}
919918
if( idxKey.nKeyColumns != 1 ){
920-
sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported");
919+
sqlite3ErrorMsg(pParse, "vector index: unsupported for tables without ROWID and composite primary key");
921920
return CREATE_FAIL;
922921
}
923922
rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams);
924923
if( rc != SQLITE_OK ){
925-
sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index");
924+
sqlite3ErrorMsg(pParse, "vector index: unable to initialize diskann");
926925
return CREATE_FAIL;
927926
}
928927
rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams);
@@ -933,7 +932,7 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co
933932
return CREATE_OK_SKIP_REFILL;
934933
}
935934
if( rc != SQLITE_OK ){
936-
sqlite3ErrorMsg(pParse, "unable to update global metadata table");
935+
sqlite3ErrorMsg(pParse, "vector index: unable to update global metadata table");
937936
return CREATE_FAIL;
938937
}
939938
return CREATE_OK;
@@ -954,7 +953,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value
954953
assert( zDbSName != NULL );
955954

956955
if( argc != 3 ){
957-
*pzErrMsg = sqlite3_mprintf("vector search must have exactly 3 parameters");
956+
*pzErrMsg = sqlite3_mprintf("vector index(search): got %d parameters, expected 3", argc);
958957
rc = SQLITE_ERROR;
959958
goto out;
960959
}
@@ -963,7 +962,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value
963962
goto out;
964963
}
965964
if( type != VECTOR_TYPE_FLOAT32 ){
966-
*pzErrMsg = sqlite3_mprintf("only f32 vectors are supported");
965+
*pzErrMsg = sqlite3_mprintf("vector index(search): only f32 vectors are supported");
967966
rc = SQLITE_ERROR;
968967
goto out;
969968
}
@@ -977,40 +976,40 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value
977976
goto out;
978977
}
979978
if( sqlite3_value_type(argv[2]) != SQLITE_INTEGER ){
980-
*pzErrMsg = sqlite3_mprintf("vector search third parameter (k) must be an integer");
979+
*pzErrMsg = sqlite3_mprintf("vector index(search): third parameter (k) must be a non-negative integer");
981980
rc = SQLITE_ERROR;
982981
goto out;
983982
}
984983
k = sqlite3_value_int(argv[2]);
985984
if( k < 0 ){
986-
*pzErrMsg = sqlite3_mprintf("k must be a non-negative integer");
985+
*pzErrMsg = sqlite3_mprintf("vector index(search): third parameter (k) must be a non-negative integer");
987986
rc = SQLITE_ERROR;
988987
goto out;
989988
}
990989
if( sqlite3_value_type(argv[0]) != SQLITE_TEXT ){
991-
*pzErrMsg = sqlite3_mprintf("vector search first parameter (index) must be a string");
990+
*pzErrMsg = sqlite3_mprintf("vector index(search): first parameter (index) must be a string");
992991
rc = SQLITE_ERROR;
993992
goto out;
994993
}
995994
zIdxName = (const char*)sqlite3_value_text(argv[0]);
996995
if( vectorIndexGetParameters(db, zIdxName, &idxParams) != 0 ){
997-
*pzErrMsg = sqlite3_mprintf("failed to parse vector index parameters");
996+
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse vector index parameters");
998997
rc = SQLITE_ERROR;
999998
goto out;
1000999
}
10011000
pIndex = sqlite3FindIndex(db, zIdxName, zDbSName);
10021001
if( pIndex == NULL ){
1003-
*pzErrMsg = sqlite3_mprintf("vector index not found");
1002+
*pzErrMsg = sqlite3_mprintf("vector index(search): index not found");
10041003
rc = SQLITE_ERROR;
10051004
goto out;
10061005
}
10071006
rc = diskAnnOpenIndex(db, zDbSName, zIdxName, &idxParams, &pDiskAnn);
10081007
if( rc != SQLITE_OK ){
1009-
*pzErrMsg = sqlite3_mprintf("failed to open diskann index");
1008+
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to open diskann index");
10101009
goto out;
10111010
}
10121011
if( vectorIdxKeyGet(pIndex->pTable, &pKey, &zErrMsg) != 0 ){
1013-
*pzErrMsg = sqlite3_mprintf("failed to extract table key: %s", zErrMsg);
1012+
*pzErrMsg = sqlite3_mprintf("vector index(search): failed to extract table key: %s", zErrMsg);
10141013
rc = SQLITE_ERROR;
10151014
goto out;
10161015
}

0 commit comments

Comments
 (0)