Skip to content

Commit 848b08e

Browse files
committed
make nodeMetadataSize/edgeMetadataSize dynamic
1 parent 65d4b19 commit 848b08e

File tree

1 file changed

+36
-30
lines changed

1 file changed

+36
-30
lines changed

libsql-sqlite3/src/vectordiskann.c

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@
7676
*/
7777
#define DISKANN_BLOCK_SIZE_SHIFT 9
7878

79-
#define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
80-
#define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
8179

8280
typedef struct VectorPair VectorPair;
8381
typedef struct DiskAnnSearchCtx DiskAnnSearchCtx;
@@ -300,46 +298,54 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
300298
** Layout specific utilities
301299
**************************************************************************/
302300

303-
int nodeEdgeOverhead(int nEdgeVectorSize){
304-
return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
301+
int nodeMetadataSize(int nFormatVersion){
302+
return (sizeof(u64) + sizeof(u16));
305303
}
306304

307-
int nodeOverhead(int nNodeVectorSize){
308-
return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE;
305+
int edgeMetadataSize(int nFormatVersion){
306+
return (sizeof(u64) + sizeof(u64));
307+
}
308+
309+
int nodeEdgeOverhead(int nFormatVersion, int nEdgeVectorSize){
310+
return nEdgeVectorSize + edgeMetadataSize(nFormatVersion);
311+
}
312+
313+
int nodeOverhead(int nFormatVersion, int nNodeVectorSize){
314+
return nNodeVectorSize + nodeMetadataSize(nFormatVersion);
309315
}
310316

311317
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
312-
unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
318+
unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nFormatVersion, pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nFormatVersion, pIndex->nEdgeVectorSize);
313319
assert( nMaxEdges > 0);
314320
return nMaxEdges;
315321
}
316322

317323
int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex){
318324
unsigned int offset;
319325
unsigned int nMaxEdges = nodeEdgesMaxCount(pIndex);
320-
offset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
326+
offset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
321327
assert( offset <= pIndex->nBlockSize );
322328
return offset;
323329
}
324330

325331
void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector){
326-
assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
332+
assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
327333

328334
memset(pBlobSpot->pBuffer, 0, pBlobSpot->nBufferSize);
329335
writeLE64(pBlobSpot->pBuffer, nRowid);
330336
// neighbours count already zero after memset - no need to set it explicitly
331337

332-
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE, pIndex->nNodeVectorSize);
338+
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion), pIndex->nNodeVectorSize);
333339
}
334340

335341
void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector) {
336-
assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
342+
assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
337343

338-
vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE);
344+
vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion));
339345
}
340346

341347
u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
342-
assert( VECTOR_NODE_METADATA_SIZE <= pBlobSpot->nBufferSize );
348+
assert( nodeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
343349

344350
return readLE16(pBlobSpot->pBuffer + sizeof(u64));
345351
}
@@ -349,20 +355,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
349355
int offset = nodeEdgesMetadataOffset(pIndex);
350356

351357
if( pRowid != NULL ){
352-
assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
353-
*pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
358+
assert( offset + (iEdge + 1) * edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
359+
*pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u64));
354360
}
355361
if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
356-
distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
362+
distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u32));
357363
*pDistance = *((float*)&distance);
358364
}
359365
if( pVector != NULL ){
360-
assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
366+
assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
361367
vectorInitStatic(
362368
pVector,
363369
pIndex->nEdgeVectorType,
364370
pIndex->nVectorDims,
365-
pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
371+
pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
366372
);
367373
}
368374
}
@@ -399,11 +405,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
399405
nEdges++;
400406
}
401407

402-
edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
403-
edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * VECTOR_EDGE_METADATA_SIZE;
408+
edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
409+
edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * edgeMetadataSize(pIndex->nFormatVersion);
404410

405411
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
406-
assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
412+
assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
407413

408414
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
409415
writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
@@ -419,19 +425,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
419425

420426
assert( 0 <= iDelete && iDelete < nEdges );
421427

422-
edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
423-
lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
424-
edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * VECTOR_EDGE_METADATA_SIZE;
425-
lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * VECTOR_EDGE_METADATA_SIZE;
428+
edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
429+
lastVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
430+
edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * edgeMetadataSize(pIndex->nFormatVersion);
431+
lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * edgeMetadataSize(pIndex->nFormatVersion);
426432

427433
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
428434
assert( lastVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
429-
assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
430-
assert( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
435+
assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
436+
assert( lastMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
431437

432438
if( edgeVectorOffset < lastVectorOffset ){
433439
memmove(pBlobSpot->pBuffer + edgeVectorOffset, pBlobSpot->pBuffer + lastVectorOffset, pIndex->nEdgeVectorSize);
434-
memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, VECTOR_EDGE_METADATA_SIZE);
440+
memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, edgeMetadataSize(pIndex->nFormatVersion));
435441
}
436442

437443
writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges - 1);
@@ -517,9 +523,9 @@ int diskAnnCreateIndex(
517523
if( maxNeighborsParam == 0 ){
518524
// 3 D**(1/2) gives good recall values (90%+)
519525
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
520-
maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(neighbours, dims)) + 1);
526+
maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims))) / nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims)) + 1);
521527
}
522-
blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(neighbours, dims));
528+
blockSizeBytes = nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims));
523529
if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
524530
return SQLITE_ERROR;
525531
}

0 commit comments

Comments
 (0)