76
76
*/
77
77
#define DISKANN_BLOCK_SIZE_SHIFT 9
78
78
79
- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
80
- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
81
79
82
80
typedef struct VectorPair VectorPair ;
83
81
typedef struct DiskAnnSearchCtx DiskAnnSearchCtx ;
@@ -300,46 +298,54 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
300
298
** Layout specific utilities
301
299
**************************************************************************/
302
300
303
- int nodeEdgeOverhead (int nEdgeVectorSize ){
304
- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE ;
301
+ int nodeMetadataSize (int nFormatVersion ){
302
+ return ( sizeof ( u64 ) + sizeof ( u16 )) ;
305
303
}
306
304
307
- int nodeOverhead (int nNodeVectorSize ){
308
- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
305
+ int edgeMetadataSize (int nFormatVersion ){
306
+ return (sizeof (u64 ) + sizeof (u64 ));
307
+ }
308
+
309
+ int nodeEdgeOverhead (int nFormatVersion , int nEdgeVectorSize ){
310
+ return nEdgeVectorSize + edgeMetadataSize (nFormatVersion );
311
+ }
312
+
313
+ int nodeOverhead (int nFormatVersion , int nNodeVectorSize ){
314
+ return nNodeVectorSize + nodeMetadataSize (nFormatVersion );
309
315
}
310
316
311
317
int nodeEdgesMaxCount (const DiskAnnIndex * pIndex ){
312
- unsigned int nMaxEdges = (pIndex -> nBlockSize - nodeOverhead (pIndex -> nNodeVectorSize )) / nodeEdgeOverhead (pIndex -> nEdgeVectorSize );
318
+ unsigned int nMaxEdges = (pIndex -> nBlockSize - nodeOverhead (pIndex -> nFormatVersion , pIndex -> nNodeVectorSize )) / nodeEdgeOverhead (pIndex -> nFormatVersion , pIndex -> nEdgeVectorSize );
313
319
assert ( nMaxEdges > 0 );
314
320
return nMaxEdges ;
315
321
}
316
322
317
323
int nodeEdgesMetadataOffset (const DiskAnnIndex * pIndex ){
318
324
unsigned int offset ;
319
325
unsigned int nMaxEdges = nodeEdgesMaxCount (pIndex );
320
- offset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + nMaxEdges * pIndex -> nEdgeVectorSize ;
326
+ offset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + nMaxEdges * pIndex -> nEdgeVectorSize ;
321
327
assert ( offset <= pIndex -> nBlockSize );
322
328
return offset ;
323
329
}
324
330
325
331
void nodeBinInit (const DiskAnnIndex * pIndex , BlobSpot * pBlobSpot , u64 nRowid , Vector * pVector ){
326
- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
332
+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
327
333
328
334
memset (pBlobSpot -> pBuffer , 0 , pBlobSpot -> nBufferSize );
329
335
writeLE64 (pBlobSpot -> pBuffer , nRowid );
330
336
// neighbours count already zero after memset - no need to set it explicitly
331
337
332
- vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex -> nNodeVectorSize );
338
+ vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) , pIndex -> nNodeVectorSize );
333
339
}
334
340
335
341
void nodeBinVector (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot , Vector * pVector ) {
336
- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
342
+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
337
343
338
- vectorInitStatic (pVector , pIndex -> nNodeVectorType , pIndex -> nVectorDims , pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE );
344
+ vectorInitStatic (pVector , pIndex -> nNodeVectorType , pIndex -> nVectorDims , pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) );
339
345
}
340
346
341
347
u16 nodeBinEdges (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot ) {
342
- assert ( VECTOR_NODE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
348
+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
343
349
344
350
return readLE16 (pBlobSpot -> pBuffer + sizeof (u64 ));
345
351
}
@@ -349,20 +355,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
349
355
int offset = nodeEdgesMetadataOffset (pIndex );
350
356
351
357
if ( pRowid != NULL ){
352
- assert ( offset + (iEdge + 1 ) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
353
- * pRowid = readLE64 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u64 ));
358
+ assert ( offset + (iEdge + 1 ) * edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
359
+ * pRowid = readLE64 (pBlobSpot -> pBuffer + offset + iEdge * edgeMetadataSize ( pIndex -> nFormatVersion ) + sizeof (u64 ));
354
360
}
355
361
if ( pIndex -> nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
356
- distance = readLE32 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u32 ));
362
+ distance = readLE32 (pBlobSpot -> pBuffer + offset + iEdge * edgeMetadataSize ( pIndex -> nFormatVersion ) + sizeof (u32 ));
357
363
* pDistance = * ((float * )& distance );
358
364
}
359
365
if ( pVector != NULL ){
360
- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize < offset );
366
+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize < offset );
361
367
vectorInitStatic (
362
368
pVector ,
363
369
pIndex -> nEdgeVectorType ,
364
370
pIndex -> nVectorDims ,
365
- pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize
371
+ pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize
366
372
);
367
373
}
368
374
}
@@ -399,11 +405,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
399
405
nEdges ++ ;
400
406
}
401
407
402
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iReplace * pIndex -> nEdgeVectorSize ;
403
- edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
408
+ edgeVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iReplace * pIndex -> nEdgeVectorSize ;
409
+ edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iReplace * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
404
410
405
411
assert ( edgeVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
406
- assert ( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
412
+ assert ( edgeMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
407
413
408
414
vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + edgeVectorOffset , pIndex -> nEdgeVectorSize );
409
415
writeLE32 (pBlobSpot -> pBuffer + edgeMetaOffset + sizeof (u32 ), * ((u32 * )& distance ));
@@ -419,19 +425,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
419
425
420
426
assert ( 0 <= iDelete && iDelete < nEdges );
421
427
422
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iDelete * pIndex -> nEdgeVectorSize ;
423
- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + (nEdges - 1 ) * pIndex -> nEdgeVectorSize ;
424
- edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
425
- lastMetaOffset = nodeEdgesMetadataOffset (pIndex ) + (nEdges - 1 ) * VECTOR_EDGE_METADATA_SIZE ;
428
+ edgeVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iDelete * pIndex -> nEdgeVectorSize ;
429
+ lastVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + (nEdges - 1 ) * pIndex -> nEdgeVectorSize ;
430
+ edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iDelete * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
431
+ lastMetaOffset = nodeEdgesMetadataOffset (pIndex ) + (nEdges - 1 ) * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
426
432
427
433
assert ( edgeVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
428
434
assert ( lastVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
429
- assert ( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
430
- assert ( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
435
+ assert ( edgeMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
436
+ assert ( lastMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
431
437
432
438
if ( edgeVectorOffset < lastVectorOffset ){
433
439
memmove (pBlobSpot -> pBuffer + edgeVectorOffset , pBlobSpot -> pBuffer + lastVectorOffset , pIndex -> nEdgeVectorSize );
434
- memmove (pBlobSpot -> pBuffer + edgeMetaOffset , pBlobSpot -> pBuffer + lastMetaOffset , VECTOR_EDGE_METADATA_SIZE );
440
+ memmove (pBlobSpot -> pBuffer + edgeMetaOffset , pBlobSpot -> pBuffer + lastMetaOffset , edgeMetadataSize ( pIndex -> nFormatVersion ) );
435
441
}
436
442
437
443
writeLE16 (pBlobSpot -> pBuffer + sizeof (u64 ), nEdges - 1 );
@@ -517,9 +523,9 @@ int diskAnnCreateIndex(
517
523
if ( maxNeighborsParam == 0 ){
518
524
// 3 D**(1/2) gives good recall values (90%+)
519
525
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
520
- maxNeighborsParam = MIN (3 * ((int )(sqrt (dims )) + 1 ), (50 * nodeOverhead (vectorDataSize (type , dims ))) / nodeEdgeOverhead (vectorDataSize (neighbours , dims )) + 1 );
526
+ maxNeighborsParam = MIN (3 * ((int )(sqrt (dims )) + 1 ), (50 * nodeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (type , dims ))) / nodeEdgeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (neighbours , dims )) + 1 );
521
527
}
522
- blockSizeBytes = nodeOverhead (vectorDataSize (type , dims )) + maxNeighborsParam * (u64 )nodeEdgeOverhead (vectorDataSize (neighbours , dims ));
528
+ blockSizeBytes = nodeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (type , dims )) + maxNeighborsParam * (u64 )nodeEdgeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (neighbours , dims ));
523
529
if ( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
524
530
return SQLITE_ERROR ;
525
531
}
0 commit comments