@@ -85673,8 +85673,8 @@ void blobSpotFree(BlobSpot *pBlobSpot);
85673
85673
85674
85674
/*
85675
85675
* Accessor for node binary format
85676
- * - v1 format is the following:
85677
- * [u64 nRowid] [u16 nEdges] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u64 legacyField ] [u64 edgeId]) * nEdges
85676
+ * - default format is the following:
85677
+ * [u64 nRowid] [u16 nEdges] [2 byte padding] [ node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u32 unused] [f32 distance ] [u64 edgeId]) * nEdges
85678
85678
* Note, that node vector and edge vector can have different representations (and edge vector can be smaller in size than node vector)
85679
85679
*/
85680
85680
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex);
@@ -85713,9 +85713,11 @@ typedef u8 MetricType;
85713
85713
/*
85714
85714
* 1 - v1 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u64 unused ] [u64 edge rowid] ] ...
85715
85715
* 2 - v2 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u32 unused] [f32 distance] [u64 edge rowid] ] ...
85716
+ * 3 - v3 version; node meta aligned to 8-byte boundary (instead of having u64 + u16 size - we round it up to u64 + u64)
85716
85717
*/
85717
85718
#define VECTOR_FORMAT_V1 1
85718
- #define VECTOR_FORMAT_DEFAULT 2
85719
+ #define VECTOR_FORMAT_V2 2
85720
+ #define VECTOR_FORMAT_DEFAULT 3
85719
85721
85720
85722
/* type of the vector index */
85721
85723
#define VECTOR_INDEX_TYPE_PARAM_ID 2
@@ -212727,8 +212729,6 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
212727
212729
*/
212728
212730
#define DISKANN_BLOCK_SIZE_SHIFT 9
212729
212731
212730
- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
212731
- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
212732
212732
212733
212733
typedef struct VectorPair VectorPair;
212734
212734
typedef struct DiskAnnSearchCtx DiskAnnSearchCtx;
@@ -212951,46 +212951,58 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
212951
212951
** Layout specific utilities
212952
212952
**************************************************************************/
212953
212953
212954
- int nodeEdgeOverhead(int nEdgeVectorSize){
212955
- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
212954
+ int nodeMetadataSize(int nFormatVersion){
212955
+ if( nFormatVersion <= VECTOR_FORMAT_V2 ){
212956
+ return (sizeof(u64) + sizeof(u16));
212957
+ }else{
212958
+ return (sizeof(u64) + sizeof(u64));
212959
+ }
212960
+ }
212961
+
212962
+ int edgeMetadataSize(int nFormatVersion){
212963
+ return (sizeof(u64) + sizeof(u64));
212964
+ }
212965
+
212966
+ int nodeEdgeOverhead(int nFormatVersion, int nEdgeVectorSize){
212967
+ return nEdgeVectorSize + edgeMetadataSize(nFormatVersion);
212956
212968
}
212957
212969
212958
- int nodeOverhead(int nNodeVectorSize){
212959
- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
212970
+ int nodeOverhead(int nFormatVersion, int nNodeVectorSize){
212971
+ return nNodeVectorSize + nodeMetadataSize(nFormatVersion) ;
212960
212972
}
212961
212973
212962
212974
int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
212963
- unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
212975
+ unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nFormatVersion, pIndex-> nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nFormatVersion, pIndex->nEdgeVectorSize);
212964
212976
assert( nMaxEdges > 0);
212965
212977
return nMaxEdges;
212966
212978
}
212967
212979
212968
212980
int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex){
212969
212981
unsigned int offset;
212970
212982
unsigned int nMaxEdges = nodeEdgesMaxCount(pIndex);
212971
- offset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212983
+ offset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212972
212984
assert( offset <= pIndex->nBlockSize );
212973
212985
return offset;
212974
212986
}
212975
212987
212976
212988
void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector){
212977
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212989
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212978
212990
212979
212991
memset(pBlobSpot->pBuffer, 0, pBlobSpot->nBufferSize);
212980
212992
writeLE64(pBlobSpot->pBuffer, nRowid);
212981
212993
// neighbours count already zero after memset - no need to set it explicitly
212982
212994
212983
- vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex->nNodeVectorSize);
212995
+ vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) , pIndex->nNodeVectorSize);
212984
212996
}
212985
212997
212986
212998
void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector) {
212987
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212999
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212988
213000
212989
- vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE );
213001
+ vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) );
212990
213002
}
212991
213003
212992
213004
u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
212993
- assert( VECTOR_NODE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213005
+ assert( nodeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
212994
213006
212995
213007
return readLE16(pBlobSpot->pBuffer + sizeof(u64));
212996
213008
}
@@ -213000,20 +213012,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
213000
213012
int offset = nodeEdgesMetadataOffset(pIndex);
213001
213013
213002
213014
if( pRowid != NULL ){
213003
- assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213004
- *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
213015
+ assert( offset + (iEdge + 1) * edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213016
+ *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u64));
213005
213017
}
213006
213018
if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
213007
- distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
213019
+ distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u32));
213008
213020
*pDistance = *((float*)&distance);
213009
213021
}
213010
213022
if( pVector != NULL ){
213011
- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213023
+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213012
213024
vectorInitStatic(
213013
213025
pVector,
213014
213026
pIndex->nEdgeVectorType,
213015
213027
pIndex->nVectorDims,
213016
- pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213028
+ pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213017
213029
);
213018
213030
}
213019
213031
}
@@ -213050,11 +213062,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
213050
213062
nEdges++;
213051
213063
}
213052
213064
213053
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213054
- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
213065
+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213066
+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * edgeMetadataSize(pIndex->nFormatVersion) ;
213055
213067
213056
213068
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213057
- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213069
+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213058
213070
213059
213071
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
213060
213072
writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
@@ -213070,19 +213082,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
213070
213082
213071
213083
assert( 0 <= iDelete && iDelete < nEdges );
213072
213084
213073
- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213074
- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213075
- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
213076
- lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * VECTOR_EDGE_METADATA_SIZE ;
213085
+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213086
+ lastVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213087
+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * edgeMetadataSize(pIndex->nFormatVersion) ;
213088
+ lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * edgeMetadataSize(pIndex->nFormatVersion) ;
213077
213089
213078
213090
assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213079
213091
assert( lastVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213080
- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213081
- assert( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213092
+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213093
+ assert( lastMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213082
213094
213083
213095
if( edgeVectorOffset < lastVectorOffset ){
213084
213096
memmove(pBlobSpot->pBuffer + edgeVectorOffset, pBlobSpot->pBuffer + lastVectorOffset, pIndex->nEdgeVectorSize);
213085
- memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, VECTOR_EDGE_METADATA_SIZE );
213097
+ memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, edgeMetadataSize(pIndex->nFormatVersion) );
213086
213098
}
213087
213099
213088
213100
writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges - 1);
@@ -213168,9 +213180,9 @@ int diskAnnCreateIndex(
213168
213180
if( maxNeighborsParam == 0 ){
213169
213181
// 3 D**(1/2) gives good recall values (90%+)
213170
213182
// we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
213171
- maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(neighbours, dims)) + 1);
213183
+ maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims))) / nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims)) + 1);
213172
213184
}
213173
- blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(neighbours, dims));
213185
+ blockSizeBytes = nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims));
213174
213186
if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
213175
213187
return SQLITE_ERROR;
213176
213188
}
0 commit comments