From ede6f819d93edc561cbe9c8dc867156b0c5119e1 Mon Sep 17 00:00:00 2001 From: John Wagster Date: Fri, 21 Nov 2025 10:26:23 -0600 Subject: [PATCH 1/4] added in off heaps bytes sizes for stats endpoints --- .../220_dense_vector_node_index_stats.yml | 149 +++++++++++++++++- .../diskbbq/ES920DiskBBQVectorsReader.java | 2 +- .../vectors/diskbbq/IVFVectorsReader.java | 26 +-- .../next/ESNextDiskBBQVectorsReader.java | 2 +- .../index/mapper/MapperFeatures.java | 4 +- .../index/shard/DenseVectorStats.java | 4 +- ...S920DiskBBQBFloat16VectorsFormatTests.java | 6 +- .../ES920DiskBBQVectorsFormatTests.java | 8 +- ...NextDiskBBQBFloat16VectorsFormatTests.java | 6 +- .../next/ESNextDiskBBQVectorsFormatTests.java | 8 +- .../index/shard/DenseVectorStatsTests.java | 25 ++- 11 files changed, 193 insertions(+), 47 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/220_dense_vector_node_index_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/220_dense_vector_node_index_stats.yml index fcdc344b0f39e..db8a61c0afd04 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/220_dense_vector_node_index_stats.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/220_dense_vector_node_index_stats.yml @@ -123,30 +123,40 @@ - gt: { indices.non_quantized.primaries.dense_vector.off_heap.total_vec_size_bytes: 0 } - match: { indices.non_quantized.primaries.dense_vector.off_heap.total_veq_size_bytes: 0 } - gt: { indices.non_quantized.primaries.dense_vector.off_heap.total_vex_size_bytes: 0 } + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.total_cenivf_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.total_clivf_size_bytes # vector1, hnsw (float) - is_true: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1 - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.vec_size_bytes: 0 } - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.vex_size_bytes: 0 } - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.veb_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.veq_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.cenivf_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector1.clivf_size_bytes # vector2, flat (float) - is_true: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2 - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vec_size_bytes: 0 } - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veb_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veq_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vex_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.cenivf_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector2.clivf_size_bytes # vector3, hnsw (byte) - is_true: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3 - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vec_size_bytes: 0 } - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vex_size_bytes: 0 } - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veb_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veq_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.cenivf_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector3.clivf_size_bytes # vector4, flat (byte) - is_true: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4 - gt: { indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.vec_size_bytes: 0 } - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.veb_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.veq_size_bytes - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.vex_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.cenivf_size_bytes + - is_false: indices.non_quantized.primaries.dense_vector.off_heap.fielddata.vector4.clivf_size_bytes --- "index node stats int8_and int4_quantized": @@ -276,24 +286,32 @@ - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector1.veq_size_bytes: 0 } - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector1.vex_size_bytes: 0 } - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector1.veb_size_bytes - # vector2, int8_flat + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector1.cenivf_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector1.clivf_size_bytes + # vector2, int8_flat - is_true: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2 - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vec_size_bytes: 0 } - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veq_size_bytes: 0 } - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veb_size_bytes - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vex_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.cenivf_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector2.clivf_size_bytes # vector3, int4_hnsw - is_true: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3 - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vec_size_bytes: 0 } - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veq_size_bytes: 0 } - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vex_size_bytes: 0 } - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veb_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.cenivf_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector3.clivf_size_bytes # vector4, int4_flat - is_true: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4 - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.vec_size_bytes: 0 } - gt: { indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.veq_size_bytes: 0 } - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.veb_size_bytes - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.vex_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.cenivf_size_bytes + - is_false: indices.int8_and_4_quantized.primaries.dense_vector.off_heap.fielddata.vector4.clivf_size_bytes --- "index node stats bbq_quantized": @@ -429,12 +447,125 @@ - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector1.vec_size_bytes: 0 } - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector1.vex_size_bytes: 0 } - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector1.veq_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector1.cenivf_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector1.clivf_size_bytes # vector2, bbq_flat - is_true: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2 - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veb_size_bytes: 0 } - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vec_size_bytes: 0 } - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.veq_size_bytes - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.vex_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.cenivf_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector2.clivf_size_bytes + +--- +"index node stats bbq_disk quantized": + - requires: + capabilities: + - method: GET + path: /_nodes/stats + capabilities: [ dense_vector_off_heap_stats ] + test_runner_features: [ capabilities ] + reason: Capability required to run test + - requires: + capabilities: + - method: POST + path: /_search + capabilities: [ optimized_scalar_quantization_bbq ] + test_runner_features: capabilities + reason: "Uses bbq" + - requires: + cluster_features: ["mapper.bbq_disk_support"] + reason: Needs mapper.bbq_disk_support feature + - requires: + cluster_features: ["mapper.bbq_disk_stats_support"] + reason: Needs bbq_disk DenseVectorStats support + + - do: + indices.create: + index: bbq_quantized + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + vector3: + type: dense_vector + dims: 64 + index: true + similarity: l2_norm + index_options: + type: bbq_disk + + - do: + index: + index: bbq_quantized + id: "3" + body: + vector3: [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ] + + - do: + indices.refresh: {} + + - do: + cat.shards: + format: "json" + h: [ id ] + index: "bbq_quantized" + + - do: + nodes.stats: + metric: [ indices ] + node_id: $body.0.id + + - set: + nodes._arbitrary_key_: node_id + + - is_true: cluster_name + - is_true: nodes + - is_true: nodes.$node_id.name + - is_true: nodes.$node_id.indices.dense_vector + - match: { nodes.$node_id.indices.dense_vector.value_count: 1 } + - is_true: nodes.$node_id.indices.dense_vector.off_heap + - gt: { nodes.$node_id.indices.dense_vector.off_heap.total_size_bytes: 0 } + - match: { nodes.$node_id.indices.dense_vector.off_heap.total_veb_size_bytes: 0 } + - gt: { nodes.$node_id.indices.dense_vector.off_heap.total_vec_size_bytes: 0 } + - match: { nodes.$node_id.indices.dense_vector.off_heap.total_veq_size_bytes: 0 } + - match: { nodes.$node_id.indices.dense_vector.off_heap.total_vex_size_bytes: 0 } + - gt: { nodes.$node_id.indices.dense_vector.off_heap.total_cenivf_size_bytes: 0 } + - gt: { nodes.$node_id.indices.dense_vector.off_heap.total_clivf_size_bytes: 0 } + - is_false: nodes.$node_id.indices.dense_vector.off_heap.fielddata + + - do: + indices.stats: { index: _all } + + - is_true: _all.primaries.dense_vector.off_heap + - is_false: _all.primaries.dense_vector.off_heap.fielddata + - is_true: _all.total.dense_vector.off_heap + - is_false: _all.total.dense_vector.off_heap.fielddata + + - is_true: indices.bbq_quantized.primaries.dense_vector + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_size_bytes: 0 } + - match: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_veb_size_bytes: 0 } + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_vec_size_bytes: 0 } + - match: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_veq_size_bytes: 0 } + - match: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_vex_size_bytes: 0 } + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_cenivf_size_bytes: 0 } + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.total_clivf_size_bytes: 0 } + # vector3, bbq_disk + - is_true: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3 + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vec_size_bytes: 0 } + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.cenivf_size_bytes: 0 } + - gt: { indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.clivf_size_bytes: 0 } + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veb_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.veq_size_bytes + - is_false: indices.bbq_quantized.primaries.dense_vector.off_heap.fielddata.vector3.vex_size_bytes + --- "index node stats bit_vectors": @@ -539,12 +670,16 @@ - gt: { indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector1.vex_size_bytes: 0 } - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector1.veb_size_bytes - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector1.veq_size_bytes + - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector1.cenivf_size_bytes + - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector1.clivf_size_bytes # vector2, flat - is_true: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2 - gt: { indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.vec_size_bytes: 0 } - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.veb_size_bytes - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.veq_size_bytes - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.vex_size_bytes + - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.cenivf_size_bytes + - is_false: indices.bit_vectors.primaries.dense_vector.off_heap.fielddata.vector2.clivf_size_bytes --- "index node stats empty": @@ -616,6 +751,8 @@ - match: { indices.empty_index.primaries.dense_vector.off_heap.total_vex_size_bytes: 0 } - is_false: indices.empty_index.primaries.dense_vector.off_heap.fielddata + + --- "index node stats multiple indices": - requires: @@ -752,6 +889,8 @@ - gt: { indices.bbq_quantized1.primaries.dense_vector.off_heap.total_vec_size_bytes: 0 } - match: { indices.bbq_quantized1.primaries.dense_vector.off_heap.total_veq_size_bytes: 0 } - gt: { indices.bbq_quantized1.primaries.dense_vector.off_heap.total_vex_size_bytes: 0 } + - is_false: indices.bbq_quantized1.primaries.dense_vector.off_heap.total_cenivf_size_bytes + - is_false: indices.bbq_quantized1.primaries.dense_vector.off_heap.total_clivf_size_bytes - is_true: indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata # foo.foo, bbq_hnsw - is_true: indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo @@ -759,6 +898,8 @@ - gt: { indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo.vec_size_bytes: 0 } - gt: { indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo.vex_size_bytes: 0 } - is_false: indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo.veq_size_bytes + - is_false: indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo.cenivf_size_bytes + - is_false: indices.bbq_quantized1.primaries.dense_vector.off_heap.fielddata.foo\.foo.clivf_size_bytes # index type bbq_flat - is_true: indices.bbq_quantized2.primaries.dense_vector @@ -774,6 +915,8 @@ - gt: { indices.bbq_quantized2.primaries.dense_vector.off_heap.fielddata.bar.vec_size_bytes: 0 } - is_false: indices.bbq_quantized2.primaries.dense_vector.off_heap.fielddata.bar.veq_size_bytes - is_false: indices.bbq_quantized2.primaries.dense_vector.off_heap.fielddata.bar.vex_size_bytes + - is_false: indices.bbq_quantized2.primaries.dense_vector.off_heap.fielddata.bar.cenivf_size_bytes + - is_false: indices.bbq_quantized2.primaries.dense_vector.off_heap.fielddata.bar.clivf_size_bytes # index type hnsw_idx - is_true: indices.hnsw_idx.primaries.dense_vector @@ -782,6 +925,8 @@ - gt: { indices.hnsw_idx.primaries.dense_vector.off_heap.total_vec_size_bytes: 0 } - match: { indices.hnsw_idx.primaries.dense_vector.off_heap.total_veq_size_bytes: 0 } - gt: { indices.hnsw_idx.primaries.dense_vector.off_heap.total_vex_size_bytes: 0 } + - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.total_cenivf_size_bytes + - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.total_clivf_size_bytes - is_true: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata # baz, hnsw (float) - is_true: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz @@ -789,6 +934,8 @@ - gt: { indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz.vex_size_bytes: 0 } - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz.veb_size_bytes - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz.veq_size_bytes + - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz.cenivf_size_bytes + - is_false: indices.hnsw_idx.primaries.dense_vector.off_heap.fielddata.baz.clivf_size_bytes --- "index node stats non-indexed": diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java index ef2bd98419eb0..9655869b97288 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java @@ -396,7 +396,7 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp @Override public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return Map.of(); + return super.getOffHeapByteSize(fieldInfo); } private static class MemorySegmentPostingsVisitor implements PostingVisitor { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java index a122440fc58c0..2372dd222538d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java @@ -42,6 +42,8 @@ import java.util.Map; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.CENTROID_EXTENSION; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.VERSION_DIRECT_IO; @@ -87,20 +89,8 @@ protected IVFVectorsReader(SegmentReadState state, GenericFlatVectorReaders.Load } finally { CodecUtil.checkFooter(ivfMeta, priorE); } - ivfCentroids = openDataInput( - state, - versionMeta, - ES920DiskBBQVectorsFormat.CENTROID_EXTENSION, - ES920DiskBBQVectorsFormat.NAME, - state.context - ); - ivfClusters = openDataInput( - state, - versionMeta, - ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION, - ES920DiskBBQVectorsFormat.NAME, - state.context - ); + ivfCentroids = openDataInput(state, versionMeta, CENTROID_EXTENSION, ES920DiskBBQVectorsFormat.NAME, state.context); + ivfClusters = openDataInput(state, versionMeta, CLUSTER_EXTENSION, ES920DiskBBQVectorsFormat.NAME, state.context); success = true; } finally { if (success == false) { @@ -388,12 +378,10 @@ public Map getOffHeapByteSize(FieldInfo fieldInfo) { assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; return raw; } - return raw; // for now just return the size of raw - // TODO: determine desired off off-heap requirements - // var centroids = Map.of(EXTENSION, fe.xxxLength()); - // var clusters = Map.of(EXTENSION, fe.yyyLength()); - // return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroids, clusters); + // TODO: report on desired off-heap requirements instead or in addition to max? + var centroidsClusters = Map.of(CENTROID_EXTENSION, fe.centroidLength, CLUSTER_EXTENSION, fe.postingListLength); + return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroidsClusters); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java index 8e4fdab12930b..54925eac69c3f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java @@ -499,7 +499,7 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp @Override public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return Map.of(); + return super.getOffHeapByteSize(fieldInfo); } private static class MemorySegmentPostingsVisitor implements PostingVisitor { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index b0cedb3e779e9..d6b9fd6d8cc26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -62,6 +62,7 @@ public class MapperFeatures implements FeatureSpecification { static final NodeFeature EXCLUDE_VECTORS_DOCVALUE_BUGFIX = new NodeFeature("mapper.exclude_vectors_docvalue_bugfix"); static final NodeFeature BASE64_DENSE_VECTORS = new NodeFeature("mapper.base64_dense_vectors"); public static final NodeFeature GENERIC_VECTOR_FORMAT = new NodeFeature("mapper.vectors.generic_vector_format"); + static final NodeFeature BBQ_DISK_STATS_SUPPORT = new NodeFeature("mapper.bbq_disk_stats_support"); @Override public Set getTestFeatures() { @@ -104,7 +105,8 @@ public Set getTestFeatures() { PROVIDE_INDEX_SORT_SETTING_DEFAULTS, INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_FIELD_NAME_LIMIT, EXCLUDE_VECTORS_DOCVALUE_BUGFIX, - BASE64_DENSE_VECTORS + BASE64_DENSE_VECTORS, + BBQ_DISK_STATS_SUPPORT ); if (ES93GenericFlatVectorsFormat.GENERIC_VECTOR_FORMAT.isEnabled()) { features = new HashSet<>(features); diff --git a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java index b289c51617c71..2c996586b95ac 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java +++ b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java @@ -112,7 +112,7 @@ public Map> offHeapStats() { private Map getTotalsByCategory() { if (offHeapStats == null) { - return Map.of("veb", 0L, "vec", 0L, "veq", 0L, "vex", 0L); + return Map.of("veb", 0L, "vec", 0L, "veq", 0L, "vex", 0L, "cenivf", 0L, "clivf", 0L); } else { return offHeapStats.entrySet() .stream() @@ -140,6 +140,8 @@ private void toXContentWithFields(XContentBuilder builder, Params params) throws builder.humanReadableField("total_vec_size_bytes", "total_vec_size", ofBytes(totals.getOrDefault("vec", 0L))); builder.humanReadableField("total_veq_size_bytes", "total_veq_size", ofBytes(totals.getOrDefault("veq", 0L))); builder.humanReadableField("total_vex_size_bytes", "total_vex_size", ofBytes(totals.getOrDefault("vex", 0L))); + builder.humanReadableField("total_cenivf_size_bytes", "total_cenivf_size", ofBytes(totals.getOrDefault("cenivf", 0L))); + builder.humanReadableField("total_clivf_size_bytes", "total_clivf_size", ofBytes(totals.getOrDefault("clivf", 0L))); if (params.paramAsBoolean(INCLUDE_PER_FIELD_STATS, false) && offHeapStats != null && offHeapStats.size() > 0) { toXContentWithPerFieldStats(builder); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQBFloat16VectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQBFloat16VectorsFormatTests.java index 1b7225d3acedb..532149c8ebcfd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQBFloat16VectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQBFloat16VectorsFormatTests.java @@ -30,7 +30,6 @@ import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; -import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.equalTo; public class ES920DiskBBQBFloat16VectorsFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { @@ -102,9 +101,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx } var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum(); - // IVF doesn't report stats at the moment - assertThat(offHeap, anEmptyMap()); - assertThat(totalByteSize, equalTo(0L)); + assertThat(offHeap.size(), equalTo(3)); + assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum())); } else { throw new AssertionError("unexpected:" + r.getClass()); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java index 9b452fe4fb9cb..3928eca058d72 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java @@ -49,7 +49,6 @@ import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; -import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasToString; @@ -122,9 +121,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx } var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum(); - // IVF doesn't report stats at the moment - assertThat(offHeap, anEmptyMap()); - assertThat(totalByteSize, equalTo(0L)); + assertThat(offHeap.size(), equalTo(3)); + assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum())); } else { throw new AssertionError("unexpected:" + r.getClass()); } @@ -164,7 +162,7 @@ public void testSimpleOffHeapSize() throws IOException { } var fieldInfo = r.getFieldInfos().fieldInfo("f"); var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); - assertEquals(0, offHeap.size()); + assertEquals(3, offHeap.size()); } } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQBFloat16VectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQBFloat16VectorsFormatTests.java index 89fdc382874a1..013a534fa6170 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQBFloat16VectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQBFloat16VectorsFormatTests.java @@ -31,7 +31,6 @@ import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; -import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.equalTo; public class ESNextDiskBBQBFloat16VectorsFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { @@ -108,9 +107,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx } var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum(); - // IVF doesn't report stats at the moment - assertThat(offHeap, anEmptyMap()); - assertThat(totalByteSize, equalTo(0L)); + assertThat(offHeap.size(), equalTo(3)); + assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum())); } else { throw new AssertionError("unexpected:" + r.getClass()); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsFormatTests.java index 04e1416c9c3c4..57cae8ab530aa 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsFormatTests.java @@ -49,7 +49,6 @@ import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; -import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -123,9 +122,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx } var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum(); - // IVF doesn't report stats at the moment - assertThat(offHeap, anEmptyMap()); - assertThat(totalByteSize, equalTo(0L)); + assertThat(offHeap.size(), equalTo(3)); + assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum())); } else { throw new AssertionError("unexpected:" + r.getClass()); } @@ -173,7 +171,7 @@ public void testSimpleOffHeapSize() throws IOException { } var fieldInfo = r.getFieldInfos().fieldInfo("f"); var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); - assertEquals(0, offHeap.size()); + assertEquals(3, offHeap.size()); } } } diff --git a/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java b/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java index 5c092a3ed27a3..1df48f1d6b25b 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java @@ -87,7 +87,10 @@ public void testBasicEquality() { } public void testBasicXContent() throws IOException { - var stats = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("vec", 14L, "vex", 1L, "veb", 3L))); + var stats = new DenseVectorStats( + 5L, + Map.of("foo", Map.of("vec", 9L), "bar", Map.of("vec", 14L, "vex", 1L, "veb", 3L, "cenivf", 7L, "clivf", 2L)) + ); XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); builder.startObject(); @@ -110,11 +113,13 @@ public void testBasicXContent() throws IOException { "dense_vector" : { "value_count" : 5, "off_heap" : { - "total_size_bytes" : 27, + "total_size_bytes" : 36, "total_veb_size_bytes" : 3, "total_vec_size_bytes" : 23, "total_veq_size_bytes" : 0, - "total_vex_size_bytes" : 1 + "total_vex_size_bytes" : 1, + "total_cenivf_size_bytes" : 7, + "total_clivf_size_bytes" : 2 } } }"""; @@ -129,13 +134,17 @@ public void testBasicXContent() throws IOException { "dense_vector" : { "value_count" : 5, "off_heap" : { - "total_size_bytes" : 27, + "total_size_bytes" : 36, "total_veb_size_bytes" : 3, "total_vec_size_bytes" : 23, "total_veq_size_bytes" : 0, "total_vex_size_bytes" : 1, + "total_cenivf_size_bytes" : 7, + "total_clivf_size_bytes" : 2, "fielddata" : { "bar" : { + "cenivf_size_bytes" : 7, + "clivf_size_bytes" : 2, "veb_size_bytes" : 3, "vec_size_bytes" : 14, "vex_size_bytes" : 1 @@ -168,7 +177,9 @@ public void testBasicXContent() throws IOException { "total_veb_size_bytes" : 0, "total_vec_size_bytes" : 0, "total_veq_size_bytes" : 0, - "total_vex_size_bytes" : 0 + "total_vex_size_bytes" : 0, + "total_cenivf_size_bytes" : 0, + "total_clivf_size_bytes" : 0 } } }"""; @@ -202,6 +213,10 @@ public void testXContentHumanReadable() throws IOException { "total_veq_size_bytes" : 1099511627776, "total_vex_size" : "190.7mb", "total_vex_size_bytes" : 200000000, + "total_cenivf_size" : "0b", + "total_cenivf_size_bytes" : 0, + "total_clivf_size" : "0b", + "total_clivf_size_bytes" : 0, "fielddata" : { "bar" : { "veb_size" : "1kb", From 52394c80b1bdb90faab4b334bb1b787fda568b71 Mon Sep 17 00:00:00 2001 From: John Wagster Date: Fri, 21 Nov 2025 10:46:09 -0600 Subject: [PATCH 2/4] minor cleanup --- .../codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java | 5 ----- .../vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java index 9655869b97288..9b327ae69465e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java @@ -394,11 +394,6 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp return new MemorySegmentPostingsVisitor(target, indexInput, entry, fieldInfo, acceptDocs); } - @Override - public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return super.getOffHeapByteSize(fieldInfo); - } - private static class MemorySegmentPostingsVisitor implements PostingVisitor { final long quantizedByteLength; final IndexInput indexInput; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java index 0c25873484a54..6dc021faeaac6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsReader.java @@ -527,6 +527,7 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp @Override public Map getOffHeapByteSize(FieldInfo fieldInfo) { + // TODO: override if adding new files return super.getOffHeapByteSize(fieldInfo); } From f110e4d0105c2baef817604337d2e7bb82835b7e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 21 Nov 2025 16:55:44 +0000 Subject: [PATCH 3/4] [CI] Auto commit changes from spotless --- .../index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java index 9b327ae69465e..b13eca82c82f0 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java @@ -27,7 +27,6 @@ import org.elasticsearch.simdvec.ESVectorUtil; import java.io.IOException; -import java.util.Map; import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; From fa4cf92ac27e2c708bd30d50e266677899a9e2ec Mon Sep 17 00:00:00 2001 From: John Wagster Date: Fri, 21 Nov 2025 14:24:00 -0600 Subject: [PATCH 4/4] nix todo --- .../index/codec/vectors/diskbbq/IVFVectorsReader.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java index 2372dd222538d..75210051ed738 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java @@ -379,7 +379,6 @@ public Map getOffHeapByteSize(FieldInfo fieldInfo) { return raw; } - // TODO: report on desired off-heap requirements instead or in addition to max? var centroidsClusters = Map.of(CENTROID_EXTENSION, fe.centroidLength, CLUSTER_EXTENSION, fe.postingListLength); return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroidsClusters); }