Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.elasticsearch.simdvec.ESVectorUtil;

import java.io.IOException;
import java.util.Map;

import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS;
import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
Expand Down Expand Up @@ -394,11 +393,6 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp
return new MemorySegmentPostingsVisitor(target, indexInput, entry, fieldInfo, acceptDocs);
}

@Override
public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) {
return Map.of();
}

private static class MemorySegmentPostingsVisitor implements PostingVisitor {
final long quantizedByteLength;
final IndexInput indexInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import java.util.Map;

import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.CENTROID_EXTENSION;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.VERSION_DIRECT_IO;

Expand Down Expand Up @@ -87,20 +89,8 @@ protected IVFVectorsReader(SegmentReadState state, GenericFlatVectorReaders.Load
} finally {
CodecUtil.checkFooter(ivfMeta, priorE);
}
ivfCentroids = openDataInput(
state,
versionMeta,
ES920DiskBBQVectorsFormat.CENTROID_EXTENSION,
ES920DiskBBQVectorsFormat.NAME,
state.context
);
ivfClusters = openDataInput(
state,
versionMeta,
ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION,
ES920DiskBBQVectorsFormat.NAME,
state.context
);
ivfCentroids = openDataInput(state, versionMeta, CENTROID_EXTENSION, ES920DiskBBQVectorsFormat.NAME, state.context);
ivfClusters = openDataInput(state, versionMeta, CLUSTER_EXTENSION, ES920DiskBBQVectorsFormat.NAME, state.context);
success = true;
} finally {
if (success == false) {
Expand Down Expand Up @@ -388,12 +378,10 @@ public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) {
assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE;
return raw;
}
return raw; // for now just return the size of raw

// TODO: determine desired off off-heap requirements
// var centroids = Map.of(EXTENSION, fe.xxxLength());
// var clusters = Map.of(EXTENSION, fe.yyyLength());
// return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroids, clusters);
// TODO: report on desired off-heap requirements instead or in addition to max?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can indicate "desired" here, it will always be weird given the user query patterns.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough i'll nix the todo

var centroidsClusters = Map.of(CENTROID_EXTENSION, fe.centroidLength, CLUSTER_EXTENSION, fe.postingListLength);
return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroidsClusters);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInp

@Override
public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) {
return Map.of();
// TODO: override if adding new files
return super.getOffHeapByteSize(fieldInfo);
}

private static class MemorySegmentPostingsVisitor implements PostingVisitor {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class MapperFeatures implements FeatureSpecification {
static final NodeFeature BASE64_DENSE_VECTORS = new NodeFeature("mapper.base64_dense_vectors");
public static final NodeFeature GENERIC_VECTOR_FORMAT = new NodeFeature("mapper.vectors.generic_vector_format");
public static final NodeFeature FIX_DENSE_VECTOR_WRONG_FIELDS = new NodeFeature("mapper.fix_dense_vector_wrong_fields");
static final NodeFeature BBQ_DISK_STATS_SUPPORT = new NodeFeature("mapper.bbq_disk_stats_support");

@Override
public Set<NodeFeature> getTestFeatures() {
Expand Down Expand Up @@ -106,7 +107,8 @@ public Set<NodeFeature> getTestFeatures() {
INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_FIELD_NAME_LIMIT,
EXCLUDE_VECTORS_DOCVALUE_BUGFIX,
BASE64_DENSE_VECTORS,
FIX_DENSE_VECTOR_WRONG_FIELDS
FIX_DENSE_VECTOR_WRONG_FIELDS,
BBQ_DISK_STATS_SUPPORT
);
if (ES93GenericFlatVectorsFormat.GENERIC_VECTOR_FORMAT.isEnabled()) {
features = new HashSet<>(features);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public Map<String, Map<String, Long>> offHeapStats() {

private Map<String, Long> getTotalsByCategory() {
if (offHeapStats == null) {
return Map.of("veb", 0L, "vec", 0L, "veq", 0L, "vex", 0L);
return Map.of("veb", 0L, "vec", 0L, "veq", 0L, "vex", 0L, "cenivf", 0L, "clivf", 0L);
} else {
return offHeapStats.entrySet()
.stream()
Expand Down Expand Up @@ -140,6 +140,8 @@ private void toXContentWithFields(XContentBuilder builder, Params params) throws
builder.humanReadableField("total_vec_size_bytes", "total_vec_size", ofBytes(totals.getOrDefault("vec", 0L)));
builder.humanReadableField("total_veq_size_bytes", "total_veq_size", ofBytes(totals.getOrDefault("veq", 0L)));
builder.humanReadableField("total_vex_size_bytes", "total_vex_size", ofBytes(totals.getOrDefault("vex", 0L)));
builder.humanReadableField("total_cenivf_size_bytes", "total_cenivf_size", ofBytes(totals.getOrDefault("cenivf", 0L)));
builder.humanReadableField("total_clivf_size_bytes", "total_clivf_size", ofBytes(totals.getOrDefault("clivf", 0L)));
if (params.paramAsBoolean(INCLUDE_PER_FIELD_STATS, false) && offHeapStats != null && offHeapStats.size() > 0) {
toXContentWithPerFieldStats(builder);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;

public class ES920DiskBBQBFloat16VectorsFormatTests extends BaseBFloat16KnnVectorsFormatTestCase {
Expand Down Expand Up @@ -102,9 +101,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx
}
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum();
// IVF doesn't report stats at the moment
assertThat(offHeap, anEmptyMap());
assertThat(totalByteSize, equalTo(0L));
assertThat(offHeap.size(), equalTo(3));
assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum()));
} else {
throw new AssertionError("unexpected:" + r.getClass());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasToString;

Expand Down Expand Up @@ -122,9 +121,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx
}
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum();
// IVF doesn't report stats at the moment
assertThat(offHeap, anEmptyMap());
assertThat(totalByteSize, equalTo(0L));
assertThat(offHeap.size(), equalTo(3));
assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum()));
} else {
throw new AssertionError("unexpected:" + r.getClass());
}
Expand Down Expand Up @@ -164,7 +162,7 @@ public void testSimpleOffHeapSize() throws IOException {
}
var fieldInfo = r.getFieldInfos().fieldInfo("f");
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
assertEquals(0, offHeap.size());
assertEquals(3, offHeap.size());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;

public class ESNextDiskBBQBFloat16VectorsFormatTests extends BaseBFloat16KnnVectorsFormatTestCase {
Expand Down Expand Up @@ -108,9 +107,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx
}
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum();
// IVF doesn't report stats at the moment
assertThat(offHeap, anEmptyMap());
assertThat(totalByteSize, equalTo(0L));
assertThat(offHeap.size(), equalTo(3));
assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum()));
} else {
throw new AssertionError("unexpected:" + r.getClass());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER;
import static org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;
Expand Down Expand Up @@ -127,9 +126,8 @@ protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOEx
}
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum();
// IVF doesn't report stats at the moment
assertThat(offHeap, anEmptyMap());
assertThat(totalByteSize, equalTo(0L));
assertThat(offHeap.size(), equalTo(3));
assertThat(totalByteSize, equalTo(offHeap.values().stream().mapToLong(Long::longValue).sum()));
} else {
throw new AssertionError("unexpected:" + r.getClass());
}
Expand Down Expand Up @@ -177,7 +175,7 @@ public void testSimpleOffHeapSize() throws IOException {
}
var fieldInfo = r.getFieldInfos().fieldInfo("f");
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
assertEquals(0, offHeap.size());
assertEquals(3, offHeap.size());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ public void testBasicEquality() {
}

public void testBasicXContent() throws IOException {
var stats = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("vec", 14L, "vex", 1L, "veb", 3L)));
var stats = new DenseVectorStats(
5L,
Map.of("foo", Map.of("vec", 9L), "bar", Map.of("vec", 14L, "vex", 1L, "veb", 3L, "cenivf", 7L, "clivf", 2L))
);

XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint();
builder.startObject();
Expand All @@ -110,11 +113,13 @@ public void testBasicXContent() throws IOException {
"dense_vector" : {
"value_count" : 5,
"off_heap" : {
"total_size_bytes" : 27,
"total_size_bytes" : 36,
"total_veb_size_bytes" : 3,
"total_vec_size_bytes" : 23,
"total_veq_size_bytes" : 0,
"total_vex_size_bytes" : 1
"total_vex_size_bytes" : 1,
"total_cenivf_size_bytes" : 7,
"total_clivf_size_bytes" : 2
}
}
}""";
Expand All @@ -129,13 +134,17 @@ public void testBasicXContent() throws IOException {
"dense_vector" : {
"value_count" : 5,
"off_heap" : {
"total_size_bytes" : 27,
"total_size_bytes" : 36,
"total_veb_size_bytes" : 3,
"total_vec_size_bytes" : 23,
"total_veq_size_bytes" : 0,
"total_vex_size_bytes" : 1,
"total_cenivf_size_bytes" : 7,
"total_clivf_size_bytes" : 2,
"fielddata" : {
"bar" : {
"cenivf_size_bytes" : 7,
"clivf_size_bytes" : 2,
"veb_size_bytes" : 3,
"vec_size_bytes" : 14,
"vex_size_bytes" : 1
Expand Down Expand Up @@ -168,7 +177,9 @@ public void testBasicXContent() throws IOException {
"total_veb_size_bytes" : 0,
"total_vec_size_bytes" : 0,
"total_veq_size_bytes" : 0,
"total_vex_size_bytes" : 0
"total_vex_size_bytes" : 0,
"total_cenivf_size_bytes" : 0,
"total_clivf_size_bytes" : 0
}
}
}""";
Expand Down Expand Up @@ -202,6 +213,10 @@ public void testXContentHumanReadable() throws IOException {
"total_veq_size_bytes" : 1099511627776,
"total_vex_size" : "190.7mb",
"total_vex_size_bytes" : 200000000,
"total_cenivf_size" : "0b",
"total_cenivf_size_bytes" : 0,
"total_clivf_size" : "0b",
"total_clivf_size_bytes" : 0,
"fielddata" : {
"bar" : {
"veb_size" : "1kb",
Expand Down