Skip to content

Commit 8bbe9bf

Browse files
Iteration
1 parent 214a76a commit 8bbe9bf

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ void visitField(Fields vectors, String fieldName) throws IOException {
548548
}
549549
}
550550

551-
void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws IOException {
551+
void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) {
552552
KnnVectorsReader vectorReader = reader.getVectorReader();
553553
if (vectorReader == null) {
554554
return;
@@ -562,6 +562,13 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I
562562
for (var entry : offHeap.entrySet()) {
563563
totalSize += entry.getValue();
564564
}
565+
long vectorsSize = offHeap.getOrDefault("vec", 0L);
566+
if (vectorsSize == 0L) {
567+
// This can happen if .vec file is opened with directIO
568+
// calculate the size of vectors manually
569+
vectorsSize = field.getVectorDimension() * field.getVectorEncoding().byteSize;
570+
totalSize += vectorsSize;
571+
}
565572
stats.addKnnVectors(field.name, totalSize);
566573
}
567574
}

server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.lucene.document.Field;
2525
import org.apache.lucene.document.FieldType;
2626
import org.apache.lucene.document.IntPoint;
27+
import org.apache.lucene.document.KnnByteVectorField;
2728
import org.apache.lucene.document.KnnFloatVectorField;
2829
import org.apache.lucene.document.LatLonShape;
2930
import org.apache.lucene.document.LongPoint;
@@ -67,6 +68,7 @@
6768
import org.elasticsearch.common.lucene.Lucene;
6869
import org.elasticsearch.core.IOUtils;
6970
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
71+
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
7072
import org.elasticsearch.index.shard.ShardId;
7173
import org.elasticsearch.index.store.LuceneFilesExtensions;
7274
import org.elasticsearch.test.ESTestCase;
@@ -254,15 +256,27 @@ public void testKnnVectors() throws Exception {
254256
VectorSimilarityFunction similarity = randomFrom(VectorSimilarityFunction.values());
255257
int numDocs = between(1000, 5000);
256258
int dimension = between(10, 200);
259+
DenseVectorFieldMapper.ElementType elementType = randomFrom(DenseVectorFieldMapper.ElementType.values());
257260

258-
indexRandomly(dir, codec, numDocs, doc -> {
259-
float[] vector = randomVector(dimension);
260-
doc.add(new KnnFloatVectorField("vector", vector, similarity));
261-
});
261+
if (elementType == DenseVectorFieldMapper.ElementType.FLOAT) {
262+
indexRandomly(dir, codec, numDocs, doc -> {
263+
float[] vector = randomVector(dimension);
264+
doc.add(new KnnFloatVectorField("vector", vector, similarity));
265+
});
266+
} else {
267+
indexRandomly(dir, codec, numDocs, doc -> {
268+
byte[] vector = new byte[dimension];
269+
random().nextBytes(vector);
270+
doc.add(new KnnByteVectorField("vector", vector, similarity));
271+
});
272+
}
262273
final IndexDiskUsageStats stats = IndexDiskUsageAnalyzer.analyze(testShardId(), lastCommit(dir), () -> {});
263274
logger.info("--> stats {}", stats);
264275

265-
long dataBytes = (long) numDocs * dimension * Float.BYTES; // size of flat vector data
276+
// expected size of flat vector data
277+
long dataBytes = elementType == DenseVectorFieldMapper.ElementType.FLOAT
278+
? ((long) numDocs * dimension * Float.BYTES)
279+
: ((long) numDocs * dimension);
266280
long indexBytesEstimate = (long) numDocs * (Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN / 4); // rough size of HNSW graph
267281
assertThat("numDocs=" + numDocs + ";dimension=" + dimension, stats.total().getKnnVectorsBytes(), greaterThan(dataBytes));
268282
long connectionOverhead = stats.total().getKnnVectorsBytes() - dataBytes;

0 commit comments

Comments
 (0)