|
25 | 25 | import org.apache.lucene.codecs.TermVectorsReader; |
26 | 26 | import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; |
27 | 27 | import org.apache.lucene.index.BinaryDocValues; |
28 | | -import org.apache.lucene.index.ByteVectorValues; |
29 | 28 | import org.apache.lucene.index.DirectoryReader; |
30 | 29 | import org.apache.lucene.index.DocValuesType; |
31 | 30 | import org.apache.lucene.index.FieldInfo; |
32 | 31 | import org.apache.lucene.index.FieldInfos; |
33 | 32 | import org.apache.lucene.index.Fields; |
34 | | -import org.apache.lucene.index.FloatVectorValues; |
35 | 33 | import org.apache.lucene.index.IndexCommit; |
36 | 34 | import org.apache.lucene.index.IndexOptions; |
37 | | -import org.apache.lucene.index.KnnVectorValues; |
38 | 35 | import org.apache.lucene.index.LeafReaderContext; |
39 | 36 | import org.apache.lucene.index.NumericDocValues; |
40 | 37 | import org.apache.lucene.index.PointValues; |
|
47 | 44 | import org.apache.lucene.index.Terms; |
48 | 45 | import org.apache.lucene.index.TermsEnum; |
49 | 46 | import org.apache.lucene.search.DocIdSetIterator; |
50 | | -import org.apache.lucene.search.KnnCollector; |
51 | | -import org.apache.lucene.search.TopKnnCollector; |
52 | 47 | import org.apache.lucene.store.Directory; |
53 | 48 | import org.apache.lucene.store.FilterDirectory; |
54 | 49 | import org.apache.lucene.store.IOContext; |
@@ -562,57 +557,12 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I |
562 | 557 | cancellationChecker.checkForCancellation(); |
563 | 558 | directory.resetBytesRead(); |
564 | 559 | if (field.getVectorDimension() > 0) { |
565 | | - switch (field.getVectorEncoding()) { |
566 | | - case BYTE -> { |
567 | | - iterateDocValues(reader.maxDoc(), () -> vectorReader.getByteVectorValues(field.name).iterator(), vectors -> { |
568 | | - cancellationChecker.logEvent(); |
569 | | - vectors.index(); |
570 | | - }); |
571 | | - |
572 | | - // do a couple of randomized searches to figure out min and max offsets of index file |
573 | | - ByteVectorValues vectorValues = vectorReader.getByteVectorValues(field.name); |
574 | | - KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); |
575 | | - final KnnCollector collector = new TopKnnCollector( |
576 | | - Math.max(1, Math.min(100, vectorValues.size() - 1)), |
577 | | - Integer.MAX_VALUE |
578 | | - ); |
579 | | - int numDocsToVisit = reader.maxDoc() < 10 ? reader.maxDoc() : 10 * (int) Math.log10(reader.maxDoc()); |
580 | | - int skipFactor = Math.max(reader.maxDoc() / numDocsToVisit, 1); |
581 | | - for (int i = 0; i < reader.maxDoc(); i += skipFactor) { |
582 | | - if ((i = iterator.advance(i)) == DocIdSetIterator.NO_MORE_DOCS) { |
583 | | - break; |
584 | | - } |
585 | | - cancellationChecker.checkForCancellation(); |
586 | | - vectorReader.search(field.name, vectorValues.vectorValue(iterator.index()), collector, null); |
587 | | - } |
588 | | - stats.addKnnVectors(field.name, directory.getBytesRead()); |
589 | | - } |
590 | | - case FLOAT32 -> { |
591 | | - iterateDocValues(reader.maxDoc(), () -> vectorReader.getFloatVectorValues(field.name).iterator(), vectors -> { |
592 | | - cancellationChecker.logEvent(); |
593 | | - vectors.index(); |
594 | | - }); |
595 | | - |
596 | | - // do a couple of randomized searches to figure out min and max offsets of index file |
597 | | - FloatVectorValues vectorValues = vectorReader.getFloatVectorValues(field.name); |
598 | | - KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); |
599 | | - final KnnCollector collector = new TopKnnCollector( |
600 | | - Math.max(1, Math.min(100, vectorValues.size() - 1)), |
601 | | - Integer.MAX_VALUE |
602 | | - ); |
603 | | - int numDocsToVisit = reader.maxDoc() < 10 ? reader.maxDoc() : 10 * (int) Math.log10(reader.maxDoc()); |
604 | | - int skipFactor = Math.max(reader.maxDoc() / numDocsToVisit, 1); |
605 | | - for (int i = 0; i < reader.maxDoc(); i += skipFactor) { |
606 | | - if ((i = iterator.advance(i)) == DocIdSetIterator.NO_MORE_DOCS) { |
607 | | - break; |
608 | | - } |
609 | | - cancellationChecker.checkForCancellation(); |
610 | | - vectorReader.search(field.name, vectorValues.vectorValue(iterator.index()), collector, null); |
611 | | - } |
612 | | - stats.addKnnVectors(field.name, directory.getBytesRead()); |
613 | | - } |
| 560 | + Map<String, Long> offHeap = vectorReader.getOffHeapByteSize(field); |
| 561 | + long totalSize = 0; |
| 562 | + for (var entry : offHeap.entrySet()) { |
| 563 | + totalSize += entry.getValue(); |
614 | 564 | } |
615 | | - |
| 565 | + stats.addKnnVectors(field.name, totalSize); |
616 | 566 | } |
617 | 567 | } |
618 | 568 | } |
|
0 commit comments