2525import org .apache .lucene .codecs .TermVectorsReader ;
2626import org .apache .lucene .codecs .lucene103 .Lucene103PostingsFormat ;
2727import org .apache .lucene .index .BinaryDocValues ;
28- import org .apache .lucene .index .ByteVectorValues ;
2928import org .apache .lucene .index .DirectoryReader ;
3029import org .apache .lucene .index .DocValuesType ;
3130import org .apache .lucene .index .FieldInfo ;
3231import org .apache .lucene .index .FieldInfos ;
3332import org .apache .lucene .index .Fields ;
34- import org .apache .lucene .index .FloatVectorValues ;
3533import org .apache .lucene .index .IndexCommit ;
3634import org .apache .lucene .index .IndexOptions ;
37- import org .apache .lucene .index .KnnVectorValues ;
3835import org .apache .lucene .index .LeafReaderContext ;
3936import org .apache .lucene .index .NumericDocValues ;
4037import org .apache .lucene .index .PointValues ;
4744import org .apache .lucene .index .Terms ;
4845import org .apache .lucene .index .TermsEnum ;
4946import org .apache .lucene .search .DocIdSetIterator ;
50- import org .apache .lucene .search .KnnCollector ;
51- import org .apache .lucene .search .TopKnnCollector ;
5247import org .apache .lucene .store .Directory ;
5348import org .apache .lucene .store .FilterDirectory ;
5449import org .apache .lucene .store .IOContext ;
@@ -553,7 +548,7 @@ void visitField(Fields vectors, String fieldName) throws IOException {
553548 }
554549 }
555550
556- void analyzeKnnVectors (SegmentReader reader , IndexDiskUsageStats stats ) throws IOException {
551+ void analyzeKnnVectors (SegmentReader reader , IndexDiskUsageStats stats ) {
557552 KnnVectorsReader vectorReader = reader .getVectorReader ();
558553 if (vectorReader == null ) {
559554 return ;
@@ -562,57 +557,19 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I
562557 cancellationChecker .checkForCancellation ();
563558 directory .resetBytesRead ();
564559 if (field .getVectorDimension () > 0 ) {
565- switch (field .getVectorEncoding ()) {
566- case BYTE -> {
567- iterateDocValues (reader .maxDoc (), () -> vectorReader .getByteVectorValues (field .name ).iterator (), vectors -> {
568- cancellationChecker .logEvent ();
569- vectors .index ();
570- });
571-
572- // do a couple of randomized searches to figure out min and max offsets of index file
573- ByteVectorValues vectorValues = vectorReader .getByteVectorValues (field .name );
574- KnnVectorValues .DocIndexIterator iterator = vectorValues .iterator ();
575- final KnnCollector collector = new TopKnnCollector (
576- Math .max (1 , Math .min (100 , vectorValues .size () - 1 )),
577- Integer .MAX_VALUE
578- );
579- int numDocsToVisit = reader .maxDoc () < 10 ? reader .maxDoc () : 10 * (int ) Math .log10 (reader .maxDoc ());
580- int skipFactor = Math .max (reader .maxDoc () / numDocsToVisit , 1 );
581- for (int i = 0 ; i < reader .maxDoc (); i += skipFactor ) {
582- if ((i = iterator .advance (i )) == DocIdSetIterator .NO_MORE_DOCS ) {
583- break ;
584- }
585- cancellationChecker .checkForCancellation ();
586- vectorReader .search (field .name , vectorValues .vectorValue (iterator .index ()), collector , null );
587- }
588- stats .addKnnVectors (field .name , directory .getBytesRead ());
589- }
590- case FLOAT32 -> {
591- iterateDocValues (reader .maxDoc (), () -> vectorReader .getFloatVectorValues (field .name ).iterator (), vectors -> {
592- cancellationChecker .logEvent ();
593- vectors .index ();
594- });
595-
596- // do a couple of randomized searches to figure out min and max offsets of index file
597- FloatVectorValues vectorValues = vectorReader .getFloatVectorValues (field .name );
598- KnnVectorValues .DocIndexIterator iterator = vectorValues .iterator ();
599- final KnnCollector collector = new TopKnnCollector (
600- Math .max (1 , Math .min (100 , vectorValues .size () - 1 )),
601- Integer .MAX_VALUE
602- );
603- int numDocsToVisit = reader .maxDoc () < 10 ? reader .maxDoc () : 10 * (int ) Math .log10 (reader .maxDoc ());
604- int skipFactor = Math .max (reader .maxDoc () / numDocsToVisit , 1 );
605- for (int i = 0 ; i < reader .maxDoc (); i += skipFactor ) {
606- if ((i = iterator .advance (i )) == DocIdSetIterator .NO_MORE_DOCS ) {
607- break ;
608- }
609- cancellationChecker .checkForCancellation ();
610- vectorReader .search (field .name , vectorValues .vectorValue (iterator .index ()), collector , null );
611- }
612- stats .addKnnVectors (field .name , directory .getBytesRead ());
613- }
560+ Map <String , Long > offHeap = vectorReader .getOffHeapByteSize (field );
561+ long totalSize = 0 ;
562+ for (var entry : offHeap .entrySet ()) {
563+ totalSize += entry .getValue ();
614564 }
615-
565+ long vectorsSize = offHeap .getOrDefault ("vec" , 0L );
566+ if (vectorsSize == 0L ) {
567+ // This can happen if .vec file is opened with directIO
568+ // calculate the size of vectors manually
569+ vectorsSize = field .getVectorDimension () * field .getVectorEncoding ().byteSize ;
570+ totalSize += vectorsSize ;
571+ }
572+ stats .addKnnVectors (field .name , totalSize );
616573 }
617574 }
618575 }
0 commit comments