2626import org .apache .lucene .index .SortedNumericDocValues ;
2727import org .apache .lucene .index .SortedSetDocValues ;
2828import org .apache .lucene .index .TermsEnum ;
29+ import org .apache .lucene .search .CheckedIntConsumer ;
2930import org .apache .lucene .search .DocIdSetIterator ;
3031import org .apache .lucene .search .SortedSetSelector ;
3132import org .apache .lucene .store .ByteArrayDataOutput ;
3233import org .apache .lucene .store .ByteBuffersDataOutput ;
3334import org .apache .lucene .store .ByteBuffersIndexOutput ;
35+ import org .apache .lucene .store .Directory ;
36+ import org .apache .lucene .store .IOContext ;
3437import org .apache .lucene .store .IndexOutput ;
3538import org .apache .lucene .util .ArrayUtil ;
3639import org .apache .lucene .util .BytesRef ;
@@ -57,6 +60,7 @@ final class ES87TSDBDocValuesConsumer extends DocValuesConsumer {
5760
5861 IndexOutput data , meta ;
5962 final int maxDoc ;
63+ final Directory dir ;
6064 final boolean enableOptimizedMerge ;
6165 private byte [] termsDictBuffer ;
6266 private final int skipIndexIntervalSize ;
@@ -94,6 +98,7 @@ final class ES87TSDBDocValuesConsumer extends DocValuesConsumer {
9498 maxDoc = state .segmentInfo .maxDoc ();
9599 this .skipIndexIntervalSize = skipIndexIntervalSize ;
96100 this .enableOptimizedMerge = enableOptimizedMerge ;
101+ this .dir = state .directory ;
97102 success = true ;
98103 } finally {
99104 if (success == false ) {
@@ -116,10 +121,15 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
116121 writeSkipIndex (field , producer );
117122 }
118123
119- writeField (field , producer , -1 );
124+ writeField (field , producer , -1 , null );
120125 }
121126
122- private long [] writeField (FieldInfo field , DocValuesProducer valuesProducer , long maxOrd ) throws IOException {
127+ private long [] writeField (
128+ FieldInfo field ,
129+ DocValuesProducer valuesProducer ,
130+ long maxOrd ,
131+ CheckedIntConsumer <IOException > docCountConsumer
132+ ) throws IOException {
123133 int numDocsWithValue = 0 ;
124134 long numValues = 0 ;
125135
@@ -184,6 +194,9 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon
184194 for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
185195 numDocsWithValue ++;
186196 final int count = values .docValueCount ();
197+ if (docCountConsumer != null ) {
198+ docCountConsumer .accept (count );
199+ }
187200 for (int i = 0 ; i < count ; ++i ) {
188201 buffer [bufferSize ++] = values .nextValue ();
189202 if (bufferSize == ES87TSDBDocValuesFormat .NUMERIC_BLOCK_SIZE ) {
@@ -368,7 +381,7 @@ public long cost() {
368381 }
369382 SortedDocValues sorted = valuesProducer .getSorted (field );
370383 int maxOrd = sorted .getValueCount ();
371- writeField (field , producer , maxOrd );
384+ writeField (field , producer , maxOrd , null );
372385 addTermsDict (DocValues .singleton (valuesProducer .getSorted (field )));
373386 }
374387
@@ -527,32 +540,93 @@ private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesPr
527540 if (maxOrd > -1 ) {
528541 meta .writeByte ((byte ) 1 ); // multiValued (1 = multiValued)
529542 }
530- long [] stats = writeField (field , valuesProducer , maxOrd );
531- int numDocsWithField = Math .toIntExact (stats [0 ]);
532- long numValues = stats [1 ];
533- assert numValues >= numDocsWithField ;
534-
535- meta .writeInt (numDocsWithField );
536- if (numValues > numDocsWithField ) {
537- long start = data .getFilePointer ();
538- meta .writeLong (start );
539- meta .writeVInt (ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
540543
541- final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter .getInstance (
542- meta ,
543- data ,
544- numDocsWithField + 1L ,
545- ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
546- );
547- long addr = 0 ;
548- addressesWriter .add (addr );
549- SortedNumericDocValues values = valuesProducer .getSortedNumeric (field );
550- for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
551- addr += values .docValueCount ();
544+ boolean optimizedAddressWriting = false ;
545+ if (valuesProducer instanceof DocValuesConsumerUtil .TsdbDocValuesProducer tsdbDocValuesProducer ) {
546+ if (tsdbDocValuesProducer .mergeStats .sumNumDocsWithFieldAccurate ()) {
547+ optimizedAddressWriting = true ;
548+ }
549+ }
550+ if (optimizedAddressWriting ) {
551+ DocValuesConsumerUtil .TsdbDocValuesProducer tsdbDocValuesProducer =
552+ (DocValuesConsumerUtil .TsdbDocValuesProducer ) valuesProducer ;
553+ int numDocsWithField = tsdbDocValuesProducer .mergeStats .sumNumDocsWithField ();
554+ long numValues = tsdbDocValuesProducer .mergeStats .sumNumValues ();
555+ if (numDocsWithField == numValues ) {
556+ writeField (field , valuesProducer , maxOrd , null );
557+ assert numValues >= numDocsWithField ;
558+ meta .writeInt (numDocsWithField );
559+ } else {
560+ assert numValues >= numDocsWithField ;
561+
562+ var addressMetaBuffer = new ByteBuffersDataOutput ();
563+ String addressDataOutputName = null ;
564+ try (
565+ var addressMetaOutput = new ByteBuffersIndexOutput (addressMetaBuffer , "meta-temp" , "meta-temp" );
566+ // TODO: which IOContext should be used here?
567+ var addressDataOutput = dir .createTempOutput (data .getName (), "address-data" , IOContext .DEFAULT )
568+ ) {
569+ addressDataOutputName = addressDataOutput .getName ();
570+ final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter .getInstance (
571+ addressMetaOutput ,
572+ addressDataOutput ,
573+ numDocsWithField + 1L ,
574+ ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
575+ );
576+ long [] addr = new long [1 ];
577+ addressesWriter .add (addr [0 ]);
578+ writeField (field , valuesProducer , maxOrd , docValueCount -> {
579+ addr [0 ] += docValueCount ;
580+ addressesWriter .add (addr [0 ]);
581+ });
582+ addressesWriter .finish ();
583+
584+ meta .writeInt (numDocsWithField );
585+ long start = data .getFilePointer ();
586+ meta .writeLong (start );
587+ meta .writeVInt (ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
588+ addressMetaBuffer .copyTo (meta );
589+ addressDataOutput .close ();
590+ try (
591+ // TODO: which IOContext should be used here?
592+ var addressDataInput = dir .openInput (addressDataOutput .getName (), IOContext .DEFAULT )
593+ ) {
594+ data .copyBytes (addressDataInput , addressDataInput .length ());
595+ meta .writeLong (data .getFilePointer () - start );
596+ }
597+ } finally {
598+ if (addressDataOutputName != null ) {
599+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (dir , addressDataOutputName );
600+ }
601+ }
602+ }
603+ } else {
604+ long [] stats = writeField (field , valuesProducer , maxOrd , null );
605+ int numDocsWithField = Math .toIntExact (stats [0 ]);
606+ long numValues = stats [1 ];
607+ assert numValues >= numDocsWithField ;
608+ meta .writeInt (numDocsWithField );
609+ if (numValues > numDocsWithField ) {
610+ long start = data .getFilePointer ();
611+ meta .writeLong (start );
612+ meta .writeVInt (ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
613+
614+ final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter .getInstance (
615+ meta ,
616+ data ,
617+ numDocsWithField + 1L ,
618+ ES87TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
619+ );
620+ long addr = 0 ;
552621 addressesWriter .add (addr );
622+ SortedNumericDocValues values = valuesProducer .getSortedNumeric (field );
623+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
624+ addr += values .docValueCount ();
625+ addressesWriter .add (addr );
626+ }
627+ addressesWriter .finish ();
628+ meta .writeLong (data .getFilePointer () - start );
553629 }
554- addressesWriter .finish ();
555- meta .writeLong (data .getFilePointer () - start );
556630 }
557631 }
558632
0 commit comments