@@ -247,71 +247,146 @@ public void mergeNumericField(FieldInfo mergeFieldInfo, MergeState mergeState) t
247247 }
248248 }
249249
250+ @ Override
251+ public void mergeBinaryField (FieldInfo mergeFieldInfo , MergeState mergeState ) throws IOException {
252+ var result = compatibleWithOptimizedMerge (enableOptimizedMerge , mergeState , mergeFieldInfo );
253+ if (result .supported ()) {
254+ mergeBinaryField (result , mergeFieldInfo , mergeState );
255+ } else {
256+ super .mergeBinaryField (mergeFieldInfo , mergeState );
257+ }
258+ }
259+
250260 @ Override
251261 public void addBinaryField (FieldInfo field , DocValuesProducer valuesProducer ) throws IOException {
252262 meta .writeInt (field .number );
253263 meta .writeByte (ES819TSDBDocValuesFormat .BINARY );
254264
255- BinaryDocValues values = valuesProducer .getBinary (field );
256- long start = data .getFilePointer ();
257- meta .writeLong (start ); // dataOffset
258- int numDocsWithField = 0 ;
259- int minLength = Integer .MAX_VALUE ;
260- int maxLength = 0 ;
261- for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
262- numDocsWithField ++;
263- BytesRef v = values .binaryValue ();
264- int length = v .length ;
265- data .writeBytes (v .bytes , v .offset , v .length );
266- minLength = Math .min (length , minLength );
267- maxLength = Math .max (length , maxLength );
268- }
269- assert numDocsWithField <= maxDoc ;
270- meta .writeLong (data .getFilePointer () - start ); // dataLength
271-
272- if (numDocsWithField == 0 ) {
273- meta .writeLong (-2 ); // docsWithFieldOffset
274- meta .writeLong (0L ); // docsWithFieldLength
275- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
276- meta .writeByte ((byte ) -1 ); // denseRankPower
277- } else if (numDocsWithField == maxDoc ) {
278- meta .writeLong (-1 ); // docsWithFieldOffset
279- meta .writeLong (0L ); // docsWithFieldLength
280- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
281- meta .writeByte ((byte ) -1 ); // denseRankPower
282- } else {
283- long offset = data .getFilePointer ();
284- meta .writeLong (offset ); // docsWithFieldOffset
285- values = valuesProducer .getBinary (field );
286- final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
287- meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
288- meta .writeShort (jumpTableEntryCount );
289- meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
290- }
265+ if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer .mergeStats .supported ()) {
266+ final int numDocsWithField = tsdbValuesProducer .mergeStats .sumNumDocsWithField ();
267+ final int minLength = tsdbValuesProducer .mergeStats .minLength ();
268+ final int maxLength = tsdbValuesProducer .mergeStats .maxLength ();
291269
292- meta .writeInt (numDocsWithField );
293- meta .writeInt (minLength );
294- meta .writeInt (maxLength );
295- if (maxLength > minLength ) {
296- start = data .getFilePointer ();
297- meta .writeLong (start );
298- meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
270+ assert numDocsWithField <= maxDoc ;
299271
300- final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
301- meta ,
302- data ,
303- numDocsWithField + 1 ,
304- ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
305- );
306- long addr = 0 ;
307- writer .add (addr );
308- values = valuesProducer .getBinary (field );
272+ BinaryDocValues values = valuesProducer .getBinary (field );
273+ long start = data .getFilePointer ();
274+ meta .writeLong (start ); // dataOffset
275+
276+ OffsetsAccumulator offsetsAccumulator = null ;
277+ DISIAccumulator disiAccumulator = null ;
278+ try {
279+ if (numDocsWithField > 0 && numDocsWithField < maxDoc ) {
280+ disiAccumulator = new DISIAccumulator (dir , context , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
281+ }
282+
283+ assert maxLength >= minLength ;
284+ if (maxLength > minLength ) {
285+ offsetsAccumulator = new OffsetsAccumulator (dir , context , data , numDocsWithField );
286+ }
287+
288+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
289+ BytesRef v = values .binaryValue ();
290+ data .writeBytes (v .bytes , v .offset , v .length );
291+ if (disiAccumulator != null ) {
292+ disiAccumulator .addDocId (doc );
293+ }
294+ if (offsetsAccumulator != null ) {
295+ offsetsAccumulator .addDoc (v .length );
296+ }
297+ }
298+ meta .writeLong (data .getFilePointer () - start ); // dataLength
299+
300+ if (numDocsWithField == 0 ) {
301+ meta .writeLong (-2 ); // docsWithFieldOffset
302+ meta .writeLong (0L ); // docsWithFieldLength
303+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
304+ meta .writeByte ((byte ) -1 ); // denseRankPower
305+ } else if (numDocsWithField == maxDoc ) {
306+ meta .writeLong (-1 ); // docsWithFieldOffset
307+ meta .writeLong (0L ); // docsWithFieldLength
308+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
309+ meta .writeByte ((byte ) -1 ); // denseRankPower
310+ } else {
311+ long offset = data .getFilePointer ();
312+ meta .writeLong (offset ); // docsWithFieldOffset
313+ final short jumpTableEntryCount = disiAccumulator .build (data );
314+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
315+ meta .writeShort (jumpTableEntryCount );
316+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
317+ }
318+
319+ meta .writeInt (numDocsWithField );
320+ meta .writeInt (minLength );
321+ meta .writeInt (maxLength );
322+ if (offsetsAccumulator != null ) {
323+ offsetsAccumulator .build (meta , data );
324+ }
325+ } finally {
326+ IOUtils .close (disiAccumulator , offsetsAccumulator );
327+ }
328+ } else {
329+ BinaryDocValues values = valuesProducer .getBinary (field );
330+ long start = data .getFilePointer ();
331+ meta .writeLong (start ); // dataOffset
332+ int numDocsWithField = 0 ;
333+ int minLength = Integer .MAX_VALUE ;
334+ int maxLength = 0 ;
309335 for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
310- addr += values .binaryValue ().length ;
336+ numDocsWithField ++;
337+ BytesRef v = values .binaryValue ();
338+ int length = v .length ;
339+ data .writeBytes (v .bytes , v .offset , v .length );
340+ minLength = Math .min (length , minLength );
341+ maxLength = Math .max (length , maxLength );
342+ }
343+ assert numDocsWithField <= maxDoc ;
344+ meta .writeLong (data .getFilePointer () - start ); // dataLength
345+
346+ if (numDocsWithField == 0 ) {
347+ meta .writeLong (-2 ); // docsWithFieldOffset
348+ meta .writeLong (0L ); // docsWithFieldLength
349+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
350+ meta .writeByte ((byte ) -1 ); // denseRankPower
351+ } else if (numDocsWithField == maxDoc ) {
352+ meta .writeLong (-1 ); // docsWithFieldOffset
353+ meta .writeLong (0L ); // docsWithFieldLength
354+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
355+ meta .writeByte ((byte ) -1 ); // denseRankPower
356+ } else {
357+ long offset = data .getFilePointer ();
358+ meta .writeLong (offset ); // docsWithFieldOffset
359+ values = valuesProducer .getBinary (field );
360+ final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
361+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
362+ meta .writeShort (jumpTableEntryCount );
363+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
364+ }
365+
366+ meta .writeInt (numDocsWithField );
367+ meta .writeInt (minLength );
368+ meta .writeInt (maxLength );
369+ if (maxLength > minLength ) {
370+ start = data .getFilePointer ();
371+ meta .writeLong (start );
372+ meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
373+
374+ final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
375+ meta ,
376+ data ,
377+ numDocsWithField + 1 ,
378+ ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
379+ );
380+ long addr = 0 ;
311381 writer .add (addr );
382+ values = valuesProducer .getBinary (field );
383+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
384+ addr += values .binaryValue ().length ;
385+ writer .add (addr );
386+ }
387+ writer .finish ();
388+ meta .writeLong (data .getFilePointer () - start );
312389 }
313- writer .finish ();
314- meta .writeLong (data .getFilePointer () - start );
315390 }
316391 }
317392
0 commit comments