@@ -258,71 +258,146 @@ public void mergeNumericField(FieldInfo mergeFieldInfo, MergeState mergeState) t
258258 }
259259 }
260260
261+ @ Override
262+ public void mergeBinaryField (FieldInfo mergeFieldInfo , MergeState mergeState ) throws IOException {
263+ var result = compatibleWithOptimizedMerge (enableOptimizedMerge , mergeState , mergeFieldInfo );
264+ if (result .supported ()) {
265+ mergeBinaryField (result , mergeFieldInfo , mergeState );
266+ } else {
267+ super .mergeBinaryField (mergeFieldInfo , mergeState );
268+ }
269+ }
270+
261271 @ Override
262272 public void addBinaryField (FieldInfo field , DocValuesProducer valuesProducer ) throws IOException {
263273 meta .writeInt (field .number );
264274 meta .writeByte (ES819TSDBDocValuesFormat .BINARY );
265275
266- BinaryDocValues values = valuesProducer .getBinary (field );
267- long start = data .getFilePointer ();
268- meta .writeLong (start ); // dataOffset
269- int numDocsWithField = 0 ;
270- int minLength = Integer .MAX_VALUE ;
271- int maxLength = 0 ;
272- for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
273- numDocsWithField ++;
274- BytesRef v = values .binaryValue ();
275- int length = v .length ;
276- data .writeBytes (v .bytes , v .offset , v .length );
277- minLength = Math .min (length , minLength );
278- maxLength = Math .max (length , maxLength );
279- }
280- assert numDocsWithField <= maxDoc ;
281- meta .writeLong (data .getFilePointer () - start ); // dataLength
282-
283- if (numDocsWithField == 0 ) {
284- meta .writeLong (-2 ); // docsWithFieldOffset
285- meta .writeLong (0L ); // docsWithFieldLength
286- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
287- meta .writeByte ((byte ) -1 ); // denseRankPower
288- } else if (numDocsWithField == maxDoc ) {
289- meta .writeLong (-1 ); // docsWithFieldOffset
290- meta .writeLong (0L ); // docsWithFieldLength
291- meta .writeShort ((short ) -1 ); // jumpTableEntryCount
292- meta .writeByte ((byte ) -1 ); // denseRankPower
293- } else {
294- long offset = data .getFilePointer ();
295- meta .writeLong (offset ); // docsWithFieldOffset
296- values = valuesProducer .getBinary (field );
297- final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
298- meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
299- meta .writeShort (jumpTableEntryCount );
300- meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
301- }
276+ if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer .mergeStats .supported ()) {
277+ final int numDocsWithField = tsdbValuesProducer .mergeStats .sumNumDocsWithField ();
278+ final int minLength = tsdbValuesProducer .mergeStats .minLength ();
279+ final int maxLength = tsdbValuesProducer .mergeStats .maxLength ();
302280
303- meta .writeInt (numDocsWithField );
304- meta .writeInt (minLength );
305- meta .writeInt (maxLength );
306- if (maxLength > minLength ) {
307- start = data .getFilePointer ();
308- meta .writeLong (start );
309- meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
281+ assert numDocsWithField <= maxDoc ;
310282
311- final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
312- meta ,
313- data ,
314- numDocsWithField + 1 ,
315- ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
316- );
317- long addr = 0 ;
318- writer .add (addr );
319- values = valuesProducer .getBinary (field );
283+ BinaryDocValues values = valuesProducer .getBinary (field );
284+ long start = data .getFilePointer ();
285+ meta .writeLong (start ); // dataOffset
286+
287+ OffsetsAccumulator offsetsAccumulator = null ;
288+ DISIAccumulator disiAccumulator = null ;
289+ try {
290+ if (numDocsWithField > 0 && numDocsWithField < maxDoc ) {
291+ disiAccumulator = new DISIAccumulator (dir , context , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
292+ }
293+
294+ assert maxLength >= minLength ;
295+ if (maxLength > minLength ) {
296+ offsetsAccumulator = new OffsetsAccumulator (dir , context , data , numDocsWithField );
297+ }
298+
299+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
300+ BytesRef v = values .binaryValue ();
301+ data .writeBytes (v .bytes , v .offset , v .length );
302+ if (disiAccumulator != null ) {
303+ disiAccumulator .addDocId (doc );
304+ }
305+ if (offsetsAccumulator != null ) {
306+ offsetsAccumulator .addDoc (v .length );
307+ }
308+ }
309+ meta .writeLong (data .getFilePointer () - start ); // dataLength
310+
311+ if (numDocsWithField == 0 ) {
312+ meta .writeLong (-2 ); // docsWithFieldOffset
313+ meta .writeLong (0L ); // docsWithFieldLength
314+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
315+ meta .writeByte ((byte ) -1 ); // denseRankPower
316+ } else if (numDocsWithField == maxDoc ) {
317+ meta .writeLong (-1 ); // docsWithFieldOffset
318+ meta .writeLong (0L ); // docsWithFieldLength
319+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
320+ meta .writeByte ((byte ) -1 ); // denseRankPower
321+ } else {
322+ long offset = data .getFilePointer ();
323+ meta .writeLong (offset ); // docsWithFieldOffset
324+ final short jumpTableEntryCount = disiAccumulator .build (data );
325+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
326+ meta .writeShort (jumpTableEntryCount );
327+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
328+ }
329+
330+ meta .writeInt (numDocsWithField );
331+ meta .writeInt (minLength );
332+ meta .writeInt (maxLength );
333+ if (offsetsAccumulator != null ) {
334+ offsetsAccumulator .build (meta , data );
335+ }
336+ } finally {
337+ IOUtils .close (disiAccumulator , offsetsAccumulator );
338+ }
339+ } else {
340+ BinaryDocValues values = valuesProducer .getBinary (field );
341+ long start = data .getFilePointer ();
342+ meta .writeLong (start ); // dataOffset
343+ int numDocsWithField = 0 ;
344+ int minLength = Integer .MAX_VALUE ;
345+ int maxLength = 0 ;
320346 for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
321- addr += values .binaryValue ().length ;
347+ numDocsWithField ++;
348+ BytesRef v = values .binaryValue ();
349+ int length = v .length ;
350+ data .writeBytes (v .bytes , v .offset , v .length );
351+ minLength = Math .min (length , minLength );
352+ maxLength = Math .max (length , maxLength );
353+ }
354+ assert numDocsWithField <= maxDoc ;
355+ meta .writeLong (data .getFilePointer () - start ); // dataLength
356+
357+ if (numDocsWithField == 0 ) {
358+ meta .writeLong (-2 ); // docsWithFieldOffset
359+ meta .writeLong (0L ); // docsWithFieldLength
360+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
361+ meta .writeByte ((byte ) -1 ); // denseRankPower
362+ } else if (numDocsWithField == maxDoc ) {
363+ meta .writeLong (-1 ); // docsWithFieldOffset
364+ meta .writeLong (0L ); // docsWithFieldLength
365+ meta .writeShort ((short ) -1 ); // jumpTableEntryCount
366+ meta .writeByte ((byte ) -1 ); // denseRankPower
367+ } else {
368+ long offset = data .getFilePointer ();
369+ meta .writeLong (offset ); // docsWithFieldOffset
370+ values = valuesProducer .getBinary (field );
371+ final short jumpTableEntryCount = IndexedDISI .writeBitSet (values , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
372+ meta .writeLong (data .getFilePointer () - offset ); // docsWithFieldLength
373+ meta .writeShort (jumpTableEntryCount );
374+ meta .writeByte (IndexedDISI .DEFAULT_DENSE_RANK_POWER );
375+ }
376+
377+ meta .writeInt (numDocsWithField );
378+ meta .writeInt (minLength );
379+ meta .writeInt (maxLength );
380+ if (maxLength > minLength ) {
381+ start = data .getFilePointer ();
382+ meta .writeLong (start );
383+ meta .writeVInt (ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT );
384+
385+ final DirectMonotonicWriter writer = DirectMonotonicWriter .getInstance (
386+ meta ,
387+ data ,
388+ numDocsWithField + 1 ,
389+ ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
390+ );
391+ long addr = 0 ;
322392 writer .add (addr );
393+ values = valuesProducer .getBinary (field );
394+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
395+ addr += values .binaryValue ().length ;
396+ writer .add (addr );
397+ }
398+ writer .finish ();
399+ meta .writeLong (data .getFilePointer () - start );
323400 }
324- writer .finish ();
325- meta .writeLong (data .getFilePointer () - start );
326401 }
327402 }
328403
0 commit comments