Skip to content

Commit a63e853

Browse files
committed
addresses
1 parent d8b3c15 commit a63e853

File tree

2 files changed

+108
-29
lines changed

2 files changed

+108
-29
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesConsumerUtil.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
*/
4141
class DocValuesConsumerUtil {
4242

43-
static final MergeStats UNSUPPORTED = new MergeStats(false, -1, -1);
43+
static final MergeStats UNSUPPORTED = new MergeStats(false, -1, -1, false);
4444

4545
abstract static class TsdbDocValuesProducer extends EmptyDocValuesProducer {
4646

@@ -52,7 +52,7 @@ abstract static class TsdbDocValuesProducer extends EmptyDocValuesProducer {
5252

5353
}
5454

55-
record MergeStats(boolean supported, long sumNumValues, int sumNumDocsWithField) {}
55+
record MergeStats(boolean supported, long sumNumValues, int sumNumDocsWithField, boolean sumNumDocsWithFieldAccurate) {}
5656

5757
static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, MergeState mergeState, FieldInfo fieldInfo)
5858
throws IOException {
@@ -69,6 +69,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
6969

7070
long sumNumValues = 0;
7171
int sumNumDocsWithField = 0;
72+
boolean sumNumDocsWithFieldAccurate = true;
7273

7374
// TODO bring back codec version check? (per field doc values producer sits between ES87TSDBDocValuesConsumer)
7475
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
@@ -77,6 +78,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
7778
case NUMERIC -> {
7879
var numeric = docValuesProducer.getNumeric(fieldInfo);
7980
if (numeric instanceof ES87TSDBDocValuesProducer.BaseNumericDocValues baseNumeric) {
81+
sumNumDocsWithFieldAccurate = false;
8082
var entry = baseNumeric.entry;
8183
sumNumValues += entry.numValues;
8284
int numDocsWithField = getNumDocsWithField(entry, mergeState.maxDocs[i]);
@@ -94,6 +96,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
9496
} else {
9597
var singleton = DocValues.unwrapSingleton(sortedNumeric);
9698
if (singleton instanceof ES87TSDBDocValuesProducer.BaseNumericDocValues baseNumeric) {
99+
sumNumDocsWithFieldAccurate = false;
97100
var entry = baseNumeric.entry;
98101
sumNumValues += entry.numValues;
99102
// In this case the numDocsWithField doesn't get recorded in meta:
@@ -107,6 +110,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
107110
case SORTED -> {
108111
var sorted = docValuesProducer.getSorted(fieldInfo);
109112
if (sorted instanceof ES87TSDBDocValuesProducer.BaseSortedDocValues baseSortedDocValues) {
113+
sumNumDocsWithFieldAccurate = false;
110114
var entry = baseSortedDocValues.entry;
111115
sumNumValues += entry.ordsEntry.numValues;
112116
// In this case the numDocsWithField doesn't get recorded in meta:v
@@ -125,6 +129,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
125129
} else {
126130
var singleton = DocValues.unwrapSingleton(sortedSet);
127131
if (singleton instanceof ES87TSDBDocValuesProducer.BaseSortedDocValues baseSorted) {
132+
sumNumDocsWithFieldAccurate = false;
128133
var entry = baseSorted.entry;
129134
sumNumValues += entry.ordsEntry.numValues;
130135
// In this case the numDocsWithField doesn't get recorded in meta:
@@ -139,7 +144,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
139144
}
140145
}
141146

142-
return new MergeStats(true, sumNumValues, sumNumDocsWithField);
147+
return new MergeStats(true, sumNumValues, sumNumDocsWithField, sumNumDocsWithFieldAccurate);
143148
}
144149

145150
private static int getNumDocsWithField(ES87TSDBDocValuesProducer.NumericEntry entry, int maxDoc) {

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java

Lines changed: 100 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,14 @@
2626
import org.apache.lucene.index.SortedNumericDocValues;
2727
import org.apache.lucene.index.SortedSetDocValues;
2828
import org.apache.lucene.index.TermsEnum;
29+
import org.apache.lucene.search.CheckedIntConsumer;
2930
import org.apache.lucene.search.DocIdSetIterator;
3031
import org.apache.lucene.search.SortedSetSelector;
3132
import org.apache.lucene.store.ByteArrayDataOutput;
3233
import org.apache.lucene.store.ByteBuffersDataOutput;
3334
import org.apache.lucene.store.ByteBuffersIndexOutput;
35+
import org.apache.lucene.store.Directory;
36+
import org.apache.lucene.store.IOContext;
3437
import org.apache.lucene.store.IndexOutput;
3538
import org.apache.lucene.util.ArrayUtil;
3639
import org.apache.lucene.util.BytesRef;
@@ -57,6 +60,7 @@ final class ES87TSDBDocValuesConsumer extends DocValuesConsumer {
5760

5861
IndexOutput data, meta;
5962
final int maxDoc;
63+
final Directory dir;
6064
final boolean enableOptimizedMerge;
6165
private byte[] termsDictBuffer;
6266
private final int skipIndexIntervalSize;
@@ -94,6 +98,7 @@ final class ES87TSDBDocValuesConsumer extends DocValuesConsumer {
9498
maxDoc = state.segmentInfo.maxDoc();
9599
this.skipIndexIntervalSize = skipIndexIntervalSize;
96100
this.enableOptimizedMerge = enableOptimizedMerge;
101+
this.dir = state.directory;
97102
success = true;
98103
} finally {
99104
if (success == false) {
@@ -116,10 +121,15 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
116121
writeSkipIndex(field, producer);
117122
}
118123

119-
writeField(field, producer, -1);
124+
writeField(field, producer, -1, null);
120125
}
121126

122-
private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException {
127+
private long[] writeField(
128+
FieldInfo field,
129+
DocValuesProducer valuesProducer,
130+
long maxOrd,
131+
CheckedIntConsumer<IOException> docCountConsumer
132+
) throws IOException {
123133
int numDocsWithValue = 0;
124134
long numValues = 0;
125135

@@ -184,6 +194,9 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon
184194
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
185195
numDocsWithValue++;
186196
final int count = values.docValueCount();
197+
if (docCountConsumer != null) {
198+
docCountConsumer.accept(count);
199+
}
187200
for (int i = 0; i < count; ++i) {
188201
buffer[bufferSize++] = values.nextValue();
189202
if (bufferSize == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) {
@@ -368,7 +381,7 @@ public long cost() {
368381
}
369382
SortedDocValues sorted = valuesProducer.getSorted(field);
370383
int maxOrd = sorted.getValueCount();
371-
writeField(field, producer, maxOrd);
384+
writeField(field, producer, maxOrd, null);
372385
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
373386
}
374387

@@ -527,32 +540,93 @@ private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesPr
527540
if (maxOrd > -1) {
528541
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
529542
}
530-
long[] stats = writeField(field, valuesProducer, maxOrd);
531-
int numDocsWithField = Math.toIntExact(stats[0]);
532-
long numValues = stats[1];
533-
assert numValues >= numDocsWithField;
534-
535-
meta.writeInt(numDocsWithField);
536-
if (numValues > numDocsWithField) {
537-
long start = data.getFilePointer();
538-
meta.writeLong(start);
539-
meta.writeVInt(ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
540543

541-
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(
542-
meta,
543-
data,
544-
numDocsWithField + 1L,
545-
ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
546-
);
547-
long addr = 0;
548-
addressesWriter.add(addr);
549-
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
550-
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
551-
addr += values.docValueCount();
544+
boolean optimizedAddressWriting = false;
545+
if (valuesProducer instanceof DocValuesConsumerUtil.TsdbDocValuesProducer tsdbDocValuesProducer) {
546+
if (tsdbDocValuesProducer.mergeStats.sumNumDocsWithFieldAccurate()) {
547+
optimizedAddressWriting = true;
548+
}
549+
}
550+
if (optimizedAddressWriting) {
551+
DocValuesConsumerUtil.TsdbDocValuesProducer tsdbDocValuesProducer =
552+
(DocValuesConsumerUtil.TsdbDocValuesProducer) valuesProducer;
553+
int numDocsWithField = tsdbDocValuesProducer.mergeStats.sumNumDocsWithField();
554+
long numValues = tsdbDocValuesProducer.mergeStats.sumNumValues();
555+
if (numDocsWithField == numValues) {
556+
writeField(field, valuesProducer, maxOrd, null);
557+
assert numValues >= numDocsWithField;
558+
meta.writeInt(numDocsWithField);
559+
} else {
560+
assert numValues >= numDocsWithField;
561+
562+
var addressMetaBuffer = new ByteBuffersDataOutput();
563+
String addressDataOutputName = null;
564+
try (
565+
var addressMetaOutput = new ByteBuffersIndexOutput(addressMetaBuffer, "meta-temp", "meta-temp");
566+
// TODO: which IOContext should be used here?
567+
var addressDataOutput = dir.createTempOutput(data.getName(), "address-data", IOContext.DEFAULT)
568+
) {
569+
addressDataOutputName = addressDataOutput.getName();
570+
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(
571+
addressMetaOutput,
572+
addressDataOutput,
573+
numDocsWithField + 1L,
574+
ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
575+
);
576+
long[] addr = new long[1];
577+
addressesWriter.add(addr[0]);
578+
writeField(field, valuesProducer, maxOrd, docValueCount -> {
579+
addr[0] += docValueCount;
580+
addressesWriter.add(addr[0]);
581+
});
582+
addressesWriter.finish();
583+
584+
meta.writeInt(numDocsWithField);
585+
long start = data.getFilePointer();
586+
meta.writeLong(start);
587+
meta.writeVInt(ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
588+
addressMetaBuffer.copyTo(meta);
589+
addressDataOutput.close();
590+
try (
591+
// TODO: which IOContext should be used here?
592+
var addressDataInput = dir.openInput(addressDataOutput.getName(), IOContext.DEFAULT)
593+
) {
594+
data.copyBytes(addressDataInput, addressDataInput.length());
595+
meta.writeLong(data.getFilePointer() - start);
596+
}
597+
} finally {
598+
if (addressDataOutputName != null) {
599+
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, addressDataOutputName);
600+
}
601+
}
602+
}
603+
} else {
604+
long[] stats = writeField(field, valuesProducer, maxOrd, null);
605+
int numDocsWithField = Math.toIntExact(stats[0]);
606+
long numValues = stats[1];
607+
assert numValues >= numDocsWithField;
608+
meta.writeInt(numDocsWithField);
609+
if (numValues > numDocsWithField) {
610+
long start = data.getFilePointer();
611+
meta.writeLong(start);
612+
meta.writeVInt(ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
613+
614+
final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(
615+
meta,
616+
data,
617+
numDocsWithField + 1L,
618+
ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
619+
);
620+
long addr = 0;
552621
addressesWriter.add(addr);
622+
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
623+
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
624+
addr += values.docValueCount();
625+
addressesWriter.add(addr);
626+
}
627+
addressesWriter.finish();
628+
meta.writeLong(data.getFilePointer() - start);
553629
}
554-
addressesWriter.finish();
555-
meta.writeLong(data.getFilePointer() - start);
556630
}
557631
}
558632

0 commit comments

Comments
 (0)