Skip to content

Commit 1df1edb

Browse files
authored
Merge branch 'main' into lucene_10_2_1
2 parents 5a40dfd + 0f6b0b9 commit 1df1edb

File tree

6 files changed

+279
-64
lines changed

6 files changed

+279
-64
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,6 @@ tests:
345345
- class: org.elasticsearch.search.SearchWithRejectionsIT
346346
method: testOpenContextsAfterRejections
347347
issue: https://github.com/elastic/elasticsearch/issues/126340
348-
- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT
349-
method: test {p0=search.vectors/42_knn_search_bbq_flat/Vector rescoring has same scoring as exact search for kNN section}
350-
issue: https://github.com/elastic/elasticsearch/issues/126368
351348
- class: org.elasticsearch.smoketest.MlWithSecurityIT
352349
method: test {yaml=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable cardinality is too low}
353350
issue: https://github.com/elastic/elasticsearch/issues/123200

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DocValuesConsumerUtil.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
*/
2121
class DocValuesConsumerUtil {
2222

23-
static final MergeStats UNSUPPORTED = new MergeStats(false, -1, -1);
23+
static final MergeStats UNSUPPORTED = new MergeStats(false, -1, -1, -1, -1);
2424

25-
record MergeStats(boolean supported, long sumNumValues, int sumNumDocsWithField) {}
25+
record MergeStats(boolean supported, long sumNumValues, int sumNumDocsWithField, int minLength, int maxLength) {}
2626

2727
static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, MergeState mergeState, FieldInfo fieldInfo) {
2828
if (optimizedMergeEnabled == false || mergeState.needsIndexSort == false) {
@@ -38,6 +38,8 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
3838

3939
long sumNumValues = 0;
4040
int sumNumDocsWithField = 0;
41+
int minLength = Integer.MAX_VALUE;
42+
int maxLength = 0;
4143

4244
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
4345
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
@@ -86,6 +88,14 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
8688
}
8789
}
8890
}
91+
case BINARY -> {
92+
var entry = tsdbDocValuesProducer.binaries.get(fieldInfo.number);
93+
if (entry != null) {
94+
sumNumDocsWithField += entry.numDocsWithField;
95+
minLength = Math.min(minLength, entry.minLength);
96+
maxLength = Math.max(maxLength, entry.maxLength);
97+
}
98+
}
8999
default -> throw new IllegalStateException("unexpected doc values producer type: " + fieldInfo.getDocValuesType());
90100
}
91101
} else {
@@ -96,7 +106,7 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
96106
}
97107
}
98108

99-
return new MergeStats(true, sumNumValues, sumNumDocsWithField);
109+
return new MergeStats(true, sumNumValues, sumNumDocsWithField, minLength, maxLength);
100110
}
101111

102112
}

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 130 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -258,71 +258,146 @@ public void mergeNumericField(FieldInfo mergeFieldInfo, MergeState mergeState) t
258258
}
259259
}
260260

261+
@Override
262+
public void mergeBinaryField(FieldInfo mergeFieldInfo, MergeState mergeState) throws IOException {
263+
var result = compatibleWithOptimizedMerge(enableOptimizedMerge, mergeState, mergeFieldInfo);
264+
if (result.supported()) {
265+
mergeBinaryField(result, mergeFieldInfo, mergeState);
266+
} else {
267+
super.mergeBinaryField(mergeFieldInfo, mergeState);
268+
}
269+
}
270+
261271
@Override
262272
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
263273
meta.writeInt(field.number);
264274
meta.writeByte(ES819TSDBDocValuesFormat.BINARY);
265275

266-
BinaryDocValues values = valuesProducer.getBinary(field);
267-
long start = data.getFilePointer();
268-
meta.writeLong(start); // dataOffset
269-
int numDocsWithField = 0;
270-
int minLength = Integer.MAX_VALUE;
271-
int maxLength = 0;
272-
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
273-
numDocsWithField++;
274-
BytesRef v = values.binaryValue();
275-
int length = v.length;
276-
data.writeBytes(v.bytes, v.offset, v.length);
277-
minLength = Math.min(length, minLength);
278-
maxLength = Math.max(length, maxLength);
279-
}
280-
assert numDocsWithField <= maxDoc;
281-
meta.writeLong(data.getFilePointer() - start); // dataLength
282-
283-
if (numDocsWithField == 0) {
284-
meta.writeLong(-2); // docsWithFieldOffset
285-
meta.writeLong(0L); // docsWithFieldLength
286-
meta.writeShort((short) -1); // jumpTableEntryCount
287-
meta.writeByte((byte) -1); // denseRankPower
288-
} else if (numDocsWithField == maxDoc) {
289-
meta.writeLong(-1); // docsWithFieldOffset
290-
meta.writeLong(0L); // docsWithFieldLength
291-
meta.writeShort((short) -1); // jumpTableEntryCount
292-
meta.writeByte((byte) -1); // denseRankPower
293-
} else {
294-
long offset = data.getFilePointer();
295-
meta.writeLong(offset); // docsWithFieldOffset
296-
values = valuesProducer.getBinary(field);
297-
final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
298-
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
299-
meta.writeShort(jumpTableEntryCount);
300-
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
301-
}
276+
if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) {
277+
final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField();
278+
final int minLength = tsdbValuesProducer.mergeStats.minLength();
279+
final int maxLength = tsdbValuesProducer.mergeStats.maxLength();
302280

303-
meta.writeInt(numDocsWithField);
304-
meta.writeInt(minLength);
305-
meta.writeInt(maxLength);
306-
if (maxLength > minLength) {
307-
start = data.getFilePointer();
308-
meta.writeLong(start);
309-
meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
281+
assert numDocsWithField <= maxDoc;
310282

311-
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
312-
meta,
313-
data,
314-
numDocsWithField + 1,
315-
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
316-
);
317-
long addr = 0;
318-
writer.add(addr);
319-
values = valuesProducer.getBinary(field);
283+
BinaryDocValues values = valuesProducer.getBinary(field);
284+
long start = data.getFilePointer();
285+
meta.writeLong(start); // dataOffset
286+
287+
OffsetsAccumulator offsetsAccumulator = null;
288+
DISIAccumulator disiAccumulator = null;
289+
try {
290+
if (numDocsWithField > 0 && numDocsWithField < maxDoc) {
291+
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
292+
}
293+
294+
assert maxLength >= minLength;
295+
if (maxLength > minLength) {
296+
offsetsAccumulator = new OffsetsAccumulator(dir, context, data, numDocsWithField);
297+
}
298+
299+
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
300+
BytesRef v = values.binaryValue();
301+
data.writeBytes(v.bytes, v.offset, v.length);
302+
if (disiAccumulator != null) {
303+
disiAccumulator.addDocId(doc);
304+
}
305+
if (offsetsAccumulator != null) {
306+
offsetsAccumulator.addDoc(v.length);
307+
}
308+
}
309+
meta.writeLong(data.getFilePointer() - start); // dataLength
310+
311+
if (numDocsWithField == 0) {
312+
meta.writeLong(-2); // docsWithFieldOffset
313+
meta.writeLong(0L); // docsWithFieldLength
314+
meta.writeShort((short) -1); // jumpTableEntryCount
315+
meta.writeByte((byte) -1); // denseRankPower
316+
} else if (numDocsWithField == maxDoc) {
317+
meta.writeLong(-1); // docsWithFieldOffset
318+
meta.writeLong(0L); // docsWithFieldLength
319+
meta.writeShort((short) -1); // jumpTableEntryCount
320+
meta.writeByte((byte) -1); // denseRankPower
321+
} else {
322+
long offset = data.getFilePointer();
323+
meta.writeLong(offset); // docsWithFieldOffset
324+
final short jumpTableEntryCount = disiAccumulator.build(data);
325+
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
326+
meta.writeShort(jumpTableEntryCount);
327+
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
328+
}
329+
330+
meta.writeInt(numDocsWithField);
331+
meta.writeInt(minLength);
332+
meta.writeInt(maxLength);
333+
if (offsetsAccumulator != null) {
334+
offsetsAccumulator.build(meta, data);
335+
}
336+
} finally {
337+
IOUtils.close(disiAccumulator, offsetsAccumulator);
338+
}
339+
} else {
340+
BinaryDocValues values = valuesProducer.getBinary(field);
341+
long start = data.getFilePointer();
342+
meta.writeLong(start); // dataOffset
343+
int numDocsWithField = 0;
344+
int minLength = Integer.MAX_VALUE;
345+
int maxLength = 0;
320346
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
321-
addr += values.binaryValue().length;
347+
numDocsWithField++;
348+
BytesRef v = values.binaryValue();
349+
int length = v.length;
350+
data.writeBytes(v.bytes, v.offset, v.length);
351+
minLength = Math.min(length, minLength);
352+
maxLength = Math.max(length, maxLength);
353+
}
354+
assert numDocsWithField <= maxDoc;
355+
meta.writeLong(data.getFilePointer() - start); // dataLength
356+
357+
if (numDocsWithField == 0) {
358+
meta.writeLong(-2); // docsWithFieldOffset
359+
meta.writeLong(0L); // docsWithFieldLength
360+
meta.writeShort((short) -1); // jumpTableEntryCount
361+
meta.writeByte((byte) -1); // denseRankPower
362+
} else if (numDocsWithField == maxDoc) {
363+
meta.writeLong(-1); // docsWithFieldOffset
364+
meta.writeLong(0L); // docsWithFieldLength
365+
meta.writeShort((short) -1); // jumpTableEntryCount
366+
meta.writeByte((byte) -1); // denseRankPower
367+
} else {
368+
long offset = data.getFilePointer();
369+
meta.writeLong(offset); // docsWithFieldOffset
370+
values = valuesProducer.getBinary(field);
371+
final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
372+
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
373+
meta.writeShort(jumpTableEntryCount);
374+
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
375+
}
376+
377+
meta.writeInt(numDocsWithField);
378+
meta.writeInt(minLength);
379+
meta.writeInt(maxLength);
380+
if (maxLength > minLength) {
381+
start = data.getFilePointer();
382+
meta.writeLong(start);
383+
meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
384+
385+
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
386+
meta,
387+
data,
388+
numDocsWithField + 1,
389+
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
390+
);
391+
long addr = 0;
322392
writer.add(addr);
393+
values = valuesProducer.getBinary(field);
394+
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
395+
addr += values.binaryValue().length;
396+
writer.add(addr);
397+
}
398+
writer.finish();
399+
meta.writeLong(data.getFilePointer() - start);
323400
}
324-
writer.finish();
325-
meta.writeLong(data.getFilePointer() - start);
326401
}
327402
}
328403

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252

5353
final class ES819TSDBDocValuesProducer extends DocValuesProducer {
5454
final IntObjectHashMap<NumericEntry> numerics;
55-
private final IntObjectHashMap<BinaryEntry> binaries;
55+
final IntObjectHashMap<BinaryEntry> binaries;
5656
final IntObjectHashMap<SortedEntry> sorted;
5757
final IntObjectHashMap<SortedSetEntry> sortedSets;
5858
final IntObjectHashMap<SortedNumericEntry> sortedNumerics;
@@ -1445,7 +1445,7 @@ static class NumericEntry {
14451445
long valuesLength;
14461446
}
14471447

1448-
private static class BinaryEntry {
1448+
static class BinaryEntry {
14491449
long dataOffset;
14501450
long dataLength;
14511451
long docsWithFieldOffset;

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/XDocValuesConsumer.java

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.lucene.codecs.DocValuesConsumer;
1212
import org.apache.lucene.codecs.DocValuesProducer;
1313
import org.apache.lucene.index.BaseTermsEnum;
14+
import org.apache.lucene.index.BinaryDocValues;
1415
import org.apache.lucene.index.DocIDMerger;
1516
import org.apache.lucene.index.DocValues;
1617
import org.apache.lucene.index.DocValuesType;
@@ -152,6 +153,102 @@ public long longValue() throws IOException {
152153
};
153154
}
154155

156+
/** Tracks state of one binary sub-reader that we are merging */
157+
private static class BinaryDocValuesSub extends DocIDMerger.Sub {
158+
159+
final BinaryDocValues values;
160+
161+
BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values) {
162+
super(docMap);
163+
this.values = values;
164+
assert values.docID() == -1;
165+
}
166+
167+
@Override
168+
public int nextDoc() throws IOException {
169+
return values.nextDoc();
170+
}
171+
}
172+
173+
/**
174+
* Merges the binary docvalues from <code>MergeState</code>.
175+
*
176+
* <p>The default implementation calls {@link #addBinaryField}, passing a DocValuesProducer that
177+
* merges and filters deleted documents on the fly.
178+
*/
179+
public void mergeBinaryField(MergeStats mergeStats, FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
180+
addBinaryField(mergeFieldInfo, new TsdbDocValuesProducer(mergeStats) {
181+
@Override
182+
public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
183+
if (fieldInfo != mergeFieldInfo) {
184+
throw new IllegalArgumentException("wrong fieldInfo");
185+
}
186+
187+
List<BinaryDocValuesSub> subs = new ArrayList<>();
188+
189+
long cost = 0;
190+
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
191+
BinaryDocValues values = null;
192+
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
193+
if (docValuesProducer != null) {
194+
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
195+
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.BINARY) {
196+
values = docValuesProducer.getBinary(readerFieldInfo);
197+
}
198+
}
199+
if (values != null) {
200+
cost += values.cost();
201+
subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], values));
202+
}
203+
}
204+
205+
final DocIDMerger<BinaryDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
206+
final long finalCost = cost;
207+
208+
return new BinaryDocValues() {
209+
private BinaryDocValuesSub current;
210+
private int docID = -1;
211+
212+
@Override
213+
public int docID() {
214+
return docID;
215+
}
216+
217+
@Override
218+
public int nextDoc() throws IOException {
219+
current = docIDMerger.next();
220+
if (current == null) {
221+
docID = NO_MORE_DOCS;
222+
} else {
223+
docID = current.mappedDocID;
224+
}
225+
return docID;
226+
}
227+
228+
@Override
229+
public int advance(int target) throws IOException {
230+
throw new UnsupportedOperationException();
231+
}
232+
233+
@Override
234+
public boolean advanceExact(int target) throws IOException {
235+
throw new UnsupportedOperationException();
236+
}
237+
238+
@Override
239+
public long cost() {
240+
return finalCost;
241+
}
242+
243+
@Override
244+
public BytesRef binaryValue() throws IOException {
245+
return current.values.binaryValue();
246+
}
247+
};
248+
}
249+
});
250+
}
251+
155252
/** Tracks state of one sorted numeric sub-reader that we are merging */
156253
private static class SortedNumericDocValuesSub extends DocIDMerger.Sub {
157254

0 commit comments

Comments
 (0)