Skip to content

Commit 3d5c92c

Browse files
dnhatnfzowl
authored andcommitted
Try bulk load sorted ordinals across ranges (elastic#137076)
This change implements the TODO for loading sorted ordinals in the TSDB codec. With ordinal range encoding, we can bulk-append the entire range for the next ordinal, rather than reading and appending one document at a time.
1 parent df38e16 commit 3d5c92c

File tree

5 files changed

+86
-20
lines changed

5 files changed

+86
-20
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.apache.lucene.util.packed.PackedInts;
4646
import org.elasticsearch.core.Assertions;
4747
import org.elasticsearch.core.IOUtils;
48+
import org.elasticsearch.core.Nullable;
4849
import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
4950
import org.elasticsearch.index.mapper.BlockLoader;
5051
import org.elasticsearch.index.mapper.blockloader.docvalues.BlockDocValuesReader;
@@ -427,19 +428,43 @@ public BlockLoader.Block tryRead(
427428
}
428429

429430
BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
430-
if (ords instanceof BaseDenseNumericValues denseOrds && (valuesSorted || entry.termsDictEntry.termsDictSize == 1)) {
431+
if (ords instanceof BaseDenseNumericValues denseOrds) {
432+
if (entry.termsDictEntry.termsDictSize == 1) {
433+
return factory.constantBytes(BytesRef.deepCopyOf(lookupOrd(0)), docs.count() - offset);
434+
}
435+
if (valuesSorted == false) {
436+
return null;
437+
}
431438
int firstDoc = docs.get(offset);
432439
denseOrds.advanceExact(firstDoc);
433-
long startValue = denseOrds.longValue();
440+
int startValue = Math.toIntExact(denseOrds.longValue());
434441
final int docCount = docs.count();
435442
int lastDoc = docs.get(docCount - 1);
436-
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
443+
int lastValue = Math.toIntExact(denseOrds.lookAheadValueAt(lastDoc));
437444
if (lastValue == startValue) {
438445
BytesRef b = lookupOrd(Math.toIntExact(startValue));
439446
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
440447
}
441-
// TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue,
442-
// then fill remaining positions with lastValue.
448+
var ordinalReader = denseOrds.sortedOrdinalReader();
449+
if (ordinalReader != null) {
450+
try (var builder = factory.singletonOrdinalsBuilder(this, docCount - offset, true)) {
451+
int docIndex = offset;
452+
while (docIndex < docCount) {
453+
int ord = Math.toIntExact(ordinalReader.readValueAndAdvance(docs.get(docIndex)));
454+
// append all docs of the last range without checking
455+
if (lastDoc < ordinalReader.rangeEndExclusive) {
456+
builder.appendOrds(ord, docCount - docIndex);
457+
break;
458+
}
459+
final int startIndex = docIndex;
460+
while (docIndex < docCount && docs.get(docIndex) < ordinalReader.rangeEndExclusive) {
461+
++docIndex;
462+
}
463+
builder.appendOrds(ord, docIndex - startIndex);
464+
}
465+
return builder.build();
466+
}
467+
}
443468
}
444469
return null;
445470
}
@@ -552,6 +577,9 @@ public BlockLoader.Block tryRead(
552577
BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException {
553578
return null;
554579
}
580+
581+
@Nullable
582+
abstract SortedOrdinalReader sortedOrdinalReader();
555583
}
556584

557585
abstract static class BaseSparseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
@@ -1326,6 +1354,11 @@ public int docIDRunEnd() {
13261354
long lookAheadValueAt(int targetDoc) throws IOException {
13271355
return 0L; // Only one ordinal!
13281356
}
1357+
1358+
@Override
1359+
SortedOrdinalReader sortedOrdinalReader() {
1360+
return null;
1361+
}
13291362
};
13301363
} else {
13311364
final IndexedDISI disi = new IndexedDISI(
@@ -1488,6 +1521,10 @@ static boolean isDense(int firstDocId, int lastDocId, int length) {
14881521
return lastDocId - firstDocId == length - 1;
14891522
}
14901523

1524+
@Override
1525+
SortedOrdinalReader sortedOrdinalReader() {
1526+
return null;
1527+
}
14911528
};
14921529
} else {
14931530
final IndexedDISI disi = new IndexedDISI(
@@ -1623,6 +1660,11 @@ public long longValue() {
16231660
public int docIDRunEnd() throws IOException {
16241661
return maxDoc;
16251662
}
1663+
1664+
@Override
1665+
SortedOrdinalReader sortedOrdinalReader() {
1666+
return ordinalsReader;
1667+
}
16261668
};
16271669
} else {
16281670
final var disi = new IndexedDISI(

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,11 @@ interface SingletonOrdinalsBuilder extends Builder {
604604
*/
605605
SingletonOrdinalsBuilder appendOrd(int value);
606606

607+
/**
608+
* Appends a single ord for the next N positions
609+
*/
610+
SingletonOrdinalsBuilder appendOrds(int ord, int length);
611+
607612
SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd);
608613
}
609614

server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,19 @@ public void testLoadKeywordFieldWithIndexSorts() throws IOException {
11701170
var config = new IndexWriterConfig();
11711171
config.setIndexSort(new Sort(new SortField(primaryField, SortField.Type.STRING, false)));
11721172
config.setMergePolicy(new LogByteSizeMergePolicy());
1173-
config.setCodec(getCodec());
1173+
final Codec codec = new Elasticsearch92Lucene103Codec() {
1174+
final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
1175+
ESTestCase.randomIntBetween(2, 4096),
1176+
1, // always enable range-encode
1177+
random().nextBoolean()
1178+
);
1179+
1180+
@Override
1181+
public DocValuesFormat getDocValuesFormatForField(String field) {
1182+
return docValuesFormat;
1183+
}
1184+
};
1185+
config.setCodec(codec);
11741186
Map<Integer, String> hostnames = new HashMap<>();
11751187
try (Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, config)) {
11761188
int numDocs = ESTestCase.randomIntBetween(100, 5000);
@@ -1278,20 +1290,11 @@ public int get(int docId) {
12781290
return docId;
12791291
}
12801292
}, start);
1281-
Set<String> seenValues = new HashSet<>();
1282-
for (int p = start; p <= end; p++) {
1283-
String hostName = hostnames.get(((Number) idBlock.get(p)).intValue());
1284-
seenValues.add(hostName);
1285-
}
1286-
if (seenValues.size() == 1) {
1287-
assertNotNull(hostBlock);
1288-
assertThat(hostBlock.size(), equalTo(end - start + 1));
1289-
for (int i = 0; i < hostBlock.size(); i++) {
1290-
String actualHostName = BytesRefs.toString(hostBlock.get(i));
1291-
assertThat(actualHostName, equalTo(hostnames.get(((Number) idBlock.get(i + start)).intValue())));
1292-
}
1293-
} else {
1294-
assertNull(hostBlock);
1293+
assertNotNull(hostBlock);
1294+
assertThat(hostBlock.size(), equalTo(end - start + 1));
1295+
for (int i = 0; i < hostBlock.size(); i++) {
1296+
String actualHostName = BytesRefs.toString(hostBlock.get(i));
1297+
assertThat(actualHostName, equalTo(hostnames.get(((Number) idBlock.get(i + start)).intValue())));
12951298
}
12961299
if (start == idBlock.size() - 1) {
12971300
break;

test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,14 @@ public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, i
396396
}
397397
return this;
398398
}
399+
400+
@Override
401+
public BlockLoader.SingletonOrdinalsBuilder appendOrds(int ord, int length) {
402+
for (int i = 0; i < length; i++) {
403+
appendOrd(ord);
404+
}
405+
return this;
406+
}
399407
}
400408
return new SingletonOrdsBuilder();
401409
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, i
6767
return this;
6868
}
6969

70+
@Override
71+
public BlockLoader.SingletonOrdinalsBuilder appendOrds(int ord, int length) {
72+
Arrays.fill(ords, count, count + length, ord);
73+
this.minOrd = Math.min(this.minOrd, ord);
74+
this.maxOrd = Math.max(this.maxOrd, ord);
75+
return this;
76+
}
77+
7078
@Override
7179
public SingletonOrdinalsBuilder beginPositionEntry() {
7280
throw new UnsupportedOperationException("should only have one value per doc");

0 commit comments

Comments
 (0)