Skip to content

Commit 5b60057

Browse files
martijnvgjavanna
authored andcommitted
Speed up loading dense singleton keyword fields (#132994)
With this change both sorted set and number doc values use the same bulk loading for values/ordinals. This PR supersedes (#132715) that also sped up loading dense singleton keyword fields, but duplicated the bulk encoding logic.
1 parent dbab66f commit 5b60057

File tree

9 files changed

+336
-95
lines changed

9 files changed

+336
-95
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 121 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,10 @@ public SortedDocValues getSorted(FieldInfo field) throws IOException {
345345
}
346346

347347
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
348+
if (entry.ordsEntry.docsWithFieldOffset == -2) {
349+
return DocValues.emptySorted();
350+
}
351+
348352
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
349353
return new BaseSortedDocValues(entry) {
350354

@@ -380,7 +384,25 @@ public long cost() {
380384

381385
@Override
382386
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
383-
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
387+
if (ords instanceof BaseDenseNumericValues denseOrds) {
388+
var block = tryReadAHead(factory, docs, offset);
389+
if (block != null) {
390+
return block;
391+
}
392+
// Falling back to tryRead(...) is safe here, given that current block index wasn't altered by looking ahead.
393+
try (var builder = factory.singletonOrdinalsBuilder(this, docs.count() - offset, true)) {
394+
BlockLoader.SingletonLongBuilder delegate = new SingletonLongToSingletonOrdinalDelegate(builder);
395+
var result = denseOrds.tryRead(delegate, docs, offset);
396+
if (result != null) {
397+
return result;
398+
}
399+
}
400+
}
401+
return null;
402+
}
403+
404+
BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
405+
if (ords instanceof BaseDenseNumericValues denseOrds && (valuesSorted || entry.termsDictEntry.termsDictSize == 1)) {
384406
int firstDoc = docs.get(offset);
385407
denseOrds.advanceExact(firstDoc);
386408
long startValue = denseOrds.longValue();
@@ -438,10 +460,18 @@ public TermsEnum termsEnum() throws IOException {
438460
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
439461
return null;
440462
}
463+
464+
BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
465+
return null;
466+
}
441467
}
442468

443469
abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
444470
abstract long lookAheadValueAt(int targetDoc) throws IOException;
471+
472+
BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException {
473+
return null;
474+
}
445475
}
446476

447477
abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
@@ -1256,41 +1286,49 @@ public long longValue() throws IOException {
12561286

12571287
@Override
12581288
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1259-
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
1289+
try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) {
1290+
return tryRead(builder, docs, offset);
1291+
}
1292+
}
1293+
1294+
@Override
1295+
BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException {
12601296
final int docsCount = docs.count();
12611297
doc = docs.get(docsCount - 1);
1262-
try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) {
1263-
for (int i = offset; i < docsCount;) {
1264-
int index = docs.get(i);
1265-
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1266-
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1267-
if (blockIndex != currentBlockIndex) {
1268-
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1269-
// no need to seek if the loading block is the next block
1270-
if (currentBlockIndex + 1 != blockIndex) {
1271-
valuesData.seek(indexReader.get(blockIndex));
1272-
}
1273-
currentBlockIndex = blockIndex;
1298+
for (int i = offset; i < docsCount;) {
1299+
int index = docs.get(i);
1300+
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1301+
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1302+
if (blockIndex != currentBlockIndex) {
1303+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1304+
// no need to seek if the loading block is the next block
1305+
if (currentBlockIndex + 1 != blockIndex) {
1306+
valuesData.seek(indexReader.get(blockIndex));
1307+
}
1308+
currentBlockIndex = blockIndex;
1309+
if (bitsPerOrd == -1) {
12741310
decoder.decode(valuesData, currentBlock);
1311+
} else {
1312+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
12751313
}
1314+
}
12761315

1277-
// Try to append more than just one value:
1278-
// Instead of iterating over docs and find the max length, take an optimistic approach to avoid as
1279-
// many comparisons as there are remaining docs and instead do at most 7 comparisons:
1280-
int length = 1;
1281-
int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i);
1282-
for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) {
1283-
int lastIndex = i + newLength - 1;
1284-
if (isDense(index, docs.get(lastIndex), newLength)) {
1285-
length = newLength;
1286-
break;
1287-
}
1316+
// Try to append more than just one value:
1317+
// Instead of iterating over docs and find the max length, take an optimistic approach to avoid as
1318+
// many comparisons as there are remaining docs and instead do at most 7 comparisons:
1319+
int length = 1;
1320+
int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i);
1321+
for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) {
1322+
int lastIndex = i + newLength - 1;
1323+
if (isDense(index, docs.get(lastIndex), newLength)) {
1324+
length = newLength;
1325+
break;
12881326
}
1289-
builder.appendLongs(currentBlock, blockInIndex, length);
1290-
i += length;
12911327
}
1292-
return builder.build();
1328+
builder.appendLongs(currentBlock, blockInIndex, length);
1329+
i += length;
12931330
}
1331+
return builder.build();
12941332
}
12951333

12961334
@Override
@@ -1624,4 +1662,59 @@ private static class TermsDictEntry {
16241662
int maxBlockLength;
16251663
}
16261664

1665+
static final class SingletonLongToSingletonOrdinalDelegate implements BlockLoader.SingletonLongBuilder {
1666+
private final BlockLoader.SingletonOrdinalsBuilder builder;
1667+
1668+
SingletonLongToSingletonOrdinalDelegate(BlockLoader.SingletonOrdinalsBuilder builder) {
1669+
this.builder = builder;
1670+
}
1671+
1672+
@Override
1673+
public BlockLoader.SingletonLongBuilder appendLong(long value) {
1674+
throw new UnsupportedOperationException();
1675+
}
1676+
1677+
@Override
1678+
public BlockLoader.SingletonLongBuilder appendLongs(long[] values, int from, int length) {
1679+
// Unfortunately, no array copy here...
1680+
// Since we need to loop here, let's also keep track of min/max.
1681+
int minOrd = Integer.MAX_VALUE;
1682+
int maxOrd = Integer.MIN_VALUE;
1683+
int counter = 0;
1684+
int[] convertedOrds = new int[length];
1685+
int end = from + length;
1686+
for (int j = from; j < end; j++) {
1687+
int ord = Math.toIntExact(values[j]);
1688+
convertedOrds[counter++] = ord;
1689+
minOrd = Math.min(minOrd, ord);
1690+
maxOrd = Math.max(maxOrd, ord);
1691+
}
1692+
builder.appendOrds(convertedOrds, 0, length, minOrd, maxOrd);
1693+
return this;
1694+
}
1695+
1696+
@Override
1697+
public BlockLoader.Block build() {
1698+
return builder.build();
1699+
}
1700+
1701+
@Override
1702+
public BlockLoader.Builder appendNull() {
1703+
throw new UnsupportedOperationException();
1704+
}
1705+
1706+
@Override
1707+
public BlockLoader.Builder beginPositionEntry() {
1708+
throw new UnsupportedOperationException();
1709+
}
1710+
1711+
@Override
1712+
public BlockLoader.Builder endPositionEntry() {
1713+
throw new UnsupportedOperationException();
1714+
}
1715+
1716+
@Override
1717+
public void close() {}
1718+
}
1719+
16271720
}

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
756756
return block;
757757
}
758758
}
759-
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
759+
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset, false)) {
760760
for (int i = offset; i < docs.count(); i++) {
761761
int doc = docs.get(i);
762762
if (doc < ordinals.docID()) {

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ interface BlockFactory {
447447
/**
448448
* Build a reader for reading {@link SortedDocValues}
449449
*/
450-
SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count);
450+
SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count, boolean isDense);
451451

452452
/**
453453
* Build a reader for reading {@link SortedSetDocValues}
@@ -548,6 +548,8 @@ interface SingletonOrdinalsBuilder extends Builder {
548548
* Appends an ordinal to the builder.
549549
*/
550550
SingletonOrdinalsBuilder appendOrd(int value);
551+
552+
SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd);
551553
}
552554

553555
interface SortedSetOrdinalsBuilder extends Builder {

0 commit comments

Comments
 (0)