Skip to content

Commit c2fa78f

Browse files
authored
Speed up reading multivalued keywords (#131061)
This change speeds up reading multi-valued keyword fields by leveraging ordinals. Before: ``` Benchmark (layout) (name) Mode Cnt Score Error Units ValuesSourceReaderBenchmark.benchmark in_order keyword_mv avgt 7 318.332 ± 1.660 ns/op ``` After: ``` Benchmark (layout) (name) Mode Cnt Score Error Units ValuesSourceReaderBenchmark.benchmark in_order keyword_mv avgt 7 96.659 ± 0.932 ns/op ```
1 parent 1669e8d commit c2fa78f

File tree

10 files changed

+450
-58
lines changed

10 files changed

+450
-58
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ public class ValuesSourceReaderBenchmark {
9292
"double",
9393
"keyword",
9494
"stored_keyword",
95-
"3_stored_keywords" };
95+
"3_stored_keywords",
96+
"keyword_mv" };
9697

9798
private static final int BLOCK_LENGTH = 16 * 1024;
9899
private static final int INDEX_SIZE = 10 * BLOCK_LENGTH;
@@ -332,7 +333,7 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
332333
@Param({ "in_order", "shuffled" })
333334
public String layout;
334335

335-
@Param({ "long", "keyword", "stored_keyword" })
336+
@Param({ "long", "keyword", "stored_keyword", "keyword_mv" })
336337
public String name;
337338

338339
private Directory directory;
@@ -398,6 +399,22 @@ public void benchmark() {
398399
}
399400
}
400401
}
402+
case "keyword_mv" -> {
403+
BytesRef scratch = new BytesRef();
404+
BytesRefBlock values = op.getOutput().<BytesRefBlock>getBlock(1);
405+
for (int p = 0; p < values.getPositionCount(); p++) {
406+
int count = values.getValueCount(p);
407+
if (count > 0) {
408+
int first = values.getFirstValueIndex(p);
409+
for (int i = 0; i < count; i++) {
410+
BytesRef r = values.getBytesRef(first + i, scratch);
411+
r.offset++;
412+
r.length--;
413+
sum += Integer.parseInt(r.utf8ToString());
414+
}
415+
}
416+
}
417+
}
401418
}
402419
}
403420
long expected = 0;
@@ -407,6 +424,16 @@ public void benchmark() {
407424
expected += i % 1000;
408425
}
409426
break;
427+
case "keyword_mv":
428+
for (int i = 0; i < INDEX_SIZE; i++) {
429+
int v1 = i % 1000;
430+
expected += v1;
431+
int v2 = i % 500;
432+
if (v1 != v2) {
433+
expected += v2;
434+
}
435+
}
436+
break;
410437
case "3_stored_keywords":
411438
for (int i = 0; i < INDEX_SIZE; i++) {
412439
expected += 3 * (i % 1000);
@@ -461,7 +488,9 @@ private void setupIndex() throws IOException {
461488
new StoredField("double", (double) i),
462489
new KeywordFieldMapper.KeywordField("keyword_1", new BytesRef(c + i % 1000), keywordFieldType),
463490
new KeywordFieldMapper.KeywordField("keyword_2", new BytesRef(c + i % 1000), keywordFieldType),
464-
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType)
491+
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType),
492+
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 1000), keywordFieldType),
493+
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 500), keywordFieldType)
465494
)
466495
);
467496
if (i % COMMIT_INTERVAL == 0) {

docs/changelog/131061.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131061
2+
summary: Speed up reading multivalued keywords
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOExceptio
656656
if (docs.count() == 1) {
657657
return readSingleDoc(factory, docs.get(0));
658658
}
659-
try (BlockLoader.SingletonOrdinalsBuilder builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
659+
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
660660
for (int i = 0; i < docs.count(); i++) {
661661
int doc = docs.get(i);
662662
if (doc < ordinals.docID()) {
@@ -701,13 +701,29 @@ private static class Ordinals extends BlockDocValuesReader {
701701

702702
@Override
703703
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
704-
try (BytesRefBuilder builder = factory.bytesRefsFromDocValues(docs.count())) {
704+
if (docs.count() == 1) {
705+
return readSingleDoc(factory, docs.get(0));
706+
}
707+
try (var builder = factory.sortedSetOrdinalsBuilder(ordinals, docs.count())) {
705708
for (int i = 0; i < docs.count(); i++) {
706709
int doc = docs.get(i);
707710
if (doc < ordinals.docID()) {
708711
throw new IllegalStateException("docs within same block must be in order");
709712
}
710-
read(doc, builder);
713+
if (ordinals.advanceExact(doc) == false) {
714+
builder.appendNull();
715+
continue;
716+
}
717+
int count = ordinals.docValueCount();
718+
if (count == 1) {
719+
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
720+
} else {
721+
builder.beginPositionEntry();
722+
for (int c = 0; c < count; c++) {
723+
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
724+
}
725+
builder.endPositionEntry();
726+
}
711727
}
712728
return builder.build();
713729
}
@@ -718,6 +734,26 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
718734
read(docId, (BytesRefBuilder) builder);
719735
}
720736

737+
private BlockLoader.Block readSingleDoc(BlockFactory factory, int docId) throws IOException {
738+
if (ordinals.advanceExact(docId) == false) {
739+
return factory.constantNulls();
740+
}
741+
int count = ordinals.docValueCount();
742+
if (count == 1) {
743+
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
744+
return factory.constantBytes(BytesRef.deepCopyOf(v));
745+
}
746+
try (var builder = factory.bytesRefsFromDocValues(count)) {
747+
builder.beginPositionEntry();
748+
for (int c = 0; c < count; c++) {
749+
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
750+
builder.appendBytesRef(v);
751+
}
752+
builder.endPositionEntry();
753+
return builder.build();
754+
}
755+
}
756+
721757
private void read(int docId, BytesRefBuilder builder) throws IOException {
722758
if (false == ordinals.advanceExact(docId)) {
723759
builder.appendNull();

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,11 +417,14 @@ interface BlockFactory {
417417
Block constantBytes(BytesRef value);
418418

419419
/**
420-
* Build a reader for reading keyword ordinals.
420+
* Build a reader for reading {@link SortedDocValues}
421421
*/
422422
SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count);
423423

424-
// TODO support non-singleton ords
424+
/**
425+
* Build a reader for reading {@link SortedSetDocValues}
426+
*/
427+
SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count);
425428

426429
AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count);
427430
}
@@ -509,6 +512,13 @@ interface SingletonOrdinalsBuilder extends Builder {
509512
SingletonOrdinalsBuilder appendOrd(int value);
510513
}
511514

515+
interface SortedSetOrdinalsBuilder extends Builder {
516+
/**
517+
* Appends an ordinal to the builder.
518+
*/
519+
SortedSetOrdinalsBuilder appendOrd(int value);
520+
}
521+
512522
interface AggregateMetricDoubleBuilder extends Builder {
513523

514524
DoubleBuilder min();

test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.apache.lucene.index.LeafReaderContext;
1313
import org.apache.lucene.index.SortedDocValues;
14+
import org.apache.lucene.index.SortedSetDocValues;
1415
import org.apache.lucene.util.BytesRef;
1516

1617
import java.io.IOException;
@@ -182,6 +183,22 @@ public SingletonOrdsBuilder appendOrd(int value) {
182183
return new SingletonOrdsBuilder();
183184
}
184185

186+
@Override
187+
public BlockLoader.SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count) {
188+
class SortedSetOrdinalBuilder extends TestBlock.Builder implements BlockLoader.SortedSetOrdinalsBuilder {
189+
@Override
190+
public SortedSetOrdinalBuilder appendOrd(int value) {
191+
try {
192+
add(ordinals.lookupOrd(value));
193+
return this;
194+
} catch (IOException e) {
195+
throw new UncheckedIOException(e);
196+
}
197+
}
198+
}
199+
return new SortedSetOrdinalBuilder();
200+
}
201+
185202
@Override
186203
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
187204
return new AggregateMetricDoubleBlockBuilder();

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
package org.elasticsearch.compute.lucene.read;
99

1010
import org.apache.lucene.index.SortedDocValues;
11+
import org.apache.lucene.index.SortedSetDocValues;
1112
import org.elasticsearch.compute.data.Block;
1213
import org.elasticsearch.compute.data.BlockFactory;
1314
import org.elasticsearch.compute.data.ElementType;
14-
import org.elasticsearch.compute.data.SingletonOrdinalsBuilder;
1515
import org.elasticsearch.index.mapper.BlockLoader;
1616

1717
public abstract class DelegatingBlockLoaderFactory implements BlockLoader.BlockFactory {
@@ -86,6 +86,11 @@ public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocVa
8686
return new SingletonOrdinalsBuilder(factory, ordinals, count);
8787
}
8888

89+
@Override
90+
public BlockLoader.SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count) {
91+
return new SortedSetOrdinalsBuilder(factory, ordinals, count);
92+
}
93+
8994
@Override
9095
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
9196
return factory.newAggregateMetricDoubleBlockBuilder(count);
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,17 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.compute.data;
8+
package org.elasticsearch.compute.lucene.read;
99

1010
import org.apache.lucene.index.SortedDocValues;
1111
import org.apache.lucene.util.BytesRef;
1212
import org.apache.lucene.util.RamUsageEstimator;
13+
import org.elasticsearch.compute.data.Block;
14+
import org.elasticsearch.compute.data.BlockFactory;
15+
import org.elasticsearch.compute.data.BytesRefBlock;
16+
import org.elasticsearch.compute.data.BytesRefVector;
17+
import org.elasticsearch.compute.data.IntBlock;
18+
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
1319
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
1420
import org.elasticsearch.core.Releasable;
1521
import org.elasticsearch.core.Releasables;

0 commit comments

Comments
 (0)