Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ public class ValuesSourceReaderBenchmark {
"double",
"keyword",
"stored_keyword",
"3_stored_keywords" };
"3_stored_keywords",
"keyword_mv" };

private static final int BLOCK_LENGTH = 16 * 1024;
private static final int INDEX_SIZE = 10 * BLOCK_LENGTH;
Expand Down Expand Up @@ -332,7 +333,7 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
@Param({ "in_order", "shuffled" })
public String layout;

@Param({ "long", "keyword", "stored_keyword" })
@Param({ "long", "keyword", "stored_keyword", "keyword_mv" })
public String name;

private Directory directory;
Expand Down Expand Up @@ -398,6 +399,22 @@ public void benchmark() {
}
}
}
case "keyword_mv" -> {
BytesRef scratch = new BytesRef();
BytesRefBlock values = op.getOutput().<BytesRefBlock>getBlock(1);
for (int p = 0; p < values.getPositionCount(); p++) {
int count = values.getValueCount(p);
if (count > 0) {
int first = values.getFirstValueIndex(p);
for (int i = 0; i < count; i++) {
BytesRef r = values.getBytesRef(first + i, scratch);
r.offset++;
r.length--;
sum += Integer.parseInt(r.utf8ToString());
}
}
}
}
}
}
long expected = 0;
Expand All @@ -407,6 +424,16 @@ public void benchmark() {
expected += i % 1000;
}
break;
case "keyword_mv":
for (int i = 0; i < INDEX_SIZE; i++) {
int v1 = i % 1000;
expected += v1;
int v2 = i % 500;
if (v1 != v2) {
expected += v2;
}
}
break;
case "3_stored_keywords":
for (int i = 0; i < INDEX_SIZE; i++) {
expected += 3 * (i % 1000);
Expand Down Expand Up @@ -461,7 +488,9 @@ private void setupIndex() throws IOException {
new StoredField("double", (double) i),
new KeywordFieldMapper.KeywordField("keyword_1", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_2", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType)
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 500), keywordFieldType)
)
);
if (i % COMMIT_INTERVAL == 0) {
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/131061.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131061
summary: Speed up reading multivalued keywords
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOExceptio
if (docs.count() == 1) {
return readSingleDoc(factory, docs.get(0));
}
try (BlockLoader.SingletonOrdinalsBuilder builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
for (int i = 0; i < docs.count(); i++) {
int doc = docs.get(i);
if (doc < ordinals.docID()) {
Expand Down Expand Up @@ -701,13 +701,29 @@ private static class Ordinals extends BlockDocValuesReader {

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
try (BytesRefBuilder builder = factory.bytesRefsFromDocValues(docs.count())) {
if (docs.count() == 1) {
return readSingleDoc(factory, docs.get(0));
}
try (var builder = factory.sortedSetOrdinalsBuilder(ordinals, docs.count())) {
for (int i = 0; i < docs.count(); i++) {
int doc = docs.get(i);
if (doc < ordinals.docID()) {
throw new IllegalStateException("docs within same block must be in order");
}
read(doc, builder);
if (ordinals.advanceExact(doc) == false) {
builder.appendNull();
continue;
}
int count = ordinals.docValueCount();
if (count == 1) {
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
} else {
builder.beginPositionEntry();
for (int c = 0; c < count; c++) {
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
}
builder.endPositionEntry();
}
}
return builder.build();
}
Expand All @@ -718,6 +734,26 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
read(docId, (BytesRefBuilder) builder);
}

private BlockLoader.Block readSingleDoc(BlockFactory factory, int docId) throws IOException {
if (ordinals.advanceExact(docId) == false) {
return factory.constantNulls();
}
int count = ordinals.docValueCount();
if (count == 1) {
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
return factory.constantBytes(BytesRef.deepCopyOf(v));
}
try (var builder = factory.bytesRefsFromDocValues(count)) {
builder.beginPositionEntry();
for (int c = 0; c < count; c++) {
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
builder.appendBytesRef(v);
}
builder.endPositionEntry();
return builder.build();
}
}

private void read(int docId, BytesRefBuilder builder) throws IOException {
if (false == ordinals.advanceExact(docId)) {
builder.appendNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,14 @@ interface BlockFactory {
Block constantBytes(BytesRef value);

/**
* Build a reader for reading keyword ordinals.
* Build a reader for reading {@link SortedDocValues}
*/
SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count);

// TODO support non-singleton ords
/**
* Build a reader for reading {@link SortedSetDocValues}
*/
SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count);

AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count);
}
Expand Down Expand Up @@ -509,6 +512,13 @@ interface SingletonOrdinalsBuilder extends Builder {
SingletonOrdinalsBuilder appendOrd(int value);
}

interface SortedSetOrdinalsBuilder extends Builder {
/**
* Appends an ordinal to the builder.
*/
SortedSetOrdinalsBuilder appendOrd(int value);
}

interface AggregateMetricDoubleBuilder extends Builder {

DoubleBuilder min();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;

import java.io.IOException;
Expand Down Expand Up @@ -182,6 +183,22 @@ public SingletonOrdsBuilder appendOrd(int value) {
return new SingletonOrdsBuilder();
}

@Override
public BlockLoader.SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count) {
class SortedSetOrdinalBuilder extends TestBlock.Builder implements BlockLoader.SortedSetOrdinalsBuilder {
@Override
public SortedSetOrdinalBuilder appendOrd(int value) {
try {
add(ordinals.lookupOrd(value));
return this;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
return new SortedSetOrdinalBuilder();
}

@Override
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
return new AggregateMetricDoubleBlockBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
package org.elasticsearch.compute.lucene.read;

import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.ElementType;
import org.elasticsearch.compute.data.SingletonOrdinalsBuilder;
import org.elasticsearch.index.mapper.BlockLoader;

public abstract class DelegatingBlockLoaderFactory implements BlockLoader.BlockFactory {
Expand Down Expand Up @@ -86,6 +86,11 @@ public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocVa
return new SingletonOrdinalsBuilder(factory, ordinals, count);
}

@Override
public BlockLoader.SortedSetOrdinalsBuilder sortedSetOrdinalsBuilder(SortedSetDocValues ordinals, int count) {
return new SortedSetOrdinalsBuilder(factory, ordinals, count);
}

@Override
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
return factory.newAggregateMetricDoubleBlockBuilder(count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
* 2.0.
*/

package org.elasticsearch.compute.data;
package org.elasticsearch.compute.lucene.read;

import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
Expand Down
Loading
Loading