Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ public class ValuesSourceReaderBenchmark {
"double",
"keyword",
"stored_keyword",
"3_stored_keywords" };
"3_stored_keywords",
"keyword_mv" };

private static final int BLOCK_LENGTH = 16 * 1024;
private static final int INDEX_SIZE = 10 * BLOCK_LENGTH;
Expand Down Expand Up @@ -332,7 +333,7 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
@Param({ "in_order", "shuffled" })
public String layout;

@Param({ "long", "keyword", "stored_keyword" })
@Param({ "long", "keyword", "stored_keyword", "keyword_mv" })
public String name;

private Directory directory;
Expand Down Expand Up @@ -398,6 +399,22 @@ public void benchmark() {
}
}
}
case "keyword_mv" -> {
BytesRef scratch = new BytesRef();
BytesRefBlock values = op.getOutput().<BytesRefBlock>getBlock(1);
for (int p = 0; p < values.getPositionCount(); p++) {
int count = values.getValueCount(p);
if (count > 0) {
int first = values.getFirstValueIndex(p);
for (int i = 0; i < count; i++) {
BytesRef r = values.getBytesRef(first + i, scratch);
r.offset++;
r.length--;
sum += Integer.parseInt(r.utf8ToString());
}
}
}
}
}
}
long expected = 0;
Expand All @@ -407,6 +424,16 @@ public void benchmark() {
expected += i % 1000;
}
break;
case "keyword_mv":
for (int i = 0; i < INDEX_SIZE; i++) {
int v1 = i % 1000;
expected += v1;
int v2 = i % 500;
if (v1 != v2) {
expected += v2;
}
}
break;
case "3_stored_keywords":
for (int i = 0; i < INDEX_SIZE; i++) {
expected += 3 * (i % 1000);
Expand Down Expand Up @@ -461,7 +488,9 @@ private void setupIndex() throws IOException {
new StoredField("double", (double) i),
new KeywordFieldMapper.KeywordField("keyword_1", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_2", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType)
new KeywordFieldMapper.KeywordField("keyword_3", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 1000), keywordFieldType),
new KeywordFieldMapper.KeywordField("keyword_mv", new BytesRef(c + i % 500), keywordFieldType)
)
);
if (i % COMMIT_INTERVAL == 0) {
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/131061.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131061
summary: Speed up reading multivalued keywords
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOExceptio
if (docs.count() == 1) {
return readSingleDoc(factory, docs.get(0));
}
try (BlockLoader.SingletonOrdinalsBuilder builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count())) {
for (int i = 0; i < docs.count(); i++) {
int doc = docs.get(i);
if (doc < ordinals.docID()) {
Expand Down Expand Up @@ -701,13 +701,29 @@ private static class Ordinals extends BlockDocValuesReader {

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
try (BytesRefBuilder builder = factory.bytesRefsFromDocValues(docs.count())) {
if (docs.count() == 1) {
return readSingleDoc(factory, docs.get(0));
}
try (var builder = factory.ordinalsBuilder(ordinals, docs.count())) {
for (int i = 0; i < docs.count(); i++) {
int doc = docs.get(i);
if (doc < ordinals.docID()) {
throw new IllegalStateException("docs within same block must be in order");
}
read(doc, builder);
if (ordinals.advanceExact(doc) == false) {
builder.appendNull();
continue;
}
int count = ordinals.docValueCount();
if (count == 1) {
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
} else {
builder.beginPositionEntry();
for (int c = 0; c < count; c++) {
builder.appendOrd(Math.toIntExact(ordinals.nextOrd()));
}
builder.endPositionEntry();
}
}
return builder.build();
}
Expand All @@ -718,6 +734,26 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
read(docId, (BytesRefBuilder) builder);
}

private BlockLoader.Block readSingleDoc(BlockFactory factory, int docId) throws IOException {
if (ordinals.advanceExact(docId) == false) {
return factory.constantNulls();
}
int count = ordinals.docValueCount();
if (count == 1) {
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
return factory.constantBytes(BytesRef.deepCopyOf(v));
}
try (var builder = factory.bytesRefsFromDocValues(count)) {
builder.beginPositionEntry();
for (int c = 0; c < count; c++) {
BytesRef v = ordinals.lookupOrd(ordinals.nextOrd());
builder.appendBytesRef(v);
}
builder.endPositionEntry();
return builder.build();
}
}

private void read(int docId, BytesRefBuilder builder) throws IOException {
if (false == ordinals.advanceExact(docId)) {
builder.appendNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,14 @@ interface BlockFactory {
Block constantBytes(BytesRef value);

/**
* Build a reader for reading keyword ordinals.
* Build a reader for reading {@link SortedDocValues}
*/
SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count);
OrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count);

/**
* Build a reader for reading {@link SortedSetDocValues}
*/
OrdinalsBuilder ordinalsBuilder(SortedSetDocValues ordinals, int count);

// TODO support non-singleton ords

Expand Down Expand Up @@ -502,11 +507,11 @@ interface LongBuilder extends Builder {
LongBuilder appendLong(long value);
}

interface SingletonOrdinalsBuilder extends Builder {
interface OrdinalsBuilder extends Builder {
/**
* Appends an ordinal to the builder.
*/
SingletonOrdinalsBuilder appendOrd(int value);
OrdinalsBuilder appendOrd(int value);
}

interface AggregateMetricDoubleBuilder extends Builder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;

import java.io.IOException;
Expand Down Expand Up @@ -167,8 +168,8 @@ public BlockLoader.Block constantBytes(BytesRef value) {
}

@Override
public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) {
class SingletonOrdsBuilder extends TestBlock.Builder implements BlockLoader.SingletonOrdinalsBuilder {
public BlockLoader.OrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) {
class SingletonOrdsBuilder extends TestBlock.Builder implements BlockLoader.OrdinalsBuilder {
@Override
public SingletonOrdsBuilder appendOrd(int value) {
try {
Expand All @@ -182,6 +183,22 @@ public SingletonOrdsBuilder appendOrd(int value) {
return new SingletonOrdsBuilder();
}

@Override
public BlockLoader.OrdinalsBuilder ordinalsBuilder(SortedSetDocValues ordinals, int count) {
class SortedSetOrdinalBuilder extends TestBlock.Builder implements BlockLoader.OrdinalsBuilder {
@Override
public SortedSetOrdinalBuilder appendOrd(int value) {
try {
add(ordinals.lookupOrd(value));
return this;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
return new SortedSetOrdinalBuilder();
}

@Override
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
return new AggregateMetricDoubleBlockBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
package org.elasticsearch.compute.lucene.read;

import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.ElementType;
import org.elasticsearch.compute.data.SingletonOrdinalsBuilder;
import org.elasticsearch.index.mapper.BlockLoader;

public abstract class DelegatingBlockLoaderFactory implements BlockLoader.BlockFactory {
Expand Down Expand Up @@ -82,10 +82,15 @@ public BlockLoader.Builder nulls(int expectedCount) {
}

@Override
public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) {
public BlockLoader.OrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) {
return new SingletonOrdinalsBuilder(factory, ordinals, count);
}

@Override
public BlockLoader.OrdinalsBuilder ordinalsBuilder(SortedSetDocValues ordinals, int count) {
return new SortedSetOrdinalsBuilder(factory, ordinals, count);
}

@Override
public BlockLoader.AggregateMetricDoubleBuilder aggregateMetricDoubleBuilder(int count) {
return factory.newAggregateMetricDoubleBlockBuilder(count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
* 2.0.
*/

package org.elasticsearch.compute.data;
package org.elasticsearch.compute.lucene.read;

import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
Expand All @@ -19,7 +25,7 @@
import java.io.UncheckedIOException;
import java.util.Arrays;

public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder {
public class SingletonOrdinalsBuilder implements BlockLoader.OrdinalsBuilder, Releasable, Block.Builder {
private final BlockFactory blockFactory;
private final SortedDocValues docValues;
private int minOrd = Integer.MAX_VALUE;
Expand Down
Loading
Loading