Skip to content

Commit 2b65b0f

Browse files
authored
Refactor LuceneQueryEvaluator to use Blocks instead of Vectors (#133246)
1 parent 5ae52bf commit 2b65b0f

File tree

6 files changed

+67
-59
lines changed

6 files changed

+67
-59
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.elasticsearch.compute.data.DocVector;
2525
import org.elasticsearch.compute.data.IntVector;
2626
import org.elasticsearch.compute.data.Page;
27-
import org.elasticsearch.compute.data.Vector;
2827
import org.elasticsearch.core.Releasable;
2928
import org.elasticsearch.core.Releasables;
3029

@@ -44,12 +43,12 @@
4443
* It's much faster to push queries to the {@link LuceneSourceOperator} or the like, but sometimes this isn't possible. So
4544
* this class is here to save the day.
4645
*/
47-
public abstract class LuceneQueryEvaluator<T extends Vector.Builder> implements Releasable {
46+
public abstract class LuceneQueryEvaluator<T extends Block.Builder> implements Releasable {
4847

4948
public record ShardConfig(Query query, IndexSearcher searcher) {}
5049

5150
private final BlockFactory blockFactory;
52-
private final ShardConfig[] shards;
51+
protected final ShardConfig[] shards;
5352

5453
private final List<ShardState> perShardState;
5554

@@ -67,9 +66,9 @@ public Block executeQuery(Page page) {
6766
DocVector docs = (DocVector) block.asVector();
6867
try {
6968
if (docs.singleSegmentNonDecreasing()) {
70-
return evalSingleSegmentNonDecreasing(docs).asBlock();
69+
return evalSingleSegmentNonDecreasing(docs);
7170
} else {
72-
return evalSlow(docs).asBlock();
71+
return evalSlow(docs);
7372
}
7473
} catch (IOException e) {
7574
throw new UncheckedIOException(e);
@@ -106,15 +105,15 @@ public Block executeQuery(Page page) {
106105
* common.
107106
* </p>
108107
*/
109-
private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
108+
private Block evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
110109
ShardState shardState = shardState(docs.shards().getInt(0));
111110
SegmentState segmentState = shardState.segmentState(docs.segments().getInt(0));
112111
int min = docs.docs().getInt(0);
113112
int max = docs.docs().getInt(docs.getPositionCount() - 1);
114113
int length = max - min + 1;
115-
try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
114+
try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
116115
if (length == docs.getPositionCount() && length > 1) {
117-
return segmentState.scoreDense(scoreBuilder, min, max);
116+
return segmentState.scoreDense(scoreBuilder, min, max, docs.getPositionCount());
118117
}
119118
return segmentState.scoreSparse(scoreBuilder, docs.docs());
120119
}
@@ -134,13 +133,13 @@ private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException
134133
* the order that the {@link DocVector} came in.
135134
* </p>
136135
*/
137-
private Vector evalSlow(DocVector docs) throws IOException {
136+
private Block evalSlow(DocVector docs) throws IOException {
138137
int[] map = docs.shardSegmentDocMapForwards();
139138
// Clear any state flags from the previous run
140139
int prevShard = -1;
141140
int prevSegment = -1;
142141
SegmentState segmentState = null;
143-
try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
142+
try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
144143
for (int i = 0; i < docs.getPositionCount(); i++) {
145144
int shard = docs.shards().getInt(docs.shards().getInt(map[i]));
146145
int segment = docs.segments().getInt(map[i]);
@@ -156,7 +155,7 @@ private Vector evalSlow(DocVector docs) throws IOException {
156155
segmentState.scoreSingleDocWithScorer(scoreBuilder, docs.docs().getInt(map[i]));
157156
}
158157
}
159-
try (Vector outOfOrder = scoreBuilder.build()) {
158+
try (Block outOfOrder = scoreBuilder.build()) {
160159
return outOfOrder.filter(docs.shardSegmentDocMapBackwards());
161160
}
162161
}
@@ -247,9 +246,9 @@ private SegmentState(Weight weight, LeafReaderContext ctx) {
247246
* Score a range using the {@link BulkScorer}. This should be faster
248247
* than using {@link #scoreSparse} for dense doc ids.
249248
*/
250-
Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
249+
Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOException {
251250
if (noMatch) {
252-
return createNoMatchVector(blockFactory, max - min + 1);
251+
return createNoMatchBlock(blockFactory, max - min + 1);
253252
}
254253
if (bulkScorer == null || // The bulkScorer wasn't initialized
255254
Thread.currentThread() != bulkScorerThread // The bulkScorer was initialized on a different thread
@@ -258,19 +257,22 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
258257
bulkScorer = weight.bulkScorer(ctx);
259258
if (bulkScorer == null) {
260259
noMatch = true;
261-
return createNoMatchVector(blockFactory, max - min + 1);
260+
return createNoMatchBlock(blockFactory, positionCount);
262261
}
263262
}
264263
try (
265264
DenseCollector<T> collector = new DenseCollector<>(
266265
min,
267266
max,
268267
scoreBuilder,
268+
ctx,
269269
LuceneQueryEvaluator.this::appendNoMatch,
270-
LuceneQueryEvaluator.this::appendMatch
270+
LuceneQueryEvaluator.this::appendMatch,
271+
weight.getQuery()
271272
)
272273
) {
273274
bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1);
275+
collector.finish();
274276
return collector.build();
275277
}
276278
}
@@ -279,10 +281,10 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
279281
* Score a vector of doc ids using {@link Scorer}. If you have a dense range of
280282
* doc ids it'd be faster to use {@link #scoreDense}.
281283
*/
282-
Vector scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
284+
Block scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
283285
initScorer(docs.getInt(0));
284286
if (noMatch) {
285-
return createNoMatchVector(blockFactory, docs.getPositionCount());
287+
return createNoMatchBlock(blockFactory, docs.getPositionCount());
286288
}
287289
for (int i = 0; i < docs.getPositionCount(); i++) {
288290
scoreSingleDocWithScorer(scoreBuilder, docs.getInt(i));
@@ -326,11 +328,13 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
326328
* doc ids are sent to {@link LeafCollector#collect(int)} in ascending order
327329
* which isn't documented, but @jpountz swears is true.
328330
*/
329-
static class DenseCollector<U extends Vector.Builder> implements LeafCollector, Releasable {
331+
static class DenseCollector<U extends Block.Builder> implements LeafCollector, Releasable {
330332
private final U scoreBuilder;
331333
private final int max;
334+
private final LeafReaderContext leafReaderContext;
332335
private final Consumer<U> appendNoMatch;
333336
private final CheckedBiConsumer<U, Scorable, IOException> appendMatch;
337+
private final Query query;
334338

335339
private Scorable scorer;
336340
int next;
@@ -339,14 +343,18 @@ static class DenseCollector<U extends Vector.Builder> implements LeafCollector,
339343
int min,
340344
int max,
341345
U scoreBuilder,
346+
LeafReaderContext leafReaderContext,
342347
Consumer<U> appendNoMatch,
343-
CheckedBiConsumer<U, Scorable, IOException> appendMatch
348+
CheckedBiConsumer<U, Scorable, IOException> appendMatch,
349+
Query query
344350
) {
345351
this.scoreBuilder = scoreBuilder;
346352
this.max = max;
347353
next = min;
354+
this.leafReaderContext = leafReaderContext;
348355
this.appendNoMatch = appendNoMatch;
349356
this.appendMatch = appendMatch;
357+
this.query = query;
350358
}
351359

352360
@Override
@@ -362,7 +370,7 @@ public void collect(int doc) throws IOException {
362370
appendMatch.accept(scoreBuilder, scorer);
363371
}
364372

365-
public Vector build() {
373+
public Block build() {
366374
return scoreBuilder.build();
367375
}
368376

@@ -387,12 +395,12 @@ public void close() {
387395
/**
388396
* Creates a vector where all positions correspond to elements that don't match the query
389397
*/
390-
protected abstract Vector createNoMatchVector(BlockFactory blockFactory, int size);
398+
protected abstract Block createNoMatchBlock(BlockFactory blockFactory, int size);
391399

392400
/**
393401
* Creates the corresponding vector builder to store the results of evaluating the query
394402
*/
395-
protected abstract T createVectorBuilder(BlockFactory blockFactory, int size);
403+
protected abstract T createBlockBuilder(BlockFactory blockFactory, int size);
396404

397405
/**
398406
* Appends a matching result to a builder created by @link createVectorBuilder}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
import org.apache.lucene.search.ScoreMode;
1313
import org.elasticsearch.compute.data.Block;
1414
import org.elasticsearch.compute.data.BlockFactory;
15+
import org.elasticsearch.compute.data.BooleanBlock;
1516
import org.elasticsearch.compute.data.BooleanVector;
1617
import org.elasticsearch.compute.data.Page;
17-
import org.elasticsearch.compute.data.Vector;
1818
import org.elasticsearch.compute.operator.DriverContext;
1919
import org.elasticsearch.compute.operator.EvalOperator;
2020

@@ -26,9 +26,7 @@
2626
* a {@link BooleanVector}.
2727
* @see LuceneQueryScoreEvaluator
2828
*/
29-
public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator<BooleanVector.Builder>
30-
implements
31-
EvalOperator.ExpressionEvaluator {
29+
public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator<BooleanBlock.Builder> implements EvalOperator.ExpressionEvaluator {
3230

3331
LuceneQueryExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shards) {
3432
super(blockFactory, shards);
@@ -45,22 +43,22 @@ protected ScoreMode scoreMode() {
4543
}
4644

4745
@Override
48-
protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
49-
return blockFactory.newConstantBooleanVector(false, size);
46+
protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
47+
return blockFactory.newConstantBooleanBlockWith(false, size);
5048
}
5149

5250
@Override
53-
protected BooleanVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
54-
return blockFactory.newBooleanVectorFixedBuilder(size);
51+
protected BooleanBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
52+
return blockFactory.newBooleanBlockBuilder(size);
5553
}
5654

5755
@Override
58-
protected void appendNoMatch(BooleanVector.Builder builder) {
56+
protected void appendNoMatch(BooleanBlock.Builder builder) {
5957
builder.appendBoolean(false);
6058
}
6159

6260
@Override
63-
protected void appendMatch(BooleanVector.Builder builder, Scorable scorer) throws IOException {
61+
protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer) throws IOException {
6462
builder.appendBoolean(true);
6563
}
6664

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import org.elasticsearch.compute.data.DoubleBlock;
1515
import org.elasticsearch.compute.data.DoubleVector;
1616
import org.elasticsearch.compute.data.Page;
17-
import org.elasticsearch.compute.data.Vector;
1817
import org.elasticsearch.compute.operator.DriverContext;
1918
import org.elasticsearch.compute.operator.ScoreOperator;
2019

@@ -27,7 +26,7 @@
2726
* Elements that don't match will have a score of {@link #NO_MATCH_SCORE}.
2827
* @see LuceneQueryScoreEvaluator
2928
*/
30-
public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator<DoubleVector.Builder> implements ScoreOperator.ExpressionScorer {
29+
public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator<DoubleBlock.Builder> implements ScoreOperator.ExpressionScorer {
3130

3231
public static final double NO_MATCH_SCORE = 0.0;
3332

@@ -46,22 +45,22 @@ protected ScoreMode scoreMode() {
4645
}
4746

4847
@Override
49-
protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
50-
return blockFactory.newConstantDoubleVector(NO_MATCH_SCORE, size);
48+
protected DoubleBlock createNoMatchBlock(BlockFactory blockFactory, int size) {
49+
return blockFactory.newConstantDoubleBlockWith(NO_MATCH_SCORE, size);
5150
}
5251

5352
@Override
54-
protected DoubleVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
55-
return blockFactory.newDoubleVectorFixedBuilder(size);
53+
protected DoubleBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
54+
return blockFactory.newDoubleBlockBuilder(size);
5655
}
5756

5857
@Override
59-
protected void appendNoMatch(DoubleVector.Builder builder) {
58+
protected void appendNoMatch(DoubleBlock.Builder builder) {
6059
builder.appendDouble(NO_MATCH_SCORE);
6160
}
6261

6362
@Override
64-
protected void appendMatch(DoubleVector.Builder builder, Scorable scorer) throws IOException {
63+
protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer) throws IOException {
6564
builder.appendDouble(scorer.score());
6665
}
6766

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,13 @@
2525
import org.apache.lucene.util.BytesRef;
2626
import org.elasticsearch.common.unit.ByteSizeValue;
2727
import org.elasticsearch.compute.OperatorTests;
28+
import org.elasticsearch.compute.data.Block;
2829
import org.elasticsearch.compute.data.BlockFactory;
2930
import org.elasticsearch.compute.data.BytesRefBlock;
30-
import org.elasticsearch.compute.data.BytesRefVector;
3131
import org.elasticsearch.compute.data.DocBlock;
3232
import org.elasticsearch.compute.data.DoubleBlock;
3333
import org.elasticsearch.compute.data.ElementType;
3434
import org.elasticsearch.compute.data.Page;
35-
import org.elasticsearch.compute.data.Vector;
3635
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
3736
import org.elasticsearch.compute.operator.Driver;
3837
import org.elasticsearch.compute.operator.DriverContext;
@@ -59,7 +58,7 @@
5958
/**
6059
* Base class for testing Lucene query evaluators.
6160
*/
62-
public abstract class LuceneQueryEvaluatorTests<T extends Vector, U extends Vector.Builder> extends ComputeTestCase {
61+
public abstract class LuceneQueryEvaluatorTests<T extends Block, U extends Block.Builder> extends ComputeTestCase {
6362

6463
private static final String FIELD = "g";
6564

@@ -168,9 +167,9 @@ protected void assertTermsQuery(List<Page> results, Set<String> matching, int ex
168167
int matchCount = 0;
169168
for (Page page : results) {
170169
int initialBlockIndex = termsBlockIndex(page);
171-
BytesRefVector terms = page.<BytesRefBlock>getBlock(initialBlockIndex).asVector();
170+
BytesRefBlock terms = page.<BytesRefBlock>getBlock(initialBlockIndex);
172171
@SuppressWarnings("unchecked")
173-
T resultVector = (T) page.getBlock(resultsBlockIndex(page)).asVector();
172+
T resultVector = (T) page.getBlock(resultsBlockIndex(page));
174173
for (int i = 0; i < page.getPositionCount(); i++) {
175174
BytesRef termAtPosition = terms.getBytesRef(i, new BytesRef());
176175
boolean isMatch = matching.contains(termAtPosition.utf8ToString());

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,28 @@
99

1010
import org.apache.lucene.search.Scorable;
1111
import org.elasticsearch.compute.data.BlockFactory;
12-
import org.elasticsearch.compute.data.BooleanVector;
12+
import org.elasticsearch.compute.data.BooleanBlock;
1313
import org.elasticsearch.compute.data.Page;
1414
import org.elasticsearch.compute.lucene.LuceneQueryEvaluator.DenseCollector;
1515
import org.elasticsearch.compute.operator.EvalOperator;
1616
import org.elasticsearch.compute.operator.Operator;
1717

1818
import static org.hamcrest.Matchers.equalTo;
1919

20-
public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests<BooleanVector, BooleanVector.Builder> {
20+
public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests<BooleanBlock, BooleanBlock.Builder> {
2121

2222
private final boolean useScoring = randomBoolean();
2323

2424
@Override
25-
protected DenseCollector<BooleanVector.Builder> createDenseCollector(int min, int max) {
25+
protected DenseCollector<BooleanBlock.Builder> createDenseCollector(int min, int max) {
2626
return new LuceneQueryEvaluator.DenseCollector<>(
2727
min,
2828
max,
29-
blockFactory().newBooleanVectorFixedBuilder(max - min + 1),
29+
blockFactory().newBooleanBlockBuilder(max - min + 1),
30+
null,
3031
b -> b.appendBoolean(false),
31-
(b, s) -> b.appendBoolean(true)
32+
(b, s) -> b.appendBoolean(true),
33+
null
3234
);
3335
}
3436

@@ -54,12 +56,12 @@ protected int resultsBlockIndex(Page page) {
5456
}
5557

5658
@Override
57-
protected void assertCollectedResultMatch(BooleanVector resultVector, int position, boolean isMatch) {
59+
protected void assertCollectedResultMatch(BooleanBlock resultVector, int position, boolean isMatch) {
5860
assertThat(resultVector.getBoolean(position), equalTo(isMatch));
5961
}
6062

6163
@Override
62-
protected void assertTermResultMatch(BooleanVector resultVector, int position, boolean isMatch) {
64+
protected void assertTermResultMatch(BooleanBlock resultVector, int position, boolean isMatch) {
6365
assertThat(resultVector.getBoolean(position), equalTo(isMatch));
6466
}
6567
}

0 commit comments

Comments
 (0)