Skip to content

Commit 8c3d9e1

Browse files
authored
ESQL: Ignore multivalued key columns in lookup index on JOIN (#120726)
Fixes #118780 Second part of #120519 In the first PR, we avoid matching multivalue keys in lookup when they come from the query. Now, we avoid matching multivalues when the lookup index has multivalues in the key column.
1 parent 09148da commit 8c3d9e1

File tree

8 files changed

+161
-118
lines changed

8 files changed

+161
-118
lines changed

x-pack/plugin/esql/compute/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,5 @@
3535
exports org.elasticsearch.compute.operator.mvdedupe;
3636
exports org.elasticsearch.compute.aggregation.table;
3737
exports org.elasticsearch.compute.data.sort;
38+
exports org.elasticsearch.compute.querydsl.query;
3839
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
import org.apache.lucene.document.InetAddressPoint;
1111
import org.apache.lucene.geo.GeoEncodingUtils;
12+
import org.apache.lucene.search.BooleanClause;
13+
import org.apache.lucene.search.BooleanQuery;
14+
import org.apache.lucene.search.MatchAllDocsQuery;
1215
import org.apache.lucene.search.Query;
1316
import org.apache.lucene.util.BytesRef;
1417
import org.elasticsearch.common.geo.ShapeRelation;
@@ -20,6 +23,8 @@
2023
import org.elasticsearch.compute.data.FloatBlock;
2124
import org.elasticsearch.compute.data.IntBlock;
2225
import org.elasticsearch.compute.data.LongBlock;
26+
import org.elasticsearch.compute.operator.Warnings;
27+
import org.elasticsearch.compute.querydsl.query.SingleValueMatchQuery;
2328
import org.elasticsearch.core.Nullable;
2429
import org.elasticsearch.geometry.Geometry;
2530
import org.elasticsearch.geometry.Point;
@@ -30,6 +35,7 @@
3035
import org.elasticsearch.index.mapper.RangeFieldMapper;
3136
import org.elasticsearch.index.query.SearchExecutionContext;
3237

38+
import java.io.IOException;
3339
import java.util.ArrayList;
3440
import java.util.List;
3541
import java.util.function.IntFunction;
@@ -38,10 +44,14 @@
3844
* Generates a list of Lucene queries based on the input block.
3945
*/
4046
public abstract class QueryList {
47+
protected final SearchExecutionContext searchExecutionContext;
48+
protected final MappedFieldType field;
4149
protected final Block block;
4250
protected final boolean onlySingleValues;
4351

44-
protected QueryList(Block block, boolean onlySingleValues) {
52+
protected QueryList(MappedFieldType field, SearchExecutionContext searchExecutionContext, Block block, boolean onlySingleValues) {
53+
this.searchExecutionContext = searchExecutionContext;
54+
this.field = field;
4555
this.block = block;
4656
this.onlySingleValues = onlySingleValues;
4757
}
@@ -59,11 +69,52 @@ int getPositionCount() {
5969
*/
6070
public abstract QueryList onlySingleValues();
6171

72+
final Query getQuery(int position) {
73+
final int valueCount = block.getValueCount(position);
74+
if (onlySingleValues && valueCount != 1) {
75+
return null;
76+
}
77+
final int firstValueIndex = block.getFirstValueIndex(position);
78+
79+
Query query = doGetQuery(position, firstValueIndex, valueCount);
80+
81+
if (onlySingleValues) {
82+
query = wrapSingleValueQuery(query);
83+
}
84+
85+
return query;
86+
}
87+
6288
/**
6389
* Returns the query at the given position.
6490
*/
6591
@Nullable
66-
abstract Query getQuery(int position);
92+
abstract Query doGetQuery(int position, int firstValueIndex, int valueCount);
93+
94+
private Query wrapSingleValueQuery(Query query) {
95+
SingleValueMatchQuery singleValueQuery = new SingleValueMatchQuery(
96+
searchExecutionContext.getForField(field, MappedFieldType.FielddataOperation.SEARCH),
97+
// Not emitting warnings for multivalued fields not matching
98+
Warnings.NOOP_WARNINGS
99+
);
100+
101+
Query rewrite = singleValueQuery;
102+
try {
103+
rewrite = singleValueQuery.rewrite(searchExecutionContext.searcher());
104+
if (rewrite instanceof MatchAllDocsQuery) {
105+
// nothing to filter
106+
return query;
107+
}
108+
} catch (IOException e) {
109+
// ignore
110+
// TODO: Should we do something with the exception?
111+
}
112+
113+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
114+
builder.add(query, BooleanClause.Occur.FILTER);
115+
builder.add(rewrite, BooleanClause.Occur.FILTER);
116+
return builder.build();
117+
}
67118

68119
/**
69120
* Returns a list of term queries for the given field and the input block
@@ -146,8 +197,6 @@ public static QueryList geoShapeQueryList(MappedFieldType field, SearchExecution
146197
}
147198

148199
private static class TermQueryList extends QueryList {
149-
private final MappedFieldType field;
150-
private final SearchExecutionContext searchExecutionContext;
151200
private final IntFunction<Object> blockValueReader;
152201

153202
private TermQueryList(
@@ -157,9 +206,7 @@ private TermQueryList(
157206
boolean onlySingleValues,
158207
IntFunction<Object> blockValueReader
159208
) {
160-
super(block, onlySingleValues);
161-
this.field = field;
162-
this.searchExecutionContext = searchExecutionContext;
209+
super(field, searchExecutionContext, block, onlySingleValues);
163210
this.blockValueReader = blockValueReader;
164211
}
165212

@@ -169,19 +216,14 @@ public TermQueryList onlySingleValues() {
169216
}
170217

171218
@Override
172-
Query getQuery(int position) {
173-
final int count = block.getValueCount(position);
174-
if (onlySingleValues && count != 1) {
175-
return null;
176-
}
177-
final int first = block.getFirstValueIndex(position);
178-
return switch (count) {
219+
Query doGetQuery(int position, int firstValueIndex, int valueCount) {
220+
return switch (valueCount) {
179221
case 0 -> null;
180-
case 1 -> field.termQuery(blockValueReader.apply(first), searchExecutionContext);
222+
case 1 -> field.termQuery(blockValueReader.apply(firstValueIndex), searchExecutionContext);
181223
default -> {
182-
final List<Object> terms = new ArrayList<>(count);
183-
for (int i = 0; i < count; i++) {
184-
final Object value = blockValueReader.apply(first + i);
224+
final List<Object> terms = new ArrayList<>(valueCount);
225+
for (int i = 0; i < valueCount; i++) {
226+
final Object value = blockValueReader.apply(firstValueIndex + i);
185227
terms.add(value);
186228
}
187229
yield field.termsQuery(terms, searchExecutionContext);
@@ -192,8 +234,6 @@ Query getQuery(int position) {
192234

193235
private static class GeoShapeQueryList extends QueryList {
194236
private final BytesRef scratch = new BytesRef();
195-
private final MappedFieldType field;
196-
private final SearchExecutionContext searchExecutionContext;
197237
private final IntFunction<Geometry> blockValueReader;
198238
private final IntFunction<Query> shapeQuery;
199239

@@ -203,10 +243,8 @@ private GeoShapeQueryList(
203243
Block block,
204244
boolean onlySingleValues
205245
) {
206-
super(block, onlySingleValues);
246+
super(field, searchExecutionContext, block, onlySingleValues);
207247

208-
this.field = field;
209-
this.searchExecutionContext = searchExecutionContext;
210248
this.blockValueReader = blockToGeometry(block);
211249
this.shapeQuery = shapeQuery();
212250
}
@@ -217,15 +255,10 @@ public GeoShapeQueryList onlySingleValues() {
217255
}
218256

219257
@Override
220-
Query getQuery(int position) {
221-
final int count = block.getValueCount(position);
222-
if (onlySingleValues && count != 1) {
223-
return null;
224-
}
225-
final int first = block.getFirstValueIndex(position);
226-
return switch (count) {
258+
Query doGetQuery(int position, int firstValueIndex, int valueCount) {
259+
return switch (valueCount) {
227260
case 0 -> null;
228-
case 1 -> shapeQuery.apply(first);
261+
case 1 -> shapeQuery.apply(firstValueIndex);
229262
// TODO: support multiple values
230263
default -> throw new IllegalArgumentException("can't read multiple Geometry values from a single position");
231264
};
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
package org.elasticsearch.xpack.esql.querydsl.query;
8+
package org.elasticsearch.compute.querydsl.query;
99

1010
import org.apache.lucene.index.DocValues;
1111
import org.apache.lucene.index.LeafReaderContext;
@@ -39,7 +39,7 @@
3939
/**
4040
* Finds all fields with a single-value. If a field has a multi-value, it emits a {@link Warnings}.
4141
*/
42-
final class SingleValueMatchQuery extends Query {
42+
public final class SingleValueMatchQuery extends Query {
4343

4444
/**
4545
* Choose a big enough value so this approximation never drives the iteration.
@@ -52,7 +52,7 @@ final class SingleValueMatchQuery extends Query {
5252
private final IndexFieldData<?> fieldData;
5353
private final Warnings warnings;
5454

55-
SingleValueMatchQuery(IndexFieldData<?> fieldData, Warnings warnings) {
55+
public SingleValueMatchQuery(IndexFieldData<?> fieldData, Warnings warnings) {
5656
this.fieldData = fieldData;
5757
this.warnings = warnings;
5858
}

0 commit comments

Comments
 (0)