Skip to content

Commit 5406c66

Browse files
committed
ESQL: Push more ==s on text fields to lucene
If you do: ``` | WHERE text_field == "cat" ``` we can't push to the text field because it's search index is for individual words. But most text fields have a `.keyword` sub field and we *can* query it's index. EXCEPT! It's normal for these fields to have `ignore_above` in their mapping. In that case we don't push to the field. Very sad. With this change we can push down `==`, but only when the right hand side is shorter than the `ignore_above`. This has pretty much infinite speed gain. An example using a million documents: ``` Before: "took" : 391, After: "took" : 4, ``` But this is going from totally un-indexed linear scans to totally indexed. You can make the "Before" number as high as you want by loading more data.
1 parent d081100 commit 5406c66

File tree

38 files changed

+349
-44
lines changed

38 files changed

+349
-44
lines changed

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,21 @@ public boolean canUseSyntheticSourceDelegateForQuerying() {
984984
&& syntheticSourceDelegate.isIndexed();
985985
}
986986

987+
/**
988+
* Returns true if the delegate sub-field can be used for querying only (ie. isIndexed must be true)
989+
*/
990+
public boolean canUseSyntheticSourceDelegateForQueryingEquality(String str) {
991+
if (syntheticSourceDelegate == null
992+
// Can't push equality to an index if there isn't an index
993+
|| syntheticSourceDelegate.isIndexed() == false
994+
// ESQL needs docs values to push equality
995+
|| syntheticSourceDelegate.hasDocValues() == false) {
996+
return false;
997+
}
998+
// Can't push equality if the field we're checking for is so big we'd ignore it.
999+
return str.length() < syntheticSourceDelegate.ignoreAbove();
1000+
}
1001+
9871002
@Override
9881003
public BlockLoader blockLoader(BlockLoaderContext blContext) {
9891004
if (canUseSyntheticSourceDelegateForLoading()) {

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ public void testProfile() throws IOException {
313313
@SuppressWarnings("unchecked")
314314
List<Map<String, Object>> operators = (List<Map<String, Object>>) p.get("operators");
315315
for (Map<String, Object> o : operators) {
316-
sig.add(checkOperatorProfile(o));
316+
sig.add(checkOperatorProfile(o, "*:*"));
317317
}
318318
String description = p.get("description").toString();
319319
switch (description) {
@@ -411,6 +411,55 @@ public void testProfileParsing() throws IOException {
411411
}
412412
}
413413

414+
public void testPushEqualityOnDefaults() throws IOException {
415+
indexTimestampData(1);
416+
417+
RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | WHERE test == \"value1\"");
418+
builder.profile(true);
419+
Map<String, Object> result = runEsql(builder);
420+
assertResultMap(
421+
result,
422+
getResultMatcher(result).entry("profile", matchesMap().entry("drivers", instanceOf(List.class))),
423+
matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
424+
.item(matchesMap().entry("name", "test").entry("type", "text"))
425+
.item(matchesMap().entry("name", "test.keyword").entry("type", "keyword"))
426+
.item(matchesMap().entry("name", "value").entry("type", "long")),
427+
equalTo(List.of(List.of("2020-12-12T00:00:00.000Z", "value1", "value1", 1)))
428+
);
429+
430+
@SuppressWarnings("unchecked")
431+
List<Map<String, Object>> profiles = (List<Map<String, Object>>) ((Map<String, Object>) result.get("profile")).get("drivers");
432+
for (Map<String, Object> p : profiles) {
433+
fixTypesOnProfile(p);
434+
assertThat(p, commonProfile());
435+
List<String> sig = new ArrayList<>();
436+
@SuppressWarnings("unchecked")
437+
List<Map<String, Object>> operators = (List<Map<String, Object>>) p.get("operators");
438+
for (Map<String, Object> o : operators) {
439+
// The query here is the most important bit - we *do* push to lucene.
440+
sig.add(checkOperatorProfile(o, "test.keyword:value1"));
441+
}
442+
String description = p.get("description").toString();
443+
switch (description) {
444+
case "data" -> assertMap(
445+
sig,
446+
matchesList().item("LuceneSourceOperator")
447+
.item("ValuesSourceReaderOperator")
448+
.item("ProjectOperator")
449+
.item("ExchangeSinkOperator")
450+
);
451+
case "node_reduce" -> assertThat(
452+
sig,
453+
either(matchesList().item("ExchangeSourceOperator").item("ExchangeSinkOperator")).or(
454+
matchesList().item("ExchangeSourceOperator").item("AggregationOperator").item("ExchangeSinkOperator")
455+
)
456+
);
457+
case "final" -> assertMap(sig, matchesList().item("ExchangeSourceOperator").item("LimitOperator").item("OutputOperator"));
458+
default -> throw new IllegalArgumentException("can't match " + description);
459+
}
460+
}
461+
}
462+
414463
@SuppressWarnings("unchecked")
415464
public void assertProcessMetadataForNextNode(Map<String, Object> nodeMetadata, Set<String> expectedNamesForNodes, int seenNodes) {
416465
assertEquals("M", nodeMetadata.get("ph"));
@@ -521,7 +570,7 @@ public void testInlineStatsProfile() throws IOException {
521570
@SuppressWarnings("unchecked")
522571
List<Map<String, Object>> operators = (List<Map<String, Object>>) p.get("operators");
523572
for (Map<String, Object> o : operators) {
524-
sig.add(checkOperatorProfile(o));
573+
sig.add(checkOperatorProfile(o, "*:*"));
525574
}
526575
signatures.add(sig);
527576
}
@@ -673,7 +722,7 @@ private void fixTypesOnProfile(Map<String, Object> profile) {
673722
profile.put("took_nanos", ((Number) profile.get("took_nanos")).longValue());
674723
}
675724

676-
private String checkOperatorProfile(Map<String, Object> o) {
725+
private String checkOperatorProfile(Map<String, Object> o, String query) {
677726
String name = (String) o.get("operator");
678727
name = name.replaceAll("\\[.+", "");
679728
MapMatcher status = switch (name) {
@@ -687,7 +736,7 @@ private String checkOperatorProfile(Map<String, Object> o) {
687736
.entry("pages_emitted", greaterThan(0))
688737
.entry("rows_emitted", greaterThan(0))
689738
.entry("process_nanos", greaterThan(0))
690-
.entry("processed_queries", List.of("*:*"));
739+
.entry("processed_queries", List.of(query));
691740
case "ValuesSourceReaderOperator" -> basicProfile().entry("readers_built", matchesMap().extraOk());
692741
case "AggregationOperator" -> matchesMap().entry("pages_processed", greaterThan(0))
693742
.entry("rows_received", greaterThan(0))

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ public byte[] max(String field, DataType dataType) {
293293
public boolean isSingleValue(String field) {
294294
return false;
295295
}
296+
297+
@Override
298+
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
299+
return false;
300+
}
296301
}
297302

298303
/**
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"properties" : {
3+
"emp_no" : {
4+
"type" : "integer"
5+
},
6+
"first_name" : {
7+
"type" : "keyword"
8+
},
9+
"gender" : {
10+
"type" : "text"
11+
},
12+
"languages" : {
13+
"type" : "byte"
14+
},
15+
"last_name" : {
16+
"type" : "keyword"
17+
},
18+
"salary" : {
19+
"type" : "integer"
20+
},
21+
"_meta_field": {
22+
"type" : "keyword"
23+
},
24+
"hire_date": {
25+
"type": "date"
26+
},
27+
"job": {
28+
"type": "text",
29+
"fields": {
30+
"raw": {
31+
"type": "keyword",
32+
"ignore_above": 4
33+
}
34+
}
35+
},
36+
"long_noidx": {
37+
"type": "long",
38+
"index": false,
39+
"doc_values": false
40+
}
41+
}
42+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/TranslationAware.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public interface TranslationAware {
3131
* <p>and <b>not</b> this:</p>
3232
* <p>{@code Query childQuery = child.asQuery(handler);}</p>
3333
*/
34-
Query asQuery(TranslatorHandler handler);
34+
Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler);
3535

3636
/**
3737
* Subinterface for expressions that can only process single values (and null out on MVs).

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
153153
}
154154

155155
@Override
156-
public Query asQuery(TranslatorHandler handler) {
156+
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
157157
return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(handler);
158158
}
159159

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.elasticsearch.index.query.QueryRewriteContext;
1414
import org.elasticsearch.index.query.Rewriteable;
1515
import org.elasticsearch.xpack.esql.core.util.Holder;
16+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
1617
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
1718
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
1819
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
@@ -76,7 +77,9 @@ public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOExc
7677
Holder<Boolean> updated = new Holder<>(false);
7778
LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> {
7879
QueryBuilder builder = f.queryBuilder(), initial = builder;
79-
builder = builder == null ? f.asQuery(TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() : builder;
80+
builder = builder == null
81+
? f.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
82+
: builder;
8083
try {
8184
builder = builder.rewrite(ctx);
8285
} catch (IOException e) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ip/CIDRMatch.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
184184
}
185185

186186
@Override
187-
public Query asQuery(TranslatorHandler handler) {
187+
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
188188
var fa = LucenePushdownPredicates.checkIsFieldAttribute(ipField);
189189
Check.isTrue(Expressions.foldable(matches), "Expected foldable matches, but got [{}]", matches);
190190

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/spatial/SpatialRelatesFunction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
183183
}
184184

185185
@Override
186-
public Query asQuery(TranslatorHandler handler) {
186+
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
187187
if (left().foldable()) {
188188
checkSpatialRelatesFunction(left(), queryRelation());
189189
return translate(handler, right(), left());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWith.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
144144
}
145145

146146
@Override
147-
public Query asQuery(TranslatorHandler handler) {
147+
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
148148
LucenePushdownPredicates.checkIsPushableAttribute(str);
149149
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
150150

0 commit comments

Comments
 (0)