Skip to content

Commit b527e4b

Browse files
authored
ESQL: Push more ==s on text fields to lucene (elastic#126641)
If you do: ``` | WHERE text_field == "cat" ``` we can't push to the text field because it's search index is for individual words. But most text fields have a `.keyword` sub field and we *can* query it's index. EXCEPT! It's normal for these fields to have `ignore_above` in their mapping. In that case we don't push to the field. Very sad. With this change we can push down `==`, but only when the right hand side is shorter than the `ignore_above`. This has pretty much infinite speed gain. An example using a million documents: ``` Before: "took" : 391, After: "took" : 4, ``` But this is going from totally un-indexed linear scans to totally indexed. You can make the "Before" number as high as you want by loading more data.
1 parent 09541c5 commit b527e4b

File tree

49 files changed

+747
-96
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+747
-96
lines changed

docs/changelog/126641.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126641
2+
summary: Push more `==`s on text fields to lucene
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,21 @@ public boolean canUseSyntheticSourceDelegateForQuerying() {
983983
&& syntheticSourceDelegate.isIndexed();
984984
}
985985

986+
/**
987+
* Returns true if the delegate sub-field can be used for querying only (ie. isIndexed must be true)
988+
*/
989+
public boolean canUseSyntheticSourceDelegateForQueryingEquality(String str) {
990+
if (syntheticSourceDelegate == null
991+
// Can't push equality to an index if there isn't an index
992+
|| syntheticSourceDelegate.isIndexed() == false
993+
// ESQL needs docs values to push equality
994+
|| syntheticSourceDelegate.hasDocValues() == false) {
995+
return false;
996+
}
997+
// Can't push equality if the field we're checking for is so big we'd ignore it.
998+
return str.length() <= syntheticSourceDelegate.ignoreAbove();
999+
}
1000+
9861001
@Override
9871002
public BlockLoader blockLoader(BlockLoaderContext blContext) {
9881003
if (canUseSyntheticSourceDelegateForLoading()) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.qa.single_node;
9+
10+
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
11+
12+
import org.elasticsearch.client.Request;
13+
import org.elasticsearch.client.Response;
14+
import org.elasticsearch.test.ListMatcher;
15+
import org.elasticsearch.test.MapMatcher;
16+
import org.elasticsearch.test.TestClustersThreadFilter;
17+
import org.elasticsearch.test.cluster.ElasticsearchCluster;
18+
import org.elasticsearch.test.rest.ESRestTestCase;
19+
import org.elasticsearch.xcontent.XContentType;
20+
import org.elasticsearch.xpack.esql.AssertWarnings;
21+
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
22+
import org.junit.ClassRule;
23+
24+
import java.io.IOException;
25+
import java.util.ArrayList;
26+
import java.util.List;
27+
import java.util.Map;
28+
import java.util.regex.Pattern;
29+
30+
import static org.elasticsearch.test.ListMatcher.matchesList;
31+
import static org.elasticsearch.test.MapMatcher.assertMap;
32+
import static org.elasticsearch.test.MapMatcher.matchesMap;
33+
import static org.elasticsearch.xpack.esql.EsqlTestUtils.entityToMap;
34+
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder;
35+
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsql;
36+
import static org.elasticsearch.xpack.esql.qa.single_node.RestEsqlIT.commonProfile;
37+
import static org.elasticsearch.xpack.esql.qa.single_node.RestEsqlIT.fixTypesOnProfile;
38+
import static org.hamcrest.Matchers.equalTo;
39+
import static org.hamcrest.Matchers.instanceOf;
40+
import static org.hamcrest.Matchers.startsWith;
41+
42+
/**
43+
* Tests for pushing queries to lucene.
44+
*/
45+
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
46+
public class PushQueriesIT extends ESRestTestCase {
47+
@ClassRule
48+
public static ElasticsearchCluster cluster = Clusters.testCluster();
49+
50+
public void testPushEqualityOnDefaults() throws IOException {
51+
String value = "v".repeat(between(0, 256));
52+
testPushQuery(value, """
53+
FROM test
54+
| WHERE test == "%value"
55+
""", "#test.keyword:%value -_ignored:test.keyword", false);
56+
}
57+
58+
public void testPushEqualityOnDefaultsTooBigToPush() throws IOException {
59+
String value = "a".repeat(between(257, 1000));
60+
testPushQuery(value, """
61+
FROM test
62+
| WHERE test == "%value"
63+
""", "*:*", true);
64+
}
65+
66+
public void testPushCaseInsensitiveEqualityOnDefaults() throws IOException {
67+
String value = "a".repeat(between(0, 256));
68+
testPushQuery(value, """
69+
FROM test
70+
| WHERE TO_LOWER(test) == "%value"
71+
""", "*:*", true);
72+
}
73+
74+
private void testPushQuery(String value, String esqlQuery, String luceneQuery, boolean filterInCompute) throws IOException {
75+
indexValue(value);
76+
77+
RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(
78+
esqlQuery.replaceAll("%value", value) + "\n| KEEP test"
79+
);
80+
builder.profile(true);
81+
Map<String, Object> result = runEsql(builder, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC);
82+
assertResultMap(
83+
result,
84+
getResultMatcher(result).entry(
85+
"profile",
86+
matchesMap().entry("drivers", instanceOf(List.class))
87+
.entry("planning", matchesMap().extraOk())
88+
.entry("query", matchesMap().extraOk())
89+
),
90+
matchesList().item(matchesMap().entry("name", "test").entry("type", "text")),
91+
equalTo(List.of(List.of(value)))
92+
);
93+
94+
@SuppressWarnings("unchecked")
95+
List<Map<String, Object>> profiles = (List<Map<String, Object>>) ((Map<String, Object>) result.get("profile")).get("drivers");
96+
for (Map<String, Object> p : profiles) {
97+
fixTypesOnProfile(p);
98+
assertThat(p, commonProfile());
99+
List<String> sig = new ArrayList<>();
100+
@SuppressWarnings("unchecked")
101+
List<Map<String, Object>> operators = (List<Map<String, Object>>) p.get("operators");
102+
for (Map<String, Object> o : operators) {
103+
sig.add(checkOperatorProfile(o, luceneQuery.replaceAll("%value", value)));
104+
}
105+
String description = p.get("description").toString();
106+
switch (description) {
107+
case "data" -> {
108+
ListMatcher matcher = matchesList().item("LuceneSourceOperator").item("ValuesSourceReaderOperator");
109+
if (filterInCompute) {
110+
matcher = matcher.item("FilterOperator").item("LimitOperator");
111+
}
112+
matcher = matcher.item("ProjectOperator").item("ExchangeSinkOperator");
113+
assertMap(sig, matcher);
114+
}
115+
case "node_reduce" -> assertMap(sig, matchesList().item("ExchangeSourceOperator").item("ExchangeSinkOperator"));
116+
case "final" -> assertMap(
117+
sig,
118+
matchesList().item("ExchangeSourceOperator").item("LimitOperator").item("ProjectOperator").item("OutputOperator")
119+
);
120+
default -> throw new IllegalArgumentException("can't match " + description);
121+
}
122+
}
123+
}
124+
125+
private void indexValue(String value) throws IOException {
126+
Request createIndex = new Request("PUT", "test");
127+
createIndex.setJsonEntity("""
128+
{
129+
"settings": {
130+
"index": {
131+
"number_of_shards": 1
132+
}
133+
}
134+
}""");
135+
Response createResponse = client().performRequest(createIndex);
136+
assertThat(
137+
entityToMap(createResponse.getEntity(), XContentType.JSON),
138+
matchesMap().entry("shards_acknowledged", true).entry("index", "test").entry("acknowledged", true)
139+
);
140+
141+
Request bulk = new Request("POST", "/_bulk");
142+
bulk.addParameter("refresh", "");
143+
bulk.setJsonEntity(String.format("""
144+
{"create":{"_index":"test"}}
145+
{"test":"%s"}
146+
""", value));
147+
Response bulkResponse = client().performRequest(bulk);
148+
assertThat(entityToMap(bulkResponse.getEntity(), XContentType.JSON), matchesMap().entry("errors", false).extraOk());
149+
}
150+
151+
private static final Pattern TO_NAME = Pattern.compile("\\[.+", Pattern.DOTALL);
152+
153+
private static String checkOperatorProfile(Map<String, Object> o, String query) {
154+
String name = (String) o.get("operator");
155+
name = TO_NAME.matcher(name).replaceAll("");
156+
if (name.equals("LuceneSourceOperator")) {
157+
MapMatcher expectedOp = matchesMap().entry("operator", startsWith(name))
158+
.entry("status", matchesMap().entry("processed_queries", List.of(query)).extraOk());
159+
assertMap(o, expectedOp);
160+
}
161+
return name;
162+
}
163+
164+
@Override
165+
protected String getTestRestCluster() {
166+
return cluster.getHttpAddresses();
167+
}
168+
}

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ public void testForceSleepsProfile() throws IOException {
648648
}
649649
}
650650

651-
private MapMatcher commonProfile() {
651+
static MapMatcher commonProfile() {
652652
return matchesMap() //
653653
.entry("description", any(String.class))
654654
.entry("cluster_name", any(String.class))
@@ -669,7 +669,7 @@ private MapMatcher commonProfile() {
669669
* come back as integers and sometimes longs. This just promotes
670670
* them to long every time.
671671
*/
672-
private void fixTypesOnProfile(Map<String, Object> profile) {
672+
static void fixTypesOnProfile(Map<String, Object> profile) {
673673
profile.put("iterations", ((Number) profile.get("iterations")).longValue());
674674
profile.put("cpu_nanos", ((Number) profile.get("cpu_nanos")).longValue());
675675
profile.put("took_nanos", ((Number) profile.get("took_nanos")).longValue());

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,8 @@ public static Map<String, Object> runEsqlAsync(RequestObjectBuilder requestObjec
12351235
return runEsqlAsync(requestObject, randomBoolean(), new AssertWarnings.NoWarnings());
12361236
}
12371237

1238-
static Map<String, Object> runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings, Mode mode) throws IOException {
1238+
public static Map<String, Object> runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings, Mode mode)
1239+
throws IOException {
12391240
if (mode == ASYNC) {
12401241
return runEsqlAsync(requestObject, randomBoolean(), assertWarnings);
12411242
} else {

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ public class CsvTestsDataLoader {
139139
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
140140
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
141141
private static final TestDataset LOGS = new TestDataset("logs");
142+
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
142143

143144
public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
144145
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -196,7 +197,8 @@ public class CsvTestsDataLoader {
196197
Map.entry(ADDRESSES.indexName, ADDRESSES),
197198
Map.entry(BOOKS.indexName, BOOKS),
198199
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
199-
Map.entry(LOGS.indexName, LOGS)
200+
Map.entry(LOGS.indexName, LOGS),
201+
Map.entry(MV_TEXT.indexName, MV_TEXT)
200202
);
201203

202204
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,11 @@ public byte[] max(String field, DataType dataType) {
294294
public boolean isSingleValue(String field) {
295295
return false;
296296
}
297+
298+
@Override
299+
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
300+
return false;
301+
}
297302
}
298303

299304
/**
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@timestamp:date ,message:text
2+
2023-10-23T13:55:01.543Z,[Connected to 10.1.0.1, Banana]
3+
2023-10-23T13:55:01.544Z,Connected to 10.1.0.1
4+
2023-10-23T13:55:01.545Z,[Connected to 10.1.0.1, More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100]
5+
2023-10-23T13:55:01.546Z,More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"properties" : {
3+
"emp_no" : {
4+
"type" : "integer"
5+
},
6+
"first_name" : {
7+
"type" : "keyword"
8+
},
9+
"gender" : {
10+
"type" : "text"
11+
},
12+
"languages" : {
13+
"type" : "byte"
14+
},
15+
"last_name" : {
16+
"type" : "keyword"
17+
},
18+
"salary" : {
19+
"type" : "integer"
20+
},
21+
"_meta_field": {
22+
"type" : "keyword"
23+
},
24+
"hire_date": {
25+
"type": "date"
26+
},
27+
"job": {
28+
"type": "text",
29+
"fields": {
30+
"raw": {
31+
"type": "keyword",
32+
"ignore_above": 4
33+
}
34+
}
35+
},
36+
"long_noidx": {
37+
"type": "long",
38+
"index": false,
39+
"doc_values": false
40+
}
41+
}
42+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"properties": {
3+
"@timestamp": {
4+
"type": "date"
5+
},
6+
"message": {
7+
"type": "text",
8+
"fields": {
9+
"raw": {
10+
"type": "keyword",
11+
"ignore_above": 100
12+
}
13+
}
14+
}
15+
}
16+
}

0 commit comments

Comments
 (0)