Skip to content

Commit dfbd526

Browse files
authored
Smarter field caps with subscribable listener (#116755) (#118063)
* Smarter field caps with subscribable listener (#116755) (cherry picked from commit 22f4a79) * Create the mapping explicitly, otherwise for 0 documents indices (#118015) the mapping will not contain the "value" field (cherry picked from commit 774c6ea)
1 parent 5afbfda commit dfbd526

File tree

8 files changed

+744
-108
lines changed

8 files changed

+744
-108
lines changed

docs/changelog/116755.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 116755
2+
summary: Smarter field caps with subscribable listener
3+
area: ES|QL
4+
type: enhancement
5+
issues: []
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.qa.multi_node;
9+
10+
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
11+
12+
import org.elasticsearch.test.TestClustersThreadFilter;
13+
import org.elasticsearch.test.cluster.ElasticsearchCluster;
14+
import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase;
15+
import org.junit.ClassRule;
16+
17+
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
18+
public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase {
19+
20+
@ClassRule
21+
public static ElasticsearchCluster cluster = Clusters.testCluster(ignored -> {});
22+
23+
@Override
24+
protected String getTestRestCluster() {
25+
return cluster.getHttpAddresses();
26+
}
27+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.qa.single_node;
9+
10+
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
11+
12+
import org.elasticsearch.test.TestClustersThreadFilter;
13+
import org.elasticsearch.test.cluster.ElasticsearchCluster;
14+
import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase;
15+
import org.junit.ClassRule;
16+
17+
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
18+
public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase {
19+
20+
@ClassRule
21+
public static ElasticsearchCluster cluster = Clusters.testCluster();
22+
23+
@Override
24+
protected String getTestRestCluster() {
25+
return cluster.getHttpAddresses();
26+
}
27+
}
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.qa.rest;
9+
10+
import org.apache.http.util.EntityUtils;
11+
import org.elasticsearch.client.Request;
12+
import org.elasticsearch.client.Response;
13+
import org.elasticsearch.client.ResponseException;
14+
import org.elasticsearch.test.rest.ESRestTestCase;
15+
import org.elasticsearch.xcontent.XContentType;
16+
import org.elasticsearch.xpack.esql.AssertWarnings;
17+
import org.junit.After;
18+
import org.junit.Assert;
19+
20+
import java.io.IOException;
21+
import java.nio.charset.StandardCharsets;
22+
import java.util.List;
23+
import java.util.Locale;
24+
import java.util.Map;
25+
26+
import static org.elasticsearch.test.ListMatcher.matchesList;
27+
import static org.elasticsearch.test.MapMatcher.assertMap;
28+
import static org.elasticsearch.test.MapMatcher.matchesMap;
29+
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.entityToMap;
30+
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder;
31+
import static org.hamcrest.Matchers.allOf;
32+
import static org.hamcrest.Matchers.containsString;
33+
import static org.hamcrest.Matchers.equalTo;
34+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
35+
import static org.hamcrest.Matchers.hasSize;
36+
import static org.hamcrest.Matchers.instanceOf;
37+
import static org.hamcrest.Matchers.nullValue;
38+
39+
public abstract class RequestIndexFilteringTestCase extends ESRestTestCase {
40+
41+
@After
42+
public void wipeTestData() throws IOException {
43+
try {
44+
var response = client().performRequest(new Request("DELETE", "/test*"));
45+
assertEquals(200, response.getStatusLine().getStatusCode());
46+
} catch (ResponseException re) {
47+
assertEquals(404, re.getResponse().getStatusLine().getStatusCode());
48+
}
49+
}
50+
51+
public void testTimestampFilterFromQuery() throws IOException {
52+
int docsTest1 = 50;
53+
int docsTest2 = 30;
54+
indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
55+
indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
56+
57+
// filter includes both indices in the result (all columns, all rows)
58+
RestEsqlTestCase.RequestObjectBuilder builder = timestampFilter("gte", "2023-01-01").query("FROM test*");
59+
Map<String, Object> result = runEsql(builder);
60+
assertMap(
61+
result,
62+
matchesMap().entry(
63+
"columns",
64+
matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
65+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
66+
.item(matchesMap().entry("name", "id2").entry("type", "integer"))
67+
.item(matchesMap().entry("name", "value").entry("type", "long"))
68+
).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1 + docsTest2))).entry("took", greaterThanOrEqualTo(0))
69+
);
70+
71+
// filter includes only test1. Columns from test2 are filtered out, as well (not only rows)!
72+
builder = timestampFilter("gte", "2024-01-01").query("FROM test*");
73+
assertMap(
74+
runEsql(builder),
75+
matchesMap().entry(
76+
"columns",
77+
matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
78+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
79+
.item(matchesMap().entry("name", "value").entry("type", "long"))
80+
).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
81+
);
82+
83+
// filter excludes both indices (no rows); the first analysis step fails because there are no columns, a second attempt succeeds
84+
// after eliminating the index filter. All columns are returned.
85+
builder = timestampFilter("gte", "2025-01-01").query("FROM test*");
86+
assertMap(
87+
runEsql(builder),
88+
matchesMap().entry(
89+
"columns",
90+
matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
91+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
92+
.item(matchesMap().entry("name", "id2").entry("type", "integer"))
93+
.item(matchesMap().entry("name", "value").entry("type", "long"))
94+
).entry("values", allOf(instanceOf(List.class), hasSize(0))).entry("took", greaterThanOrEqualTo(0))
95+
);
96+
}
97+
98+
public void testFieldExistsFilter_KeepWildcard() throws IOException {
99+
int docsTest1 = randomIntBetween(0, 10);
100+
int docsTest2 = randomIntBetween(0, 10);
101+
indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
102+
indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
103+
104+
// filter includes only test1. Columns and rows of test2 are filtered out
105+
RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query("FROM test*");
106+
Map<String, Object> result = runEsql(builder);
107+
assertMap(
108+
result,
109+
matchesMap().entry(
110+
"columns",
111+
matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
112+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
113+
.item(matchesMap().entry("name", "value").entry("type", "long"))
114+
).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
115+
);
116+
117+
// filter includes only test1. Columns from test2 are filtered out, as well (not only rows)!
118+
builder = existsFilter("id1").query("FROM test* METADATA _index | KEEP _index, id*");
119+
result = runEsql(builder);
120+
assertMap(
121+
result,
122+
matchesMap().entry(
123+
"columns",
124+
matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword"))
125+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
126+
).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
127+
);
128+
@SuppressWarnings("unchecked")
129+
var values = (List<List<Object>>) result.get("values");
130+
for (List<Object> row : values) {
131+
assertThat(row.get(0), equalTo("test1"));
132+
assertThat(row.get(1), instanceOf(Integer.class));
133+
}
134+
}
135+
136+
public void testFieldExistsFilter_With_ExplicitUseOfDiscardedIndexFields() throws IOException {
137+
int docsTest1 = randomIntBetween(1, 5);
138+
int docsTest2 = randomIntBetween(0, 5);
139+
indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
140+
indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
141+
142+
// test2 is explicitly used in a query with "SORT id2" even if the index filter should discard test2
143+
RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query(
144+
"FROM test* METADATA _index | SORT id2 | KEEP _index, id*"
145+
);
146+
Map<String, Object> result = runEsql(builder);
147+
assertMap(
148+
result,
149+
matchesMap().entry(
150+
"columns",
151+
matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword"))
152+
.item(matchesMap().entry("name", "id1").entry("type", "integer"))
153+
.item(matchesMap().entry("name", "id2").entry("type", "integer"))
154+
).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
155+
);
156+
@SuppressWarnings("unchecked")
157+
var values = (List<List<Object>>) result.get("values");
158+
for (List<Object> row : values) {
159+
assertThat(row.get(0), equalTo("test1"));
160+
assertThat(row.get(1), instanceOf(Integer.class));
161+
assertThat(row.get(2), nullValue());
162+
}
163+
}
164+
165+
public void testFieldNameTypo() throws IOException {
166+
int docsTest1 = randomIntBetween(0, 5);
167+
int docsTest2 = randomIntBetween(0, 5);
168+
indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
169+
indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
170+
171+
// idx field name is explicitly used, though it doesn't exist in any of the indices. First test - without filter
172+
ResponseException e = expectThrows(
173+
ResponseException.class,
174+
() -> runEsql(requestObjectBuilder().query("FROM test* | WHERE idx == 123"))
175+
);
176+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
177+
assertThat(e.getMessage(), containsString("verification_exception"));
178+
assertThat(e.getMessage(), containsString("Found 1 problem"));
179+
assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
180+
181+
e = expectThrows(ResponseException.class, () -> runEsql(requestObjectBuilder().query("FROM test1 | WHERE idx == 123")));
182+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
183+
assertThat(e.getMessage(), containsString("verification_exception"));
184+
assertThat(e.getMessage(), containsString("Found 1 problem"));
185+
assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
186+
187+
e = expectThrows(
188+
ResponseException.class,
189+
() -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test* | WHERE idx == 123"))
190+
);
191+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
192+
assertThat(e.getMessage(), containsString("Found 1 problem"));
193+
assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
194+
195+
e = expectThrows(
196+
ResponseException.class,
197+
() -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test2 | WHERE idx == 123"))
198+
);
199+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
200+
assertThat(e.getMessage(), containsString("Found 1 problem"));
201+
assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
202+
}
203+
204+
public void testIndicesDontExist() throws IOException {
205+
int docsTest1 = 0; // we are interested only in the created index, not necessarily that it has data
206+
indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
207+
208+
ResponseException e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo")));
209+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
210+
assertThat(e.getMessage(), containsString("verification_exception"));
211+
assertThat(e.getMessage(), containsString("Unknown index [foo]"));
212+
213+
e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo*")));
214+
assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
215+
assertThat(e.getMessage(), containsString("verification_exception"));
216+
assertThat(e.getMessage(), containsString("Unknown index [foo*]"));
217+
218+
e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo,test1")));
219+
assertEquals(404, e.getResponse().getStatusLine().getStatusCode());
220+
assertThat(e.getMessage(), containsString("index_not_found_exception"));
221+
assertThat(e.getMessage(), containsString("no such index [foo]"));
222+
}
223+
224+
private static RestEsqlTestCase.RequestObjectBuilder timestampFilter(String op, String date) throws IOException {
225+
return requestObjectBuilder().filter(b -> {
226+
b.startObject("range");
227+
{
228+
b.startObject("@timestamp").field(op, date).endObject();
229+
}
230+
b.endObject();
231+
});
232+
}
233+
234+
private static RestEsqlTestCase.RequestObjectBuilder existsFilter(String field) throws IOException {
235+
return requestObjectBuilder().filter(b -> b.startObject("exists").field("field", field).endObject());
236+
}
237+
238+
public Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder requestObject) throws IOException {
239+
return RestEsqlTestCase.runEsql(requestObject, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC);
240+
}
241+
242+
protected void indexTimestampData(int docs, String indexName, String date, String differentiatorFieldName) throws IOException {
243+
Request createIndex = new Request("PUT", indexName);
244+
createIndex.setJsonEntity("""
245+
{
246+
"settings": {
247+
"index": {
248+
"number_of_shards": 3
249+
}
250+
},
251+
"mappings": {
252+
"properties": {
253+
"@timestamp": {
254+
"type": "date"
255+
},
256+
"value": {
257+
"type": "long"
258+
},
259+
"%differentiator_field_name%": {
260+
"type": "integer"
261+
}
262+
}
263+
}
264+
}""".replace("%differentiator_field_name%", differentiatorFieldName));
265+
Response response = client().performRequest(createIndex);
266+
assertThat(
267+
entityToMap(response.getEntity(), XContentType.JSON),
268+
matchesMap().entry("shards_acknowledged", true).entry("index", indexName).entry("acknowledged", true)
269+
);
270+
271+
if (docs > 0) {
272+
StringBuilder b = new StringBuilder();
273+
for (int i = 0; i < docs; i++) {
274+
b.append(String.format(Locale.ROOT, """
275+
{"create":{"_index":"%s"}}
276+
{"@timestamp":"%s","value":%d,"%s":%d}
277+
""", indexName, date, i, differentiatorFieldName, i));
278+
}
279+
Request bulk = new Request("POST", "/_bulk");
280+
bulk.addParameter("refresh", "true");
281+
bulk.addParameter("filter_path", "errors");
282+
bulk.setJsonEntity(b.toString());
283+
response = client().performRequest(bulk);
284+
Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8));
285+
}
286+
}
287+
}

0 commit comments

Comments
 (0)