Skip to content

Commit b1cf59b

Browse files
authored
[8.19] Don't build can_match queries we can't push to data nodes (#130210) (#130386)
* Don't build can_match queries we can't push to data nodes (#130210) Passes the minimum transport version down to expressions when we convert them into queries that we'll use for can_match. Right now all this is used for is skipping the can_match from the wildcard like queries. The queries we make there aren't serializable. We'll fix that - but this should give us the levers that we need to do it in a backwards incompatible way. * Fix for backport * don't be backwards
1 parent f1835de commit b1cf59b

File tree

9 files changed

+322
-56
lines changed

9 files changed

+322
-56
lines changed

server/src/main/java/org/elasticsearch/index/query/AutomatonQueryBuilder.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public String fieldName() {
4848
return fieldName;
4949
}
5050

51+
public String description() {
52+
return description;
53+
}
54+
5155
@Override
5256
public String getWriteableName() {
5357
throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getWriteableName");

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java

Lines changed: 170 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.junit.rules.TestRule;
3030

3131
import java.io.IOException;
32+
import java.util.ArrayList;
3233
import java.util.List;
3334
import java.util.Map;
3435
import java.util.Set;
@@ -37,11 +38,15 @@
3738
import java.util.stream.Stream;
3839

3940
import static org.elasticsearch.test.MapMatcher.assertMap;
41+
import static org.elasticsearch.test.MapMatcher.matchesMap;
4042
import static org.elasticsearch.xpack.esql.ccq.Clusters.REMOTE_CLUSTER_NAME;
4143
import static org.hamcrest.Matchers.any;
44+
import static org.hamcrest.Matchers.anyOf;
4245
import static org.hamcrest.Matchers.equalTo;
4346
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
4447
import static org.hamcrest.Matchers.hasKey;
48+
import static org.hamcrest.Matchers.is;
49+
import static org.hamcrest.Matchers.nullValue;
4550

4651
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
4752
public class MultiClustersIT extends ESRestTestCase {
@@ -129,7 +134,7 @@ void indexDocs(RestClient client, String index, List<Doc> docs) throws IOExcepti
129134
}
130135

131136
private Map<String, Object> run(String query, boolean includeCCSMetadata) throws IOException {
132-
var queryBuilder = new RestEsqlTestCase.RequestObjectBuilder().query(query);
137+
var queryBuilder = new RestEsqlTestCase.RequestObjectBuilder().query(query).profile(true);
133138
if (includeCCSMetadata) {
134139
queryBuilder.includeCCSMetadata(true);
135140
}
@@ -158,12 +163,51 @@ private Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder reques
158163
}
159164
}
160165

166+
private <C, V> void assertResultMapForLike(
167+
boolean includeCCSMetadata,
168+
Map<String, Object> result,
169+
C columns,
170+
V values,
171+
boolean remoteOnly,
172+
boolean requireLikeListCapability
173+
) throws IOException {
174+
List<String> requiredCapabilities = new ArrayList<>(List.of("like_on_index_fields"));
175+
if (requireLikeListCapability) {
176+
requiredCapabilities.add("like_list_on_index_fields");
177+
}
178+
// the feature is completely supported if both local and remote clusters support it
179+
boolean isSupported = clusterHasCapability("POST", "/_query", List.of(), requiredCapabilities).orElse(false);
180+
try (RestClient remoteClient = remoteClusterClient()) {
181+
isSupported = isSupported
182+
&& clusterHasCapability(remoteClient, "POST", "/_query", List.of(), requiredCapabilities).orElse(false);
183+
}
184+
185+
if (isSupported) {
186+
assertResultMap(includeCCSMetadata, result, columns, values, remoteOnly);
187+
} else {
188+
logger.info("--> skipping data check for like index test, cluster does not support like index feature");
189+
// just verify that we did not get a partial result
190+
var clusters = result.get("_clusters");
191+
var reason = "unexpected partial results" + (clusters != null ? ": _clusters=" + clusters : "");
192+
assertThat(reason, result.get("is_partial"), anyOf(nullValue(), is(false)));
193+
}
194+
}
195+
196+
private boolean capabilitiesSupportedNewAndOld(List<String> requiredCapabilities) throws IOException {
197+
boolean isSupported = clusterHasCapability("POST", "/_query", List.of(), requiredCapabilities).orElse(false);
198+
try (RestClient remoteClient = remoteClusterClient()) {
199+
isSupported = isSupported
200+
&& clusterHasCapability(remoteClient, "POST", "/_query", List.of(), requiredCapabilities).orElse(false);
201+
}
202+
return isSupported;
203+
}
204+
161205
private <C, V> void assertResultMap(boolean includeCCSMetadata, Map<String, Object> result, C columns, V values, boolean remoteOnly) {
162206
MapMatcher mapMatcher = getResultMatcher(
163207
ccsMetadataAvailable(),
164208
result.containsKey("is_partial"),
165209
result.containsKey("documents_found")
166-
);
210+
).extraOk();
167211
if (includeCCSMetadata) {
168212
mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
169213
}
@@ -251,11 +295,13 @@ private void assertClusterDetailsMap(Map<String, Object> result, boolean remoteO
251295

252296
@SuppressWarnings("unchecked")
253297
Map<String, Object> remoteClusterShards = (Map<String, Object>) remoteCluster.get("_shards");
254-
assertThat(remoteClusterShards.keySet(), equalTo(Set.of("total", "successful", "skipped", "failed")));
255-
assertThat((Integer) remoteClusterShards.get("total"), greaterThanOrEqualTo(0));
256-
assertThat((Integer) remoteClusterShards.get("successful"), equalTo((Integer) remoteClusterShards.get("total")));
257-
assertThat((Integer) remoteClusterShards.get("skipped"), equalTo(0));
258-
assertThat((Integer) remoteClusterShards.get("failed"), equalTo(0));
298+
assertThat(
299+
remoteClusterShards,
300+
matchesMap().entry("total", greaterThanOrEqualTo(0))
301+
.entry("successful", remoteClusterShards.get("total"))
302+
.entry("skipped", greaterThanOrEqualTo(0))
303+
.entry("failed", 0)
304+
);
259305

260306
if (remoteOnly == false) {
261307
@SuppressWarnings("unchecked")
@@ -267,11 +313,13 @@ private void assertClusterDetailsMap(Map<String, Object> result, boolean remoteO
267313

268314
@SuppressWarnings("unchecked")
269315
Map<String, Object> localClusterShards = (Map<String, Object>) localCluster.get("_shards");
270-
assertThat(localClusterShards.keySet(), equalTo(Set.of("total", "successful", "skipped", "failed")));
271-
assertThat((Integer) localClusterShards.get("total"), greaterThanOrEqualTo(0));
272-
assertThat((Integer) localClusterShards.get("successful"), equalTo((Integer) localClusterShards.get("total")));
273-
assertThat((Integer) localClusterShards.get("skipped"), equalTo(0));
274-
assertThat((Integer) localClusterShards.get("failed"), equalTo(0));
316+
assertThat(
317+
localClusterShards,
318+
matchesMap().entry("total", greaterThanOrEqualTo(0))
319+
.entry("successful", localClusterShards.get("total"))
320+
.entry("skipped", greaterThanOrEqualTo(0))
321+
.entry("failed", 0)
322+
);
275323
}
276324
}
277325

@@ -371,6 +419,116 @@ public void testStats() throws IOException {
371419
assertThat(clusterData, hasKey("took"));
372420
}
373421

422+
public void testLikeIndex() throws Exception {
423+
424+
boolean includeCCSMetadata = includeCCSMetadata();
425+
Map<String, Object> result = run("""
426+
FROM test-local-index,*:test-remote-index METADATA _index
427+
| WHERE _index LIKE "*remote*"
428+
| STATS c = COUNT(*) BY _index
429+
| SORT _index ASC
430+
""", includeCCSMetadata);
431+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
432+
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
433+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
434+
}
435+
436+
public void testNotLikeIndex() throws Exception {
437+
boolean includeCCSMetadata = includeCCSMetadata();
438+
Map<String, Object> result = run("""
439+
FROM test-local-index,*:test-remote-index METADATA _index
440+
| WHERE _index NOT LIKE "*remote*"
441+
| STATS c = COUNT(*) BY _index
442+
| SORT _index ASC
443+
""", includeCCSMetadata);
444+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
445+
var values = List.of(List.of(localDocs.size(), localIndex));
446+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
447+
}
448+
449+
public void testLikeListIndex() throws Exception {
450+
List<String> requiredCapabilities = new ArrayList<>(List.of("like_list_on_index_fields"));
451+
// the feature is completely supported if both local and remote clusters support it
452+
if (capabilitiesSupportedNewAndOld(requiredCapabilities) == false) {
453+
logger.info("--> skipping testNotLikeListIndex, due to missing capability");
454+
return;
455+
}
456+
boolean includeCCSMetadata = includeCCSMetadata();
457+
Map<String, Object> result = run("""
458+
FROM test-local-index,*:test-remote-index METADATA _index
459+
| WHERE _index LIKE ("*remote*", "not-exist*")
460+
| STATS c = COUNT(*) BY _index
461+
| SORT _index ASC
462+
""", includeCCSMetadata);
463+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
464+
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
465+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
466+
}
467+
468+
public void testNotLikeListIndex() throws Exception {
469+
List<String> requiredCapabilities = new ArrayList<>(List.of("like_list_on_index_fields"));
470+
// the feature is completely supported if both local and remote clusters support it
471+
if (capabilitiesSupportedNewAndOld(requiredCapabilities) == false) {
472+
logger.info("--> skipping testNotLikeListIndex, due to missing capability");
473+
return;
474+
}
475+
boolean includeCCSMetadata = includeCCSMetadata();
476+
Map<String, Object> result = run("""
477+
FROM test-local-index,*:test-remote-index METADATA _index
478+
| WHERE _index NOT LIKE ("*remote*", "not-exist*")
479+
| STATS c = COUNT(*) BY _index
480+
| SORT _index ASC
481+
""", includeCCSMetadata);
482+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
483+
var values = List.of(List.of(localDocs.size(), localIndex));
484+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
485+
}
486+
487+
public void testNotLikeListKeyWord() throws Exception {
488+
List<String> requiredCapabilities = new ArrayList<>(List.of("like_list_on_index_fields"));
489+
// the feature is completely supported if both local and remote clusters support it
490+
if (capabilitiesSupportedNewAndOld(requiredCapabilities) == false) {
491+
logger.info("--> skipping testNotLikeListIndex, due to missing capability");
492+
return;
493+
}
494+
boolean includeCCSMetadata = includeCCSMetadata();
495+
Map<String, Object> result = run("""
496+
FROM test-local-index,*:test-remote-index METADATA _index
497+
| WHERE color NOT LIKE ("*blue*", "*red*")
498+
| STATS c = COUNT(*) BY _index
499+
| SORT _index ASC
500+
""", includeCCSMetadata);
501+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
502+
var values = List.of(List.of(localDocs.size(), localIndex));
503+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
504+
}
505+
506+
public void testRLikeIndex() throws Exception {
507+
boolean includeCCSMetadata = includeCCSMetadata();
508+
Map<String, Object> result = run("""
509+
FROM test-local-index,*:test-remote-index METADATA _index
510+
| WHERE _index RLIKE ".*remote.*"
511+
| STATS c = COUNT(*) BY _index
512+
| SORT _index ASC
513+
""", includeCCSMetadata);
514+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
515+
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
516+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
517+
}
518+
519+
public void testNotRLikeIndex() throws Exception {
520+
boolean includeCCSMetadata = includeCCSMetadata();
521+
Map<String, Object> result = run("""
522+
FROM test-local-index,*:test-remote-index METADATA _index
523+
| WHERE _index NOT RLIKE ".*remote.*"
524+
| STATS c = COUNT(*) BY _index
525+
| SORT _index ASC
526+
""", includeCCSMetadata);
527+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
528+
var values = List.of(List.of(localDocs.size(), localIndex));
529+
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
530+
}
531+
374532
private RestClient remoteClusterClient() throws IOException {
375533
var clusterHosts = parseClusterHosts(remoteCluster.getHttpAddresses());
376534
return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0]));

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLikeList.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,12 @@ protected WildcardLikeList replaceChild(Expression newLeft) {
9292
*/
9393
@Override
9494
public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
95-
return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO;
96-
95+
if (pushdownPredicates.minTransportVersion() == null) {
96+
return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO;
97+
} else {
98+
// The AutomatonQuery that we use right now isn't serializable.
99+
return Translatable.NO;
100+
}
97101
}
98102

99103
/**

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
package org.elasticsearch.xpack.esql.optimizer.rules.physical.local;
99

10+
import org.elasticsearch.TransportVersion;
11+
import org.elasticsearch.core.Nullable;
12+
import org.elasticsearch.index.query.QueryBuilder;
1013
import org.elasticsearch.xpack.esql.core.expression.Expression;
1114
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
1215
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
@@ -30,6 +33,24 @@
3033
* </ol>
3134
*/
3235
public interface LucenePushdownPredicates {
36+
/**
37+
* If we're extracting a query for {@code can_match} then this is the
38+
* minimum transport version in the cluster. Otherwise, this is {@code null}.
39+
* <p>
40+
* If this is not null {@link Expression}s should not claim to be
41+
* serializable unless their {@link QueryBuilder}
42+
* {@link QueryBuilder#getMinimalSupportedVersion supports} the version.
43+
* </p>
44+
* <p>
45+
* This is done on the coordinating node <strong>and</strong>. And for
46+
* cross cluster search this is done on the coordinating node on the
47+
* remote cluster. So! We actually <strong>have</strong> the minimum
48+
* cluster transport version.
49+
* </p>
50+
*/
51+
@Nullable
52+
TransportVersion minTransportVersion();
53+
3354
/**
3455
* For TEXT fields, we need to check if the field has a subfield of type KEYWORD that can be used instead.
3556
*/
@@ -101,29 +122,41 @@ static String pushableAttributeName(TypedAttribute attribute) {
101122
* In particular, it assumes TEXT fields have no exact subfields (underlying keyword field),
102123
* and that isAggregatable means indexed and has hasDocValues.
103124
*/
104-
LucenePushdownPredicates DEFAULT = new LucenePushdownPredicates() {
105-
@Override
106-
public boolean hasExactSubfield(FieldAttribute attr) {
107-
return false;
108-
}
125+
LucenePushdownPredicates DEFAULT = forCanMatch(null);
109126

110-
@Override
111-
public boolean isIndexedAndHasDocValues(FieldAttribute attr) {
112-
// Is the FieldType.isAggregatable() check correct here? In FieldType isAggregatable usually only means hasDocValues
113-
return attr.field().isAggregatable();
114-
}
127+
/**
128+
* A {@link LucenePushdownPredicates} for use with the {@code can_match} phase.
129+
*/
130+
static LucenePushdownPredicates forCanMatch(TransportVersion minTransportVersion) {
131+
return new LucenePushdownPredicates() {
132+
@Override
133+
public TransportVersion minTransportVersion() {
134+
return minTransportVersion;
135+
}
115136

116-
@Override
117-
public boolean isIndexed(FieldAttribute attr) {
118-
// TODO: This is the original behaviour, but is it correct? In FieldType isAggregatable usually only means hasDocValues
119-
return attr.field().isAggregatable();
120-
}
137+
@Override
138+
public boolean hasExactSubfield(FieldAttribute attr) {
139+
return false;
140+
}
121141

122-
@Override
123-
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
124-
return false;
125-
}
126-
};
142+
@Override
143+
public boolean isIndexedAndHasDocValues(FieldAttribute attr) {
144+
// Is the FieldType.isAggregatable() check correct here? In FieldType isAggregatable usually only means hasDocValues
145+
return attr.field().isAggregatable();
146+
}
147+
148+
@Override
149+
public boolean isIndexed(FieldAttribute attr) {
150+
// TODO: This is the original behaviour, but is it correct? In FieldType isAggregatable usually only means hasDocValues
151+
return attr.field().isAggregatable();
152+
}
153+
154+
@Override
155+
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
156+
return false;
157+
}
158+
};
159+
}
127160

128161
/**
129162
* If we have access to {@link SearchStats} over a collection of shards, we can make more fine-grained decisions about what can be
@@ -133,6 +166,11 @@ static LucenePushdownPredicates from(SearchStats stats) {
133166
// TODO: use FieldAttribute#fieldName, otherwise this doesn't apply to field attributes used for union types.
134167
// C.f. https://github.com/elastic/elasticsearch/issues/128905
135168
return new LucenePushdownPredicates() {
169+
@Override
170+
public TransportVersion minTransportVersion() {
171+
return null;
172+
}
173+
136174
@Override
137175
public boolean hasExactSubfield(FieldAttribute attr) {
138176
return stats.hasExactSubfield(new FieldAttribute.FieldName(attr.name()));

0 commit comments

Comments
 (0)