Skip to content

Commit c94c021

Browse files
pawankartik-elasticelasticsearchmachineastefanalex-spies
authored
ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497)
ES|QL index patterns validation: Ensure that the patterns in the query are syntactically and semantically valid --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Andrei Stefan <[email protected]> Co-authored-by: Alexander Spies <[email protected]>
1 parent af735ac commit c94c021

File tree

3 files changed

+407
-81
lines changed

3 files changed

+407
-81
lines changed

docs/changelog/122497.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122497
2+
summary: Check if index patterns conform to valid format before validation
3+
area: CCS
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java

Lines changed: 191 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,20 @@
2020
import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext;
2121

2222
import java.util.ArrayList;
23+
import java.util.Arrays;
2324
import java.util.List;
2425

2526
import static org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.SelectorResolver.SELECTOR_SEPARATOR;
2627
import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR;
27-
import static org.elasticsearch.transport.RemoteClusterAware.isRemoteIndexName;
28+
import static org.elasticsearch.transport.RemoteClusterAware.splitIndexName;
2829
import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION;
2930
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
3031
import static org.elasticsearch.xpack.esql.parser.ParserUtils.source;
3132

3233
abstract class IdentifierBuilder extends AbstractBuilder {
3334

35+
private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern";
36+
3437
@Override
3538
public String visitIdentifier(IdentifierContext ctx) {
3639
return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER());
@@ -88,39 +91,21 @@ public String visitIndexPattern(List<EsqlBaseParser.IndexPatternContext> ctx) {
8891
String indexPattern = c.unquotedIndexString() != null ? c.unquotedIndexString().getText() : visitIndexString(c.indexString());
8992
String clusterString = visitClusterString(c.clusterString());
9093
String selectorString = visitSelectorString(c.selectorString());
91-
// skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
92-
// For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
93-
if (clusterString == null) {
94-
hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get());
95-
validateIndexPattern(indexPattern, c, hasSeenStar.get());
96-
// Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster
97-
// string is unset
98-
if (selectorString != null) {
99-
try {
100-
// Ensures that the selector provided is one of the valid kinds
101-
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexPattern, selectorString);
102-
} catch (InvalidIndexNameException e) {
103-
throw new ParsingException(e, source(c), e.getMessage());
104-
}
105-
}
106-
} else {
107-
validateClusterString(clusterString, c);
108-
// Do not allow selectors on remote cluster expressions until they are supported
109-
if (selectorString != null) {
110-
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexPattern, selectorString), c);
111-
}
112-
}
94+
95+
hasSeenStar.set(hasSeenStar.get() || indexPattern.contains(WILDCARD));
96+
validate(clusterString, indexPattern, selectorString, c, hasSeenStar.get());
11397
patterns.add(reassembleIndexName(clusterString, indexPattern, selectorString));
11498
});
11599
return Strings.collectionToDelimitedString(patterns, ",");
116100
}
117101

102+
private static void throwInvalidIndexNameException(String indexPattern, String message, EsqlBaseParser.IndexPatternContext ctx) {
103+
var ie = new InvalidIndexNameException(indexPattern, message);
104+
throw new ParsingException(ie, source(ctx), ie.getMessage());
105+
}
106+
118107
private static void throwOnMixingSelectorWithCluster(String indexPattern, EsqlBaseParser.IndexPatternContext c) {
119-
InvalidIndexNameException ie = new InvalidIndexNameException(
120-
indexPattern,
121-
"Selectors are not yet supported on remote cluster patterns"
122-
);
123-
throw new ParsingException(ie, source(c), ie.getMessage());
108+
throwInvalidIndexNameException(indexPattern, "Selectors are not yet supported on remote cluster patterns", c);
124109
}
125110

126111
private static String reassembleIndexName(String clusterString, String indexPattern, String selectorString) {
@@ -144,59 +129,196 @@ protected static void validateClusterString(String clusterString, EsqlBaseParser
144129
}
145130
}
146131

147-
private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
148-
// multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2"
149-
String[] indices = indexPattern.split(",");
150-
boolean hasExclusion = false;
151-
for (String index : indices) {
152-
// Strip spaces off first because validation checks are not written to handle them
153-
index = index.strip();
154-
if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster
155-
// Ensure that there are no selectors as they are not yet supported
156-
if (index.contains(SELECTOR_SEPARATOR)) {
157-
throwOnMixingSelectorWithCluster(index, ctx);
158-
}
159-
continue;
132+
/**
133+
* Takes the parsed constituent strings and validates them.
134+
* @param clusterString Name of the remote cluster. Can be null.
135+
* @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting,
136+
* e.g. {@code FROM """index*,-index1"""}.
137+
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
138+
* @param ctx Index Pattern Context for generating error messages with offsets.
139+
* @param hasSeenStar If we've seen an asterisk so far.
140+
*/
141+
private static void validate(
142+
String clusterString,
143+
String indexPattern,
144+
String selectorString,
145+
EsqlBaseParser.IndexPatternContext ctx,
146+
boolean hasSeenStar
147+
) {
148+
/*
149+
* At this point, only 3 formats are possible:
150+
* "index_pattern(s)",
151+
* remote:index_pattern, and,
152+
* index_pattern::selector_string.
153+
*
154+
* The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're
155+
* partially quoted. So if either of cluster string or selector string are present, there's no need
156+
* to split the pattern by comma since comma requires partial quoting.
157+
*/
158+
159+
String[] patterns;
160+
if (clusterString == null && selectorString == null) {
161+
// Pattern could be quoted or is singular like "index_name".
162+
patterns = indexPattern.split(",", -1);
163+
} else {
164+
// Either of cluster string or selector string is present. Pattern is unquoted.
165+
patterns = new String[] { indexPattern };
166+
}
167+
168+
patterns = Arrays.stream(patterns).map(String::strip).toArray(String[]::new);
169+
if (Arrays.stream(patterns).anyMatch(String::isBlank)) {
170+
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
171+
}
172+
173+
// Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"".
174+
if (patterns.length == 0) {
175+
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
176+
} else if (patterns.length == 1) {
177+
// Pattern is either an unquoted string or a quoted string with a single index (no comma sep).
178+
validateSingleIndexPattern(clusterString, patterns[0], selectorString, ctx, hasSeenStar);
179+
} else {
180+
/*
181+
* Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present,
182+
* cluster string and selector string cannot be present; they need to be attached within the quoting.
183+
* So we attempt to extract them later.
184+
*/
185+
for (String pattern : patterns) {
186+
validateSingleIndexPattern(null, pattern, null, ctx, hasSeenStar);
160187
}
188+
}
189+
}
190+
191+
/**
192+
* Validates the constituent strings. Will extract the cluster string and/or selector string from the index
193+
* name if clubbed together inside a quoted string.
194+
*
195+
* @param clusterString Name of the remote cluster. Can be null.
196+
* @param indexName Name of the index.
197+
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
198+
* @param ctx Index Pattern Context for generating error messages with offsets.
199+
* @param hasSeenStar If we've seen an asterisk so far.
200+
*/
201+
private static void validateSingleIndexPattern(
202+
String clusterString,
203+
String indexName,
204+
String selectorString,
205+
EsqlBaseParser.IndexPatternContext ctx,
206+
boolean hasSeenStar
207+
) {
208+
indexName = indexName.strip();
209+
210+
/*
211+
* Precedence:
212+
* 1. Cannot mix cluster and selector strings.
213+
* 2. Cluster string must be valid.
214+
* 3. Index name must be valid.
215+
* 4. Selector string must be valid.
216+
*
217+
* Since cluster string and/or selector string can be clubbed with the index name, we must try to
218+
* manually extract them before we attempt to do #2, #3, and #4.
219+
*/
220+
221+
// It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string.
222+
if (clusterString == null && selectorString == null) {
161223
try {
162-
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(index);
163-
if (splitPattern.v2() != null) {
164-
index = splitPattern.v1();
165-
}
166-
} catch (InvalidIndexNameException e) {
167-
// throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions
224+
var split = splitIndexName(indexName);
225+
clusterString = split[0];
226+
indexName = split[1];
227+
} catch (IllegalArgumentException e) {
168228
throw new ParsingException(e, source(ctx), e.getMessage());
169229
}
170-
hasSeenStar = index.contains(WILDCARD) || hasSeenStar;
171-
index = index.replace(WILDCARD, "").strip();
172-
if (index.isBlank()) {
173-
continue;
174-
}
175-
hasExclusion = index.startsWith(EXCLUSION);
176-
index = removeExclusion(index);
177-
String tempName;
178-
try {
179-
// remove the exclusion outside of <>, from index names with DateMath expression,
180-
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
181-
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
182-
} catch (ElasticsearchParseException e) {
183-
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
184-
throw new ParsingException(e, source(ctx), e.getMessage());
230+
}
231+
232+
// At the moment, selector strings for remote indices is not allowed.
233+
if (clusterString != null && selectorString != null) {
234+
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexName, selectorString), ctx);
235+
}
236+
237+
// Validation in the right precedence.
238+
if (clusterString != null) {
239+
clusterString = clusterString.strip();
240+
validateClusterString(clusterString, ctx);
241+
}
242+
243+
/*
244+
* It is possible for selector string to be attached to the index: "index_name::selector_string".
245+
* Try to extract the selector string.
246+
*/
247+
try {
248+
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(indexName);
249+
if (splitPattern.v2() != null) {
250+
// Cluster string too was clubbed with the index name like selector string.
251+
if (clusterString != null) {
252+
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, splitPattern.v1(), splitPattern.v2()), ctx);
253+
} else {
254+
// We've seen a selectorString. Use it.
255+
selectorString = splitPattern.v2();
256+
}
185257
}
186-
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
187-
index = tempName.equals(index) ? index : removeExclusion(tempName);
258+
259+
indexName = splitPattern.v1();
260+
} catch (InvalidIndexNameException e) {
261+
throw new ParsingException(e, source(ctx), e.getMessage());
262+
}
263+
264+
resolveAndValidateIndex(indexName, ctx, hasSeenStar);
265+
if (selectorString != null) {
266+
selectorString = selectorString.strip();
188267
try {
189-
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
268+
// Ensures that the selector provided is one of the valid kinds.
269+
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexName, selectorString);
190270
} catch (InvalidIndexNameException e) {
191-
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
192-
if (hasSeenStar && hasExclusion) {
193-
continue;
194-
}
195271
throw new ParsingException(e, source(ctx), e.getMessage());
196272
}
197273
}
198274
}
199275

276+
private static void resolveAndValidateIndex(String index, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
277+
// If index name is blank without any replacements, it was likely blank right from the beginning and is invalid.
278+
if (index.isBlank()) {
279+
throwInvalidIndexNameException(index, BLANK_INDEX_ERROR_MESSAGE, ctx);
280+
}
281+
282+
hasSeenStar = hasSeenStar || index.contains(WILDCARD);
283+
index = index.replace(WILDCARD, "").strip();
284+
if (index.isBlank()) {
285+
return;
286+
}
287+
var hasExclusion = index.startsWith(EXCLUSION);
288+
index = removeExclusion(index);
289+
String tempName;
290+
try {
291+
// remove the exclusion outside of <>, from index names with DateMath expression,
292+
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
293+
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
294+
} catch (ElasticsearchParseException e) {
295+
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
296+
throw new ParsingException(e, source(ctx), e.getMessage());
297+
}
298+
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
299+
index = tempName.equals(index) ? index : removeExclusion(tempName);
300+
try {
301+
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
302+
} catch (InvalidIndexNameException e) {
303+
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
304+
if (hasSeenStar && hasExclusion) {
305+
return;
306+
}
307+
308+
InvalidIndexNameException errorToThrow = e;
309+
/*
310+
* We only modify this particular message because it mentions '*' as an invalid char.
311+
* However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not
312+
* mislead the user by mentioning this char in the error message.
313+
*/
314+
if (e.getMessage().contains("must not contain the following characters")) {
315+
errorToThrow = new InvalidIndexNameException(index, e.getMessage().replace("'*',", ""));
316+
}
317+
318+
throw new ParsingException(errorToThrow, source(ctx), errorToThrow.getMessage());
319+
}
320+
}
321+
200322
private static String removeExclusion(String indexPattern) {
201323
return indexPattern.charAt(0) == EXCLUSION.charAt(0) ? indexPattern.substring(1) : indexPattern;
202324
}

0 commit comments

Comments
 (0)