Skip to content

Commit e867dcd

Browse files
[8.19] ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497) (#130021)
* ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497) ES|QL index patterns validation: Ensure that the patterns in the query are syntactically and semantically valid --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Andrei Stefan <[email protected]> Co-authored-by: Alexander Spies <[email protected]> (cherry picked from commit c94c021) # Conflicts: # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java * Refactor dropping asterisk from invalid chars * Assert error message and drop invalid chars due to randomisation issue * Fix missing sort in `Strings#INVALID_FILENAME_CHARS` and assert invalid chars in error string
1 parent 4c591f5 commit e867dcd

File tree

4 files changed

+404
-77
lines changed

4 files changed

+404
-77
lines changed

docs/changelog/122497.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122497
2+
summary: Check if index patterns conform to valid format before validation
3+
area: CCS
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/common/Strings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ private static String changeFirstCharacterCase(String str, boolean capitalize) {
282282
static final Set<Character> INVALID_CHARS = Set.of('\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',');
283283

284284
public static final String INVALID_FILENAME_CHARS = INVALID_CHARS.stream()
285+
.sorted()
285286
.map(c -> "'" + c + "'")
286287
.collect(Collectors.joining(",", "[", "]"));
287288

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java

Lines changed: 192 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,22 @@
2020
import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext;
2121

2222
import java.util.ArrayList;
23+
import java.util.Arrays;
2324
import java.util.List;
2425

2526
import static org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.SelectorResolver.SELECTOR_SEPARATOR;
2627
import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR;
27-
import static org.elasticsearch.transport.RemoteClusterAware.isRemoteIndexName;
28+
import static org.elasticsearch.transport.RemoteClusterAware.splitIndexName;
2829
import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION;
2930
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
3031
import static org.elasticsearch.xpack.esql.parser.ParserUtils.source;
3132

3233
abstract class IdentifierBuilder extends AbstractBuilder {
3334

35+
private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern";
36+
37+
private static final String INVALID_ESQL_CHARS = Strings.INVALID_FILENAME_CHARS.replace("'*',", "");
38+
3439
@Override
3540
public String visitIdentifier(IdentifierContext ctx) {
3641
return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER());
@@ -88,39 +93,21 @@ public String visitIndexPattern(List<EsqlBaseParser.IndexPatternContext> ctx) {
8893
String indexPattern = c.unquotedIndexString() != null ? c.unquotedIndexString().getText() : visitIndexString(c.indexString());
8994
String clusterString = visitClusterString(c.clusterString());
9095
String selectorString = visitSelectorString(c.selectorString());
91-
// skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
92-
// For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
93-
if (clusterString == null) {
94-
hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get());
95-
validateIndexPattern(indexPattern, c, hasSeenStar.get());
96-
// Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster
97-
// string is unset
98-
if (selectorString != null) {
99-
try {
100-
// Ensures that the selector provided is one of the valid kinds
101-
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexPattern, selectorString);
102-
} catch (InvalidIndexNameException e) {
103-
throw new ParsingException(e, source(c), e.getMessage());
104-
}
105-
}
106-
} else {
107-
validateClusterString(clusterString, c);
108-
// Do not allow selectors on remote cluster expressions until they are supported
109-
if (selectorString != null) {
110-
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexPattern, selectorString), c);
111-
}
112-
}
96+
97+
hasSeenStar.set(hasSeenStar.get() || indexPattern.contains(WILDCARD));
98+
validate(clusterString, indexPattern, selectorString, c, hasSeenStar.get());
11399
patterns.add(reassembleIndexName(clusterString, indexPattern, selectorString));
114100
});
115101
return Strings.collectionToDelimitedString(patterns, ",");
116102
}
117103

104+
private static void throwInvalidIndexNameException(String indexPattern, String message, EsqlBaseParser.IndexPatternContext ctx) {
105+
var ie = new InvalidIndexNameException(indexPattern, message);
106+
throw new ParsingException(ie, source(ctx), ie.getMessage());
107+
}
108+
118109
private static void throwOnMixingSelectorWithCluster(String indexPattern, EsqlBaseParser.IndexPatternContext c) {
119-
InvalidIndexNameException ie = new InvalidIndexNameException(
120-
indexPattern,
121-
"Selectors are not yet supported on remote cluster patterns"
122-
);
123-
throw new ParsingException(ie, source(c), ie.getMessage());
110+
throwInvalidIndexNameException(indexPattern, "Selectors are not yet supported on remote cluster patterns", c);
124111
}
125112

126113
private static String reassembleIndexName(String clusterString, String indexPattern, String selectorString) {
@@ -144,59 +131,195 @@ protected static void validateClusterString(String clusterString, EsqlBaseParser
144131
}
145132
}
146133

147-
private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
148-
// multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2"
149-
String[] indices = indexPattern.split(",");
150-
boolean hasExclusion = false;
151-
for (String index : indices) {
152-
// Strip spaces off first because validation checks are not written to handle them
153-
index = index.strip();
154-
if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster
155-
// Ensure that there are no selectors as they are not yet supported
156-
if (index.contains(SELECTOR_SEPARATOR)) {
157-
throwOnMixingSelectorWithCluster(index, ctx);
158-
}
159-
continue;
134+
/**
135+
* Takes the parsed constituent strings and validates them.
136+
* @param clusterString Name of the remote cluster. Can be null.
137+
* @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting,
138+
* e.g. {@code FROM """index*,-index1"""}.
139+
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
140+
* @param ctx Index Pattern Context for generating error messages with offsets.
141+
* @param hasSeenStar If we've seen an asterisk so far.
142+
*/
143+
private static void validate(
144+
String clusterString,
145+
String indexPattern,
146+
String selectorString,
147+
EsqlBaseParser.IndexPatternContext ctx,
148+
boolean hasSeenStar
149+
) {
150+
/*
151+
* At this point, only 3 formats are possible:
152+
* "index_pattern(s)",
153+
* remote:index_pattern, and,
154+
* index_pattern::selector_string.
155+
*
156+
* The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're
157+
* partially quoted. So if either of cluster string or selector string are present, there's no need
158+
* to split the pattern by comma since comma requires partial quoting.
159+
*/
160+
161+
String[] patterns;
162+
if (clusterString == null && selectorString == null) {
163+
// Pattern could be quoted or is singular like "index_name".
164+
patterns = indexPattern.split(",", -1);
165+
} else {
166+
// Either of cluster string or selector string is present. Pattern is unquoted.
167+
patterns = new String[] { indexPattern };
168+
}
169+
170+
patterns = Arrays.stream(patterns).map(String::strip).toArray(String[]::new);
171+
if (Arrays.stream(patterns).anyMatch(String::isBlank)) {
172+
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
173+
}
174+
175+
// Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"".
176+
if (patterns.length == 0) {
177+
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
178+
} else if (patterns.length == 1) {
179+
// Pattern is either an unquoted string or a quoted string with a single index (no comma sep).
180+
validateSingleIndexPattern(clusterString, patterns[0], selectorString, ctx, hasSeenStar);
181+
} else {
182+
/*
183+
* Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present,
184+
* cluster string and selector string cannot be present; they need to be attached within the quoting.
185+
* So we attempt to extract them later.
186+
*/
187+
for (String pattern : patterns) {
188+
validateSingleIndexPattern(null, pattern, null, ctx, hasSeenStar);
160189
}
190+
}
191+
}
192+
193+
/**
194+
* Validates the constituent strings. Will extract the cluster string and/or selector string from the index
195+
* name if clubbed together inside a quoted string.
196+
*
197+
* @param clusterString Name of the remote cluster. Can be null.
198+
* @param indexName Name of the index.
199+
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
200+
* @param ctx Index Pattern Context for generating error messages with offsets.
201+
* @param hasSeenStar If we've seen an asterisk so far.
202+
*/
203+
private static void validateSingleIndexPattern(
204+
String clusterString,
205+
String indexName,
206+
String selectorString,
207+
EsqlBaseParser.IndexPatternContext ctx,
208+
boolean hasSeenStar
209+
) {
210+
indexName = indexName.strip();
211+
212+
/*
213+
* Precedence:
214+
* 1. Cannot mix cluster and selector strings.
215+
* 2. Cluster string must be valid.
216+
* 3. Index name must be valid.
217+
* 4. Selector string must be valid.
218+
*
219+
* Since cluster string and/or selector string can be clubbed with the index name, we must try to
220+
* manually extract them before we attempt to do #2, #3, and #4.
221+
*/
222+
223+
// It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string.
224+
if (clusterString == null && selectorString == null) {
161225
try {
162-
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(index);
163-
if (splitPattern.v2() != null) {
164-
index = splitPattern.v1();
165-
}
166-
} catch (InvalidIndexNameException e) {
167-
// throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions
226+
var split = splitIndexName(indexName);
227+
clusterString = split[0];
228+
indexName = split[1];
229+
} catch (IllegalArgumentException e) {
168230
throw new ParsingException(e, source(ctx), e.getMessage());
169231
}
170-
hasSeenStar = index.contains(WILDCARD) || hasSeenStar;
171-
index = index.replace(WILDCARD, "").strip();
172-
if (index.isBlank()) {
173-
continue;
174-
}
175-
hasExclusion = index.startsWith(EXCLUSION);
176-
index = removeExclusion(index);
177-
String tempName;
178-
try {
179-
// remove the exclusion outside of <>, from index names with DateMath expression,
180-
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
181-
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
182-
} catch (ElasticsearchParseException e) {
183-
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
184-
throw new ParsingException(e, source(ctx), e.getMessage());
232+
}
233+
234+
// At the moment, selector strings for remote indices is not allowed.
235+
if (clusterString != null && selectorString != null) {
236+
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexName, selectorString), ctx);
237+
}
238+
239+
// Validation in the right precedence.
240+
if (clusterString != null) {
241+
clusterString = clusterString.strip();
242+
validateClusterString(clusterString, ctx);
243+
}
244+
245+
/*
246+
* It is possible for selector string to be attached to the index: "index_name::selector_string".
247+
* Try to extract the selector string.
248+
*/
249+
try {
250+
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(indexName);
251+
if (splitPattern.v2() != null) {
252+
// Cluster string too was clubbed with the index name like selector string.
253+
if (clusterString != null) {
254+
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, splitPattern.v1(), splitPattern.v2()), ctx);
255+
} else {
256+
// We've seen a selectorString. Use it.
257+
selectorString = splitPattern.v2();
258+
}
185259
}
186-
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
187-
index = tempName.equals(index) ? index : removeExclusion(tempName);
260+
261+
indexName = splitPattern.v1();
262+
} catch (InvalidIndexNameException e) {
263+
throw new ParsingException(e, source(ctx), e.getMessage());
264+
}
265+
266+
resolveAndValidateIndex(indexName, ctx, hasSeenStar);
267+
if (selectorString != null) {
268+
selectorString = selectorString.strip();
188269
try {
189-
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
270+
// Ensures that the selector provided is one of the valid kinds.
271+
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexName, selectorString);
190272
} catch (InvalidIndexNameException e) {
191-
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
192-
if (hasSeenStar && hasExclusion) {
193-
continue;
194-
}
195273
throw new ParsingException(e, source(ctx), e.getMessage());
196274
}
197275
}
198276
}
199277

278+
private static void resolveAndValidateIndex(String index, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
279+
// If index name is blank without any replacements, it was likely blank right from the beginning and is invalid.
280+
if (index.isBlank()) {
281+
throwInvalidIndexNameException(index, BLANK_INDEX_ERROR_MESSAGE, ctx);
282+
}
283+
284+
hasSeenStar = hasSeenStar || index.contains(WILDCARD);
285+
index = index.replace(WILDCARD, "").strip();
286+
if (index.isBlank()) {
287+
return;
288+
}
289+
var hasExclusion = index.startsWith(EXCLUSION);
290+
index = removeExclusion(index);
291+
String tempName;
292+
try {
293+
// remove the exclusion outside of <>, from index names with DateMath expression,
294+
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
295+
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
296+
} catch (ElasticsearchParseException e) {
297+
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
298+
throw new ParsingException(e, source(ctx), e.getMessage());
299+
}
300+
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
301+
index = tempName.equals(index) ? index : removeExclusion(tempName);
302+
try {
303+
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
304+
} catch (InvalidIndexNameException e) {
305+
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
306+
if (hasSeenStar && hasExclusion) {
307+
return;
308+
}
309+
310+
/*
311+
* We only modify this particular message because it mentions '*' as an invalid char.
312+
* However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not
313+
* mislead the user by mentioning this char in the error message.
314+
*/
315+
if (e.getMessage().contains("must not contain the following characters")) {
316+
throwInvalidIndexNameException(index, "must not contain the following characters " + INVALID_ESQL_CHARS, ctx);
317+
}
318+
319+
throw new ParsingException(e, source(ctx), e.getMessage());
320+
}
321+
}
322+
200323
private static String removeExclusion(String indexPattern) {
201324
return indexPattern.charAt(0) == EXCLUSION.charAt(0) ? indexPattern.substring(1) : indexPattern;
202325
}

0 commit comments

Comments
 (0)