2020import org .elasticsearch .xpack .esql .parser .EsqlBaseParser .IndexStringContext ;
2121
2222import java .util .ArrayList ;
23+ import java .util .Arrays ;
2324import java .util .List ;
2425
2526import static org .elasticsearch .cluster .metadata .IndexNameExpressionResolver .SelectorResolver .SELECTOR_SEPARATOR ;
2627import static org .elasticsearch .transport .RemoteClusterAware .REMOTE_CLUSTER_INDEX_SEPARATOR ;
27- import static org .elasticsearch .transport .RemoteClusterAware .isRemoteIndexName ;
28+ import static org .elasticsearch .transport .RemoteClusterAware .splitIndexName ;
2829import static org .elasticsearch .xpack .esql .core .util .StringUtils .EXCLUSION ;
2930import static org .elasticsearch .xpack .esql .core .util .StringUtils .WILDCARD ;
3031import static org .elasticsearch .xpack .esql .parser .ParserUtils .source ;
3132
3233abstract class IdentifierBuilder extends AbstractBuilder {
3334
35+ private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern" ;
36+
37+ private static final String INVALID_ESQL_CHARS = Strings .INVALID_FILENAME_CHARS .replace ("'*'," , "" );
38+
3439 @ Override
3540 public String visitIdentifier (IdentifierContext ctx ) {
3641 return ctx == null ? null : unquoteIdentifier (ctx .QUOTED_IDENTIFIER (), ctx .UNQUOTED_IDENTIFIER ());
@@ -88,39 +93,21 @@ public String visitIndexPattern(List<EsqlBaseParser.IndexPatternContext> ctx) {
8893 String indexPattern = c .unquotedIndexString () != null ? c .unquotedIndexString ().getText () : visitIndexString (c .indexString ());
8994 String clusterString = visitClusterString (c .clusterString ());
9095 String selectorString = visitSelectorString (c .selectorString ());
91- // skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
92- // For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
93- if (clusterString == null ) {
94- hasSeenStar .set (indexPattern .contains (WILDCARD ) || hasSeenStar .get ());
95- validateIndexPattern (indexPattern , c , hasSeenStar .get ());
96- // Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster
97- // string is unset
98- if (selectorString != null ) {
99- try {
100- // Ensures that the selector provided is one of the valid kinds
101- IndexNameExpressionResolver .SelectorResolver .validateIndexSelectorString (indexPattern , selectorString );
102- } catch (InvalidIndexNameException e ) {
103- throw new ParsingException (e , source (c ), e .getMessage ());
104- }
105- }
106- } else {
107- validateClusterString (clusterString , c );
108- // Do not allow selectors on remote cluster expressions until they are supported
109- if (selectorString != null ) {
110- throwOnMixingSelectorWithCluster (reassembleIndexName (clusterString , indexPattern , selectorString ), c );
111- }
112- }
96+
97+ hasSeenStar .set (hasSeenStar .get () || indexPattern .contains (WILDCARD ));
98+ validate (clusterString , indexPattern , selectorString , c , hasSeenStar .get ());
11399 patterns .add (reassembleIndexName (clusterString , indexPattern , selectorString ));
114100 });
115101 return Strings .collectionToDelimitedString (patterns , "," );
116102 }
117103
104+ private static void throwInvalidIndexNameException (String indexPattern , String message , EsqlBaseParser .IndexPatternContext ctx ) {
105+ var ie = new InvalidIndexNameException (indexPattern , message );
106+ throw new ParsingException (ie , source (ctx ), ie .getMessage ());
107+ }
108+
118109 private static void throwOnMixingSelectorWithCluster (String indexPattern , EsqlBaseParser .IndexPatternContext c ) {
119- InvalidIndexNameException ie = new InvalidIndexNameException (
120- indexPattern ,
121- "Selectors are not yet supported on remote cluster patterns"
122- );
123- throw new ParsingException (ie , source (c ), ie .getMessage ());
110+ throwInvalidIndexNameException (indexPattern , "Selectors are not yet supported on remote cluster patterns" , c );
124111 }
125112
126113 private static String reassembleIndexName (String clusterString , String indexPattern , String selectorString ) {
@@ -144,59 +131,195 @@ protected static void validateClusterString(String clusterString, EsqlBaseParser
144131 }
145132 }
146133
147- private static void validateIndexPattern (String indexPattern , EsqlBaseParser .IndexPatternContext ctx , boolean hasSeenStar ) {
148- // multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2"
149- String [] indices = indexPattern .split ("," );
150- boolean hasExclusion = false ;
151- for (String index : indices ) {
152- // Strip spaces off first because validation checks are not written to handle them
153- index = index .strip ();
154- if (isRemoteIndexName (index )) { // skip the validation if there is remote cluster
155- // Ensure that there are no selectors as they are not yet supported
156- if (index .contains (SELECTOR_SEPARATOR )) {
157- throwOnMixingSelectorWithCluster (index , ctx );
158- }
159- continue ;
134+ /**
135+ * Takes the parsed constituent strings and validates them.
136+ * @param clusterString Name of the remote cluster. Can be null.
137+ * @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting,
138+ * e.g. {@code FROM """index*,-index1"""}.
139+ * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
140+ * @param ctx Index Pattern Context for generating error messages with offsets.
141+ * @param hasSeenStar If we've seen an asterisk so far.
142+ */
143+ private static void validate (
144+ String clusterString ,
145+ String indexPattern ,
146+ String selectorString ,
147+ EsqlBaseParser .IndexPatternContext ctx ,
148+ boolean hasSeenStar
149+ ) {
150+ /*
151+ * At this point, only 3 formats are possible:
152+ * "index_pattern(s)",
153+ * remote:index_pattern, and,
154+ * index_pattern::selector_string.
155+ *
156+ * The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're
157+ * partially quoted. So if either of cluster string or selector string are present, there's no need
158+ * to split the pattern by comma since comma requires partial quoting.
159+ */
160+
161+ String [] patterns ;
162+ if (clusterString == null && selectorString == null ) {
163+ // Pattern could be quoted or is singular like "index_name".
164+ patterns = indexPattern .split ("," , -1 );
165+ } else {
166+ // Either of cluster string or selector string is present. Pattern is unquoted.
167+ patterns = new String [] { indexPattern };
168+ }
169+
170+ patterns = Arrays .stream (patterns ).map (String ::strip ).toArray (String []::new );
171+ if (Arrays .stream (patterns ).anyMatch (String ::isBlank )) {
172+ throwInvalidIndexNameException (indexPattern , BLANK_INDEX_ERROR_MESSAGE , ctx );
173+ }
174+
175+ // Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"".
176+ if (patterns .length == 0 ) {
177+ throwInvalidIndexNameException (indexPattern , BLANK_INDEX_ERROR_MESSAGE , ctx );
178+ } else if (patterns .length == 1 ) {
179+ // Pattern is either an unquoted string or a quoted string with a single index (no comma sep).
180+ validateSingleIndexPattern (clusterString , patterns [0 ], selectorString , ctx , hasSeenStar );
181+ } else {
182+ /*
183+ * Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present,
184+ * cluster string and selector string cannot be present; they need to be attached within the quoting.
185+ * So we attempt to extract them later.
186+ */
187+ for (String pattern : patterns ) {
188+ validateSingleIndexPattern (null , pattern , null , ctx , hasSeenStar );
160189 }
190+ }
191+ }
192+
193+ /**
194+ * Validates the constituent strings. Will extract the cluster string and/or selector string from the index
195+ * name if clubbed together inside a quoted string.
196+ *
197+ * @param clusterString Name of the remote cluster. Can be null.
198+ * @param indexName Name of the index.
199+ * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
200+ * @param ctx Index Pattern Context for generating error messages with offsets.
201+ * @param hasSeenStar If we've seen an asterisk so far.
202+ */
203+ private static void validateSingleIndexPattern (
204+ String clusterString ,
205+ String indexName ,
206+ String selectorString ,
207+ EsqlBaseParser .IndexPatternContext ctx ,
208+ boolean hasSeenStar
209+ ) {
210+ indexName = indexName .strip ();
211+
212+ /*
213+ * Precedence:
214+ * 1. Cannot mix cluster and selector strings.
215+ * 2. Cluster string must be valid.
216+ * 3. Index name must be valid.
217+ * 4. Selector string must be valid.
218+ *
219+ * Since cluster string and/or selector string can be clubbed with the index name, we must try to
220+ * manually extract them before we attempt to do #2, #3, and #4.
221+ */
222+
223+ // It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string.
224+ if (clusterString == null && selectorString == null ) {
161225 try {
162- Tuple <String , String > splitPattern = IndexNameExpressionResolver .splitSelectorExpression (index );
163- if (splitPattern .v2 () != null ) {
164- index = splitPattern .v1 ();
165- }
166- } catch (InvalidIndexNameException e ) {
167- // throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions
226+ var split = splitIndexName (indexName );
227+ clusterString = split [0 ];
228+ indexName = split [1 ];
229+ } catch (IllegalArgumentException e ) {
168230 throw new ParsingException (e , source (ctx ), e .getMessage ());
169231 }
170- hasSeenStar = index .contains (WILDCARD ) || hasSeenStar ;
171- index = index .replace (WILDCARD , "" ).strip ();
172- if (index .isBlank ()) {
173- continue ;
174- }
175- hasExclusion = index .startsWith (EXCLUSION );
176- index = removeExclusion (index );
177- String tempName ;
178- try {
179- // remove the exclusion outside of <>, from index names with DateMath expression,
180- // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
181- tempName = IndexNameExpressionResolver .resolveDateMathExpression (index );
182- } catch (ElasticsearchParseException e ) {
183- // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
184- throw new ParsingException (e , source (ctx ), e .getMessage ());
232+ }
233+
234+ // At the moment, selector strings for remote indices is not allowed.
235+ if (clusterString != null && selectorString != null ) {
236+ throwOnMixingSelectorWithCluster (reassembleIndexName (clusterString , indexName , selectorString ), ctx );
237+ }
238+
239+ // Validation in the right precedence.
240+ if (clusterString != null ) {
241+ clusterString = clusterString .strip ();
242+ validateClusterString (clusterString , ctx );
243+ }
244+
245+ /*
246+ * It is possible for selector string to be attached to the index: "index_name::selector_string".
247+ * Try to extract the selector string.
248+ */
249+ try {
250+ Tuple <String , String > splitPattern = IndexNameExpressionResolver .splitSelectorExpression (indexName );
251+ if (splitPattern .v2 () != null ) {
252+ // Cluster string too was clubbed with the index name like selector string.
253+ if (clusterString != null ) {
254+ throwOnMixingSelectorWithCluster (reassembleIndexName (clusterString , splitPattern .v1 (), splitPattern .v2 ()), ctx );
255+ } else {
256+ // We've seen a selectorString. Use it.
257+ selectorString = splitPattern .v2 ();
258+ }
185259 }
186- hasExclusion = tempName .startsWith (EXCLUSION ) || hasExclusion ;
187- index = tempName .equals (index ) ? index : removeExclusion (tempName );
260+
261+ indexName = splitPattern .v1 ();
262+ } catch (InvalidIndexNameException e ) {
263+ throw new ParsingException (e , source (ctx ), e .getMessage ());
264+ }
265+
266+ resolveAndValidateIndex (indexName , ctx , hasSeenStar );
267+ if (selectorString != null ) {
268+ selectorString = selectorString .strip ();
188269 try {
189- MetadataCreateIndexService .validateIndexOrAliasName (index , InvalidIndexNameException ::new );
270+ // Ensures that the selector provided is one of the valid kinds.
271+ IndexNameExpressionResolver .SelectorResolver .validateIndexSelectorString (indexName , selectorString );
190272 } catch (InvalidIndexNameException e ) {
191- // ignore invalid index name if it has exclusions and there is an index with wildcard before it
192- if (hasSeenStar && hasExclusion ) {
193- continue ;
194- }
195273 throw new ParsingException (e , source (ctx ), e .getMessage ());
196274 }
197275 }
198276 }
199277
278+ private static void resolveAndValidateIndex (String index , EsqlBaseParser .IndexPatternContext ctx , boolean hasSeenStar ) {
279+ // If index name is blank without any replacements, it was likely blank right from the beginning and is invalid.
280+ if (index .isBlank ()) {
281+ throwInvalidIndexNameException (index , BLANK_INDEX_ERROR_MESSAGE , ctx );
282+ }
283+
284+ hasSeenStar = hasSeenStar || index .contains (WILDCARD );
285+ index = index .replace (WILDCARD , "" ).strip ();
286+ if (index .isBlank ()) {
287+ return ;
288+ }
289+ var hasExclusion = index .startsWith (EXCLUSION );
290+ index = removeExclusion (index );
291+ String tempName ;
292+ try {
293+ // remove the exclusion outside of <>, from index names with DateMath expression,
294+ // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
295+ tempName = IndexNameExpressionResolver .resolveDateMathExpression (index );
296+ } catch (ElasticsearchParseException e ) {
297+ // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
298+ throw new ParsingException (e , source (ctx ), e .getMessage ());
299+ }
300+ hasExclusion = tempName .startsWith (EXCLUSION ) || hasExclusion ;
301+ index = tempName .equals (index ) ? index : removeExclusion (tempName );
302+ try {
303+ MetadataCreateIndexService .validateIndexOrAliasName (index , InvalidIndexNameException ::new );
304+ } catch (InvalidIndexNameException e ) {
305+ // ignore invalid index name if it has exclusions and there is an index with wildcard before it
306+ if (hasSeenStar && hasExclusion ) {
307+ return ;
308+ }
309+
310+ /*
311+ * We only modify this particular message because it mentions '*' as an invalid char.
312+ * However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not
313+ * mislead the user by mentioning this char in the error message.
314+ */
315+ if (e .getMessage ().contains ("must not contain the following characters" )) {
316+ throwInvalidIndexNameException (index , "must not contain the following characters " + INVALID_ESQL_CHARS , ctx );
317+ }
318+
319+ throw new ParsingException (e , source (ctx ), e .getMessage ());
320+ }
321+ }
322+
200323 private static String removeExclusion (String indexPattern ) {
201324 return indexPattern .charAt (0 ) == EXCLUSION .charAt (0 ) ? indexPattern .substring (1 ) : indexPattern ;
202325 }
0 commit comments