Skip to content
Merged
5 changes: 5 additions & 0 deletions docs/changelog/120355.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 120355
summary: Ensure cluster string could be quoted
area: ES|QL
type: enhancement
issues: []
1 change: 1 addition & 0 deletions x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ indexPattern

clusterString
: UNQUOTED_SOURCE
| QUOTED_STRING
;
Comment on lines 144 to 147
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since they are the same, point clusterString to indexString:

clusterString
  : indexString
;

We could fully remove it but it's worth keeping the element in for future changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to avoid that. clusterString should not be equivalent to indexString.
For example clusterString should not allow : and :: from upcoming selector changes.
I imagine some day we might need to reflect that in grammar.


indexString
Expand Down

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,32 @@ protected static String quoteIdString(String unquotedString) {
return "`" + unquotedString.replace("`", "``") + "`";
}

@Override
public String visitClusterString(EsqlBaseParser.ClusterStringContext ctx) {
if (ctx == null) {
return null;
} else if (ctx.UNQUOTED_SOURCE() != null) {
return ctx.UNQUOTED_SOURCE().getText();
} else {
return unquote(ctx.QUOTED_STRING().getText());
}
Comment on lines +57 to +63
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment in the grammar - this method can then be either remove or delegate to visitIndexString.

}

@Override
public String visitIndexString(IndexStringContext ctx) {
TerminalNode n = ctx.UNQUOTED_SOURCE();
return n != null ? n.getText() : unquote(ctx.QUOTED_STRING().getText());
if (ctx.UNQUOTED_SOURCE() != null) {
return ctx.UNQUOTED_SOURCE().getText();
} else {
return unquote(ctx.QUOTED_STRING().getText());
}
}

public String visitIndexPattern(List<EsqlBaseParser.IndexPatternContext> ctx) {
List<String> patterns = new ArrayList<>(ctx.size());
Holder<Boolean> hasSeenStar = new Holder<>(false);
ctx.forEach(c -> {
String indexPattern = visitIndexString(c.indexString());
String clusterString = c.clusterString() != null ? c.clusterString().getText() : null;
String clusterString = visitClusterString(c.clusterString());
// skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
// For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
if (clusterString == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,55 @@ private void clustersAndIndices(String command, String indexString1, String inde
);
}

public void testValidQuotingFromIndexPattern() {
Copy link
Member

@fang-xing-esql fang-xing-esql Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The coverage looks pretty good to me, there is one negative case that I can think of. According to RemoteClusterAware.isRemoteIndexName, : is a valid character in the index pattern used to separate cluster and index name, it is not a valid character for index name, I wonder if it is a valid character for the cluster name?

The following queries can pass the grammar and parser, they errors out, which look correct, however the error message does not look quite clear.

+ curl -u elastic:password -X POST 'localhost:9200/_query?format=txt&pretty' -H 'Content-Type: application/json' '-d
{
  "query": "FROM \"remot:e\":existing_index"
} 
'
{
  "error" : {
    "root_cause" : [
      {
        "type" : "no_such_remote_cluster_exception",
        "reason" : "no such remote cluster: [remot]"
      }
    ],
    "type" : "no_such_remote_cluster_exception",
    "reason" : "no such remote cluster: [remot]"
  },
  "status" : 404
}
+ curl -u elastic:password -X POST 'localhost:9200/_query?format=txt&pretty' -H 'Content-Type: application/json' '-d
{
  "query": "FROM \"remote:\":existing_index"
} 
'
{
  "error" : {
    "root_cause" : [
      {
        "type" : "invalid_index_name_exception",
        "reason" : "Invalid index name [remote::existing_index], Invalid usage of :: separator, [existing_index] is not a recognized selector",
        "index_uuid" : "_na_",
        "index" : "remote::existing_index"
      }
    ],
    "type" : "invalid_index_name_exception",
    "reason" : "Invalid index name [remote::existing_index], Invalid usage of :: separator, [existing_index] is not a recognized selector",
    "index_uuid" : "_na_",
    "index" : "remote::existing_index"
  },
  "status" : 400
}

Copy link
Contributor Author

@idegtiarenko idegtiarenko Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RemoteClusterAware.isRemoteIndexName, : is a valid character in the index pattern used to separate cluster and index name, it is not a valid character for index name, I wonder if it is a valid character for the cluster name?

I believe : can not be used as a character in a cluster name.

According to

/**
* Split the index name into remote cluster alias and index name.
* The index expression is assumed to be individual index (no commas) but can contain `-`, wildcards,
* datemath, remote cluster name and any other syntax permissible in index expression component.
* There's no guarantee the components actually represent existing remote cluster or index, only
* rudimentary checks are done on the syntax.
*/
public static String[] splitIndexName(String indexExpression) {
if (indexExpression.isEmpty() || indexExpression.charAt(0) == '<' || indexExpression.startsWith("-<")) {
// This is date math, but even if it is not, the remote can't start with '<'.
// Thus, whatever it is, this is definitely not a remote index.
return new String[] { null, indexExpression };
}
int i = indexExpression.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR);
if (i == 0) {
throw new IllegalArgumentException("index name [" + indexExpression + "] is invalid because the remote part is empty");
}
if (i < 0 || indexExpression.startsWith(SelectorResolver.SELECTOR_SEPARATOR, i)) {
// Either no colon present, or the colon was a part of a selector separator (::)
return new String[] { null, indexExpression };
} else {
return new String[] { indexExpression.substring(0, i), indexExpression.substring(i + 1) };
}
}

we rely on finding the first : when splitting cluster name and index pattern in indexExpression. This would lead to an indexPattern with : when multiple : used that is not permitted.

Also when registering a remote with : I am getting the following:

PUT http://localhost:9200/_cluster/settings
Content-Type: application/json

{
  "persistent" : {
    "cluster.remote.remote:1.seeds" : ["127.0.0.1:9301"]
  }
}
{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "persistent setting [cluster.remote.remote:1.seeds], not recognized"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "persistent setting [cluster.remote.remote:1.seeds], not recognized"
  },
  "status": 400
}

I believe above indicates that we do not support : in cluster names.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added basic cluster string validation in d58db78.

Please note below indexPatterns are not checked:

FROM remote:invalid:index
FROM remote:"invalid:index"

as we skip all validation when detecting remote:

if (clusterString == null) {
hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get());
validateIndexPattern(indexPattern, c, hasSeenStar.get());
} else {

if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster
continue;
}

var patterns = randomList(1, 5, () -> {
String pattern = randomIndexIdentifier();// index or alias
if (randomBoolean()) {// pattern
pattern += "*";
}
if (randomBoolean()) {// quoted
pattern = "\"" + pattern + "\"";
}
if (randomBoolean()) {// remote cluster
var cluster = randomIdentifier();
if (randomBoolean()) {// quoted
cluster = "\"" + cluster + "\"";
}
pattern = cluster + ":" + pattern;
}
if (pattern.contains(":") && pattern.contains("\"") == false) {// quote entire "cluster:index"
pattern = "\"" + pattern + "\"";
}
return pattern;
});

var plan = statement("FROM " + String.join(",", patterns));
var expected = String.join(",", patterns).replace("\"", "");

assertThat(plan, instanceOf(UnresolvedRelation.class));
assertThat(((UnresolvedRelation) plan).table().index(), equalTo(expected));
}

private static String randomIndexIdentifier() {
// https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params
var validFirstCharacters = "abcdefghijklmnopqrstuvwxyz0123456789!'$^&";
var validCharacters = validFirstCharacters + "+-_.";

var index = new StringBuilder();
if (randomInt(9) == 0) {// hidden index
index.append('.');
}
index.append(randomCharacterFrom(validFirstCharacters));
for (int i = 0; i < randomIntBetween(1, 100); i++) {
index.append(randomCharacterFrom(validCharacters));
}
return index.toString();
}

private static char randomCharacterFrom(String str) {
return str.charAt(randomInt(str.length() - 1));
}

public void testInvalidQuotingAsFromIndexPattern() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should also add tests for invalid quoting of the remote name itself for good measure.

expectError("FROM \"foo", ": token recognition error at: '\"foo'");
expectError("FROM \"foo | LIMIT 1", ": token recognition error at: '\"foo | LIMIT 1'");
Expand Down