Skip to content

Commit 22b1a43

Browse files
committed
ESQL: Catch parsing exception (elastic#124958)
When an invalid popMode occurs (due to an invalid query), ANTLR throws an out-of-channel exception, bypassing the existing checks. This PR extends the checks and properly reports the error back to the user Fix elastic#119025 (cherry picked from commit dc15462) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java
1 parent fda5483 commit 22b1a43

File tree

3 files changed

+1107
-73
lines changed

3 files changed

+1107
-73
lines changed

docs/changelog/124958.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 124958
2+
summary: Catch parsing exception
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 119025

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.antlr.v4.runtime.Recognizer;
1515
import org.antlr.v4.runtime.Token;
1616
import org.antlr.v4.runtime.TokenSource;
17+
import org.antlr.v4.runtime.VocabularyImpl;
1718
import org.antlr.v4.runtime.atn.PredictionMode;
1819
import org.elasticsearch.logging.LogManager;
1920
import org.elasticsearch.logging.Logger;
@@ -23,12 +24,15 @@
2324
import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;
2425

2526
import java.util.BitSet;
27+
import java.util.EmptyStackException;
28+
import java.util.Map;
2629
import java.util.function.BiFunction;
2730
import java.util.function.Function;
2831
import java.util.regex.Matcher;
2932
import java.util.regex.Pattern;
3033

3134
import static org.elasticsearch.xpack.esql.core.util.StringUtils.isInteger;
35+
import static org.elasticsearch.xpack.esql.parser.ParserUtils.nameOrPosition;
3236
import static org.elasticsearch.xpack.esql.parser.ParserUtils.source;
3337

3438
public class EsqlParser {
@@ -44,6 +48,45 @@ public class EsqlParser {
4448
*/
4549
public static final int MAX_LENGTH = 1_000_000;
4650

51+
private static void replaceSymbolWithLiteral(Map<String, String> symbolReplacements, String[] literalNames, String[] symbolicNames) {
52+
for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) {
53+
String symName = symbolicNames[i];
54+
if (symName != null) {
55+
String replacement = symbolReplacements.get(symName);
56+
if (replacement != null && literalNames[i] == null) {
57+
// literals are single quoted
58+
literalNames[i] = "'" + replacement + "'";
59+
replacements--;
60+
}
61+
}
62+
}
63+
}
64+
65+
/**
66+
* Add the literal name to a number of tokens that due to ANTLR internals/ATN
67+
* have their symbolic name returns instead during error reporting.
68+
* When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
69+
* (if set), otherwise falls back to the literal one and eventually uses the symbol name.
70+
* Since the Vocabulary is static and not pluggable, this code modifies the underlying
71+
* arrays by setting the literal string manually based on the token index.
72+
* This is needed since some symbols, especially around setting up the mode, end up losing
73+
* their literal representation.
74+
* NB: this code is highly dependent on the ANTLR internals and thus will likely break
75+
* during upgrades.
76+
* NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
77+
*/
78+
static {
79+
Map<String, String> symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "[");
80+
81+
// the vocabularies have the same content however are different instances
82+
// for extra reliability, perform the replacement for each map
83+
VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY;
84+
replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames());
85+
86+
VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY;
87+
replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames());
88+
}
89+
4790
private EsqlConfig config = new EsqlConfig();
4891

4992
public EsqlConfig config() {
@@ -111,6 +154,9 @@ private <T> T invokeParser(
111154
return result.apply(new AstBuilder(new ExpressionBuilder.ParsingContext(params, metrics)), tree);
112155
} catch (StackOverflowError e) {
113156
throw new ParsingException("ESQL statement is too large, causing stack overflow when generating the parsing tree: [{}]", query);
157+
// likely thrown by an invalid popMode (such as extra closing parenthesis)
158+
} catch (EmptyStackException ese) {
159+
throw new ParsingException("Invalid query [{}]", query);
114160
}
115161
}
116162

@@ -141,11 +187,14 @@ public void syntaxError(
141187
String message,
142188
RecognitionException e
143189
) {
144-
if (recognizer instanceof EsqlBaseParser parser && parser.isDevVersion() == false) {
145-
Matcher m = REPLACE_DEV.matcher(message);
146-
message = m.replaceAll(StringUtils.EMPTY);
147-
}
190+
if (recognizer instanceof EsqlBaseParser parser) {
191+
Matcher m;
148192

193+
if (parser.isDevVersion() == false) {
194+
m = REPLACE_DEV.matcher(message);
195+
message = m.replaceAll(StringUtils.EMPTY);
196+
}
197+
}
149198
throw new ParsingException(message, e, line, charPositionInLine);
150199
}
151200
};
@@ -172,7 +221,7 @@ private static class ParametrizedTokenSource extends DelegatingTokenSource {
172221
@Override
173222
public Token nextToken() {
174223
Token token = delegate.nextToken();
175-
if (token.getType() == EsqlBaseLexer.PARAM) {
224+
if (token.getType() == EsqlBaseLexer.PARAM || token.getType() == EsqlBaseLexer.DOUBLE_PARAMS) {
176225
checkAnonymousParam(token);
177226
if (param > params.size()) {
178227
throw new ParsingException(source(token), "Not enough actual parameters {}", params.size());
@@ -181,8 +230,9 @@ public Token nextToken() {
181230
param++;
182231
}
183232

184-
if (token.getType() == EsqlBaseLexer.NAMED_OR_POSITIONAL_PARAM) {
185-
if (isInteger(token.getText().substring(1))) {
233+
String nameOrPosition = nameOrPosition(token);
234+
if (nameOrPosition.isBlank() == false) {
235+
if (isInteger(nameOrPosition)) {
186236
checkPositionalParam(token);
187237
} else {
188238
checkNamedParam(token);

0 commit comments

Comments
 (0)