From 75d949fe325be849dbb061caf02b7ca4c33bdb55 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Sat, 15 Mar 2025 20:37:13 -0700 Subject: [PATCH] ESQL: Catch parsing exception (#124958) When an invalid popMode occurs (due to an invalid query), ANTLR throws an out-of-channel exception, bypassing the existing checks. This PR extends the checks and properly reports the error back to the user Fix #119025 (cherry picked from commit dc15462ca4f97cf622a284afa8ac0e797f02f8f1) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java --- docs/changelog/124958.yaml | 6 ++ .../xpack/esql/parser/EsqlParser.java | 56 +++++++++++++++++-- .../esql/parser/StatementParserTests.java | 7 +++ 3 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/124958.yaml diff --git a/docs/changelog/124958.yaml b/docs/changelog/124958.yaml new file mode 100644 index 0000000000000..be7f646b7dcae --- /dev/null +++ b/docs/changelog/124958.yaml @@ -0,0 +1,6 @@ +pr: 124958 +summary: Catch parsing exception +area: ES|QL +type: bug +issues: + - 119025 \ No newline at end of file diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java index c33cfdf7a320d..a8ee18d8b2777 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java @@ -14,6 +14,7 @@ import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.TokenSource; +import org.antlr.v4.runtime.VocabularyImpl; import org.antlr.v4.runtime.atn.PredictionMode; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -23,6 +24,8 @@ import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry; import java.util.BitSet; +import java.util.EmptyStackException; +import java.util.Map; import java.util.function.BiFunction; import java.util.function.Function; import java.util.regex.Matcher; @@ -45,6 +48,45 @@ public class EsqlParser { */ public static final int MAX_LENGTH = 1_000_000; + private static void replaceSymbolWithLiteral(Map symbolReplacements, String[] literalNames, String[] symbolicNames) { + for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) { + String symName = symbolicNames[i]; + if (symName != null) { + String replacement = symbolReplacements.get(symName); + if (replacement != null && literalNames[i] == null) { + // literals are single quoted + literalNames[i] = "'" + replacement + "'"; + replacements--; + } + } + } + } + + /** + * Add the literal name to a number of tokens that due to ANTLR internals/ATN + * have their symbolic name returns instead during error reporting. + * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName + * (if set), otherwise falls back to the literal one and eventually uses the symbol name. + * Since the Vocabulary is static and not pluggable, this code modifies the underlying + * arrays by setting the literal string manually based on the token index. + * This is needed since some symbols, especially around setting up the mode, end up losing + * their literal representation. + * NB: this code is highly dependent on the ANTLR internals and thus will likely break + * during upgrades. + * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration + */ + static { + Map symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "["); + + // the vocabularies have the same content however are different instances + // for extra reliability, perform the replacement for each map + VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY; + replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames()); + + VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY; + replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames()); + } + private EsqlConfig config = new EsqlConfig(); public EsqlConfig config() { @@ -112,6 +154,9 @@ private T invokeParser( return result.apply(new AstBuilder(new ExpressionBuilder.ParsingContext(params, metrics)), tree); } catch (StackOverflowError e) { throw new ParsingException("ESQL statement is too large, causing stack overflow when generating the parsing tree: [{}]", query); + // likely thrown by an invalid popMode (such as extra closing parenthesis) + } catch (EmptyStackException ese) { + throw new ParsingException("Invalid query [{}]", query); } } @@ -142,11 +187,14 @@ public void syntaxError( String message, RecognitionException e ) { - if (recognizer instanceof EsqlBaseParser parser && parser.isDevVersion() == false) { - Matcher m = REPLACE_DEV.matcher(message); - message = m.replaceAll(StringUtils.EMPTY); - } + if (recognizer instanceof EsqlBaseParser parser) { + Matcher m; + if (parser.isDevVersion() == false) { + m = REPLACE_DEV.matcher(message); + message = m.replaceAll(StringUtils.EMPTY); + } + } throw new ParsingException(message, e, line, charPositionInLine); } }; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 13549056b1a55..e15c41508b02d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -3751,4 +3751,11 @@ public void testInvalidDoubleParamsType() { ); } } + + public void testUnclosedParenthesis() { + String[] queries = { "row a = )", "row ]", "from source | eval x = [1,2,3]]" }; + for (String q : queries) { + expectError(q, "Invalid query"); + } + } }