From f7762528dca31beb14b41a838c671411467340c8 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Sun, 30 Nov 2025 19:32:41 -0800 Subject: [PATCH] Improve performance when parsing invalid queries Use BailErrorStrategy as recommended by the ANTLR team. --- .../io/trino/sql/jsonpath/PathParser.java | 11 +++-- .../java/io/trino/sql/parser/SqlParser.java | 40 ++++++++++--------- .../java/io/trino/type/TypeCalculation.java | 11 +++-- .../expression/ExpressionMappingParser.java | 11 +++-- .../expression/SparkExpressionParser.java | 11 +++-- 5 files changed, 49 insertions(+), 35 deletions(-) diff --git a/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java b/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java index 8da4a6a4db45..331434df6684 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java +++ b/core/trino-parser/src/main/java/io/trino/sql/jsonpath/PathParser.java @@ -19,16 +19,19 @@ import io.trino.sql.jsonpath.tree.PathNode; import io.trino.sql.parser.ParsingException; import io.trino.sql.tree.NodeLocation; +import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonToken; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.DefaultErrorStrategy; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.misc.Pair; +import org.antlr.v4.runtime.misc.ParseCancellationException; import org.antlr.v4.runtime.tree.TerminalNode; import java.util.Arrays; @@ -95,20 +98,20 @@ public PathNode parseJsonPath(String path) lexer.addErrorListener(errorListener); parser.removeErrorListeners(); - parser.addErrorListener(errorListener); ParserRuleContext tree; try { // first, try parsing with potentially faster SLL mode parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + parser.setErrorHandler(new BailErrorStrategy()); tree = parser.path(); } - catch (ParsingException ex) { + catch (ParseCancellationException e) { // if we fail, parse with LL mode - tokenStream.seek(0); // rewind input stream parser.reset(); - parser.getInterpreter().setPredictionMode(PredictionMode.LL); + parser.setErrorHandler(new DefaultErrorStrategy()); + parser.addErrorListener(errorListener); tree = parser.path(); } diff --git a/core/trino-parser/src/main/java/io/trino/sql/parser/SqlParser.java b/core/trino-parser/src/main/java/io/trino/sql/parser/SqlParser.java index 127b287f3b6e..51b1d688a563 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/parser/SqlParser.java +++ b/core/trino-parser/src/main/java/io/trino/sql/parser/SqlParser.java @@ -25,6 +25,7 @@ import io.trino.sql.tree.RowPattern; import io.trino.sql.tree.Statement; import org.antlr.v4.runtime.ANTLRErrorListener; +import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonToken; @@ -39,6 +40,7 @@ import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.Pair; +import org.antlr.v4.runtime.misc.ParseCancellationException; import org.antlr.v4.runtime.tree.TerminalNode; import java.util.Arrays; @@ -144,42 +146,27 @@ private Node invokeParser(String name, String sql, Optional locati SqlBaseParser parser = new SqlBaseParser(tokenStream); initializer.accept(lexer, parser); - // Override the default error strategy to not attempt inserting or deleting a token. - // Otherwise, it messes up error reporting - parser.setErrorHandler(new DefaultErrorStrategy() - { - @Override - public Token recoverInline(Parser recognizer) - throws RecognitionException - { - if (nextTokensContext == null) { - throw new InputMismatchException(recognizer); - } - throw new InputMismatchException(recognizer, nextTokensState, nextTokensContext); - } - }); - parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames()), parser)); lexer.removeErrorListeners(); lexer.addErrorListener(LEXER_ERROR_LISTENER); parser.removeErrorListeners(); - parser.addErrorListener(PARSER_ERROR_HANDLER); ParserRuleContext tree; try { try { // first, try parsing with potentially faster SLL mode parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + parser.setErrorHandler(new BailErrorStrategy()); tree = parseFunction.apply(parser); } - catch (ParsingException ex) { + catch (ParseCancellationException e) { // if we fail, parse with LL mode - tokenStream.seek(0); // rewind input stream parser.reset(); - parser.getInterpreter().setPredictionMode(PredictionMode.LL); + parser.setErrorHandler(new NonRecoveringErrorStrategy()); + parser.addErrorListener(PARSER_ERROR_HANDLER); tree = parseFunction.apply(parser); } } @@ -203,6 +190,21 @@ public Token recoverInline(Parser recognizer) } } + // Override the default error strategy to not attempt inserting or deleting a token. + // Otherwise, it messes up error reporting. + private static final class NonRecoveringErrorStrategy + extends DefaultErrorStrategy + { + @Override + public Token recoverInline(Parser recognizer) + { + if (nextTokensContext == null) { + throw new InputMismatchException(recognizer); + } + throw new InputMismatchException(recognizer, nextTokensState, nextTokensContext); + } + } + private static class PostProcessor extends SqlBaseBaseListener { diff --git a/core/trino-parser/src/main/java/io/trino/type/TypeCalculation.java b/core/trino-parser/src/main/java/io/trino/type/TypeCalculation.java index d8ecc734cf57..1aa3dd0d5379 100644 --- a/core/trino-parser/src/main/java/io/trino/type/TypeCalculation.java +++ b/core/trino-parser/src/main/java/io/trino/type/TypeCalculation.java @@ -27,13 +27,16 @@ import io.trino.sql.parser.ParsingException; import io.trino.sql.tree.NodeLocation; import org.antlr.v4.runtime.ANTLRErrorListener; +import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.DefaultErrorStrategy; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.misc.ParseCancellationException; import java.math.BigInteger; import java.util.Map; @@ -85,20 +88,20 @@ private static ParserRuleContext parseTypeCalculation(String calculation) lexer.addErrorListener(ERROR_LISTENER); parser.removeErrorListeners(); - parser.addErrorListener(ERROR_LISTENER); ParserRuleContext tree; try { // first, try parsing with potentially faster SLL mode parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + parser.setErrorHandler(new BailErrorStrategy()); tree = parser.typeCalculation(); } - catch (ParsingException ex) { + catch (ParseCancellationException e) { // if we fail, parse with LL mode - tokenStream.seek(0); // rewind input stream parser.reset(); - parser.getInterpreter().setPredictionMode(PredictionMode.LL); + parser.setErrorHandler(new DefaultErrorStrategy()); + parser.addErrorListener(ERROR_LISTENER); tree = parser.typeCalculation(); } return tree; diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ExpressionMappingParser.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ExpressionMappingParser.java index f06e20be4a76..709c8e1b8d62 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ExpressionMappingParser.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/expression/ExpressionMappingParser.java @@ -15,13 +15,16 @@ import com.google.common.collect.ImmutableMap; import org.antlr.v4.runtime.ANTLRErrorListener; +import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.DefaultErrorStrategy; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.misc.ParseCancellationException; import java.util.Map; import java.util.Set; @@ -69,20 +72,20 @@ public Object invokeParser(String input, Function