1414import org .antlr .v4 .runtime .Recognizer ;
1515import org .antlr .v4 .runtime .Token ;
1616import org .antlr .v4 .runtime .TokenSource ;
17+ import org .antlr .v4 .runtime .VocabularyImpl ;
1718import org .antlr .v4 .runtime .atn .PredictionMode ;
1819import org .elasticsearch .logging .LogManager ;
1920import org .elasticsearch .logging .Logger ;
2021import org .elasticsearch .xpack .esql .core .util .StringUtils ;
22+ import org .elasticsearch .xpack .esql .expression .function .EsqlFunctionRegistry ;
2123import org .elasticsearch .xpack .esql .plan .logical .LogicalPlan ;
24+ import org .elasticsearch .xpack .esql .telemetry .PlanTelemetry ;
2225
2326import java .util .BitSet ;
27+ import java .util .EmptyStackException ;
28+ import java .util .Map ;
2429import java .util .function .BiFunction ;
2530import java .util .function .Function ;
2631import java .util .regex .Matcher ;
2732import java .util .regex .Pattern ;
2833
2934import static org .elasticsearch .xpack .esql .core .util .StringUtils .isInteger ;
35+ import static org .elasticsearch .xpack .esql .parser .ParserUtils .nameOrPosition ;
3036import static org .elasticsearch .xpack .esql .parser .ParserUtils .source ;
3137
3238public class EsqlParser {
3339
3440 private static final Logger log = LogManager .getLogger (EsqlParser .class );
3541
42+ /**
43+ * Maximum number of characters in an ESQL query. Antlr may parse the entire
44+ * query into tokens to make the choices, buffering the world. There's a lot we
45+ * can do in the grammar to prevent that, but let's be paranoid and assume we'll
46+ * fail at preventing antlr from slurping in the world. Instead, let's make sure
47+ * that the world just isn't that big.
48+ */
49+ public static final int MAX_LENGTH = 1_000_000 ;
50+
51+ private static void replaceSymbolWithLiteral (Map <String , String > symbolReplacements , String [] literalNames , String [] symbolicNames ) {
52+ for (int i = 0 , replacements = symbolReplacements .size (); i < symbolicNames .length && replacements > 0 ; i ++) {
53+ String symName = symbolicNames [i ];
54+ if (symName != null ) {
55+ String replacement = symbolReplacements .get (symName );
56+ if (replacement != null && literalNames [i ] == null ) {
57+ // literals are single quoted
58+ literalNames [i ] = "'" + replacement + "'" ;
59+ replacements --;
60+ }
61+ }
62+ }
63+ }
64+
65+ /**
66+ * Add the literal name to a number of tokens that due to ANTLR internals/ATN
67+ * have their symbolic name returns instead during error reporting.
68+ * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
69+ * (if set), otherwise falls back to the literal one and eventually uses the symbol name.
70+ * Since the Vocabulary is static and not pluggable, this code modifies the underlying
71+ * arrays by setting the literal string manually based on the token index.
72+ * This is needed since some symbols, especially around setting up the mode, end up losing
73+ * their literal representation.
74+ * NB: this code is highly dependent on the ANTLR internals and thus will likely break
75+ * during upgrades.
76+ * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
77+ */
78+ static {
79+ Map <String , String > symbolReplacements = Map .of ("LP" , "(" , "OPENING_BRACKET" , "[" );
80+
81+ // the vocabularies have the same content however are different instances
82+ // for extra reliability, perform the replacement for each map
83+ VocabularyImpl parserVocab = (VocabularyImpl ) EsqlBaseParser .VOCABULARY ;
84+ replaceSymbolWithLiteral (symbolReplacements , parserVocab .getLiteralNames (), parserVocab .getSymbolicNames ());
85+
86+ VocabularyImpl lexerVocab = (VocabularyImpl ) EsqlBaseLexer .VOCABULARY ;
87+ replaceSymbolWithLiteral (symbolReplacements , lexerVocab .getLiteralNames (), lexerVocab .getSymbolicNames ());
88+ }
89+
3690 private EsqlConfig config = new EsqlConfig ();
3791
3892 public EsqlConfig config () {
@@ -43,25 +97,34 @@ public void setEsqlConfig(EsqlConfig config) {
4397 this .config = config ;
4498 }
4599
100+ // testing utility
46101 public LogicalPlan createStatement (String query ) {
47102 return createStatement (query , new QueryParams ());
48103 }
49104
105+ // testing utility
50106 public LogicalPlan createStatement (String query , QueryParams params ) {
107+ return createStatement (query , params , new PlanTelemetry (new EsqlFunctionRegistry ()));
108+ }
109+
110+ public LogicalPlan createStatement (String query , QueryParams params , PlanTelemetry metrics ) {
51111 if (log .isDebugEnabled ()) {
52112 log .debug ("Parsing as statement: {}" , query );
53113 }
54- return invokeParser (query , params , EsqlBaseParser ::singleStatement , AstBuilder ::plan );
114+ return invokeParser (query , params , metrics , EsqlBaseParser ::singleStatement , AstBuilder ::plan );
55115 }
56116
57117 private <T > T invokeParser (
58118 String query ,
59119 QueryParams params ,
120+ PlanTelemetry metrics ,
60121 Function <EsqlBaseParser , ParserRuleContext > parseFunction ,
61122 BiFunction <AstBuilder , ParserRuleContext , T > result
62123 ) {
124+ if (query .length () > MAX_LENGTH ) {
125+ throw new ParsingException ("ESQL statement is too large [{} characters > {}]" , query .length (), MAX_LENGTH );
126+ }
63127 try {
64- // new CaseChangingCharStream()
65128 EsqlBaseLexer lexer = new EsqlBaseLexer (CharStreams .fromString (query ));
66129
67130 lexer .removeErrorListeners ();
@@ -88,9 +151,12 @@ private <T> T invokeParser(
88151 log .trace ("Parse tree: {}" , tree .toStringTree ());
89152 }
90153
91- return result .apply (new AstBuilder (params ), tree );
154+ return result .apply (new AstBuilder (new ExpressionBuilder . ParsingContext ( params , metrics ) ), tree );
92155 } catch (StackOverflowError e ) {
93156 throw new ParsingException ("ESQL statement is too large, causing stack overflow when generating the parsing tree: [{}]" , query );
157+ // likely thrown by an invalid popMode (such as extra closing parenthesis)
158+ } catch (EmptyStackException ese ) {
159+ throw new ParsingException ("Invalid query [{}]" , query );
94160 }
95161 }
96162
@@ -121,11 +187,14 @@ public void syntaxError(
121187 String message ,
122188 RecognitionException e
123189 ) {
124- if (recognizer instanceof EsqlBaseParser parser && parser .isDevVersion () == false ) {
125- Matcher m = REPLACE_DEV .matcher (message );
126- message = m .replaceAll (StringUtils .EMPTY );
127- }
190+ if (recognizer instanceof EsqlBaseParser parser ) {
191+ Matcher m ;
128192
193+ if (parser .isDevVersion () == false ) {
194+ m = REPLACE_DEV .matcher (message );
195+ message = m .replaceAll (StringUtils .EMPTY );
196+ }
197+ }
129198 throw new ParsingException (message , e , line , charPositionInLine );
130199 }
131200 };
@@ -152,7 +221,7 @@ private static class ParametrizedTokenSource extends DelegatingTokenSource {
152221 @ Override
153222 public Token nextToken () {
154223 Token token = delegate .nextToken ();
155- if (token .getType () == EsqlBaseLexer .PARAM ) {
224+ if (token .getType () == EsqlBaseLexer .PARAM || token . getType () == EsqlBaseLexer . DOUBLE_PARAMS ) {
156225 checkAnonymousParam (token );
157226 if (param > params .size ()) {
158227 throw new ParsingException (source (token ), "Not enough actual parameters {}" , params .size ());
@@ -161,8 +230,9 @@ public Token nextToken() {
161230 param ++;
162231 }
163232
164- if (token .getType () == EsqlBaseLexer .NAMED_OR_POSITIONAL_PARAM ) {
165- if (isInteger (token .getText ().substring (1 ))) {
233+ String nameOrPosition = nameOrPosition (token );
234+ if (nameOrPosition .isBlank () == false ) {
235+ if (isInteger (nameOrPosition )) {
166236 checkPositionalParam (token );
167237 } else {
168238 checkNamedParam (token );
0 commit comments