1414import org .antlr .v4 .runtime .Recognizer ;
1515import org .antlr .v4 .runtime .Token ;
1616import org .antlr .v4 .runtime .TokenSource ;
17+ import org .antlr .v4 .runtime .VocabularyImpl ;
1718import org .antlr .v4 .runtime .atn .PredictionMode ;
1819import org .elasticsearch .logging .LogManager ;
1920import org .elasticsearch .logging .Logger ;
2324import org .elasticsearch .xpack .esql .telemetry .PlanTelemetry ;
2425
2526import java .util .BitSet ;
27+ import java .util .Map ;
2628import java .util .function .BiFunction ;
2729import java .util .function .Function ;
2830import java .util .regex .Matcher ;
@@ -45,6 +47,45 @@ public class EsqlParser {
4547 */
4648 public static final int MAX_LENGTH = 1_000_000 ;
4749
50+ private static void replaceSymbolWithLiteral (Map <String , String > symbolReplacements , String [] literalNames , String [] symbolicNames ) {
51+ for (int i = 0 , replacements = symbolReplacements .size (); i < symbolicNames .length && replacements > 0 ; i ++) {
52+ String symName = symbolicNames [i ];
53+ if (symName != null ) {
54+ String replacement = symbolReplacements .get (symName );
55+ if (replacement != null && literalNames [i ] == null ) {
56+ // literals are single quoted
57+ literalNames [i ] = "'" + replacement + "'" ;
58+ replacements --;
59+ }
60+ }
61+ }
62+ }
63+
64+ /**
65+ * Add the literal name to a number of tokens that due to ANTLR internals/ATN
66+ * have their symbolic name returns instead during error reporting.
67+ * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
68+ * (if set), otherwise falls back to the literal one and eventually uses the symbol name.
69+ * Since the Vocabulary is static and not pluggable, this code modifies the underlying
70+ * arrays by setting the literal string manually based on the token index.
71+ * This is needed since some symbols, especially around setting up the mode, end up losing
72+ * their literal representation.
73+ * NB: this code is highly dependent on the ANTLR internals and thus will likely break
74+ * during upgrades.
75+ * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
76+ */
77+ static {
78+ Map <String , String > symbolReplacements = Map .of ("LP" , "(" , "OPENING_BRACKET" , "[" );
79+
80+ // the vocabularies have the same content however are different instances
81+ // for extra reliability, perform the replacement for each map
82+ VocabularyImpl parserVocab = (VocabularyImpl ) EsqlBaseParser .VOCABULARY ;
83+ replaceSymbolWithLiteral (symbolReplacements , parserVocab .getLiteralNames (), parserVocab .getSymbolicNames ());
84+
85+ VocabularyImpl lexerVocab = (VocabularyImpl ) EsqlBaseLexer .VOCABULARY ;
86+ replaceSymbolWithLiteral (symbolReplacements , lexerVocab .getLiteralNames (), lexerVocab .getSymbolicNames ());
87+ }
88+
4889 private EsqlConfig config = new EsqlConfig ();
4990
5091 public EsqlConfig config () {
@@ -142,11 +183,14 @@ public void syntaxError(
142183 String message ,
143184 RecognitionException e
144185 ) {
145- if (recognizer instanceof EsqlBaseParser parser && parser .isDevVersion () == false ) {
146- Matcher m = REPLACE_DEV .matcher (message );
147- message = m .replaceAll (StringUtils .EMPTY );
148- }
186+ if (recognizer instanceof EsqlBaseParser parser ) {
187+ Matcher m ;
149188
189+ if (parser .isDevVersion () == false ) {
190+ m = REPLACE_DEV .matcher (message );
191+ message = m .replaceAll (StringUtils .EMPTY );
192+ }
193+ }
150194 throw new ParsingException (message , e , line , charPositionInLine );
151195 }
152196 };
0 commit comments