Skip to content

Commit 053a886

Browse files
costincbuescher
authored andcommitted
ESQL: add conditional runtime parsing through semantic predicates (elastic#111995)
Introducing condition parsing (and versioning) in the grammar (both lexer and parser). That is make sure that the underlying grammar gets parsed only if certain conditions are met such as : - in development code is picked up only in the snapshot branch - new commands are not available on older versions (clusters with old nodes) - deprecated grammar is no longer allowed once removed The goal of this branch is to test the concept and see whether it work both on the backend (Java) and front-end (Javascript) since the underlying predicates(aka conditions) are language specific.
1 parent 4cb0b68 commit 053a886

19 files changed

+2921
-2516
lines changed

x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4

Lines changed: 149 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,99 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
17
lexer grammar EsqlBaseLexer;
28

9+
@header {
10+
/*
11+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
12+
* or more contributor license agreements. Licensed under the Elastic License
13+
* 2.0; you may not use this file except in compliance with the Elastic License
14+
* 2.0.
15+
*/
16+
}
17+
18+
options {
19+
superClass=LexerConfig;
20+
caseInsensitive=true;
21+
}
22+
23+
/*
24+
* Before modifying this file, please read the section above as changes here
25+
* have significant impact in the ANTLR generated code and its consumption upstream
26+
* (including Kibana).
27+
*
28+
* A. To add a development token (only available behind in snapshot/dev builds)
29+
*
30+
* Since the tokens/modes are in development, simply define them under the
31+
* "// in development section" and follow the section comments in that section.
32+
* That is use the DEV_ prefix and use the {isDevVersion()}? conditional.
33+
* They are defined at the end of the file, to minimize the impact on the existing
34+
* token types.
35+
*
36+
* B. To add a new (production-ready) token
37+
*
38+
* Be sure to go through step A (add a development token).
39+
* Make sure to remove the prefix and conditional before promoting the tokens in
40+
* production.
41+
* Since tokens types (numbers) are generated by ANTLR in a continuous fashion,
42+
* it is desirable to avoid changing these values hence where possible, add
43+
* add them at the end of their respective section.
44+
* Note that the use of lexing modes prevents this since any addition to a mode
45+
* (regardless where it occurs) shifts all the declarations that follow in other modes.
46+
*
47+
* C. Renaming a token
48+
*
49+
* Avoid renaming the token. But if you really have to, please check with the
50+
* Kibana team as they might be using the generated ANTLR "dictionary".
51+
*
52+
* D. To remove a token
53+
*
54+
* If the tokens haven't made it to production (and make sure to double check),
55+
* simply remove them from the grammar.
56+
* If the tokens get promoted to release, check with the Kibana team the impact
57+
* they have on the UI (auto-completion, etc...)
58+
*/
59+
360
DISSECT : 'dissect' -> pushMode(EXPRESSION_MODE);
461
DROP : 'drop' -> pushMode(PROJECT_MODE);
562
ENRICH : 'enrich' -> pushMode(ENRICH_MODE);
663
EVAL : 'eval' -> pushMode(EXPRESSION_MODE);
764
EXPLAIN : 'explain' -> pushMode(EXPLAIN_MODE);
865
FROM : 'from' -> pushMode(FROM_MODE);
966
GROK : 'grok' -> pushMode(EXPRESSION_MODE);
10-
INLINESTATS : 'inlinestats' -> pushMode(EXPRESSION_MODE);
1167
KEEP : 'keep' -> pushMode(PROJECT_MODE);
1268
LIMIT : 'limit' -> pushMode(EXPRESSION_MODE);
13-
LOOKUP : 'lookup' -> pushMode(LOOKUP_MODE);
1469
META : 'meta' -> pushMode(META_MODE);
15-
METRICS : 'metrics' -> pushMode(METRICS_MODE);
1670
MV_EXPAND : 'mv_expand' -> pushMode(MVEXPAND_MODE);
1771
RENAME : 'rename' -> pushMode(RENAME_MODE);
1872
ROW : 'row' -> pushMode(EXPRESSION_MODE);
1973
SHOW : 'show' -> pushMode(SHOW_MODE);
2074
SORT : 'sort' -> pushMode(EXPRESSION_MODE);
2175
STATS : 'stats' -> pushMode(EXPRESSION_MODE);
2276
WHERE : 'where' -> pushMode(EXPRESSION_MODE);
23-
MATCH : 'match' -> pushMode(EXPRESSION_MODE);
24-
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE);
77+
//
78+
// in development
79+
//
80+
// Before adding a new in-development command, to sandbox the behavior when running in production environments
81+
//
82+
// For example: to add myCommand use the following declaration:
83+
// DEV_MYCOMMAND : {isDevVersion()}? 'mycommand' -> ...
84+
//
85+
// Once the command has been stabilized, remove the DEV_ prefix and the {}? conditional and move the command to the
86+
// main section while preserving alphabetical order:
87+
// MYCOMMAND : 'mycommand' -> ...
88+
DEV_INLINESTATS : {isDevVersion()}? 'inlinestats' -> pushMode(EXPRESSION_MODE);
89+
DEV_LOOKUP : {isDevVersion()}? 'lookup' -> pushMode(LOOKUP_MODE);
90+
DEV_MATCH : {isDevVersion()}? 'match' -> pushMode(EXPRESSION_MODE);
91+
DEV_METRICS : {isDevVersion()}? 'metrics' -> pushMode(METRICS_MODE);
92+
93+
//
94+
// Catch-all for unrecognized commands - don't define any beyond this line
95+
//
96+
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE) ;
2597

2698
LINE_COMMENT
2799
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
@@ -35,27 +107,6 @@ WS
35107
: [ \r\n\t]+ -> channel(HIDDEN)
36108
;
37109

38-
// in 8.14 ` were not allowed
39-
// this has been relaxed in 8.15 since " is used for quoting
40-
fragment UNQUOTED_SOURCE_PART
41-
: ~[:"=|,[\]/ \t\r\n]
42-
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
43-
;
44-
45-
UNQUOTED_SOURCE
46-
: UNQUOTED_SOURCE_PART+
47-
;
48-
49-
//
50-
// Explain
51-
//
52-
mode EXPLAIN_MODE;
53-
EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
54-
EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
55-
EXPLAIN_WS : WS -> channel(HIDDEN);
56-
EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
57-
EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);
58-
59110
//
60111
// Expression - used by most command
61112
//
@@ -68,7 +119,7 @@ fragment DIGIT
68119
;
69120

70121
fragment LETTER
71-
: [A-Za-z]
122+
: [a-z]
72123
;
73124

74125
fragment ESCAPE_SEQUENCE
@@ -80,7 +131,7 @@ fragment UNESCAPED_CHARS
80131
;
81132

82133
fragment EXPONENT
83-
: [Ee] [+-]? DIGIT+
134+
: [e] [+-]? DIGIT+
84135
;
85136

86137
fragment ASPERAND
@@ -136,7 +187,6 @@ IS: 'is';
136187
LAST : 'last';
137188
LIKE: 'like';
138189
LP : '(';
139-
MATCH_OPERATOR: 'match';
140190
NOT : 'not';
141191
NULL : 'null';
142192
NULLS : 'nulls';
@@ -160,6 +210,9 @@ ASTERISK : '*';
160210
SLASH : '/';
161211
PERCENT : '%';
162212

213+
// move it in the main section if the feature gets promoted
214+
DEV_MATCH_OP : {isDevVersion()}? DEV_MATCH -> type(DEV_MATCH);
215+
163216
NAMED_OR_POSITIONAL_PARAM
164217
: PARAM (LETTER | UNDERSCORE) UNQUOTED_ID_BODY*
165218
| PARAM DIGIT+
@@ -199,6 +252,17 @@ EXPR_MULTILINE_COMMENT
199252
EXPR_WS
200253
: WS -> channel(HIDDEN)
201254
;
255+
256+
//
257+
// Explain
258+
//
259+
mode EXPLAIN_MODE;
260+
EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
261+
EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
262+
EXPLAIN_WS : WS -> channel(HIDDEN);
263+
EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
264+
EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);
265+
202266
//
203267
// FROM command
204268
//
@@ -211,6 +275,17 @@ FROM_COMMA : COMMA -> type(COMMA);
211275
FROM_ASSIGN : ASSIGN -> type(ASSIGN);
212276
METADATA : 'metadata';
213277

278+
// in 8.14 ` were not allowed
279+
// this has been relaxed in 8.15 since " is used for quoting
280+
fragment UNQUOTED_SOURCE_PART
281+
: ~[:"=|,[\]/ \t\r\n]
282+
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
283+
;
284+
285+
UNQUOTED_SOURCE
286+
: UNQUOTED_SOURCE_PART+
287+
;
288+
214289
FROM_UNQUOTED_SOURCE : UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
215290
FROM_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
216291
@@ -348,50 +423,6 @@ ENRICH_FIELD_WS
348423
: WS -> channel(HIDDEN)
349424
;
350425

351-
// LOOKUP ON key
352-
mode LOOKUP_MODE;
353-
LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
354-
LOOKUP_COLON : COLON -> type(COLON);
355-
LOOKUP_COMMA : COMMA -> type(COMMA);
356-
LOOKUP_DOT: DOT -> type(DOT);
357-
LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);
358-
359-
LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
360-
LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
361-
362-
LOOKUP_LINE_COMMENT
363-
: LINE_COMMENT -> channel(HIDDEN)
364-
;
365-
366-
LOOKUP_MULTILINE_COMMENT
367-
: MULTILINE_COMMENT -> channel(HIDDEN)
368-
;
369-
370-
LOOKUP_WS
371-
: WS -> channel(HIDDEN)
372-
;
373-
374-
mode LOOKUP_FIELD_MODE;
375-
LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
376-
LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
377-
LOOKUP_FIELD_DOT: DOT -> type(DOT);
378-
379-
LOOKUP_FIELD_ID_PATTERN
380-
: ID_PATTERN -> type(ID_PATTERN)
381-
;
382-
383-
LOOKUP_FIELD_LINE_COMMENT
384-
: LINE_COMMENT -> channel(HIDDEN)
385-
;
386-
387-
LOOKUP_FIELD_MULTILINE_COMMENT
388-
: MULTILINE_COMMENT -> channel(HIDDEN)
389-
;
390-
391-
LOOKUP_FIELD_WS
392-
: WS -> channel(HIDDEN)
393-
;
394-
395426
mode MVEXPAND_MODE;
396427
MVEXPAND_PIPE : PIPE -> type(PIPE), popMode;
397428
MVEXPAND_DOT: DOT -> type(DOT);
@@ -477,6 +508,51 @@ SETTING_WS
477508
: WS -> channel(HIDDEN)
478509
;
479510

511+
//
512+
// LOOKUP ON key
513+
//
514+
mode LOOKUP_MODE;
515+
LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
516+
LOOKUP_COLON : COLON -> type(COLON);
517+
LOOKUP_COMMA : COMMA -> type(COMMA);
518+
LOOKUP_DOT: DOT -> type(DOT);
519+
LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);
520+
521+
LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
522+
LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
523+
524+
LOOKUP_LINE_COMMENT
525+
: LINE_COMMENT -> channel(HIDDEN)
526+
;
527+
528+
LOOKUP_MULTILINE_COMMENT
529+
: MULTILINE_COMMENT -> channel(HIDDEN)
530+
;
531+
532+
LOOKUP_WS
533+
: WS -> channel(HIDDEN)
534+
;
535+
536+
mode LOOKUP_FIELD_MODE;
537+
LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
538+
LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
539+
LOOKUP_FIELD_DOT: DOT -> type(DOT);
540+
541+
LOOKUP_FIELD_ID_PATTERN
542+
: ID_PATTERN -> type(ID_PATTERN)
543+
;
544+
545+
LOOKUP_FIELD_LINE_COMMENT
546+
: LINE_COMMENT -> channel(HIDDEN)
547+
;
548+
549+
LOOKUP_FIELD_MULTILINE_COMMENT
550+
: MULTILINE_COMMENT -> channel(HIDDEN)
551+
;
552+
553+
LOOKUP_FIELD_WS
554+
: WS -> channel(HIDDEN)
555+
;
480556

481557
//
482558
// METRICS command

0 commit comments

Comments
 (0)