Skip to content

Commit bc210c6

Browse files
h3n4lclaude
andauthored
feat: add BigQuery parser to monorepo (#33)
This commit adds the BigQuery parser from the standalone bq-parser repository into the unified parser monorepo. Changes: - Added BigQuery grammar files (BigQueryLexer.g4, BigQueryParser.g4) - Added test infrastructure with 8 example SQL files - Created Makefile for building and testing the parser - Updated package names from 'parser' to 'bq' - Updated import paths to github.com/bytebase/parser/bq - Updated CI workflow to include bq in the test matrix - Generated parser files using ANTLR 4 with Go target - All 8 tests passing successfully 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <[email protected]>
1 parent 62f17e5 commit bc210c6

20 files changed

+15839
-1
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
id: set-matrix
3535
run: |
3636
# List of all available parsers
37-
ALL_PARSERS="redshift postgresql cql snowflake tsql doris trino plsql googlesql mysql partiql tidb"
37+
ALL_PARSERS="redshift postgresql cql snowflake tsql doris trino plsql googlesql mysql partiql tidb bq"
3838
# Add more parsers here as they are added to the repository
3939
# ALL_PARSERS="redshift mysql postgresql"
4040

bq/BigQueryLexer.g4

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
lexer grammar BigQueryLexer;
2+
3+
// ARRAY and STRUCT included in the list of BQ keywords instead of here
4+
QUOTE : '\'' ;
5+
DQOUTE : '"';
6+
SEMI : ';';
7+
8+
/*
9+
* BigQuery Keywords:
10+
* Based off the list of BigQuery Reserved Keywords at:
11+
* https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical
12+
*/
13+
ALL : A L L ;
14+
AND : A N D ;
15+
ANY : A N Y ;
16+
ARRAY : A R R A Y ;
17+
AS : A S ;
18+
ASC : A S C ;
19+
ASSERT_ROWS_MODIFIED : A S S E R T [_] R O W S [_] M O D I F I E D ;
20+
AT : A T ;
21+
BETWEEN : B E T W E E N ;
22+
BY : B Y ;
23+
CASE : C A S E ;
24+
CAST : C A S T ;
25+
COLLATE : C O L L A T E ;
26+
CONTAINS : C O N T A I N S ;
27+
CREATE : C R E A T E ;
28+
CROSS : C R O S S ;
29+
CUBE : C U B E ;
30+
CURRENT : C U R R E N T ;
31+
DEFAULT : D E F A U L T ;
32+
DEFINE : D E F I N E ;
33+
DESC : D E S C ;
34+
DISTINCT : D I S T I N C T ;
35+
ELSE : E L S E ;
36+
END : E N D ;
37+
ENUM : E N U M ;
38+
ESCAPE : E S C A P E ;
39+
EXCEPT : E X C E P T ;
40+
EXCLUDE : E X C L U D E ;
41+
EXISTS : E X I S T S ;
42+
EXTRACT : E X T R A C T ;
43+
FALSE : F A L S E ;
44+
FETCH : F E T C H ;
45+
FOLLOWING : F O L L O W I N G ;
46+
FOR : F O R ;
47+
FROM : F R O M ;
48+
FULL : F U L L ;
49+
GROUP : G R O U P ;
50+
GROUPING : G R O U P I N G ;
51+
GROUPS : G R O U P S ;
52+
HASH : H A S H ;
53+
HAVING : H A V I N G ;
54+
IF : I F ;
55+
IGNORE : I G N O R E ;
56+
IN : I N ;
57+
INNER : I N N E R ;
58+
INTERSECT : I N T E R S E C T ;
59+
INTERVAL : I N T E R V A L ;
60+
INTO : I N T O ;
61+
IS : I S ;
62+
JOIN : J O I N ;
63+
LATERAL : L A T E R A L ;
64+
LEFT : L E F T ;
65+
LIKE : L I K E ;
66+
LIMIT : L I M I T ;
67+
LOOKUP : L O O K U P ;
68+
MERGE : M E R G E ;
69+
NATURAL : N A T U R A L ;
70+
NEW : N E W ;
71+
NO : N O ;
72+
NOT : N O T ;
73+
S_NULL : N U L L ;
74+
NULLS : N U L L S ;
75+
OF : O F ;
76+
OFFSET : O F F S E T;
77+
ON : O N ;
78+
OR : O R ;
79+
ORDER : O R D E R ;
80+
ORDINAL : O R D I N A L;
81+
OUTER : O U T E R ;
82+
OVER : O V E R ;
83+
PARTITION : P A R T I T I O N ;
84+
PRECEDING : P R E C E D I N G ;
85+
PROTO : P R O T O ;
86+
RANGE : R A N G E ;
87+
RECURSIVE : R E C U R S I V E ;
88+
REPLACE : R E P L A C E;
89+
RESPECT : R E S P E C T ;
90+
RIGHT : R I G H T ;
91+
ROLLUP : R O L L U P ;
92+
ROWS : R O W S ;
93+
SAFE_OFFSET : S A F E '_' O F F S E T ;
94+
SAFE_ORDINAL : S A F E '_' O R D I N A L ;
95+
SELECT : S E L E C T ;
96+
SET : S E T ;
97+
SOME : S O M E ;
98+
SSTRUCT : S T R U C T ;
99+
SYSTEM : S Y S T E M ;
100+
TABLESAMPLE : T A B L E S A M P L E ;
101+
THEN : T H E N ;
102+
TIME : T I M E ;
103+
TO : T O ;
104+
TREAT : T R E A T ;
105+
TRUE : T R U E ;
106+
UNBOUNDED : U N B O U N D E D ;
107+
UNION : U N I O N ;
108+
UNNEST : U N N E S T ;
109+
USING : U S I N G ;
110+
WHEN : W H E N ;
111+
WHERE : W H E R E ;
112+
WINDOW : W I N D O W ;
113+
WITH : W I T H ;
114+
WITHIN : W I T H I N ;
115+
116+
INT : ('+' | '-')? ('0x')? DIGITS;
117+
FLOAT : ('+' | '-')? DIGITS '.' DIGITS? ('e' ('+' | '-') DIGITS)?
118+
| DIGITS? '.' DIGITS ('e' ('+' | '-') DIGITS)?
119+
| DIGITS 'e' ('+' | '-')? DIGITS;
120+
DIGITS : DIGIT+ ;
121+
122+
// Whitespace
123+
WS : [ \t\r\n]+ -> channel(HIDDEN) ;
124+
// Comments
125+
CMT : '--' ~[\r\n]* -> channel(HIDDEN) ;
126+
M_CMT : '/*' .*? '*/' -> channel(HIDDEN) ;
127+
// Quoted String
128+
QUOTED_STRING : '"' (~'"' | '\\' '"')* '"'
129+
| '\'' (~'\'' | '\\' '\'' )* '\'' ;
130+
TRIPLE_QUOTED_STRING : QUOTE QUOTE QUOTE .*? ~'\\' QUOTE QUOTE QUOTE
131+
| DQOUTE DQOUTE DQOUTE .*? ~'\\' DQOUTE DQOUTE DQOUTE ;
132+
RAW_STRING : R (QUOTED_STRING | TRIPLE_QUOTED_STRING) ;
133+
BYTE_STRING : B (QUOTED_STRING | TRIPLE_QUOTED_STRING) ;
134+
RAW_BYTE_STRING : RB (QUOTED_STRING | TRIPLE_QUOTED_STRING) ;
135+
// ID regex
136+
QUOTED_ID : BACKTICK (('\\'? .))+ BACKTICK ;
137+
ID : [a-zA-Z_][A-Za-z_0-9]* ;
138+
RB : [rR][bB] | [bB][rR] ;
139+
140+
DOT: '.';
141+
BACKTICK: '`';
142+
LR_BRACKET: '(';
143+
RR_BRACKET: ')';
144+
LSB: '[';
145+
RSB: ']';
146+
EQ: '=';
147+
LT: '<';
148+
GT: '>';
149+
DOUBLE_LT: '<<';
150+
DOUBLE_GT: '>>';
151+
LE: '<=';
152+
GE: '>=';
153+
NE: '!=';
154+
LTGT: '<>';
155+
BIT_AND: '&';
156+
BIT_OR: '|';
157+
STAR: '*';
158+
DIVIDE: '/';
159+
PLUS: '+';
160+
MINUS: '-';
161+
COMMA: ',';
162+
TILDE: '~';
163+
CARET: '^';
164+
165+
fragment DIGIT : [0-9] ;
166+
// Fragments for each letter of the alphabet. This is necessary because SQL keywords are case-insensitive.
167+
fragment A : [aA];
168+
fragment B : [bB];
169+
fragment C : [cC];
170+
fragment D : [dD];
171+
fragment E : [eE];
172+
fragment F : [fF];
173+
fragment G : [gG];
174+
fragment H : [hH];
175+
fragment I : [iI];
176+
fragment J : [jJ];
177+
fragment K : [kK];
178+
fragment L : [lL];
179+
fragment M : [mM];
180+
fragment N : [nN];
181+
fragment O : [oO];
182+
fragment P : [pP];
183+
fragment Q : [qQ];
184+
fragment R : [rR];
185+
fragment S : [sS];
186+
fragment T : [tT];
187+
fragment U : [uU];
188+
fragment V : [vV];
189+
fragment W : [wW];
190+
fragment X : [xX];
191+
fragment Y : [yY];
192+
fragment Z : [zZ];

0 commit comments

Comments
 (0)