Skip to content

Commit 2b00afd

Browse files
committed
BigQuery: Fix column identifier reserved keywords list
Parser currently uses the same `RESERVED_FOR_COLUMN_ALIAS` keywords list to avoid lookahead when parsing column identifiers. This assumed that all listed keywords were reservered across all dialects which isn't the case. So that the following valid BigQuery statement previously failed due to `OFFSET` being flagged as a keyword. ```sql SELECT 1, OFFSET FROM T ``` This updates the parser to support dialect specific `RESERVED_FOR_COLUMN_ALIAS` list
1 parent 74163b1 commit 2b00afd

File tree

5 files changed

+120
-34
lines changed

5 files changed

+120
-34
lines changed

src/dialect/bigquery.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,28 @@
1616
// under the License.
1717

1818
use crate::dialect::Dialect;
19+
use crate::keywords::Keyword;
20+
use crate::parser::Parser;
21+
22+
/// These keywords are disallowed as column identifiers. Such that
23+
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
24+
const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
25+
Keyword::WITH,
26+
Keyword::SELECT,
27+
Keyword::WHERE,
28+
Keyword::GROUP,
29+
Keyword::HAVING,
30+
Keyword::ORDER,
31+
Keyword::LATERAL,
32+
Keyword::LIMIT,
33+
Keyword::FETCH,
34+
Keyword::UNION,
35+
Keyword::EXCEPT,
36+
Keyword::INTERSECT,
37+
Keyword::FROM,
38+
Keyword::INTO,
39+
Keyword::END,
40+
];
1941

2042
/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
2143
#[derive(Debug, Default)]
@@ -87,4 +109,8 @@ impl Dialect for BigQueryDialect {
87109
fn supports_timestamp_versioning(&self) -> bool {
88110
true
89111
}
112+
113+
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
114+
!RESERVED_FOR_COLUMN_ALIAS.contains(kw)
115+
}
90116
}

src/dialect/mod.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,7 @@ pub trait Dialect: Debug + Any {
788788
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
789789
}
790790

791-
// Returns reserved keywords when looking to parse a [TableFactor].
791+
/// Returns reserved keywords when looking to parse a `TableFactor`.
792792
/// See [Self::supports_from_trailing_commas]
793793
fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
794794
keywords::RESERVED_FOR_TABLE_FACTOR
@@ -828,11 +828,17 @@ pub trait Dialect: Debug + Any {
828828
false
829829
}
830830

831+
/// Returns true if the specified keyword should be parsed as a column identifier.
832+
/// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
833+
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
834+
!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
835+
}
836+
831837
/// Returns true if the specified keyword should be parsed as a select item alias.
832838
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
833839
/// to enable looking ahead if needed.
834-
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
835-
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
840+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
841+
explicit || self.is_column_alias(kw, parser)
836842
}
837843

838844
/// Returns true if the specified keyword should be parsed as a table factor alias.

src/parser/mod.rs

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3944,7 +3944,7 @@ impl<'a> Parser<'a> {
39443944
self.parse_comma_separated_with_trailing_commas(
39453945
|p| p.parse_select_item(),
39463946
trailing_commas,
3947-
None,
3947+
Self::is_reserved_for_column_alias,
39483948
)
39493949
}
39503950

@@ -3978,30 +3978,42 @@ impl<'a> Parser<'a> {
39783978
self.parse_comma_separated_with_trailing_commas(
39793979
Parser::parse_table_and_joins,
39803980
trailing_commas,
3981-
Some(self.dialect.get_reserved_keywords_for_table_factor()),
3981+
|kw, _parser| {
3982+
self.dialect
3983+
.get_reserved_keywords_for_table_factor()
3984+
.contains(kw)
3985+
},
39823986
)
39833987
}
39843988

39853989
/// Parse the comma of a comma-separated syntax element.
3990+
/// `R` is a predicate that should return true if the next
3991+
/// keyword is a reserved keyword.
39863992
/// Allows for control over trailing commas
3993+
///
39873994
/// Returns true if there is a next element
3988-
fn is_parse_comma_separated_end_with_trailing_commas(
3995+
fn is_parse_comma_separated_end_with_trailing_commas<R>(
39893996
&mut self,
39903997
trailing_commas: bool,
3991-
reserved_keywords: Option<&[Keyword]>,
3992-
) -> bool {
3993-
let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
3998+
is_reserved_keyword: &R,
3999+
) -> bool
4000+
where
4001+
R: Fn(&Keyword, &mut Parser) -> bool,
4002+
{
39944003
if !self.consume_token(&Token::Comma) {
39954004
true
39964005
} else if trailing_commas {
3997-
let token = self.peek_token().token;
3998-
match token {
3999-
Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true,
4006+
let token = self.next_token().token;
4007+
let is_end = match token {
4008+
Token::Word(ref kw) if is_reserved_keyword(&kw.keyword, self) => true,
40004009
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
40014010
true
40024011
}
40034012
_ => false,
4004-
}
4013+
};
4014+
self.prev_token();
4015+
4016+
is_end
40054017
} else {
40064018
false
40074019
}
@@ -4010,34 +4022,44 @@ impl<'a> Parser<'a> {
40104022
/// Parse the comma of a comma-separated syntax element.
40114023
/// Returns true if there is a next element
40124024
fn is_parse_comma_separated_end(&mut self) -> bool {
4013-
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None)
4025+
self.is_parse_comma_separated_end_with_trailing_commas(
4026+
self.options.trailing_commas,
4027+
&Self::is_reserved_for_column_alias,
4028+
)
40144029
}
40154030

40164031
/// Parse a comma-separated list of 1+ items accepted by `F`
40174032
pub fn parse_comma_separated<T, F>(&mut self, f: F) -> Result<Vec<T>, ParserError>
40184033
where
40194034
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
40204035
{
4021-
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None)
4036+
self.parse_comma_separated_with_trailing_commas(
4037+
f,
4038+
self.options.trailing_commas,
4039+
Self::is_reserved_for_column_alias,
4040+
)
40224041
}
40234042

4024-
/// Parse a comma-separated list of 1+ items accepted by `F`
4025-
/// Allows for control over trailing commas
4026-
fn parse_comma_separated_with_trailing_commas<T, F>(
4043+
/// Parse a comma-separated list of 1+ items accepted by `F`.
4044+
/// `R` is a predicate that should return true if the next
4045+
/// keyword is a reserved keyword.
4046+
/// Allows for control over trailing commas.
4047+
fn parse_comma_separated_with_trailing_commas<T, F, R>(
40274048
&mut self,
40284049
mut f: F,
40294050
trailing_commas: bool,
4030-
reserved_keywords: Option<&[Keyword]>,
4051+
is_reserved_keyword: R,
40314052
) -> Result<Vec<T>, ParserError>
40324053
where
40334054
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
4055+
R: Fn(&Keyword, &mut Parser) -> bool,
40344056
{
40354057
let mut values = vec![];
40364058
loop {
40374059
values.push(f(self)?);
40384060
if self.is_parse_comma_separated_end_with_trailing_commas(
40394061
trailing_commas,
4040-
reserved_keywords,
4062+
&is_reserved_keyword,
40414063
) {
40424064
break;
40434065
}
@@ -4111,6 +4133,13 @@ impl<'a> Parser<'a> {
41114133
self.parse_comma_separated(f)
41124134
}
41134135

4136+
/// Default implementation of a predicate that returns true if
4137+
/// the specified keyword is reserved for column alias.
4138+
/// See [Dialect::is_column_alias]
4139+
fn is_reserved_for_column_alias(kw: &Keyword, parser: &mut Parser) -> bool {
4140+
!parser.dialect.is_column_alias(kw, parser)
4141+
}
4142+
41144143
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
41154144
/// Returns `None` if `f` returns an error
41164145
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
@@ -9329,7 +9358,7 @@ impl<'a> Parser<'a> {
93299358
let cols = self.parse_comma_separated_with_trailing_commas(
93309359
Parser::parse_view_column,
93319360
self.dialect.supports_column_definition_trailing_commas(),
9332-
None,
9361+
Self::is_reserved_for_column_alias,
93339362
)?;
93349363
self.expect_token(&Token::RParen)?;
93359364
Ok(cols)

tests/sqlparser_bigquery.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ fn parse_raw_literal() {
213213
);
214214
}
215215

216+
#[test]
217+
fn parse_big_query_non_reserved_column_alias() {
218+
let sql = r#"SELECT OFFSET, EXPLAIN, ANALYZE, SORT, TOP, VIEW FROM T"#;
219+
bigquery().verified_stmt(sql);
220+
221+
let sql = r#"SELECT 1 AS OFFSET, 2 AS EXPLAIN, 3 AS ANALYZE FROM T"#;
222+
bigquery().verified_stmt(sql);
223+
}
224+
216225
#[test]
217226
fn parse_delete_statement() {
218227
let sql = "DELETE \"table\" WHERE 1";

tests/sqlparser_common.rs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,13 @@ fn parse_insert_default_values() {
253253

254254
#[test]
255255
fn parse_insert_select_returning() {
256-
verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
257-
let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
256+
// Dialects that support `RETURNING` as a column identifier do
257+
// not support this syntax.
258+
let dialects =
259+
all_dialects_where(|d| !d.is_column_alias(&Keyword::RETURNING, &mut Parser::new(d)));
260+
261+
dialects.verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
262+
let stmt = dialects.verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
258263
match stmt {
259264
Statement::Insert(Insert {
260265
returning: Some(ret),
@@ -6902,9 +6907,6 @@ fn parse_union_except_intersect_minus() {
69026907
verified_stmt("SELECT 1 EXCEPT SELECT 2");
69036908
verified_stmt("SELECT 1 EXCEPT ALL SELECT 2");
69046909
verified_stmt("SELECT 1 EXCEPT DISTINCT SELECT 1");
6905-
verified_stmt("SELECT 1 MINUS SELECT 2");
6906-
verified_stmt("SELECT 1 MINUS ALL SELECT 2");
6907-
verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
69086910
verified_stmt("SELECT 1 INTERSECT SELECT 2");
69096911
verified_stmt("SELECT 1 INTERSECT ALL SELECT 2");
69106912
verified_stmt("SELECT 1 INTERSECT DISTINCT SELECT 1");
@@ -6923,6 +6925,13 @@ fn parse_union_except_intersect_minus() {
69236925
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x");
69246926
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x");
69256927
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x");
6928+
6929+
// Dialects that support `MINUS` as column identifier
6930+
// do not support `MINUS` as a set operator.
6931+
let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::MINUS, &mut Parser::new(d)));
6932+
dialects.verified_stmt("SELECT 1 MINUS SELECT 2");
6933+
dialects.verified_stmt("SELECT 1 MINUS ALL SELECT 2");
6934+
dialects.verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
69266935
}
69276936

69286937
#[test]
@@ -7599,19 +7608,26 @@ fn parse_invalid_subquery_without_parens() {
75997608

76007609
#[test]
76017610
fn parse_offset() {
7611+
// Dialects that support `OFFSET` as column identifiers
7612+
// don't support this syntax.
7613+
let dialects =
7614+
all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut Parser::new(d)));
7615+
76027616
let expect = Some(Offset {
76037617
value: Expr::Value(number("2")),
76047618
rows: OffsetRows::Rows,
76057619
});
7606-
let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
7620+
let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
76077621
assert_eq!(ast.offset, expect);
7608-
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
7622+
let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
76097623
assert_eq!(ast.offset, expect);
7610-
let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
7624+
let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
76117625
assert_eq!(ast.offset, expect);
7612-
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
7626+
let ast =
7627+
dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
76137628
assert_eq!(ast.offset, expect);
7614-
let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
7629+
let ast =
7630+
dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
76157631
assert_eq!(ast.offset, expect);
76167632
match *ast.body {
76177633
SetExpr::Select(s) => match only(s.from).relation {
@@ -7622,23 +7638,23 @@ fn parse_offset() {
76227638
},
76237639
_ => panic!("Test broke"),
76247640
}
7625-
let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS");
7641+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS");
76267642
assert_eq!(
76277643
ast.offset,
76287644
Some(Offset {
76297645
value: Expr::Value(number("0")),
76307646
rows: OffsetRows::Rows,
76317647
})
76327648
);
7633-
let ast = verified_query("SELECT 'foo' OFFSET 1 ROW");
7649+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW");
76347650
assert_eq!(
76357651
ast.offset,
76367652
Some(Offset {
76377653
value: Expr::Value(number("1")),
76387654
rows: OffsetRows::Row,
76397655
})
76407656
);
7641-
let ast = verified_query("SELECT 'foo' OFFSET 1");
7657+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1");
76427658
assert_eq!(
76437659
ast.offset,
76447660
Some(Offset {

0 commit comments

Comments
 (0)