Skip to content

Commit af42d18

Browse files
Merge branch 'main' into mysql-index-hints
2 parents 7dd856c + fd6c98e commit af42d18

File tree

9 files changed

+176
-23
lines changed

9 files changed

+176
-23
lines changed

src/dialect/bigquery.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ impl Dialect for BigQueryDialect {
3131
true
3232
}
3333

34+
/// See <https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_statement>
35+
fn supports_column_definition_trailing_commas(&self) -> bool {
36+
true
37+
}
38+
3439
fn is_identifier_start(&self, ch: char) -> bool {
3540
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
3641
}

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,8 @@ impl Dialect for GenericDialect {
139139
fn supports_user_host_grantee(&self) -> bool {
140140
true
141141
}
142+
143+
fn supports_string_escape_constant(&self) -> bool {
144+
true
145+
}
142146
}

src/dialect/mod.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,11 +405,18 @@ pub trait Dialect: Debug + Any {
405405
}
406406

407407
/// Returns true if the dialect supports trailing commas in the `FROM` clause of a `SELECT` statement.
408-
/// /// Example: `SELECT 1 FROM T, U, LIMIT 1`
408+
/// Example: `SELECT 1 FROM T, U, LIMIT 1`
409409
fn supports_from_trailing_commas(&self) -> bool {
410410
false
411411
}
412412

413+
/// Returns true if the dialect supports trailing commas in the
414+
/// column definitions list of a `CREATE` statement.
415+
/// Example: `CREATE TABLE T (x INT, y TEXT,)`
416+
fn supports_column_definition_trailing_commas(&self) -> bool {
417+
false
418+
}
419+
413420
/// Returns true if the dialect supports double dot notation for object names
414421
///
415422
/// Example
@@ -843,6 +850,12 @@ pub trait Dialect: Debug + Any {
843850

844851
/// Returns true if the dialect supports the table hints in the `FROM` clause.
845852
fn supports_table_hints(&self) -> bool {
853+
flase
854+
}
855+
/// Returns true if this dialect supports the E'...' syntax for string literals
856+
///
857+
/// Postgres: <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE>
858+
fn supports_string_escape_constant(&self) -> bool {
846859
false
847860
}
848861
}

src/dialect/postgresql.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ impl Dialect for PostgreSqlDialect {
245245
fn supports_nested_comments(&self) -> bool {
246246
true
247247
}
248+
249+
fn supports_string_escape_constant(&self) -> bool {
250+
true
251+
}
248252
}
249253

250254
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

src/dialect/redshift.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,8 @@ impl Dialect for RedshiftSqlDialect {
109109
fn supports_partiql(&self) -> bool {
110110
true
111111
}
112+
113+
fn supports_string_escape_constant(&self) -> bool {
114+
true
115+
}
112116
}

src/parser/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6718,7 +6718,11 @@ impl<'a> Parser<'a> {
67186718
return self.expected("',' or ')' after column definition", self.peek_token());
67196719
};
67206720

6721-
if rparen && (!comma || self.options.trailing_commas) {
6721+
if rparen
6722+
&& (!comma
6723+
|| self.dialect.supports_column_definition_trailing_commas()
6724+
|| self.options.trailing_commas)
6725+
{
67226726
let _ = self.consume_token(&Token::RParen);
67236727
break;
67246728
}
@@ -9356,7 +9360,11 @@ impl<'a> Parser<'a> {
93569360
self.next_token();
93579361
Ok(vec![])
93589362
} else {
9359-
let cols = self.parse_comma_separated(Parser::parse_view_column)?;
9363+
let cols = self.parse_comma_separated_with_trailing_commas(
9364+
Parser::parse_view_column,
9365+
self.dialect.supports_column_definition_trailing_commas(),
9366+
None,
9367+
)?;
93609368
self.expect_token(&Token::RParen)?;
93619369
Ok(cols)
93629370
}

src/test_utils.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use core::fmt::Debug;
3333

3434
use crate::dialect::*;
3535
use crate::parser::{Parser, ParserError};
36-
use crate::tokenizer::Tokenizer;
36+
use crate::tokenizer::{Token, Tokenizer};
3737
use crate::{ast::*, parser::ParserOptions};
3838

3939
#[cfg(test)]
@@ -237,6 +237,22 @@ impl TestedDialects {
237237
pub fn verified_expr(&self, sql: &str) -> Expr {
238238
self.expr_parses_to(sql, sql)
239239
}
240+
241+
/// Check that the tokenizer returns the expected tokens for the given SQL.
242+
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
243+
if self.dialects.is_empty() {
244+
panic!("No dialects to test");
245+
}
246+
247+
self.dialects.iter().for_each(|dialect| {
248+
let mut tokenizer = Tokenizer::new(&**dialect, sql);
249+
if let Some(options) = &self.options {
250+
tokenizer = tokenizer.with_unescape(options.unescape);
251+
}
252+
let tokens = tokenizer.tokenize().unwrap();
253+
assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect);
254+
});
255+
}
240256
}
241257

242258
/// Returns all available dialects.

src/tokenizer.rs

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,10 @@ impl<'a> Tokenizer<'a> {
971971
match chars.peek() {
972972
Some('\'') => {
973973
// N'...' - a <national character string literal>
974-
let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
974+
let backslash_escape =
975+
self.dialect.supports_string_literal_backslash_escape();
976+
let s =
977+
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
975978
Ok(Some(Token::NationalStringLiteral(s)))
976979
}
977980
_ => {
@@ -982,7 +985,7 @@ impl<'a> Tokenizer<'a> {
982985
}
983986
}
984987
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
985-
x @ 'e' | x @ 'E' => {
988+
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
986989
let starting_loc = chars.location();
987990
chars.next(); // consume, to check the next char
988991
match chars.peek() {
@@ -2155,6 +2158,7 @@ mod tests {
21552158
use crate::dialect::{
21562159
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
21572160
};
2161+
use crate::test_utils::all_dialects_where;
21582162
use core::fmt::Debug;
21592163

21602164
#[test]
@@ -3543,4 +3547,74 @@ mod tests {
35433547
];
35443548
compare(expected, tokens);
35453549
}
3550+
3551+
#[test]
3552+
fn test_national_strings_backslash_escape_not_supported() {
3553+
all_dialects_where(|dialect| !dialect.supports_string_literal_backslash_escape())
3554+
.tokenizes_to(
3555+
"select n'''''\\'",
3556+
vec![
3557+
Token::make_keyword("select"),
3558+
Token::Whitespace(Whitespace::Space),
3559+
Token::NationalStringLiteral("''\\".to_string()),
3560+
],
3561+
);
3562+
}
3563+
3564+
#[test]
3565+
fn test_national_strings_backslash_escape_supported() {
3566+
all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape())
3567+
.tokenizes_to(
3568+
"select n'''''\\''",
3569+
vec![
3570+
Token::make_keyword("select"),
3571+
Token::Whitespace(Whitespace::Space),
3572+
Token::NationalStringLiteral("'''".to_string()),
3573+
],
3574+
);
3575+
}
3576+
3577+
#[test]
3578+
fn test_string_escape_constant_not_supported() {
3579+
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
3580+
"select e'...'",
3581+
vec![
3582+
Token::make_keyword("select"),
3583+
Token::Whitespace(Whitespace::Space),
3584+
Token::make_word("e", None),
3585+
Token::SingleQuotedString("...".to_string()),
3586+
],
3587+
);
3588+
3589+
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
3590+
"select E'...'",
3591+
vec![
3592+
Token::make_keyword("select"),
3593+
Token::Whitespace(Whitespace::Space),
3594+
Token::make_word("E", None),
3595+
Token::SingleQuotedString("...".to_string()),
3596+
],
3597+
);
3598+
}
3599+
3600+
#[test]
3601+
fn test_string_escape_constant_supported() {
3602+
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
3603+
"select e'\\''",
3604+
vec![
3605+
Token::make_keyword("select"),
3606+
Token::Whitespace(Whitespace::Space),
3607+
Token::EscapedStringLiteral("'".to_string()),
3608+
],
3609+
);
3610+
3611+
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
3612+
"select E'\\''",
3613+
vec![
3614+
Token::make_keyword("select"),
3615+
Token::Whitespace(Whitespace::Space),
3616+
Token::EscapedStringLiteral("'".to_string()),
3617+
],
3618+
);
3619+
}
35463620
}

tests/sqlparser_common.rs

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10265,15 +10265,19 @@ fn parse_trailing_comma() {
1026510265
"Expected: column name or constraint definition, found: )".to_string()
1026610266
)
1026710267
);
10268+
10269+
let unsupported_dialects = all_dialects_where(|d| !d.supports_trailing_commas());
10270+
assert_eq!(
10271+
unsupported_dialects
10272+
.parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,")
10273+
.unwrap_err(),
10274+
ParserError::ParserError("Expected: an expression, found: EOF".to_string())
10275+
);
1026810276
}
1026910277

1027010278
#[test]
1027110279
fn parse_projection_trailing_comma() {
10272-
// Some dialects allow trailing commas only in the projection
10273-
let trailing_commas = TestedDialects::new(vec![
10274-
Box::new(SnowflakeDialect {}),
10275-
Box::new(BigQueryDialect {}),
10276-
]);
10280+
let trailing_commas = all_dialects_where(|d| d.supports_projection_trailing_commas());
1027710281

1027810282
trailing_commas.one_statement_parses_to(
1027910283
"SELECT album_id, name, FROM track",
@@ -10286,20 +10290,14 @@ fn parse_projection_trailing_comma() {
1028610290

1028710291
trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track");
1028810292

10293+
let unsupported_dialects = all_dialects_where(|d| {
10294+
!d.supports_projection_trailing_commas() && !d.supports_trailing_commas()
10295+
});
1028910296
assert_eq!(
10290-
trailing_commas
10291-
.parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,")
10297+
unsupported_dialects
10298+
.parse_sql_statements("SELECT album_id, name, FROM track")
1029210299
.unwrap_err(),
10293-
ParserError::ParserError("Expected: an expression, found: EOF".to_string())
10294-
);
10295-
10296-
assert_eq!(
10297-
trailing_commas
10298-
.parse_sql_statements("CREATE TABLE employees (name text, age int,)")
10299-
.unwrap_err(),
10300-
ParserError::ParserError(
10301-
"Expected: column name or constraint definition, found: )".to_string()
10302-
),
10300+
ParserError::ParserError("Expected an expression, found: FROM".to_string())
1030310301
);
1030410302
}
1030510303

@@ -13137,6 +13135,33 @@ fn parse_overlaps() {
1313713135
verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')");
1313813136
}
1313913137

13138+
#[test]
13139+
fn parse_column_definition_trailing_commas() {
13140+
let dialects = all_dialects_where(|d| d.supports_column_definition_trailing_commas());
13141+
13142+
dialects.one_statement_parses_to("CREATE TABLE T (x INT64,)", "CREATE TABLE T (x INT64)");
13143+
dialects.one_statement_parses_to(
13144+
"CREATE TABLE T (x INT64, y INT64, )",
13145+
"CREATE TABLE T (x INT64, y INT64)",
13146+
);
13147+
dialects.one_statement_parses_to(
13148+
"CREATE VIEW T (x, y, ) AS SELECT 1",
13149+
"CREATE VIEW T (x, y) AS SELECT 1",
13150+
);
13151+
13152+
let unsupported_dialects = all_dialects_where(|d| {
13153+
!d.supports_projection_trailing_commas() && !d.supports_trailing_commas()
13154+
});
13155+
assert_eq!(
13156+
unsupported_dialects
13157+
.parse_sql_statements("CREATE TABLE employees (name text, age int,)")
13158+
.unwrap_err(),
13159+
ParserError::ParserError(
13160+
"Expected: column name or constraint definition, found: )".to_string()
13161+
),
13162+
);
13163+
}
13164+
1314013165
#[test]
1314113166
fn test_trailing_commas_in_from() {
1314213167
let dialects = all_dialects_where(|d| d.supports_from_trailing_commas());

0 commit comments

Comments
 (0)