Skip to content

Commit 40020be

Browse files
committed
Squashed merge of expand-parse-without-semicolons
1 parent 3c61db5 commit 40020be

File tree

7 files changed

+839
-136
lines changed

7 files changed

+839
-136
lines changed

src/dialect/mod.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,8 +1047,14 @@ pub trait Dialect: Debug + Any {
10471047
/// Returns true if the specified keyword should be parsed as a table factor alias.
10481048
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
10491049
/// to enable looking ahead if needed.
1050-
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
1051-
explicit || self.is_table_alias(kw, parser)
1050+
///
1051+
/// When the dialect supports statements without semicolon delimiter, actual keywords aren't parsed as aliases.
1052+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
1053+
if self.supports_statements_without_semicolon_delimiter() {
1054+
kw == &Keyword::NoKeyword
1055+
} else {
1056+
explicit || self.is_table_alias(kw, _parser)
1057+
}
10521058
}
10531059

10541060
/// Returns true if this dialect supports querying historical table data
@@ -1207,6 +1213,18 @@ pub trait Dialect: Debug + Any {
12071213
fn supports_semantic_view_table_factor(&self) -> bool {
12081214
false
12091215
}
1216+
1217+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1218+
///
1219+
/// If returns true, the following SQL will not parse. If returns `false` the SQL will parse
1220+
///
1221+
/// ```sql
1222+
/// SELECT 1
1223+
/// SELECT 2
1224+
/// ```
1225+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1226+
false
1227+
}
12101228
}
12111229

12121230
/// This represents the operators for which precedence must be defined

src/dialect/mssql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Dialect for MsSqlDialect {
6767
}
6868

6969
fn supports_connect_by(&self) -> bool {
70-
true
70+
false
7171
}
7272

7373
fn supports_eq_alias_assignment(&self) -> bool {
@@ -123,6 +123,10 @@ impl Dialect for MsSqlDialect {
123123
true
124124
}
125125

126+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
127+
true
128+
}
129+
126130
/// See <https://learn.microsoft.com/en-us/sql/relational-databases/security/authentication-access/server-level-roles>
127131
fn get_reserved_grantees_types(&self) -> &[GranteesType] {
128132
&[GranteesType::Public]
@@ -284,6 +288,9 @@ impl MsSqlDialect {
284288
) -> Result<Vec<Statement>, ParserError> {
285289
let mut stmts = Vec::new();
286290
loop {
291+
while let Token::SemiColon = parser.peek_token_ref().token {
292+
parser.advance_token();
293+
}
287294
if let Token::EOF = parser.peek_token_ref().token {
288295
break;
289296
}

src/keywords.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
11201120
Keyword::ANTI,
11211121
Keyword::SEMI,
11221122
Keyword::RETURNING,
1123+
Keyword::RETURN,
11231124
Keyword::ASOF,
11241125
Keyword::MATCH_CONDITION,
11251126
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1145,6 +1146,11 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
11451146
Keyword::TABLESAMPLE,
11461147
Keyword::FROM,
11471148
Keyword::OPEN,
1149+
Keyword::INSERT,
1150+
Keyword::UPDATE,
1151+
Keyword::DELETE,
1152+
Keyword::EXEC,
1153+
Keyword::EXECUTE,
11481154
];
11491155

11501156
/// Can't be used as a column alias, so that `SELECT <expr> alias`
@@ -1174,6 +1180,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
11741180
Keyword::CLUSTER,
11751181
Keyword::DISTRIBUTE,
11761182
Keyword::RETURNING,
1183+
Keyword::RETURN,
11771184
// Reserved only as a column alias in the `SELECT` clause
11781185
Keyword::FROM,
11791186
Keyword::INTO,
@@ -1188,6 +1195,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
11881195
Keyword::LIMIT,
11891196
Keyword::HAVING,
11901197
Keyword::WHERE,
1198+
Keyword::RETURN,
11911199
];
11921200

11931201
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1198,4 +1206,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
11981206
Keyword::INTERVAL,
11991207
Keyword::STRUCT,
12001208
Keyword::TRIM,
1209+
Keyword::RETURN,
12011210
];

src/parser/mod.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,22 @@ impl ParserOptions {
271271
self.unescape = unescape;
272272
self
273273
}
274+
275+
/// Set if semicolon statement delimiters are required.
276+
///
277+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
278+
///
279+
/// ```sql
280+
/// SELECT 1
281+
/// SELECT 2
282+
/// ```
283+
pub fn with_require_semicolon_stmt_delimiter(
284+
mut self,
285+
require_semicolon_stmt_delimiter: bool,
286+
) -> Self {
287+
self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter;
288+
self
289+
}
274290
}
275291

276292
#[derive(Copy, Clone)]
@@ -367,7 +383,11 @@ impl<'a> Parser<'a> {
367383
state: ParserState::Normal,
368384
dialect,
369385
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
370-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
386+
options: ParserOptions::new()
387+
.with_trailing_commas(dialect.supports_trailing_commas())
388+
.with_require_semicolon_stmt_delimiter(
389+
!dialect.supports_statements_without_semicolon_delimiter(),
390+
),
371391
}
372392
}
373393

@@ -489,13 +509,18 @@ impl<'a> Parser<'a> {
489509

490510
match self.peek_token().token {
491511
Token::EOF => break,
492-
493512
// end of statement
494513
Token::Word(word) => {
495514
if expecting_statement_delimiter && word.keyword == Keyword::END {
496515
break;
497516
}
498517
}
518+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
519+
Token::Whitespace(Whitespace::Newline) => {
520+
if !self.options.require_semicolon_stmt_delimiter {
521+
expecting_statement_delimiter = false;
522+
}
523+
}
499524
_ => {}
500525
}
501526

@@ -505,7 +530,7 @@ impl<'a> Parser<'a> {
505530

506531
let statement = self.parse_statement()?;
507532
stmts.push(statement);
508-
expecting_statement_delimiter = true;
533+
expecting_statement_delimiter = self.options.require_semicolon_stmt_delimiter;
509534
}
510535
Ok(stmts)
511536
}
@@ -4615,6 +4640,14 @@ impl<'a> Parser<'a> {
46154640
return Ok(vec![]);
46164641
}
46174642

4643+
if end_token == Token::SemiColon && !self.options.require_semicolon_stmt_delimiter {
4644+
if let Token::Word(ref kw) = self.peek_token().token {
4645+
if kw.keyword != Keyword::NoKeyword {
4646+
return Ok(vec![]);
4647+
}
4648+
}
4649+
}
4650+
46184651
if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] {
46194652
let _ = self.consume_token(&Token::Comma);
46204653
return Ok(vec![]);
@@ -4632,6 +4665,9 @@ impl<'a> Parser<'a> {
46324665
) -> Result<Vec<Statement>, ParserError> {
46334666
let mut values = vec![];
46344667
loop {
4668+
// ignore empty statements (between successive statement delimiters)
4669+
while self.consume_token(&Token::SemiColon) {}
4670+
46354671
match &self.peek_nth_token_ref(0).token {
46364672
Token::EOF => break,
46374673
Token::Word(w) => {
@@ -4643,7 +4679,13 @@ impl<'a> Parser<'a> {
46434679
}
46444680

46454681
values.push(self.parse_statement()?);
4646-
self.expect_token(&Token::SemiColon)?;
4682+
4683+
if self.options.require_semicolon_stmt_delimiter {
4684+
self.expect_token(&Token::SemiColon)?;
4685+
}
4686+
4687+
// ignore empty statements (between successive statement delimiters)
4688+
while self.consume_token(&Token::SemiColon) {}
46474689
}
46484690
Ok(values)
46494691
}
@@ -17271,7 +17313,28 @@ impl<'a> Parser<'a> {
1727117313

1727217314
/// Parse [Statement::Return]
1727317315
fn parse_return(&mut self) -> Result<Statement, ParserError> {
17274-
match self.maybe_parse(|p| p.parse_expr())? {
17316+
let rs = self.maybe_parse(|p| {
17317+
let expr = p.parse_expr()?;
17318+
17319+
match &expr {
17320+
Expr::Value(_)
17321+
| Expr::Function(_)
17322+
| Expr::UnaryOp { .. }
17323+
| Expr::BinaryOp { .. }
17324+
| Expr::Case { .. }
17325+
| Expr::Cast { .. }
17326+
| Expr::Convert { .. }
17327+
| Expr::Subquery(_) => Ok(expr),
17328+
// todo: how to retstrict to variables?
17329+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
17330+
_ => parser_err!(
17331+
"Non-returnable expression found following RETURN",
17332+
p.peek_token().span.start
17333+
),
17334+
}
17335+
})?;
17336+
17337+
match rs {
1727517338
Some(expr) => Ok(Statement::Return(ReturnStatement {
1727617339
value: Some(ReturnStatementValue::Expr(expr)),
1727717340
})),

src/test_utils.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#[cfg(not(feature = "std"))]
2626
use alloc::{
2727
boxed::Box,
28+
format,
2829
string::{String, ToString},
2930
vec,
3031
vec::Vec,
@@ -186,6 +187,32 @@ impl TestedDialects {
186187
statements
187188
}
188189

190+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
191+
pub fn statements_without_semicolons_parse_to(
192+
&self,
193+
sql: &str,
194+
canonical: &str,
195+
) -> Vec<Statement> {
196+
let sql_without_semicolons = sql.replace(";", " ");
197+
let statements = self
198+
.parse_sql_statements(&sql_without_semicolons)
199+
.expect(&sql_without_semicolons);
200+
if !canonical.is_empty() && sql != canonical {
201+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
202+
} else {
203+
assert_eq!(
204+
sql,
205+
statements
206+
.iter()
207+
// note: account for format_statement_list manually inserted semicolons
208+
.map(|s| s.to_string().trim_end_matches(";").to_string())
209+
.collect::<Vec<_>>()
210+
.join("; ")
211+
);
212+
}
213+
statements
214+
}
215+
189216
/// Ensures that `sql` parses as an [`Expr`], and that
190217
/// re-serializing the parse result produces canonical
191218
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -318,6 +345,43 @@ where
318345
all_dialects_where(|d| !except(d))
319346
}
320347

348+
/// Returns all dialects that don't support statements without semicolon delimiters.
349+
/// (i.e. dialects that require semicolon delimiters.)
350+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
351+
let tested_dialects =
352+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
353+
assert_ne!(tested_dialects.dialects.len(), 0);
354+
tested_dialects
355+
}
356+
357+
/// Returns all dialects that do support statements without semicolon delimiters.
358+
/// (i.e. dialects not requiring semicolon delimiters.)
359+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
360+
let tested_dialects =
361+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
362+
assert_ne!(tested_dialects.dialects.len(), 0);
363+
tested_dialects
364+
}
365+
366+
/// Asserts an error for `parse_sql_statements`:
367+
/// - "end of statement" for dialects that require semicolon delimiters
368+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
369+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
370+
assert_eq!(
371+
ParserError::ParserError(format!("Expected: end of statement, found: {found}")),
372+
all_dialects_requiring_semicolon_statement_delimiter()
373+
.parse_sql_statements(sql)
374+
.unwrap_err()
375+
);
376+
377+
assert_eq!(
378+
ParserError::ParserError(format!("Expected: an SQL statement, found: {found}")),
379+
all_dialects_not_requiring_semicolon_statement_delimiter()
380+
.parse_sql_statements(sql)
381+
.unwrap_err()
382+
);
383+
}
384+
321385
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
322386
assert_eq!(
323387
expected,

0 commit comments

Comments
 (0)