diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index 32ba7b32a..ec3c095be 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -33,4 +33,9 @@ impl Dialect for AnsiDialect { fn require_interval_qualifier(&self) -> bool { true } + + /// The SQL standard explicitly states that block comments nest. + fn supports_nested_comments(&self) -> bool { + true + } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index f5e70c309..bdac1f57b 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -94,4 +94,10 @@ impl Dialect for ClickHouseDialect { fn supports_group_by_with_modifier(&self) -> bool { true } + + /// Supported since 2020. + /// See + fn supports_nested_comments(&self) -> bool { + true + } } diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index a3476b1b8..4bb8c8d51 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -64,4 +64,9 @@ impl Dialect for DatabricksDialect { fn supports_struct_literal(&self) -> bool { true } + + /// See + fn supports_nested_comments(&self) -> bool { + true + } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8382a5344..54a158c1f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2419,7 +2419,7 @@ mod tests { use crate::dialect::{ BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, }; - use crate::test_utils::all_dialects_where; + use crate::test_utils::{all_dialects_except, all_dialects_where}; use core::fmt::Debug; #[test] @@ -3169,90 +3169,79 @@ mod tests { #[test] fn tokenize_nested_multiline_comment() { - let dialect = GenericDialect {}; - let test_cases = vec![ - ( - "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", - vec![ - Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), - )), - Token::Whitespace(Whitespace::Space), - Token::Div, - Token::Word(Word { - value: "comment".to_string(), - quote_style: None, - keyword: Keyword::COMMENT, - }), - Token::Mul, - Token::Div, - Token::Number("1".to_string(), false), - ], - ), - ( - "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", - vec![ - Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), - )), - Token::Number("1".to_string(), false), - ], - ), - ( - "SELECT 1/* a /* b */ c */0", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())), - Token::Number("0".to_string(), false), - ], - ), - ]; + all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to( + "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), + )), + Token::Whitespace(Whitespace::Space), + Token::Div, + Token::Word(Word { + value: "comment".to_string(), + quote_style: None, + keyword: Keyword::COMMENT, + }), + Token::Mul, + Token::Div, + Token::Number("1".to_string(), false), + ], + ); - for (sql, expected) in test_cases { - let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); - compare(expected, tokens); - } + all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to( + "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), + )), + Token::Number("1".to_string(), false), + ], + ); + + all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to( + "SELECT 1/* a /* b */ c */0", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())), + Token::Number("0".to_string(), false), + ], + ); } #[test] fn tokenize_nested_multiline_comment_empty() { - let sql = "select 1/*/**/*/0"; - - let dialect = GenericDialect {}; - let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); - let expected = vec![ - Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), - Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())), - Token::Number("0".to_string(), false), - ]; - - compare(expected, tokens); + all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to( + "select 1/*/**/*/0", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())), + Token::Number("0".to_string(), false), + ], + ); } #[test] fn tokenize_nested_comments_if_not_supported() { - let dialect = SQLiteDialect {}; - let sql = "SELECT 1/*/* nested comment */*/0"; - let tokens = Tokenizer::new(&dialect, sql).tokenize(); - let expected = vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "/* nested comment ".to_string(), - )), - Token::Mul, - Token::Div, - Token::Number("0".to_string(), false), - ]; - - compare(expected, tokens.unwrap()); + all_dialects_except(|d| d.supports_nested_comments()).tokenizes_to( + "SELECT 1/*/* nested comment */*/0", + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "/* nested comment ".to_string(), + )), + Token::Mul, + Token::Div, + Token::Number("0".to_string(), false), + ], + ); } #[test]