Skip to content

Commit 5089fd5

Browse files
Add support for C-style comments
This commit adds support for C-style comments supported by MySQL. It parses and consumes the optional version number after the `!` character.
1 parent f642dd5 commit 5089fd5

File tree

4 files changed

+62
-1
lines changed

4 files changed

+62
-1
lines changed

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
156156
true
157157
}
158158

159+
fn supports_c_style_comments(&self) -> bool {
160+
true
161+
}
162+
159163
fn supports_user_host_grantee(&self) -> bool {
160164
true
161165
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,11 @@ pub trait Dialect: Debug + Any {
898898
false
899899
}
900900

901+
/// Returns true if the dialect supports hint and C-style comments
902+
fn supports_c_style_comments(&self) -> bool {
903+
false
904+
}
905+
901906
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
902907
/// as an alias assignment operator, rather than a boolean expression.
903908
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
8484
true
8585
}
8686

87+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
88+
fn supports_c_style_comments(&self) -> bool {
89+
true
90+
}
91+
8792
fn parse_infix(
8893
&self,
8994
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107
) -> Result<Option<Token>, TokenizerError> {
21082108
let mut s = String::new();
21092109
let mut nested = 1;
2110+
let mut c_style_comments = false;
21102111
let supports_nested_comments = self.dialect.supports_nested_comments();
2111-
2112+
let supports_c_style_comments = self.dialect.supports_c_style_comments();
21122113
loop {
21132114
match chars.next() {
21142115
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -2117,10 +2118,21 @@ impl<'a> Tokenizer<'a> {
21172118
s.push('*');
21182119
nested += 1;
21192120
}
2121+
Some('!') if supports_c_style_comments => {
2122+
c_style_comments = true;
2123+
while let Some('0') | Some('1') | Some('2') | Some('3') | Some('4')
2124+
| Some('5') | Some('6') | Some('7') | Some('8') | Some('9') = chars.peek()
2125+
{
2126+
chars.next(); // consume the digit
2127+
}
2128+
}
21202129
Some('*') if matches!(chars.peek(), Some('/')) => {
21212130
chars.next(); // consume the '/'
21222131
nested -= 1;
21232132
if nested == 0 {
2133+
if c_style_comments {
2134+
break Ok(Some(Token::make_word(&s, None)));
2135+
}
21242136
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
21252137
}
21262138
s.push('*');
@@ -4070,4 +4082,39 @@ mod tests {
40704082
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
40714083
}
40724084
}
4085+
#[test]
4086+
fn tokenize_multiline_comment_with_c_style_comment() {
4087+
let sql = String::from("0/*!word*/1");
4088+
4089+
let dialect = MySqlDialect {};
4090+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4091+
let expected = vec![
4092+
Token::Number("0".to_string(), false),
4093+
Token::Word(Word {
4094+
value: "word".to_string(),
4095+
quote_style: None,
4096+
keyword: Keyword::NoKeyword,
4097+
}),
4098+
Token::Number("1".to_string(), false),
4099+
];
4100+
compare(expected, tokens);
4101+
}
4102+
4103+
#[test]
4104+
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
4105+
let sql = String::from("0/*!8000000word*/1");
4106+
4107+
let dialect = MySqlDialect {};
4108+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4109+
let expected = vec![
4110+
Token::Number("0".to_string(), false),
4111+
Token::Word(Word {
4112+
value: "word".to_string(),
4113+
quote_style: None,
4114+
keyword: Keyword::NoKeyword,
4115+
}),
4116+
Token::Number("1".to_string(), false),
4117+
];
4118+
compare(expected, tokens);
4119+
}
40734120
}

0 commit comments

Comments
 (0)