Skip to content

Commit 4123881

Browse files
Add support for C-style comments
This commit adds support for C-style comments supported by MySQL. It parses and consumes the optional version number after the `!` character.
1 parent f642dd5 commit 4123881

File tree

4 files changed

+78
-1
lines changed

4 files changed

+78
-1
lines changed

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
156156
true
157157
}
158158

159+
fn supports_c_style_comments(&self) -> bool {
160+
true
161+
}
162+
159163
fn supports_user_host_grantee(&self) -> bool {
160164
true
161165
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,11 @@ pub trait Dialect: Debug + Any {
898898
false
899899
}
900900

901+
/// Returns true if the dialect supports hint and C-style comments
902+
fn supports_c_style_comments(&self) -> bool {
903+
false
904+
}
905+
901906
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
902907
/// as an alias assignment operator, rather than a boolean expression.
903908
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
8484
true
8585
}
8686

87+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
88+
fn supports_c_style_comments(&self) -> bool {
89+
true
90+
}
91+
8792
fn parse_infix(
8893
&self,
8994
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107
) -> Result<Option<Token>, TokenizerError> {
21082108
let mut s = String::new();
21092109
let mut nested = 1;
2110+
let mut c_style_comments = false;
21102111
let supports_nested_comments = self.dialect.supports_nested_comments();
2111-
2112+
let supports_c_style_comments = self.dialect.supports_c_style_comments();
21122113
loop {
21132114
match chars.next() {
21142115
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -2117,10 +2118,37 @@ impl<'a> Tokenizer<'a> {
21172118
s.push('*');
21182119
nested += 1;
21192120
}
2121+
Some('!') if supports_c_style_comments => {
2122+
c_style_comments = true;
2123+
while let Some('0') | Some('1') | Some('2') | Some('3') | Some('4')
2124+
| Some('5') | Some('6') | Some('7') | Some('8') | Some('9') = chars.peek()
2125+
{
2126+
chars.next(); // consume the digit
2127+
}
2128+
}
2129+
// consume all leading whitespaces until the '*/' character if in a C-style comment
2130+
Some(ch) if ch.is_whitespace() && c_style_comments => {
2131+
let mut ws_count = 0;
2132+
while let Some(&c) = chars.peek() {
2133+
if c.is_whitespace() {
2134+
ws_count += 1;
2135+
} else if c == '*' && chars.peek() == Some(&'/') {
2136+
for _ in 0..ws_count {
2137+
chars.next();
2138+
}
2139+
break;
2140+
} else {
2141+
break;
2142+
}
2143+
}
2144+
}
21202145
Some('*') if matches!(chars.peek(), Some('/')) => {
21212146
chars.next(); // consume the '/'
21222147
nested -= 1;
21232148
if nested == 0 {
2149+
if c_style_comments {
2150+
break Ok(Some(Token::make_word(&s, None)));
2151+
}
21242152
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
21252153
}
21262154
s.push('*');
@@ -4070,4 +4098,39 @@ mod tests {
40704098
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
40714099
}
40724100
}
4101+
#[test]
4102+
fn tokenize_multiline_comment_with_c_style_comment() {
4103+
let sql = String::from("0/*! word */1");
4104+
4105+
let dialect = MySqlDialect {};
4106+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4107+
let expected = vec![
4108+
Token::Number("0".to_string(), false),
4109+
Token::Word(Word {
4110+
value: "word".to_string(),
4111+
quote_style: None,
4112+
keyword: Keyword::NoKeyword,
4113+
}),
4114+
Token::Number("1".to_string(), false),
4115+
];
4116+
compare(expected, tokens);
4117+
}
4118+
4119+
#[test]
4120+
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
4121+
let sql = String::from("0/*!8000000 word */1");
4122+
4123+
let dialect = MySqlDialect {};
4124+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4125+
let expected = vec![
4126+
Token::Number("0".to_string(), false),
4127+
Token::Word(Word {
4128+
value: "word".to_string(),
4129+
quote_style: None,
4130+
keyword: Keyword::NoKeyword,
4131+
}),
4132+
Token::Number("1".to_string(), false),
4133+
];
4134+
compare(expected, tokens);
4135+
}
40734136
}

0 commit comments

Comments
 (0)