@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
2107
2107
) -> Result < Option < Token > , TokenizerError > {
2108
2108
let mut s = String :: new ( ) ;
2109
2109
let mut nested = 1 ;
2110
+ let mut c_style_comments = false ;
2110
2111
let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2111
-
2112
+ let supports_c_style_comments = self . dialect . supports_c_style_comments ( ) ;
2112
2113
loop {
2113
2114
match chars. next ( ) {
2114
2115
Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -2117,10 +2118,37 @@ impl<'a> Tokenizer<'a> {
2117
2118
s. push ( '*' ) ;
2118
2119
nested += 1 ;
2119
2120
}
2121
+ Some ( '!' ) if supports_c_style_comments => {
2122
+ c_style_comments = true ;
2123
+ while let Some ( '0' ) | Some ( '1' ) | Some ( '2' ) | Some ( '3' ) | Some ( '4' )
2124
+ | Some ( '5' ) | Some ( '6' ) | Some ( '7' ) | Some ( '8' ) | Some ( '9' ) = chars. peek ( )
2125
+ {
2126
+ chars. next ( ) ; // consume the digit
2127
+ }
2128
+ }
2129
+ // consume all leading whitespaces until the '*/' character if in a C-style comment
2130
+ Some ( ch) if ch. is_whitespace ( ) && c_style_comments => {
2131
+ let mut ws_count = 0 ;
2132
+ while let Some ( & c) = chars. peek ( ) {
2133
+ if c. is_whitespace ( ) {
2134
+ ws_count += 1 ;
2135
+ } else if c == '*' && chars. peek ( ) == Some ( & '/' ) {
2136
+ for _ in 0 ..ws_count {
2137
+ chars. next ( ) ;
2138
+ }
2139
+ break ;
2140
+ } else {
2141
+ break ;
2142
+ }
2143
+ }
2144
+ }
2120
2145
Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
2121
2146
chars. next ( ) ; // consume the '/'
2122
2147
nested -= 1 ;
2123
2148
if nested == 0 {
2149
+ if c_style_comments {
2150
+ break Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2151
+ }
2124
2152
break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
2125
2153
}
2126
2154
s. push ( '*' ) ;
@@ -4070,4 +4098,39 @@ mod tests {
4070
4098
panic ! ( "Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}" ) ;
4071
4099
}
4072
4100
}
4101
+ #[ test]
4102
+ fn tokenize_multiline_comment_with_c_style_comment ( ) {
4103
+ let sql = String :: from ( "0/*! word */1" ) ;
4104
+
4105
+ let dialect = MySqlDialect { } ;
4106
+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4107
+ let expected = vec ! [
4108
+ Token :: Number ( "0" . to_string( ) , false ) ,
4109
+ Token :: Word ( Word {
4110
+ value: "word" . to_string( ) ,
4111
+ quote_style: None ,
4112
+ keyword: Keyword :: NoKeyword ,
4113
+ } ) ,
4114
+ Token :: Number ( "1" . to_string( ) , false ) ,
4115
+ ] ;
4116
+ compare ( expected, tokens) ;
4117
+ }
4118
+
4119
+ #[ test]
4120
+ fn tokenize_multiline_comment_with_c_style_comment_and_version ( ) {
4121
+ let sql = String :: from ( "0/*!8000000 word */1" ) ;
4122
+
4123
+ let dialect = MySqlDialect { } ;
4124
+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4125
+ let expected = vec ! [
4126
+ Token :: Number ( "0" . to_string( ) , false ) ,
4127
+ Token :: Word ( Word {
4128
+ value: "word" . to_string( ) ,
4129
+ quote_style: None ,
4130
+ keyword: Keyword :: NoKeyword ,
4131
+ } ) ,
4132
+ Token :: Number ( "1" . to_string( ) , false ) ,
4133
+ ] ;
4134
+ compare ( expected, tokens) ;
4135
+ }
4073
4136
}
0 commit comments