-
Notifications
You must be signed in to change notification settings - Fork 653
Add support for C-style comments #2034
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> { | |
) -> Result<Option<Token>, TokenizerError> { | ||
let mut s = String::new(); | ||
let mut nested = 1; | ||
let mut c_style_comments = false; | ||
let supports_nested_comments = self.dialect.supports_nested_comments(); | ||
|
||
let supports_c_style_comments = self.dialect.supports_c_style_comments(); | ||
loop { | ||
match chars.next() { | ||
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => { | ||
|
@@ -2117,10 +2118,40 @@ impl<'a> Tokenizer<'a> { | |
s.push('*'); | ||
nested += 1; | ||
} | ||
Some('!') if supports_c_style_comments => { | ||
c_style_comments = true; | ||
// consume the optional version digits and whitespace | ||
while let Some(&c) = chars.peek() { | ||
if c.is_ascii_digit() || c.is_whitespace() { | ||
chars.next(); | ||
} else { | ||
break; | ||
} | ||
} | ||
} | ||
// consume all leading whitespaces until the '*/' character if in a C-style comment | ||
Some(ch) if ch.is_whitespace() && c_style_comments => { | ||
let mut tmp_s = String::new(); | ||
while let Some(c) = chars.next() { | ||
if c.is_whitespace() { | ||
tmp_s.push(c); | ||
} else if c == '*' && chars.peek() == Some(&'/') { | ||
chars.next(); // consume the '/' | ||
return Ok(Some(Token::make_word(&s, None))); | ||
} else { | ||
tmp_s.push(c); | ||
s.push_str(&tmp_s); | ||
break; | ||
} | ||
} | ||
} | ||
Some('*') if matches!(chars.peek(), Some('/')) => { | ||
chars.next(); // consume the '/' | ||
nested -= 1; | ||
if nested == 0 { | ||
if c_style_comments { | ||
break Ok(Some(Token::make_word(&s, None))); | ||
} | ||
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); | ||
} | ||
s.push('*'); | ||
|
@@ -4070,4 +4101,39 @@ mod tests { | |
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); | ||
} | ||
} | ||
#[test] | ||
fn tokenize_multiline_comment_with_c_style_comment() { | ||
let sql = String::from("0/*! word */1"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at their docs, I'm wondering if/how we support these examples? SELECT /*! STRAIGHT_JOIN */ col1 FROM table1,table2
/*!50110 KEY_BLOCK_SIZE=1024 */
SELECT /*! BKA(t1) */ FROM T There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @iffyio - Parsing the c_style comment unblocks sqlparser to not discard those as if they were a normal comment. Support for each hint will have to be added in a case by case bases. For example #2033 - MySQL adds a c-style comment if you run SHOW CREATE TABLE:
Without the current patch, the invisible keyword will be discarded. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah so to clarify I'm rather wondering regarding the parser behavior for hints that aren't singe words e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @iffyio thanks flagging this. I have fixed the issue and now we properly return individual tokens inside a C-style hint comment. |
||
|
||
let dialect = MySqlDialect {}; | ||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); | ||
let expected = vec![ | ||
Token::Number("0".to_string(), false), | ||
Token::Word(Word { | ||
value: "word".to_string(), | ||
quote_style: None, | ||
keyword: Keyword::NoKeyword, | ||
}), | ||
Token::Number("1".to_string(), false), | ||
]; | ||
compare(expected, tokens); | ||
} | ||
|
||
#[test] | ||
fn tokenize_multiline_comment_with_c_style_comment_and_version() { | ||
let sql = String::from("0/*!8000000 word */1"); | ||
|
||
let dialect = MySqlDialect {}; | ||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); | ||
let expected = vec![ | ||
Token::Number("0".to_string(), false), | ||
Token::Word(Word { | ||
value: "word".to_string(), | ||
quote_style: None, | ||
keyword: Keyword::NoKeyword, | ||
}), | ||
Token::Number("1".to_string(), false), | ||
]; | ||
compare(expected, tokens); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.