From f5f916bafaf24430affb2a0be4431bd63e688fce Mon Sep 17 00:00:00 2001
From: Hans Ott <hansott@hotmail.be>
Date: Wed, 8 Jan 2025 09:46:03 +0100
Subject: [PATCH] Start new line if `\r` and dialect is postgres

Currently the tokenizer throws an error for

```sql
insert into cats_2 (petname) values ('foo'),--\r(version()||'\n');
```

this is because postgres treats \r as a separate new line character, see https://github.com/postgres/postgres/blob/master/src/backend/parser/scan.l

> In order to make the world safe for Windows and Mac clients as well as Unix ones, we accept either \n or \r as a newline.
> A DOS-style \r\n sequence will be seen as two successive newlines, but that doesn't cause any problems.
> non_newline			[^\n\r]
> comment			("--"{non_newline}*)

Let's make sure we start a new line if we encounter a \r when tokenizing a comment.
---
 src/tokenizer.rs | 63 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9103f22ea..54f3d3316 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1390,11 +1390,17 @@ impl<'a> Tokenizer<'a> {
 
     // Consume characters until newline
     fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
-        let mut comment = peeking_take_while(chars, |ch| ch != '\n');
+        let mut comment = peeking_take_while(chars, |ch| match ch {
+            '\n' => false,                                           // Always stop at \n
+            '\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop at \r for Postgres
+            _ => true, // Keep consuming for other characters
+        });
+
         if let Some(ch) = chars.next() {
-            assert_eq!(ch, '\n');
+            assert!(ch == '\n' || ch == '\r');
             comment.push(ch);
         }
+
         comment
     }
 
@@ -2425,17 +2431,62 @@ mod tests {
 
     #[test]
     fn tokenize_comment() {
-        let sql = String::from("0--this is a comment\n1");
+        let test_cases = vec![
+            (
+                String::from("0--this is a comment\n1"),
+                vec![
+                    Token::Number("0".to_string(), false),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: "this is a comment\n".to_string(),
+                    }),
+                    Token::Number("1".to_string(), false),
+                ],
+            ),
+            (
+                String::from("0--this is a comment\r1"),
+                vec![
+                    Token::Number("0".to_string(), false),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: "this is a comment\r1".to_string(),
+                    }),
+                ],
+            ),
+            (
+                String::from("0--this is a comment\r\n1"),
+                vec![
+                    Token::Number("0".to_string(), false),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: "this is a comment\r\n".to_string(),
+                    }),
+                    Token::Number("1".to_string(), false),
+                ],
+            ),
+        ];
 
         let dialect = GenericDialect {};
+
+        for (sql, expected) in test_cases {
+            let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+            compare(expected, tokens);
+        }
+    }
+
+    #[test]
+    fn tokenize_comment_postgres() {
+        let sql = String::from("1--\r0");
+
+        let dialect = PostgreSqlDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
-            Token::Number("0".to_string(), false),
+            Token::Number("1".to_string(), false),
             Token::Whitespace(Whitespace::SingleLineComment {
                 prefix: "--".to_string(),
-                comment: "this is a comment\n".to_string(),
+                comment: "\r".to_string(),
             }),
-            Token::Number("1".to_string(), false),
+            Token::Number("0".to_string(), false),
         ];
         compare(expected, tokens);
     }