Skip to content

Commit 612a2da

Browse files
authored
fix(tokenizer): fix token col attribute when there is leading whitespace after a newline (#5094)
1 parent 3bcf989 commit 612a2da

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

sqlglot/tokens.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ def _advance(self, i: int = 1, alnum: bool = False) -> None:
11091109
if self.WHITE_SPACE.get(self._char) is TokenType.BREAK:
11101110
# Ensures we don't count an extra line if we get a \r\n line break sequence
11111111
if not (self._char == "\r" and self._peek == "\n"):
1112-
self._col = 1
1112+
self._col = i
11131113
self._line += 1
11141114
else:
11151115
self._col += i

sqlglotrs/src/tokenizer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ impl<'a> TokenizerState<'a> {
173173
if Some(&self.token_types.break_) == self.settings.white_space.get(&self.current_char) {
174174
// Ensures we don't count an extra line if we get a \r\n line break sequence.
175175
if !(self.current_char == '\r' && self.peek_char == '\n') {
176-
self.column = 1;
176+
self.column = i as usize;
177177
self.line += 1;
178178
}
179179
} else {

tests/test_tokens.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,19 @@ def test_token_line_col(self):
6767
tokens = Tokenizer().tokenize("SELECT\r\n 1,\r\n 2")
6868

6969
self.assertEqual(tokens[0].line, 1)
70+
self.assertEqual(tokens[0].col, 6)
7071
self.assertEqual(tokens[1].line, 2)
72+
self.assertEqual(tokens[1].col, 3)
7173
self.assertEqual(tokens[2].line, 2)
74+
self.assertEqual(tokens[2].col, 4)
7275
self.assertEqual(tokens[3].line, 3)
76+
self.assertEqual(tokens[3].col, 3)
77+
78+
tokens = Tokenizer().tokenize(" SELECT\n 100")
79+
self.assertEqual(tokens[0].line, 1)
80+
self.assertEqual(tokens[0].col, 8)
81+
self.assertEqual(tokens[1].line, 2)
82+
self.assertEqual(tokens[1].col, 7)
7383

7484
def test_crlf(self):
7585
tokens = Tokenizer().tokenize("SELECT a\r\nFROM b")

0 commit comments

Comments
 (0)