@@ -215,6 +215,7 @@ tokenizeriter_next(tokenizeriterobject *it)
215215
216216 const char * line_start = ISSTRINGLIT (type ) ? it -> tok -> multi_line_start : it -> tok -> line_start ;
217217 PyObject * line = NULL ;
218+ int line_changed = 1 ;
218219 if (it -> tok -> tok_extra_tokens && is_trailing_token ) {
219220 line = PyUnicode_FromString ("" );
220221 } else {
@@ -229,12 +230,11 @@ tokenizeriter_next(tokenizeriterobject *it)
229230 Py_XDECREF (it -> last_line );
230231 line = PyUnicode_DecodeUTF8 (line_start , size , "replace" );
231232 it -> last_line = line ;
232- if (it -> tok -> lineno != it -> last_end_lineno ) {
233- it -> byte_col_offset_diff = 0 ;
234- }
233+ it -> byte_col_offset_diff = 0 ;
235234 } else {
236235 // Line hasn't changed so we reuse the cached one.
237236 line = it -> last_line ;
237+ line_changed = 0 ;
238238 }
239239 }
240240 if (line == NULL ) {
@@ -252,7 +252,13 @@ tokenizeriter_next(tokenizeriterobject *it)
252252 Py_ssize_t byte_offset = -1 ;
253253 if (token .start != NULL && token .start >= line_start ) {
254254 byte_offset = token .start - line_start ;
255- col_offset = byte_offset - it -> byte_col_offset_diff ;
255+ if (line_changed ) {
256+ col_offset = _PyPegen_byte_offset_to_character_offset_line (line , 0 , byte_offset );
257+ it -> byte_col_offset_diff = byte_offset - col_offset ;
258+ }
259+ else {
260+ col_offset = byte_offset - it -> byte_col_offset_diff ;
261+ }
256262 }
257263 if (token .end != NULL && token .end >= it -> tok -> line_start ) {
258264 Py_ssize_t end_byte_offset = token .end - it -> tok -> line_start ;
0 commit comments