@@ -214,6 +214,7 @@ tokenizeriter_next(tokenizeriterobject *it)
214214
215215 const char * line_start = ISSTRINGLIT (type ) ? it -> tok -> multi_line_start : it -> tok -> line_start ;
216216 PyObject * line = NULL ;
217+ int line_changed = 1 ;
217218 if (it -> tok -> tok_extra_tokens && is_trailing_token ) {
218219 line = PyUnicode_FromString ("" );
219220 } else {
@@ -228,12 +229,11 @@ tokenizeriter_next(tokenizeriterobject *it)
228229 Py_XDECREF (it -> last_line );
229230 line = PyUnicode_DecodeUTF8 (line_start , size , "replace" );
230231 it -> last_line = line ;
231- if (it -> tok -> lineno != it -> last_end_lineno ) {
232- it -> byte_col_offset_diff = 0 ;
233- }
232+ it -> byte_col_offset_diff = 0 ;
234233 } else {
235234 // Line hasn't changed so we reuse the cached one.
236235 line = it -> last_line ;
236+ line_changed = 0 ;
237237 }
238238 }
239239 if (line == NULL ) {
@@ -251,7 +251,13 @@ tokenizeriter_next(tokenizeriterobject *it)
251251 Py_ssize_t byte_offset = -1 ;
252252 if (token .start != NULL && token .start >= line_start ) {
253253 byte_offset = token .start - line_start ;
254- col_offset = byte_offset - it -> byte_col_offset_diff ;
254+ if (line_changed ) {
255+ col_offset = _PyPegen_byte_offset_to_character_offset_line (line , 0 , byte_offset );
256+ it -> byte_col_offset_diff = byte_offset - col_offset ;
257+ }
258+ else {
259+ col_offset = byte_offset - it -> byte_col_offset_diff ;
260+ }
255261 }
256262 if (token .end != NULL && token .end >= it -> tok -> line_start ) {
257263 Py_ssize_t end_byte_offset = token .end - it -> tok -> line_start ;
0 commit comments