@@ -140,21 +140,18 @@ _create_dummy_identifier(Parser *p)
140140}
141141
142142static inline Py_ssize_t
143- byte_offset_to_character_offset (PyObject * line , int col_offset )
143+ byte_offset_to_character_offset (PyObject * line , Py_ssize_t col_offset )
144144{
145145 const char * str = PyUnicode_AsUTF8 (line );
146146 if (!str ) {
147147 return 0 ;
148148 }
149+ assert (col_offset >= 0 && (unsigned long )col_offset <= strlen (str ));
149150 PyObject * text = PyUnicode_DecodeUTF8 (str , col_offset , "replace" );
150151 if (!text ) {
151152 return 0 ;
152153 }
153154 Py_ssize_t size = PyUnicode_GET_LENGTH (text );
154- str = PyUnicode_AsUTF8 (text );
155- if (str != NULL && (int )strlen (str ) == col_offset ) {
156- size = strlen (str );
157- }
158155 Py_DECREF (text );
159156 return size ;
160157}
@@ -366,7 +363,7 @@ void *
366363_PyPegen_raise_error (Parser * p , PyObject * errtype , const char * errmsg , ...)
367364{
368365 Token * t = p -> known_err_token != NULL ? p -> known_err_token : p -> tokens [p -> fill - 1 ];
369- int col_offset ;
366+ Py_ssize_t col_offset ;
370367 if (t -> col_offset == -1 ) {
371368 col_offset = Py_SAFE_DOWNCAST (p -> tok -> cur - p -> tok -> buf ,
372369 intptr_t , int );
@@ -386,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
386383
387384void *
388385_PyPegen_raise_error_known_location (Parser * p , PyObject * errtype ,
389- int lineno , int col_offset ,
386+ Py_ssize_t lineno , Py_ssize_t col_offset ,
390387 const char * errmsg , va_list va )
391388{
392389 PyObject * value = NULL ;
@@ -406,16 +403,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
406403
407404 if (!error_line ) {
408405 Py_ssize_t size = p -> tok -> inp - p -> tok -> buf ;
409- if (size && p -> tok -> buf [size - 1 ] == '\n' ) {
410- size -- ;
411- }
412406 error_line = PyUnicode_DecodeUTF8 (p -> tok -> buf , size , "replace" );
413407 if (!error_line ) {
414408 goto error ;
415409 }
416410 }
417411
418- Py_ssize_t col_number = byte_offset_to_character_offset (error_line , col_offset );
412+ Py_ssize_t col_number = col_offset ;
413+
414+ if (p -> tok -> encoding != NULL ) {
415+ col_number = byte_offset_to_character_offset (error_line , col_offset );
416+ }
419417
420418 tmp = Py_BuildValue ("(OiiN)" , p -> tok -> filename , lineno , col_number , error_line );
421419 if (!tmp ) {
0 commit comments