@@ -676,6 +676,60 @@ wrong_exception_type(PyObject *exc)
676676    PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeTranslateError)
677677
678678
679+ // --- codecs handlers: utilities --------------------------------------------- 
680+ 
681+ /* 
682+  * Return the number of characters (including special prefixes) 
683+  * needed to represent 'ch' by _codec_handler_write_unicode_hex(). 
684+  */ 
685+ static  inline  Py_ssize_t 
686+ _codec_handler_unicode_hex_width (Py_UCS4  ch )
687+ {
688+     if  (ch  >= 0x10000 ) {
689+         // format: '\\' + 'U' + 8 hex digits 
690+         return  1  +  1  +  8 ;
691+     }
692+     else  if  (ch  >= 0x100 ) {
693+         // format: '\\' + 'u' + 4 hex digits 
694+         return  1  +  1  +  4 ;
695+     }
696+     else  {
697+         // format: '\\' + 'x' + 2 hex digits 
698+         return  1  +  1  +  2 ;
699+     }
700+ }
701+ 
702+ 
703+ /* 
704+  * Write the hexadecimal representation of 'ch' to the buffer pointed by 'p' 
705+  * using 2, 4, or 8 characters prefixed by '\x', '\u', or '\U' respectively. 
706+  */ 
707+ static  inline  void 
708+ _codec_handler_write_unicode_hex (Py_UCS1  * * p , Py_UCS4  ch )
709+ {
710+     * (* p )++  =  '\\' ;
711+     if  (ch  >= 0x10000 ) {
712+         * (* p )++  =  'U' ;
713+         * (* p )++  =  Py_hexdigits [(ch  >> 28 ) &  0xf ];
714+         * (* p )++  =  Py_hexdigits [(ch  >> 24 ) &  0xf ];
715+         * (* p )++  =  Py_hexdigits [(ch  >> 20 ) &  0xf ];
716+         * (* p )++  =  Py_hexdigits [(ch  >> 16 ) &  0xf ];
717+         * (* p )++  =  Py_hexdigits [(ch  >> 12 ) &  0xf ];
718+         * (* p )++  =  Py_hexdigits [(ch  >> 8 ) &  0xf ];
719+     }
720+     else  if  (ch  >= 0x100 ) {
721+         * (* p )++  =  'u' ;
722+         * (* p )++  =  Py_hexdigits [(ch  >> 12 ) &  0xf ];
723+         * (* p )++  =  Py_hexdigits [(ch  >> 8 ) &  0xf ];
724+     }
725+     else  {
726+         * (* p )++  =  'x' ;
727+     }
728+     * (* p )++  =  Py_hexdigits [(ch  >> 4 ) &  0xf ];
729+     * (* p )++  =  Py_hexdigits [ch  &  0xf ];
730+ }
731+ 
732+ 
679733// --- handler: 'strict' ------------------------------------------------------ 
680734
681735PyObject  * PyCodec_StrictErrors (PyObject  * exc )
@@ -942,17 +996,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
942996
943997    Py_ssize_t  ressize  =  0 ;
944998    for  (Py_ssize_t  i  =  start ; i  <  end ; ++ i ) {
945-         /* object is guaranteed to be "ready" */ 
946999        Py_UCS4  c  =  PyUnicode_READ_CHAR (obj , i );
947-         if  (c  >= 0x10000 ) {
948-             ressize  +=  1  +  1  +  8 ;
949-         }
950-         else  if  (c  >= 0x100 ) {
951-             ressize  +=  1  +  1  +  4 ;
952-         }
953-         else  {
954-             ressize  +=  1  +  1  +  2 ;
955-         }
1000+         ressize  +=  _codec_handler_unicode_hex_width (c );
9561001    }
9571002    PyObject  * res  =  PyUnicode_New (ressize , 127 );
9581003    if  (res  ==  NULL ) {
@@ -962,26 +1007,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
9621007    Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
9631008    for  (Py_ssize_t  i  =  start ; i  <  end ; ++ i ) {
9641009        Py_UCS4  c  =  PyUnicode_READ_CHAR (obj , i );
965-         * outp ++  =  '\\' ;
966-         if  (c  >= 0x00010000 ) {
967-             * outp ++  =  'U' ;
968-             * outp ++  =  Py_hexdigits [(c  >> 28 ) &  0xf ];
969-             * outp ++  =  Py_hexdigits [(c  >> 24 ) &  0xf ];
970-             * outp ++  =  Py_hexdigits [(c  >> 20 ) &  0xf ];
971-             * outp ++  =  Py_hexdigits [(c  >> 16 ) &  0xf ];
972-             * outp ++  =  Py_hexdigits [(c  >> 12 ) &  0xf ];
973-             * outp ++  =  Py_hexdigits [(c  >> 8 ) &  0xf ];
974-         }
975-         else  if  (c  >= 0x100 ) {
976-             * outp ++  =  'u' ;
977-             * outp ++  =  Py_hexdigits [(c  >> 12 ) &  0xf ];
978-             * outp ++  =  Py_hexdigits [(c  >> 8 ) &  0xf ];
979-         }
980-         else  {
981-             * outp ++  =  'x' ;
982-         }
983-         * outp ++  =  Py_hexdigits [(c  >> 4 ) &  0xf ];
984-         * outp ++  =  Py_hexdigits [c  &  0xf ];
1010+         _codec_handler_write_unicode_hex (& outp , c );
9851011    }
9861012    assert (_PyUnicode_CheckConsistency (res , 1 ));
9871013    Py_DECREF (obj );
@@ -1012,82 +1038,55 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
10121038        return  NULL ;
10131039    }
10141040
1015-     char  buffer [256 ]; /* NAME_MAXLEN */ 
1016-     Py_ssize_t  i  =  start , ressize  =  0 , replsize ;
1017-     for  (; i  <  end ; ++ i ) {
1018-         // If 'c' is recognized by getname(), the corresponding replacement 
1019-         // is '\\' + 'U' + '{' + NAME + '}', namely 1 + 1 + 1 + len(NAME) + 1 
1020-         // characters. Otherwise, the replacement is obtained similarly as 
1021-         // in PyCodec_BackslashReplaceErrors(). 
1022-         Py_UCS4  c  =  PyUnicode_READ_CHAR (obj , i );
1041+     char  buffer [256 ]; /* NAME_MAXLEN in unicodename_db.h */ 
1042+     Py_ssize_t  imax  =  start , ressize  =  0 , replsize ;
1043+     for  (; imax  <  end ; ++ imax ) {
1044+         Py_UCS4  c  =  PyUnicode_READ_CHAR (obj , imax );
10231045        if  (ucnhash_capi -> getname (c , buffer , sizeof (buffer ), 1 )) {
1024-             // failures of 'getname()' are ignored by the handler 
1025-             replsize  =  1  +  1  +  1  +  (int )strlen (buffer ) +  1 ;
1026-         }
1027-         else  if  (c  >= 0x10000 ) {
1028-             replsize  =  1  +  1  +  8 ;
1029-         }
1030-         else  if  (c  >= 0x100 ) {
1031-             replsize  =  1  +  1  +  4 ;
1046+             // If 'c' is recognized by getname(), the corresponding replacement 
1047+             // is '\\' + 'U' + '{' + NAME + '}', i.e. 1 + 1 + 1 + len(NAME) + 1 
1048+             // characters. Failures of 'getname()' are ignored by the handler. 
1049+             replsize  =  1  +  1  +  1  +  strlen (buffer ) +  1 ;
10321050        }
10331051        else  {
1034-             replsize  =  1   +   1   +   2 ;
1052+             replsize  =  _codec_handler_unicode_hex_width ( c ) ;
10351053        }
10361054        if  (ressize  >  PY_SSIZE_T_MAX  -  replsize ) {
10371055            break ;
10381056        }
10391057        ressize  +=  replsize ;
10401058    }
10411059
1042-     end  =  i ;
10431060    PyObject  * res  =  PyUnicode_New (ressize , 127 );
10441061    if  (res  ==  NULL ) {
10451062        Py_DECREF (obj );
10461063        return  NULL ;
10471064    }
10481065
10491066    Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
1050-     for  (Py_ssize_t  i  =  start ; i  <  end ; ++ i ) {
1067+     for  (Py_ssize_t  i  =  start ; i  <  imax ; ++ i ) {
10511068        Py_UCS4  c  =  PyUnicode_READ_CHAR (obj , i );
1052-         * outp ++  =  '\\' ;
10531069        if  (ucnhash_capi -> getname (c , buffer , sizeof (buffer ), 1 )) {
1054-             // failures of 'getname()' are ignored by the handler 
1070+             * outp ++   =   '\\' ; 
10551071            * outp ++  =  'N' ;
10561072            * outp ++  =  '{' ;
10571073            (void )strcpy ((char  * )outp , buffer );
10581074            outp  +=  strlen (buffer );
10591075            * outp ++  =  '}' ;
1060-             continue ;
1061-         }
1062- 
1063-         if  (c  >= 0x00010000 ) {
1064-             * outp ++  =  'U' ;
1065-             * outp ++  =  Py_hexdigits [(c  >> 28 ) &  0xf ];
1066-             * outp ++  =  Py_hexdigits [(c  >> 24 ) &  0xf ];
1067-             * outp ++  =  Py_hexdigits [(c  >> 20 ) &  0xf ];
1068-             * outp ++  =  Py_hexdigits [(c  >> 16 ) &  0xf ];
1069-             * outp ++  =  Py_hexdigits [(c  >> 12 ) &  0xf ];
1070-             * outp ++  =  Py_hexdigits [(c  >> 8 ) &  0xf ];
1071-         }
1072-         else  if  (c  >= 0x100 ) {
1073-             * outp ++  =  'u' ;
1074-             * outp ++  =  Py_hexdigits [(c  >> 12 ) &  0xf ];
1075-             * outp ++  =  Py_hexdigits [(c  >> 8 ) &  0xf ];
10761076        }
10771077        else  {
1078-             * outp ++   =   'x' ;
1078+             _codec_handler_write_unicode_hex ( & outp ,  c ) ;
10791079        }
1080-         * outp ++  =  Py_hexdigits [(c  >> 4 ) &  0xf ];
1081-         * outp ++  =  Py_hexdigits [c  &  0xf ];
10821080    }
10831081
10841082    assert (outp  ==  PyUnicode_1BYTE_DATA (res ) +  ressize );
10851083    assert (_PyUnicode_CheckConsistency (res , 1 ));
1086-     PyObject  * restuple  =  Py_BuildValue ("(Nn)" , res , end );
1084+     PyObject  * restuple  =  Py_BuildValue ("(Nn)" , res , imax );
10871085    Py_DECREF (obj );
10881086    return  restuple ;
10891087}
10901088
1089+ 
10911090#define  ENC_UNKNOWN      -1
10921091#define  ENC_UTF8         0
10931092#define  ENC_UTF16BE      1
0 commit comments