@@ -1095,7 +1095,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
10951095#define ENC_UTF32LE 4
10961096
10971097static int
1098- get_standard_encoding (const char * encoding , int * bytelength )
1098+ get_standard_encoding_impl (const char * encoding , int * bytelength )
10991099{
11001100 if (Py_TOLOWER (encoding [0 ]) == 'u' &&
11011101 Py_TOLOWER (encoding [1 ]) == 't' &&
@@ -1153,172 +1153,212 @@ get_standard_encoding(const char *encoding, int *bytelength)
11531153 return ENC_UNKNOWN ;
11541154}
11551155
1156- /* This handler is declared static until someone demonstrates
1157- a need to call it directly. */
1156+
1157+ static int
1158+ get_standard_encoding (PyObject * encoding , int * code , int * bytelength )
1159+ {
1160+ const char * encoding_cstr = PyUnicode_AsUTF8 (encoding );
1161+ if (encoding_cstr == NULL ) {
1162+ return -1 ;
1163+ }
1164+ * code = get_standard_encoding_impl (encoding_cstr , bytelength );
1165+ return 0 ;
1166+ }
1167+
1168+
1169+ // --- handler: 'surrogatepass' -----------------------------------------------
1170+
11581171static PyObject *
1159- PyCodec_SurrogatePassErrors (PyObject * exc )
1172+ _PyCodec_SurrogatePassUnicodeEncodeError (PyObject * exc )
11601173{
1161- PyObject * restuple ;
1162- PyObject * object ;
1163- PyObject * encode ;
1164- const char * encoding ;
1165- int code ;
1166- int bytelength ;
1167- Py_ssize_t i ;
1168- Py_ssize_t start ;
1169- Py_ssize_t end ;
1170- PyObject * res ;
1174+ PyObject * encoding = PyUnicodeEncodeError_GetEncoding (exc );
1175+ if (encoding == NULL ) {
1176+ return NULL ;
1177+ }
1178+ int code , bytelength ;
1179+ int rc = get_standard_encoding (encoding , & code , & bytelength );
1180+ Py_DECREF (encoding );
1181+ if (rc < 0 ) {
1182+ return NULL ;
1183+ }
1184+ if (code == ENC_UNKNOWN ) {
1185+ goto bail ;
1186+ }
11711187
1172- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
1173- unsigned char * outp ;
1174- if (PyUnicodeEncodeError_GetStart (exc , & start ))
1175- return NULL ;
1176- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
1177- return NULL ;
1178- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
1179- return NULL ;
1180- if (!(encode = PyUnicodeEncodeError_GetEncoding (exc ))) {
1181- Py_DECREF (object );
1182- return NULL ;
1183- }
1184- if (!(encoding = PyUnicode_AsUTF8 (encode ))) {
1185- Py_DECREF (object );
1186- Py_DECREF (encode );
1187- return NULL ;
1188- }
1189- code = get_standard_encoding (encoding , & bytelength );
1190- Py_DECREF (encode );
1191- if (code == ENC_UNKNOWN ) {
1192- /* Not supported, fail with original exception */
1193- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1194- Py_DECREF (object );
1195- return NULL ;
1196- }
1188+ PyObject * obj ;
1189+ Py_ssize_t objlen , start , end , slen ;
1190+ if (_PyUnicodeError_GetParams (exc ,
1191+ & obj , & objlen ,
1192+ & start , & end , & slen , false) < 0 )
1193+ {
1194+ return NULL ;
1195+ }
11971196
1198- if (end - start > PY_SSIZE_T_MAX / bytelength )
1199- end = start + PY_SSIZE_T_MAX / bytelength ;
1200- res = PyBytes_FromStringAndSize (NULL , bytelength * (end - start ));
1201- if (!res ) {
1202- Py_DECREF (object );
1203- return NULL ;
1197+ if (slen > PY_SSIZE_T_MAX / bytelength ) {
1198+ end = start + PY_SSIZE_T_MAX / bytelength ;
1199+ end = Py_MIN (end , objlen );
1200+ slen = Py_MAX (0 , end - start );
1201+ }
1202+
1203+ PyObject * res = PyBytes_FromStringAndSize (NULL , bytelength * slen );
1204+ if (res == NULL ) {
1205+ Py_DECREF (obj );
1206+ return NULL ;
1207+ }
1208+
1209+ unsigned char * outp = (unsigned char * )PyBytes_AsString (res );
1210+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
1211+ /* object is guaranteed to be "ready" */
1212+ Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
1213+ if (!Py_UNICODE_IS_SURROGATE (ch )) {
1214+ /* Not a surrogate, fail with original exception */
1215+ Py_DECREF (obj );
1216+ Py_DECREF (res );
1217+ goto bail ;
12041218 }
1205- outp = (unsigned char * )PyBytes_AsString (res );
1206- for (i = start ; i < end ; i ++ ) {
1207- /* object is guaranteed to be "ready" */
1208- Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
1209- if (!Py_UNICODE_IS_SURROGATE (ch )) {
1210- /* Not a surrogate, fail with original exception */
1211- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1212- Py_DECREF (res );
1213- Py_DECREF (object );
1214- return NULL ;
1215- }
1216- switch (code ) {
1217- case ENC_UTF8 :
1219+ switch (code ) {
1220+ case ENC_UTF8 : {
12181221 * outp ++ = (unsigned char )(0xe0 | (ch >> 12 ));
12191222 * outp ++ = (unsigned char )(0x80 | ((ch >> 6 ) & 0x3f ));
12201223 * outp ++ = (unsigned char )(0x80 | (ch & 0x3f ));
12211224 break ;
1222- case ENC_UTF16LE :
1223- * outp ++ = (unsigned char ) ch ;
1225+ }
1226+ case ENC_UTF16LE : {
1227+ * outp ++ = (unsigned char )ch ;
12241228 * outp ++ = (unsigned char )(ch >> 8 );
12251229 break ;
1226- case ENC_UTF16BE :
1230+ }
1231+ case ENC_UTF16BE : {
12271232 * outp ++ = (unsigned char )(ch >> 8 );
1228- * outp ++ = (unsigned char ) ch ;
1233+ * outp ++ = (unsigned char )ch ;
12291234 break ;
1230- case ENC_UTF32LE :
1231- * outp ++ = (unsigned char ) ch ;
1235+ }
1236+ case ENC_UTF32LE : {
1237+ * outp ++ = (unsigned char )ch ;
12321238 * outp ++ = (unsigned char )(ch >> 8 );
12331239 * outp ++ = (unsigned char )(ch >> 16 );
12341240 * outp ++ = (unsigned char )(ch >> 24 );
12351241 break ;
1236- case ENC_UTF32BE :
1242+ }
1243+ case ENC_UTF32BE : {
12371244 * outp ++ = (unsigned char )(ch >> 24 );
12381245 * outp ++ = (unsigned char )(ch >> 16 );
12391246 * outp ++ = (unsigned char )(ch >> 8 );
1240- * outp ++ = (unsigned char ) ch ;
1247+ * outp ++ = (unsigned char )ch ;
12411248 break ;
12421249 }
12431250 }
1244- restuple = Py_BuildValue ("(On)" , res , end );
1245- Py_DECREF (res );
1246- Py_DECREF (object );
1247- return restuple ;
12481251 }
1249- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
1250- const unsigned char * p ;
1251- Py_UCS4 ch = 0 ;
1252- if (PyUnicodeDecodeError_GetStart (exc , & start ))
1253- return NULL ;
1254- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
1255- return NULL ;
1256- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
1257- return NULL ;
1258- p = (const unsigned char * )PyBytes_AS_STRING (object );
1259- if (!(encode = PyUnicodeDecodeError_GetEncoding (exc ))) {
1260- Py_DECREF (object );
1261- return NULL ;
1262- }
1263- if (!(encoding = PyUnicode_AsUTF8 (encode ))) {
1264- Py_DECREF (object );
1265- Py_DECREF (encode );
1266- return NULL ;
1267- }
1268- code = get_standard_encoding (encoding , & bytelength );
1269- Py_DECREF (encode );
1270- if (code == ENC_UNKNOWN ) {
1271- /* Not supported, fail with original exception */
1272- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1273- Py_DECREF (object );
1274- return NULL ;
1275- }
12761252
1277- /* Try decoding a single surrogate character. If
1278- there are more, let the codec call us again. */
1279- p += start ;
1280- if (PyBytes_GET_SIZE (object ) - start >= bytelength ) {
1281- switch (code ) {
1282- case ENC_UTF8 :
1253+ Py_DECREF (obj );
1254+ PyObject * restuple = Py_BuildValue ("(Nn)" , res , end );
1255+ return restuple ;
1256+
1257+ bail :
1258+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1259+ return NULL ;
1260+ }
1261+
1262+
1263+ static PyObject *
1264+ _PyCodec_SurrogatePassUnicodeDecodeError (PyObject * exc )
1265+ {
1266+ PyObject * encoding = PyUnicodeDecodeError_GetEncoding (exc );
1267+ if (encoding == NULL ) {
1268+ return NULL ;
1269+ }
1270+ int code , bytelength ;
1271+ int rc = get_standard_encoding (encoding , & code , & bytelength );
1272+ Py_DECREF (encoding );
1273+ if (rc < 0 ) {
1274+ return NULL ;
1275+ }
1276+ if (code == ENC_UNKNOWN ) {
1277+ goto bail ;
1278+ }
1279+
1280+ PyObject * obj ;
1281+ Py_ssize_t objlen , start , end , slen ;
1282+ if (_PyUnicodeError_GetParams (exc ,
1283+ & obj , & objlen ,
1284+ & start , & end , & slen , true) < 0 )
1285+ {
1286+ return NULL ;
1287+ }
1288+
1289+ /* Try decoding a single surrogate character. If
1290+ there are more, let the codec call us again. */
1291+ Py_UCS4 ch = 0 ;
1292+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
1293+ p += start ;
1294+
1295+ if (objlen - start >= bytelength ) {
1296+ switch (code ) {
1297+ case ENC_UTF8 : {
12831298 if ((p [0 ] & 0xf0 ) == 0xe0 &&
12841299 (p [1 ] & 0xc0 ) == 0x80 &&
1285- (p [2 ] & 0xc0 ) == 0x80 ) {
1300+ (p [2 ] & 0xc0 ) == 0x80 )
1301+ {
12861302 /* it's a three-byte code */
1287- ch = ((p [0 ] & 0x0f ) << 12 ) + ((p [1 ] & 0x3f ) << 6 ) + (p [2 ] & 0x3f );
1303+ ch = ((p [0 ] & 0x0f ) << 12 ) +
1304+ ((p [1 ] & 0x3f ) << 6 ) +
1305+ (p [2 ] & 0x3f );
12881306 }
12891307 break ;
1290- case ENC_UTF16LE :
1308+ }
1309+ case ENC_UTF16LE : {
12911310 ch = p [1 ] << 8 | p [0 ];
12921311 break ;
1293- case ENC_UTF16BE :
1312+ }
1313+ case ENC_UTF16BE : {
12941314 ch = p [0 ] << 8 | p [1 ];
12951315 break ;
1296- case ENC_UTF32LE :
1316+ }
1317+ case ENC_UTF32LE : {
12971318 ch = (p [3 ] << 24 ) | (p [2 ] << 16 ) | (p [1 ] << 8 ) | p [0 ];
12981319 break ;
1299- case ENC_UTF32BE :
1320+ }
1321+ case ENC_UTF32BE : {
13001322 ch = (p [0 ] << 24 ) | (p [1 ] << 16 ) | (p [2 ] << 8 ) | p [3 ];
13011323 break ;
13021324 }
13031325 }
1326+ }
1327+ Py_DECREF (obj );
1328+ if (!Py_UNICODE_IS_SURROGATE (ch )) {
1329+ goto bail ;
1330+ }
13041331
1305- Py_DECREF (object );
1306- if (!Py_UNICODE_IS_SURROGATE (ch )) {
1307- /* it's not a surrogate - fail */
1308- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1309- return NULL ;
1310- }
1311- res = PyUnicode_FromOrdinal (ch );
1312- if (res == NULL )
1313- return NULL ;
1314- return Py_BuildValue ("(Nn)" , res , start + bytelength );
1332+ PyObject * res = PyUnicode_FromOrdinal (ch );
1333+ if (res == NULL ) {
1334+ return NULL ;
1335+ }
1336+ return Py_BuildValue ("(Nn)" , res , start + bytelength );
1337+
1338+ bail :
1339+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1340+ return NULL ;
1341+ }
1342+
1343+
1344+ /* This handler is declared static until someone demonstrates
1345+ a need to call it directly. */
1346+ static PyObject *
1347+ PyCodec_SurrogatePassErrors (PyObject * exc )
1348+ {
1349+ if (_PyIsUnicodeEncodeError (exc )) {
1350+ return _PyCodec_SurrogatePassUnicodeEncodeError (exc );
1351+ }
1352+ else if (_PyIsUnicodeDecodeError (exc )) {
1353+ return _PyCodec_SurrogatePassUnicodeDecodeError (exc );
13151354 }
13161355 else {
13171356 wrong_exception_type (exc );
13181357 return NULL ;
13191358 }
13201359}
13211360
1361+
13221362static PyObject *
13231363PyCodec_SurrogateEscapeErrors (PyObject * exc )
13241364{
@@ -1438,11 +1478,13 @@ namereplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14381478}
14391479
14401480
1441- static PyObject * surrogatepass_errors (PyObject * self , PyObject * exc )
1481+ static inline PyObject *
1482+ surrogatepass_errors (PyObject * Py_UNUSED (self ), PyObject * exc )
14421483{
14431484 return PyCodec_SurrogatePassErrors (exc );
14441485}
14451486
1487+
14461488static PyObject * surrogateescape_errors (PyObject * self , PyObject * exc )
14471489{
14481490 return PyCodec_SurrogateEscapeErrors (exc );
0 commit comments