Skip to content

Commit 1c9e55a

Browse files
Review
1 parent 4e12b9e commit 1c9e55a

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

Lib/encodings/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,18 @@
2626
2727
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
2828
29-
"""
29+
"""#"
3030

3131
import codecs
32-
from _codecs import _normalize_encoding
3332
import sys
33+
from _codecs import _normalize_encoding
3434
from . import aliases
3535

3636
_cache = {}
3737
_unknown = '--unknown--'
3838
_import_tail = ['*']
3939
_aliases = aliases.aliases
4040

41-
4241
class CodecRegistryError(LookupError, SystemError):
4342
pass
4443

Lib/test/test_codecs.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3900,7 +3900,6 @@ def test_encodings_normalize_encoding(self):
39003900
self.assertEqual(normalize('utf_8'), 'utf_8')
39013901
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
39023902
self.assertEqual(normalize('utf 8'), 'utf_8')
3903-
39043903
# encodings.normalize_encoding() doesn't convert
39053904
# characters to lower case.
39063905
self.assertEqual(normalize('UTF 8'), 'UTF_8')

Modules/_codecsmodule.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,30 +1037,41 @@ static PyObject *
10371037
_codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
10381038
/*[clinic end generated code: output=d27465d81e361f8e input=3ff3f4d64995b988]*/
10391039
{
1040-
const char *cstr = PyUnicode_AsUTF8(encoding);
1040+
Py_ssize_t len;
1041+
const char *cstr = PyUnicode_AsUTF8AndSize(encoding, &len);
10411042
if (cstr == NULL) {
10421043
return NULL;
10431044
}
10441045

1045-
size_t len = strlen(cstr);
10461046
if (len > PY_SSIZE_T_MAX) {
10471047
PyErr_SetString(PyExc_OverflowError, "encoding is too large");
10481048
return NULL;
10491049
}
10501050

1051+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(len + 1);
1052+
if (writer == NULL) {
1053+
return NULL;
1054+
}
1055+
10511056
char *normalized = PyMem_Malloc(len + 1);
10521057
if (normalized == NULL) {
1058+
PyUnicodeWriter_Discard(writer);
10531059
return PyErr_NoMemory();
10541060
}
10551061

10561062
if (!_Py_normalize_encoding(cstr, normalized, len + 1, 0)) {
10571063
PyMem_Free(normalized);
1064+
PyUnicodeWriter_Discard(writer);
10581065
return NULL;
10591066
}
10601067

1061-
PyObject *v = PyUnicode_FromString(normalized);
1068+
if (PyUnicodeWriter_WriteUTF8(writer, normalized, (Py_ssize_t)strlen(normalized)) < 0) {
1069+
PyUnicodeWriter_Discard(writer);
1070+
PyMem_Free(normalized);
1071+
return NULL;
1072+
}
10621073
PyMem_Free(normalized);
1063-
return v;
1074+
return PyUnicodeWriter_Finish(writer);
10641075
}
10651076

10661077
/* --- Module API --------------------------------------------------------- */

0 commit comments

Comments
 (0)