Skip to content

Commit ce1b747

Browse files
authored
gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (#137415)
Fix name of the Python encoding in Unicode errors of the code page codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not valid Python code names.
1 parent c17f378 commit ce1b747

File tree

4 files changed

+5
-6
lines changed

4 files changed

+5
-6
lines changed

Lib/test/test_codecs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3293,7 +3293,7 @@ def test_code_page_name(self):
32933293
codecs.code_page_encode, 932, '\xff')
32943294
self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
32953295
codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
3296-
self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
3296+
self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
32973297
codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
32983298

32993299
def check_decode(self, cp, tests):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix name of the Python encoding in Unicode errors of the code page codec:
2+
use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
3+
valid Python code names. Patch by Victor Stinner.

Objects/unicodeobject.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7684,10 +7684,6 @@ code_page_name(UINT code_page, PyObject **obj)
76847684
*obj = NULL;
76857685
if (code_page == CP_ACP)
76867686
return "mbcs";
7687-
if (code_page == CP_UTF7)
7688-
return "CP_UTF7";
7689-
if (code_page == CP_UTF8)
7690-
return "CP_UTF8";
76917687

76927688
*obj = PyBytes_FromFormat("cp%u", code_page);
76937689
if (*obj == NULL)

Python/codecs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int *bytelength)
12041204
}
12051205
}
12061206
}
1207-
else if (strcmp(encoding, "CP_UTF8") == 0) {
1207+
else if (strcmp(encoding, "cp65001") == 0) {
12081208
*bytelength = 3;
12091209
return ENC_UTF8;
12101210
}

0 commit comments

Comments
 (0)