Skip to content

Commit 1c981ff

Browse files
Commit
1 parent 1b8dcda commit 1c981ff

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

Lib/test/test_c_locale_coercion.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,31 @@ def test_PYTHONCOERCECLOCALE_set_to_one(self):
478478
text=True)
479479
self.assertEqual(cmd.stdout.rstrip(), loc)
480480

481+
def test_unsupported_locale_fallback_to_utf8(self):
482+
locales = [
483+
"zh_TW.euctw",
484+
"hy_AM.armscii8",
485+
"ka_GE.georgianps",
486+
"C"
487+
]
488+
489+
for locale in locales:
490+
with self.subTest(locale=locale):
491+
env = dict(os.environ, LC_ALL=locale, PYTHONUTF8="0")
492+
493+
result = subprocess.run(
494+
[sys.executable, "-c", "import sys; print(sys.getfilesystemencoding())"],
495+
env=env,
496+
capture_output=True,
497+
text=True,
498+
timeout=10)
499+
500+
self.assertEqual(result.returncode, 0)
501+
if locale == "C":
502+
self.assertEqual(result.stdout.strip(), "ascii")
503+
else:
504+
self.assertEqual(result.stdout.strip(), "utf-8")
505+
481506

482507
def tearDownModule():
483508
support.reap_children()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fallback to UTF-8 if an unsupported locale is provided.

Objects/unicodeobject.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16455,10 +16455,23 @@ config_get_codec_name(wchar_t **config_encoding)
1645516455

1645616456
PyObject *name_obj = NULL;
1645716457
PyObject *codec = _PyCodec_Lookup(encoding);
16458-
PyMem_RawFree(encoding);
1645916458

16460-
if (!codec)
16461-
goto error;
16459+
if (!codec) { // Fallback to UTF-8 if the codec is not found
16460+
PyMem_RawFree(encoding);
16461+
PyErr_Clear();
16462+
16463+
wchar_t *utf8_encoding = _PyMem_RawWcsdup(L"utf-8");
16464+
if (utf8_encoding == NULL) {
16465+
PyErr_NoMemory();
16466+
return -1;
16467+
}
16468+
16469+
PyMem_RawFree(*config_encoding);
16470+
*config_encoding = utf8_encoding;
16471+
return 0;
16472+
}
16473+
16474+
PyMem_RawFree(encoding);
1646216475

1646316476
name_obj = PyObject_GetAttrString(codec, "name");
1646416477
Py_CLEAR(codec);

0 commit comments

Comments
 (0)