diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 340bec3c71b68f..5e6215d8ab0863 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -478,6 +478,25 @@ def test_PYTHONCOERCECLOCALE_set_to_one(self): text=True) self.assertEqual(cmd.stdout.rstrip(), loc) + def test_unsupported_locale_fallback_to_utf8(self): + locales = [ + "zh_TW.euctw", + "hy_AM.armscii8", + "ka_GE.georgianps", + ] + + for locale in locales: + with self.subTest(locale=locale): + env = dict(os.environ, LC_ALL=locale, PYTHONUTF8="0") + + result = subprocess.run( + [sys.executable, "-c", ""], + env=env, + capture_output=True, + text=True, + timeout=10) + + self.assertEqual(result.returncode, 0) def tearDownModule(): support.reap_children() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst new file mode 100644 index 00000000000000..dbe58abffda459 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst @@ -0,0 +1 @@ +Fallback to UTF-8 if an unsupported locale is provided. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 11ba147a744692..47bb9e5b8dc1b8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16457,8 +16457,19 @@ config_get_codec_name(wchar_t **config_encoding) PyObject *codec = _PyCodec_Lookup(encoding); PyMem_RawFree(encoding); - if (!codec) - goto error; + if (!codec) { // Fallback to UTF-8 if the codec is not found + PyErr_Clear(); + + wchar_t *utf8_encoding = _PyMem_RawWcsdup(L"utf-8"); + if (utf8_encoding == NULL) { + PyErr_NoMemory(); + return -1; + } + + PyMem_RawFree(*config_encoding); + *config_encoding = utf8_encoding; + return 0; + } name_obj = PyObject_GetAttrString(codec, "name"); Py_CLEAR(codec);