From 1c981ffcaa637117faf0aad385552bd8b26ca522 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 29 Sep 2025 19:11:16 +0100 Subject: [PATCH 1/4] Commit --- Lib/test/test_c_locale_coercion.py | 25 +++++++++++++++++++ ...5-09-29-19-10-31.gh-issue-91992.KtCoxh.rst | 1 + Objects/unicodeobject.c | 19 +++++++++++--- 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 340bec3c71b68f..60bb33d99baad5 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -478,6 +478,31 @@ def test_PYTHONCOERCECLOCALE_set_to_one(self): text=True) self.assertEqual(cmd.stdout.rstrip(), loc) + def test_unsupported_locale_fallback_to_utf8(self): + locales = [ + "zh_TW.euctw", + "hy_AM.armscii8", + "ka_GE.georgianps", + "C" + ] + + for locale in locales: + with self.subTest(locale=locale): + env = dict(os.environ, LC_ALL=locale, PYTHONUTF8="0") + + result = subprocess.run( + [sys.executable, "-c", "import sys; print(sys.getfilesystemencoding())"], + env=env, + capture_output=True, + text=True, + timeout=10) + + self.assertEqual(result.returncode, 0) + if locale == "C": + self.assertEqual(result.stdout.strip(), "ascii") + else: + self.assertEqual(result.stdout.strip(), "utf-8") + def tearDownModule(): support.reap_children() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst new file mode 100644 index 00000000000000..dbe58abffda459 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-19-10-31.gh-issue-91992.KtCoxh.rst @@ -0,0 +1 @@ +Fallback to UTF-8 if an unsupported locale is provided. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 11ba147a744692..3345f7ddf999f5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16455,10 +16455,23 @@ config_get_codec_name(wchar_t **config_encoding) PyObject *name_obj = NULL; PyObject *codec = _PyCodec_Lookup(encoding); - PyMem_RawFree(encoding); - if (!codec) - goto error; + if (!codec) { // Fallback to UTF-8 if the codec is not found + PyMem_RawFree(encoding); + PyErr_Clear(); + + wchar_t *utf8_encoding = _PyMem_RawWcsdup(L"utf-8"); + if (utf8_encoding == NULL) { + PyErr_NoMemory(); + return -1; + } + + PyMem_RawFree(*config_encoding); + *config_encoding = utf8_encoding; + return 0; + } + + PyMem_RawFree(encoding); name_obj = PyObject_GetAttrString(codec, "name"); Py_CLEAR(codec); From 5bed1101ba0d890bea9670bb666ff91cf463b7f9 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 29 Sep 2025 19:13:35 +0100 Subject: [PATCH 2/4] Commit --- Objects/unicodeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3345f7ddf999f5..47bb9e5b8dc1b8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16455,9 +16455,9 @@ config_get_codec_name(wchar_t **config_encoding) PyObject *name_obj = NULL; PyObject *codec = _PyCodec_Lookup(encoding); + PyMem_RawFree(encoding); if (!codec) { // Fallback to UTF-8 if the codec is not found - PyMem_RawFree(encoding); PyErr_Clear(); wchar_t *utf8_encoding = _PyMem_RawWcsdup(L"utf-8"); @@ -16471,8 +16471,6 @@ config_get_codec_name(wchar_t **config_encoding) return 0; } - PyMem_RawFree(encoding); - name_obj = PyObject_GetAttrString(codec, "name"); Py_CLEAR(codec); if (!name_obj) { From 3dfdad197969b1abda4ebfb2e02e2d9d589da48c Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 29 Sep 2025 19:31:13 +0100 Subject: [PATCH 3/4] Fix test --- Lib/test/test_c_locale_coercion.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 60bb33d99baad5..93130b14f73c04 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -483,7 +483,6 @@ def test_unsupported_locale_fallback_to_utf8(self): "zh_TW.euctw", "hy_AM.armscii8", "ka_GE.georgianps", - "C" ] for locale in locales: @@ -498,10 +497,7 @@ def test_unsupported_locale_fallback_to_utf8(self): timeout=10) self.assertEqual(result.returncode, 0) - if locale == "C": - self.assertEqual(result.stdout.strip(), "ascii") - else: - self.assertEqual(result.stdout.strip(), "utf-8") + self.assertEqual(result.stdout.strip(), "utf-8") def tearDownModule(): From daca33436b07caa57d83eef288f3807ec4def110 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 29 Sep 2025 19:45:03 +0100 Subject: [PATCH 4/4] Fix test again --- Lib/test/test_c_locale_coercion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 93130b14f73c04..5e6215d8ab0863 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -490,15 +490,13 @@ def test_unsupported_locale_fallback_to_utf8(self): env = dict(os.environ, LC_ALL=locale, PYTHONUTF8="0") result = subprocess.run( - [sys.executable, "-c", "import sys; print(sys.getfilesystemencoding())"], + [sys.executable, "-c", ""], env=env, capture_output=True, text=True, timeout=10) self.assertEqual(result.returncode, 0) - self.assertEqual(result.stdout.strip(), "utf-8") - def tearDownModule(): support.reap_children()