diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index d48ea04077f366..0800b3e5677c93 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and functions: If *locale* is a pair, it is converted to a locale name using the locale aliasing engine. The language code has the same format as a :ref:`locale name `, - but without encoding and ``@``-modifier. + but without encoding. The language code and encoding can be ``None``. If *locale* is omitted or ``None``, the current setting for *category* is @@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and functions: specified in the :envvar:`LANG` environment variable). If the locale is not changed thereafter, using multithreading should not cause problems. + .. versionchanged:: next + Support language codes with ``@``-modifiers. + .. function:: localeconv() @@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception and functions: values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`. The language code has the same format as a :ref:`locale name `, - but without encoding and ``@``-modifier. + but without encoding. The language code and encoding may be ``None`` if their values cannot be determined. The "C" locale is represented as ``(None, None)``. + .. versionchanged:: next + ``@``-modifier are no longer silently removed, but included in + the language code. + .. function:: getpreferredencoding(do_setlocale=True) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 252d8966b7450f..407606da961c16 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -274,6 +274,15 @@ http.cookies (Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.) +locale +------ + +* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers. + ``@``-modifier are no longer silently removed in :func:`~locale.getlocale`, + but included in the language code. + (Contributed by Serhiy Storchaka in :gh:`137729`.) + + math ---- diff --git a/Lib/locale.py b/Lib/locale.py index 0bde7ed51c66c1..37cafb4a601b3c 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -375,12 +375,14 @@ def _replace_encoding(code, encoding): def _append_modifier(code, modifier): if modifier == 'euro': if '.' not in code: - return code + '.ISO8859-15' + # Linux appears to require keeping the "@euro" modifier in place, + # even when using the ".ISO8859-15" encoding. + return code + '.ISO8859-15@euro' _, _, encoding = code.partition('.') - if encoding in ('ISO8859-15', 'UTF-8'): + if encoding == 'UTF-8': return code if encoding == 'ISO8859-1': - return _replace_encoding(code, 'ISO8859-15') + code = _replace_encoding(code, 'ISO8859-15') return code + '@' + modifier def normalize(localename): @@ -485,13 +487,18 @@ def _parse_localename(localename): # Deal with locale modifiers code, modifier = code.split('@', 1) if modifier == 'euro' and '.' not in code: - # Assume Latin-9 for @euro locales. This is bogus, - # since some systems may use other encodings for these - # locales. Also, we ignore other modifiers. - return code, 'iso-8859-15' + # Assume ISO8859-15 for @euro locales. Do note that some systems + # may use other encodings for these locales, so this may not always + # be correct. + return code + '@euro', 'ISO8859-15' + else: + modifier = '' if '.' in code: - return tuple(code.split('.')[:2]) + code, encoding = code.split('.')[:2] + if modifier: + code += '@' + modifier + return code, encoding elif code == 'C': return None, None elif code == 'UTF-8': @@ -516,7 +523,14 @@ def _build_localename(localetuple): if encoding is None: return language else: - return language + '.' + encoding + if '@' in language: + language, modifier = language.split('@', 1) + else: + modifier = '' + localename = language + '.' + encoding + if modifier: + localename += '@' + modifier + return localename except (TypeError, ValueError): raise TypeError('Locale must be None, a string, or an iterable of ' 'two strings -- language code, encoding.') from None @@ -888,6 +902,12 @@ def getpreferredencoding(do_setlocale=True): # SS 2025-06-10: # Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist # on all platforms. +# +# SS 2025-07-30: +# Remove conflicts with GNU libc. +# +# removed 'el_gr@euro' +# removed 'uz_uz@cyrillic' locale_alias = { 'a3': 'az_AZ.KOI8-C', @@ -1021,7 +1041,6 @@ def getpreferredencoding(do_setlocale=True): 'el': 'el_GR.ISO8859-7', 'el_cy': 'el_CY.ISO8859-7', 'el_gr': 'el_GR.ISO8859-7', - 'el_gr@euro': 'el_GR.ISO8859-15', 'en': 'en_US.ISO8859-1', 'en_ag': 'en_AG.UTF-8', 'en_au': 'en_AU.ISO8859-1', @@ -1456,7 +1475,6 @@ def getpreferredencoding(do_setlocale=True): 'ur_pk': 'ur_PK.CP1256', 'uz': 'uz_UZ.UTF-8', 'uz_uz': 'uz_UZ.UTF-8', - 'uz_uz@cyrillic': 'uz_UZ.UTF-8', 've': 've_ZA.UTF-8', 've_za': 've_ZA.UTF-8', 'vi': 'vi_VN.TCVN', diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 698e137e3e8abd..01b1e754d04219 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -1,4 +1,5 @@ from decimal import Decimal +from test import support from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper from test.support.warnings_helper import check_warnings from test.support.import_helper import ensure_lazy_imports, import_fresh_module @@ -425,8 +426,8 @@ def test_hyphenated_encoding(self): self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2') def test_euro_modifier(self): - self.check('de_DE@euro', 'de_DE.ISO8859-15') - self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15') + self.check('de_DE@euro', 'de_DE.ISO8859-15@euro') + self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro') self.check('de_DE.utf8@euro', 'de_DE.UTF-8') def test_latin_modifier(self): @@ -534,6 +535,105 @@ def test_setlocale_long_encoding(self): with self.assertRaises(locale.Error): locale.setlocale(locale.LC_ALL, loc2) + @support.subTests('localename,localetuple', [ + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')), + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')), + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')), + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')), + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', None)), + ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')), + ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')), + ('el_GR.ISO8859-7@euro', ('el_GR@euro', None)), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)), + ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')), + ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')), + ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)), + ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')), + ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')), + ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)), + ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')), + ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')), + ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)), + ('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')), + ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')), + ('be_BY.UTF-8@latin', ('be_BY@latin', None)), + ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')), + ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')), + ('sr_RS.UTF-8@latin', ('sr_RS@latin', None)), + ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')), + ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')), + ('ug_CN.UTF-8@latin', ('ug_CN@latin', None)), + ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')), + ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), + ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)), + ]) + def test_setlocale_with_modifier(self, localename, localetuple): + try: + locale.setlocale(locale.LC_CTYPE, localename) + except locale.Error as exc: + self.skipTest(str(exc)) + loc = locale.setlocale(locale.LC_CTYPE, localetuple) + self.assertEqual(loc, localename) + + loctuple = locale.getlocale(locale.LC_CTYPE) + loc = locale.setlocale(locale.LC_CTYPE, loctuple) + self.assertEqual(loc, localename) + + @support.subTests('localename,localetuple', [ + ('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')), + ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')), + ('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')), + ('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')), + ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')), + ('de_DE@euro', ('de_DE@euro', 'ISO8859-15')), + ('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')), + ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')), + ('el_GR@euro', ('el_GR@euro', 'ISO8859-7')), + ('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')), + ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')), + ('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')), + ('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')), + ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')), + ('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')), + ('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')), + ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')), + ('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')), + ('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')), + ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')), + ('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')), + ('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')), + ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')), + ('be_BY@latin', ('be_BY@latin', 'UTF-8')), + ('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')), + ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')), + ('sr_RS@latin', ('sr_RS@latin', 'UTF-8')), + ('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')), + ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')), + ('ug_CN@latin', ('ug_CN@latin', 'UTF-8')), + ('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), + ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), + ('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), + ]) + def test_getlocale_with_modifier(self, localename, localetuple): + try: + locale.setlocale(locale.LC_CTYPE, localename) + except locale.Error as exc: + self.skipTest(str(exc)) + loctuple = locale.getlocale(locale.LC_CTYPE) + self.assertEqual(loctuple, localetuple) + + locale.setlocale(locale.LC_CTYPE, loctuple) + self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple) + class TestMiscellaneous(unittest.TestCase): def test_defaults_UTF8(self): diff --git a/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst new file mode 100644 index 00000000000000..b324a42c7f869e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst @@ -0,0 +1,3 @@ +:func:`locale.setlocale` now supports language codes with ``@``-modifiers. +``@``-modifier are no longer silently removed in :func:`locale.getlocale`, +but included in the language code. diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index 02af1caff7d499..7f001abc09745d 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -44,6 +44,13 @@ def parse(filename): # Ignore one letter locale mappings (except for 'c') if len(locale) == 1 and locale != 'c': continue + if '@' in locale and '@' not in alias: + # Do not simply remove the "@euro" modifier. + # Glibc generates separate locales with the "@euro" modifier, and + # not always generates a locale without it with the same encoding. + # It can also affect collation. + if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'): + alias += '@euro' # Normalize encoding, if given if '.' in locale: lang, encoding = locale.split('.')[:2] @@ -51,6 +58,10 @@ def parse(filename): encoding = encoding.replace('_', '') locale = lang + '.' + encoding data[locale] = alias + # Conflict with glibc. + data.pop('el_gr@euro', None) + data.pop('uz_uz@cyrillic', None) + data.pop('uz_uz.utf8@cyrillic', None) return data def parse_glibc_supported(filename): @@ -81,7 +92,7 @@ def parse_glibc_supported(filename): # Add an encoding to alias alias, _, modifier = alias.partition('@') alias = _locale._replace_encoding(alias, alias_encoding) - if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'): + if modifier: alias += '@' + modifier data[locale] = alias return data