Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Doc/library/locale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and functions:
If *locale* is a pair, it is converted to a locale name using
the locale aliasing engine.
The language code has the same format as a :ref:`locale name <locale_name>`,
but without encoding and ``@``-modifier.
but without encoding.
The language code and encoding can be ``None``.

If *locale* is omitted or ``None``, the current setting for *category* is
Expand All @@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and functions:
specified in the :envvar:`LANG` environment variable). If the locale is not
changed thereafter, using multithreading should not cause problems.

.. versionchanged:: next
Support language codes with ``@``-modifiers.


.. function:: localeconv()

Expand Down Expand Up @@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception and functions:
values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`.

The language code has the same format as a :ref:`locale name <locale_name>`,
but without encoding and ``@``-modifier.
but without encoding.
The language code and encoding may be ``None`` if their values cannot be
determined.
The "C" locale is represented as ``(None, None)``.

.. versionchanged:: next
``@``-modifier are no longer silently removed, but included in
the language code.


.. function:: getpreferredencoding(do_setlocale=True)

Expand Down
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,15 @@ http.client
(Contributed by Alexander Enrique Urieles Nieto in :gh:`131724`.)


locale
------

* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
``@``-modifier are no longer silently removed in :func:`~locale.getlocale`,
but included in the language code.
(Contributed by Serhiy Storchaka in :gh:`137729`.)


math
----

Expand Down
40 changes: 29 additions & 11 deletions Lib/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,14 @@ def _replace_encoding(code, encoding):
def _append_modifier(code, modifier):
if modifier == 'euro':
if '.' not in code:
return code + '.ISO8859-15'
# Linux appears to require keeping the "@euro" modifier in place,
# even when using the ".ISO8859-15" encoding.
return code + '.ISO8859-15@euro'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment to this section stating that Linux appears to require keeping the "@euro" modifier in place, even when using the ".ISO8859-15" encoding.

_, _, encoding = code.partition('.')
if encoding in ('ISO8859-15', 'UTF-8'):
if encoding == 'UTF-8':
return code
if encoding == 'ISO8859-1':
return _replace_encoding(code, 'ISO8859-15')
code = _replace_encoding(code, 'ISO8859-15')
return code + '@' + modifier

def normalize(localename):
Expand Down Expand Up @@ -485,13 +487,18 @@ def _parse_localename(localename):
# Deal with locale modifiers
code, modifier = code.split('@', 1)
if modifier == 'euro' and '.' not in code:
# Assume Latin-9 for @euro locales. This is bogus,
# since some systems may use other encodings for these
# locales. Also, we ignore other modifiers.
return code, 'iso-8859-15'
# Assume ISO8859-15 for @euro locales. Do note that some systems
# may use other encodings for these locales, so this may not always
# be correct.
return code + '@euro', 'ISO8859-15'
else:
modifier = ''

if '.' in code:
return tuple(code.split('.')[:2])
code, encoding = code.split('.')[:2]
if modifier:
code += '@' + modifier
return code, encoding
elif code == 'C':
return None, None
elif code == 'UTF-8':
Expand All @@ -516,7 +523,14 @@ def _build_localename(localetuple):
if encoding is None:
return language
else:
return language + '.' + encoding
if '@' in language:
language, modifier = language.split('@', 1)
else:
modifier = ''
localename = language + '.' + encoding
if modifier:
localename += '@' + modifier
return localename
except (TypeError, ValueError):
raise TypeError('Locale must be None, a string, or an iterable of '
'two strings -- language code, encoding.') from None
Expand Down Expand Up @@ -888,6 +902,12 @@ def getpreferredencoding(do_setlocale=True):
# SS 2025-06-10:
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
# on all platforms.
#
# SS 2025-07-30:
# Remove conflicts with GNU libc.
#
# removed 'el_gr@euro'
# removed 'uz_uz@cyrillic'

locale_alias = {
'a3': 'az_AZ.KOI8-C',
Expand Down Expand Up @@ -1021,7 +1041,6 @@ def getpreferredencoding(do_setlocale=True):
'el': 'el_GR.ISO8859-7',
'el_cy': 'el_CY.ISO8859-7',
'el_gr': 'el_GR.ISO8859-7',
'el_gr@euro': 'el_GR.ISO8859-15',
'en': 'en_US.ISO8859-1',
'en_ag': 'en_AG.UTF-8',
'en_au': 'en_AU.ISO8859-1',
Expand Down Expand Up @@ -1456,7 +1475,6 @@ def getpreferredencoding(do_setlocale=True):
'ur_pk': 'ur_PK.CP1256',
'uz': 'uz_UZ.UTF-8',
'uz_uz': 'uz_UZ.UTF-8',
'uz_uz@cyrillic': 'uz_UZ.UTF-8',
've': 've_ZA.UTF-8',
've_za': 've_ZA.UTF-8',
'vi': 'vi_VN.TCVN',
Expand Down
104 changes: 102 additions & 2 deletions Lib/test/test_locale.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from decimal import Decimal
from test import support
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
from test.support.warnings_helper import check_warnings
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
Expand Down Expand Up @@ -425,8 +426,8 @@ def test_hyphenated_encoding(self):
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')

def test_euro_modifier(self):
self.check('de_DE@euro', 'de_DE.ISO8859-15')
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')

def test_latin_modifier(self):
Expand Down Expand Up @@ -534,6 +535,105 @@ def test_setlocale_long_encoding(self):
with self.assertRaises(locale.Error):
locale.setlocale(locale.LC_ALL, loc2)

@support.subTests('localename,localetuple', [
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY.UTF-8@latin', ('be_BY@latin', None)),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
])
def test_setlocale_with_modifier(self, localename, localetuple):
try:
locale.setlocale(locale.LC_CTYPE, localename)
except locale.Error as exc:
self.skipTest(str(exc))
loc = locale.setlocale(locale.LC_CTYPE, localetuple)
self.assertEqual(loc, localename)

loctuple = locale.getlocale(locale.LC_CTYPE)
loc = locale.setlocale(locale.LC_CTYPE, loctuple)
self.assertEqual(loc, localename)

@support.subTests('localename,localetuple', [
('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY@latin', ('be_BY@latin', 'UTF-8')),
('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
])
def test_getlocale_with_modifier(self, localename, localetuple):
try:
locale.setlocale(locale.LC_CTYPE, localename)
except locale.Error as exc:
self.skipTest(str(exc))
loctuple = locale.getlocale(locale.LC_CTYPE)
self.assertEqual(loctuple, localetuple)

locale.setlocale(locale.LC_CTYPE, loctuple)
self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)


class TestMiscellaneous(unittest.TestCase):
def test_defaults_UTF8(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`locale.setlocale` now supports language codes with ``@``-modifiers.
``@``-modifier are no longer silently removed in :func:`locale.getlocale`,
but included in the language code.
13 changes: 12 additions & 1 deletion Tools/i18n/makelocalealias.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,24 @@ def parse(filename):
# Ignore one letter locale mappings (except for 'c')
if len(locale) == 1 and locale != 'c':
continue
if '@' in locale and '@' not in alias:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining why this is done (to make glibc on Linux happy)

# Do not simply remove the "@euro" modifier.
# Glibc generates separate locales with the "@euro" modifier, and
# not always generates a locale without it with the same encoding.
# It can also affect collation.
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
alias += '@euro'
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
data[locale] = alias
# Conflict with glibc.
data.pop('el_gr@euro', None)
data.pop('uz_uz@cyrillic', None)
data.pop('uz_uz.utf8@cyrillic', None)
return data

def parse_glibc_supported(filename):
Expand Down Expand Up @@ -81,7 +92,7 @@ def parse_glibc_supported(filename):
# Add an encoding to alias
alias, _, modifier = alias.partition('@')
alias = _locale._replace_encoding(alias, alias_encoding)
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
if modifier:
alias += '@' + modifier
data[locale] = alias
return data
Expand Down
Loading