Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions Lib/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,12 @@ def _replace_encoding(code, encoding):
def _append_modifier(code, modifier):
if modifier == 'euro':
if '.' not in code:
return code + '.ISO8859-15'
return code + '.ISO8859-15@euro'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment to this section stating that Linux appears to require keeping the "@euro" modifier in place, even when using the ".ISO8859-15" encoding.

_, _, encoding = code.partition('.')
if encoding in ('ISO8859-15', 'UTF-8'):
if encoding == 'UTF-8':
return code
if encoding == 'ISO8859-1':
return _replace_encoding(code, 'ISO8859-15')
code = _replace_encoding(code, 'ISO8859-15')
return code + '@' + modifier

def normalize(localename):
Expand Down Expand Up @@ -487,11 +487,16 @@ def _parse_localename(localename):
if modifier == 'euro' and '.' not in code:
# Assume Latin-9 for @euro locales. This is bogus,
# since some systems may use other encodings for these
Copy link
Member

@malemburg malemburg Aug 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rewrite the comment to:

# Assume ISO8859-15 for @euro locales. Do note that some systems
# may use other encodings for these locales, so this may not always
# be correct.

# locales. Also, we ignore other modifiers.
return code, 'iso-8859-15'
# locales.
return code + '@euro', 'ISO8859-15'
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be a dead code, because normalize() above adds encoding.

else:
modifier = ''

if '.' in code:
return tuple(code.split('.')[:2])
code, encoding = code.split('.')[:2]
if modifier:
code += '@' + modifier
return code, encoding
elif code == 'C':
return None, None
elif code == 'UTF-8':
Expand All @@ -516,7 +521,14 @@ def _build_localename(localetuple):
if encoding is None:
return language
else:
return language + '.' + encoding
if '@' in language:
language, modifier = language.split('@', 1)
else:
modifier = ''
localename = language + '.' + encoding
if modifier:
localename += '@' + modifier
return localename
except (TypeError, ValueError):
raise TypeError('Locale must be None, a string, or an iterable of '
'two strings -- language code, encoding.') from None
Expand Down Expand Up @@ -888,6 +900,12 @@ def getpreferredencoding(do_setlocale=True):
# SS 2025-06-10:
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
# on all platforms.
#
# SS 2025-07-30:
# Remove conflicts with GNU libc.
#
# removed 'el_gr@euro'
# removed 'uz_uz@cyrillic'

locale_alias = {
'a3': 'az_AZ.KOI8-C',
Expand Down Expand Up @@ -1021,7 +1039,6 @@ def getpreferredencoding(do_setlocale=True):
'el': 'el_GR.ISO8859-7',
'el_cy': 'el_CY.ISO8859-7',
'el_gr': 'el_GR.ISO8859-7',
'el_gr@euro': 'el_GR.ISO8859-15',
'en': 'en_US.ISO8859-1',
'en_ag': 'en_AG.UTF-8',
'en_au': 'en_AU.ISO8859-1',
Expand Down Expand Up @@ -1456,7 +1473,6 @@ def getpreferredencoding(do_setlocale=True):
'ur_pk': 'ur_PK.CP1256',
'uz': 'uz_UZ.UTF-8',
'uz_uz': 'uz_UZ.UTF-8',
'uz_uz@cyrillic': 'uz_UZ.UTF-8',
've': 've_ZA.UTF-8',
've_za': 've_ZA.UTF-8',
'vi': 'vi_VN.TCVN',
Expand Down
159 changes: 136 additions & 23 deletions Lib/test/test_locale.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from decimal import Decimal
from test import support
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
from test.support.warnings_helper import check_warnings
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
from unittest import mock
import unittest
import locale
import os
import sys
import codecs

Expand Down Expand Up @@ -424,8 +426,8 @@ def test_hyphenated_encoding(self):
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')

def test_euro_modifier(self):
self.check('de_DE@euro', 'de_DE.ISO8859-15')
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')

def test_latin_modifier(self):
Expand Down Expand Up @@ -486,6 +488,138 @@ def test_japanese(self):
self.check('jp_jp', 'ja_JP.eucJP')


class TestRealLocales(unittest.TestCase):
locale_type = locale.LC_CTYPE

def setUp(self):
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)

def test_getsetlocale_issue1813(self):
# Issue #1813: setting and getting the locale under a Turkish locale
try:
locale.setlocale(locale.LC_CTYPE, 'tr_TR')
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs Turkish locale')
loc = locale.getlocale(locale.LC_CTYPE)
if verbose:
print('testing with %a' % (loc,), end=' ', flush=True)
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error as exc:
# bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE)
# and the tr_TR locale on Windows. getlocale() builds a locale
# which is not recognize by setlocale().
self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}")
self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE))

@support.subTests('localename,localetuple', [
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY.UTF-8@latin', ('be_BY@latin', None)),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
])
def test_setlocale_with_modifier(self, localename, localetuple):
if os.name == 'nt' and localename in ('ks_IN.UTF-8@devanagari',
'sd_IN.UTF-8@devanagari'):
self.skipTest('gh-137273: crashes on Windows')
try:
locale.setlocale(locale.LC_CTYPE, localename)
except locale.Error as exc:
self.skipTest(str(exc))
loc = locale.setlocale(locale.LC_CTYPE, localetuple)
self.assertEqual(loc, localename)

loctuple = locale.getlocale(locale.LC_CTYPE)
loc = locale.setlocale(locale.LC_CTYPE, loctuple)
self.assertEqual(loc, localename)

@support.subTests('localename,localetuple', [
('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
('be_BY@latin', ('be_BY@latin', 'UTF-8')),
('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
])
def test_getlocale_with_modifier(self, localename, localetuple):
if os.name == 'nt' and localename in ('ks_IN.UTF-8@devanagari',
'sd_IN.UTF-8@devanagari'):
self.skipTest('gh-137273: crashes on Windows')
try:
locale.setlocale(locale.LC_CTYPE, localename)
except locale.Error as exc:
self.skipTest(str(exc))
loctuple = locale.getlocale(locale.LC_CTYPE)
self.assertEqual(loctuple, localetuple)

locale.setlocale(locale.LC_CTYPE, loctuple)
self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)


class TestMiscellaneous(unittest.TestCase):
def test_defaults_UTF8(self):
# Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
Expand Down Expand Up @@ -552,27 +686,6 @@ def test_setlocale_category(self):
# crasher from bug #7419
self.assertRaises(locale.Error, locale.setlocale, 12345)

def test_getsetlocale_issue1813(self):
# Issue #1813: setting and getting the locale under a Turkish locale
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
try:
locale.setlocale(locale.LC_CTYPE, 'tr_TR')
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs Turkish locale')
loc = locale.getlocale(locale.LC_CTYPE)
if verbose:
print('testing with %a' % (loc,), end=' ', flush=True)
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error as exc:
# bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE)
# and the tr_TR locale on Windows. getlocale() builds a locale
# which is not recognize by setlocale().
self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}")
self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE))

def test_invalid_locale_format_in_localetuple(self):
with self.assertRaises(TypeError):
locale.setlocale(locale.LC_ALL, b'fi_FI')
Expand Down
9 changes: 8 additions & 1 deletion Tools/i18n/makelocalealias.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,20 @@ def parse(filename):
# Ignore one letter locale mappings (except for 'c')
if len(locale) == 1 and locale != 'c':
continue
if '@' in locale and '@' not in alias:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining why this is done (to make glibc on Linux happy)

if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
alias += '@euro'
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
data[locale] = alias
# Conflict with GNU libc
data.pop('el_gr@euro', None)
data.pop('uz_uz@cyrillic', None)
data.pop('uz_uz.utf8@cyrillic', None)
return data

def parse_glibc_supported(filename):
Expand Down Expand Up @@ -81,7 +88,7 @@ def parse_glibc_supported(filename):
# Add an encoding to alias
alias, _, modifier = alias.partition('@')
alias = _locale._replace_encoding(alias, alias_encoding)
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
if modifier:
alias += '@' + modifier
data[locale] = alias
return data
Expand Down
Loading