-
-
Notifications
You must be signed in to change notification settings - Fork 33.1k
gh-137729: Fix support for locales with @-modifiers #137253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
853cfbd
abcbb93
e0df7e0
1e65e5b
4954f97
1096f32
073572e
8aaf1e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -375,12 +375,12 @@ def _replace_encoding(code, encoding): | |
def _append_modifier(code, modifier): | ||
if modifier == 'euro': | ||
if '.' not in code: | ||
return code + '.ISO8859-15' | ||
return code + '.ISO8859-15@euro' | ||
_, _, encoding = code.partition('.') | ||
if encoding in ('ISO8859-15', 'UTF-8'): | ||
if encoding == 'UTF-8': | ||
return code | ||
if encoding == 'ISO8859-1': | ||
return _replace_encoding(code, 'ISO8859-15') | ||
code = _replace_encoding(code, 'ISO8859-15') | ||
return code + '@' + modifier | ||
|
||
def normalize(localename): | ||
|
@@ -487,11 +487,16 @@ def _parse_localename(localename): | |
if modifier == 'euro' and '.' not in code: | ||
# Assume Latin-9 for @euro locales. This is bogus, | ||
# since some systems may use other encodings for these | ||
|
||
# locales. Also, we ignore other modifiers. | ||
return code, 'iso-8859-15' | ||
# locales. | ||
return code + '@euro', 'ISO8859-15' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be a dead code, because |
||
else: | ||
modifier = '' | ||
|
||
if '.' in code: | ||
return tuple(code.split('.')[:2]) | ||
code, encoding = code.split('.')[:2] | ||
if modifier: | ||
code += '@' + modifier | ||
return code, encoding | ||
elif code == 'C': | ||
return None, None | ||
elif code == 'UTF-8': | ||
|
@@ -516,7 +521,14 @@ def _build_localename(localetuple): | |
if encoding is None: | ||
return language | ||
else: | ||
return language + '.' + encoding | ||
if '@' in language: | ||
language, modifier = language.split('@', 1) | ||
else: | ||
modifier = '' | ||
localename = language + '.' + encoding | ||
if modifier: | ||
localename += '@' + modifier | ||
return localename | ||
except (TypeError, ValueError): | ||
raise TypeError('Locale must be None, a string, or an iterable of ' | ||
'two strings -- language code, encoding.') from None | ||
|
@@ -888,6 +900,12 @@ def getpreferredencoding(do_setlocale=True): | |
# SS 2025-06-10: | ||
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist | ||
# on all platforms. | ||
# | ||
# SS 2025-07-30: | ||
# Remove conflicts with GNU libc. | ||
# | ||
# removed 'el_gr@euro' | ||
# removed 'uz_uz@cyrillic' | ||
|
||
locale_alias = { | ||
'a3': 'az_AZ.KOI8-C', | ||
|
@@ -1021,7 +1039,6 @@ def getpreferredencoding(do_setlocale=True): | |
'el': 'el_GR.ISO8859-7', | ||
'el_cy': 'el_CY.ISO8859-7', | ||
'el_gr': 'el_GR.ISO8859-7', | ||
'el_gr@euro': 'el_GR.ISO8859-15', | ||
'en': 'en_US.ISO8859-1', | ||
'en_ag': 'en_AG.UTF-8', | ||
'en_au': 'en_AU.ISO8859-1', | ||
|
@@ -1456,7 +1473,6 @@ def getpreferredencoding(do_setlocale=True): | |
'ur_pk': 'ur_PK.CP1256', | ||
'uz': 'uz_UZ.UTF-8', | ||
'uz_uz': 'uz_UZ.UTF-8', | ||
'uz_uz@cyrillic': 'uz_UZ.UTF-8', | ||
've': 've_ZA.UTF-8', | ||
've_za': 've_ZA.UTF-8', | ||
'vi': 'vi_VN.TCVN', | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,13 +44,20 @@ def parse(filename): | |
# Ignore one letter locale mappings (except for 'c') | ||
if len(locale) == 1 and locale != 'c': | ||
continue | ||
if '@' in locale and '@' not in alias: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a comment explaining why this is done (to make glibc on Linux happy) |
||
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'): | ||
alias += '@euro' | ||
# Normalize encoding, if given | ||
if '.' in locale: | ||
lang, encoding = locale.split('.')[:2] | ||
encoding = encoding.replace('-', '') | ||
encoding = encoding.replace('_', '') | ||
locale = lang + '.' + encoding | ||
data[locale] = alias | ||
# Conflict with GNU libc | ||
data.pop('el_gr@euro', None) | ||
data.pop('uz_uz@cyrillic', None) | ||
data.pop('uz_uz.utf8@cyrillic', None) | ||
return data | ||
|
||
def parse_glibc_supported(filename): | ||
|
@@ -81,7 +88,7 @@ def parse_glibc_supported(filename): | |
# Add an encoding to alias | ||
alias, _, modifier = alias.partition('@') | ||
alias = _locale._replace_encoding(alias, alias_encoding) | ||
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'): | ||
if modifier: | ||
alias += '@' + modifier | ||
data[locale] = alias | ||
return data | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a comment to this section stating that Linux appears to require keeping the "@euro" modifier in place, even when using the ".ISO8859-15" encoding.