Skip to content

Commit 75486c9

Browse files
authored
Upgrade to CLDR 45 (#1077)
* Upgrade to CLDR 45 * Handle 'localeRules="nonlikelyScript"' for parent locales Locales of the form 'lang_Script' where 'Script' is not the likely script for 'lang' should have 'root' as their parent locale. For example, the parent of 'az_Arab' should not be computed as 'az' by truncating from the end, but should be 'root' instead as 'Arab' is not the likely script for 'az'. The list of such languages was previously specified using an explicit 'locales' attribute. It is now handled dynamically using the new 'localeRules' attribute.
1 parent 3edf772 commit 75486c9

File tree

5 files changed

+51
-7
lines changed

5 files changed

+51
-7
lines changed

babel/localedata.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,27 @@ def locale_identifiers() -> list[str]:
9595
]
9696

9797

98+
def _is_non_likely_script(name: str) -> bool:
99+
"""Return whether the locale is of the form ``lang_Script``,
100+
and the script is not the likely script for the language.
101+
102+
This implements the behavior of the ``nonlikelyScript`` value of the
103+
``localRules`` attribute for parent locales added in CLDR 45.
104+
"""
105+
from babel.core import get_global, parse_locale
106+
107+
try:
108+
lang, territory, script, variant, *rest = parse_locale(name)
109+
except ValueError:
110+
return False
111+
112+
if lang and script and not territory and not variant and not rest:
113+
likely_subtag = get_global('likely_subtags').get(lang)
114+
_, _, likely_script, *_ = parse_locale(likely_subtag)
115+
return script != likely_script
116+
return False
117+
118+
98119
def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
99120
"""Load the locale data for the given locale.
100121
@@ -132,8 +153,11 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str
132153
from babel.core import get_global
133154
parent = get_global('parent_exceptions').get(name)
134155
if not parent:
135-
parts = name.split('_')
136-
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
156+
if _is_non_likely_script(name):
157+
parent = 'root'
158+
else:
159+
parts = name.split('_')
160+
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
137161
data = load(parent).copy()
138162
filename = resolve_locale_filename(name)
139163
with open(filename, 'rb') as fileobj:

scripts/download_import_cldr.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
import zipfile
1010
from urllib.request import urlretrieve
1111

12-
URL = 'https://unicode.org/Public/cldr/44/cldr-common-44.0.zip'
13-
FILENAME = 'cldr-common-44.0.zip'
14-
# Via https://unicode.org/Public/cldr/44/hashes/SHASUM512
15-
FILESUM = 'f2cd8733948caf308d6e39eae21724da7f29f528f8969d456514e1e84ecd5f1e6936d0460414a968888bb1b597bc1ee723950ea47df5cba21a02bb14f96d18b6'
12+
URL = 'https://unicode.org/Public/cldr/45/cldr-common-45.0.zip'
13+
FILENAME = 'cldr-common-45.0.zip'
14+
# Via https://unicode.org/Public/cldr/45/hashes/SHASUM512.txt
15+
FILESUM = '638123882bd29911fc9492ec152926572fec48eb6c1f5dd706aee3e59cad8be4963a334bb7a09a645dbedc3356f60ef7ac2ef7ab4ccf2c8926b547782175603c'
1616
BLKSIZE = 131072
1717

1818

scripts/import_cldr.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,11 @@ def parse_global(srcdir, sup):
315315

316316
for paternity in parentBlock.findall('./parentLocale'):
317317
parent = paternity.attrib['parent']
318+
if parent == 'root':
319+
# Since CLDR-45, the 'root' parent locale uses 'localeRules="nonlikelyScript"' instead of
320+
# 'locales'. This special case is handled in babel when loading locale data
321+
# (https://cldr.unicode.org/index/downloads/cldr-45#h.5rbkhkncdqi9)
322+
continue
318323
for child in paternity.attrib['locales'].split():
319324
parent_exceptions[child] = parent
320325

tests/test_localedata.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,21 @@ def test_load():
6363
assert localedata.load('en_US') is localedata.load('en_US')
6464

6565

66+
def test_load_inheritance(monkeypatch):
67+
from babel.localedata import _cache
68+
69+
_cache.clear()
70+
localedata.load('hi_Latn')
71+
# Must not be ['root', 'hi_Latn'] even though 'hi_Latn' matches the 'lang_Script'
72+
# form used by 'nonLikelyScripts'. This is because 'hi_Latn' has an explicit parent locale 'en_IN'.
73+
assert list(_cache.keys()) == ['root', 'en', 'en_001', 'en_IN', 'hi_Latn']
74+
75+
_cache.clear()
76+
localedata.load('az_Arab')
77+
# Must not include 'az' as 'Arab' is not a likely script for 'az'.
78+
assert list(_cache.keys()) == ['root', 'az_Arab']
79+
80+
6681
def test_merge():
6782
d = {1: 'foo', 3: 'baz'}
6883
localedata.merge(d, {1: 'Foo', 2: 'Bar'})

tests/test_numbers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def test_list_currencies():
250250

251251
assert list_currencies(locale='pa_Arab') == {'PKR', 'INR', 'EUR'}
252252

253-
assert len(list_currencies()) == 305
253+
assert len(list_currencies()) == 306
254254

255255

256256
def test_validate_currency():

0 commit comments

Comments
 (0)