Skip to content

Commit e705df9

Browse files
committed
feat: extend CLDR plurals definitions
Cover all langugages we support in base with CLDR plurals where available. This brings country specific language variants in sync with the main language. Fixes translate/translate#5565
1 parent e6efadf commit e705df9

File tree

3 files changed

+117
-41
lines changed

3 files changed

+117
-41
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv
1111
./scripts/list-diff.py
1212
pre-commit run --files PLURALS_DIFF.md || true
1313

14-
cldr.csv: modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json scripts/export-cldr.py
14+
cldr.csv: modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json scripts/export-cldr.py languages.csv
1515
./scripts/export-cldr.py
1616

1717
rtl.csv: modules/cldr-json/cldr-json/cldr-misc-full/main/*/layout.json scripts/export-cldr-orientation.py languages.csv

cldr.csv

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,78 +5,128 @@ am,Amharic,2,n > 1
55
an,Aragonese,2,n != 1
66
ar,Arabic,6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
77
ar_001,Modern Standard Arabic,6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
8+
ar_BH,Arabic (Bahrain),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
9+
ar_DZ,Arabic (Algeria),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
10+
ar_EG,Arabic (Egypt),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
11+
ar_KW,Arabic (Kuwait),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
12+
ar_LY,Arabic (Libya),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
13+
ar_MA,Arabic (Morocco),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
14+
ar_SA,Arabic (Saudi Arabia),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
15+
ar_XB,Arabic (XB pseudolocale),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
16+
ar_YE,Arabic (Yemen),6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
817
ars,Najdi Arabic,6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n % 100 >= 3 && n % 100 <= 10) ? 3 : ((n % 100 >= 11 && n % 100 <= 99) ? 4 : 5))))
918
as,Assamese,2,n > 1
1019
asa,Asu,2,n != 1
1120
ast,Asturian,2,n != 1
1221
az,Azerbaijani,2,n != 1
1322
bal,Baluchi,2,n != 1
1423
be,Belarusian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
24+
be_Latn,Belarusian (Latin script),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
1525
bem,Bemba,2,n != 1
1626
bez,Bena,2,n != 1
1727
bg,Bulgarian,2,n != 1
1828
bho,Bhojpuri,2,n > 1
1929
blo,Anii,3,(n == 0) ? 0 : ((n == 1) ? 1 : 2)
2030
bm,Bambara,1,0
2131
bn,Bangla,2,n > 1
32+
bn_BD,Bengali (Bangladesh),2,n > 1
33+
bn_IN,Bengali (India),2,n > 1
2234
bo,Tibetan,1,0
35+
bo_CN,Tibetan (China),1,0
2336
br,Breton,5,(n % 10 == 1 && n % 100 != 11 && n % 100 != 71 && n % 100 != 91) ? 0 : ((n % 10 == 2 && n % 100 != 12 && n % 100 != 72 && n % 100 != 92) ? 1 : ((((n % 10 == 3 || n % 10 == 4) || n % 10 == 9) && (n % 100 < 10 || n % 100 > 19) && (n % 100 < 70 || n % 100 > 79) && (n % 100 < 90 || n % 100 > 99)) ? 2 : ((n != 0 && n % 1000000 == 0) ? 3 : 4)))
2437
brx,Bodo,2,n != 1
2538
bs,Bosnian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
39+
bs_Cyrl,Bosnian (Cyrillic script),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
40+
bs_Latn,Bosnian (Latin script),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
2641
ca,Catalan,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
42+
ca_AD,Catalan (Andorra),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
2743
ce,Chechen,2,n != 1
2844
ceb,Cebuano,2,n != 1 && n != 2 && n != 3 && (n % 10 == 4 || n % 10 == 6 || n % 10 == 9)
2945
cgg,Chiga,2,n != 1
3046
chr,Cherokee,2,n != 1
3147
ckb,Central Kurdish,2,n != 1
48+
ckb_IQ,Kurdish (Central, Iraq),2,n != 1
49+
ckb_IR,Kurdish (Central, Iran),2,n != 1
3250
cs,Czech,3,(n == 1) ? 0 : ((n >= 2 && n <= 4) ? 1 : 2)
3351
csw,Swampy Cree,2,n > 1
3452
cy,Welsh,6,(n == 0) ? 0 : ((n == 1) ? 1 : ((n == 2) ? 2 : ((n == 3) ? 3 : ((n == 6) ? 4 : 5))))
3553
da,Danish,2,n != 1
3654
de,German,2,n != 1
37-
de_AT,Austrian German,2,n != 1
38-
de_CH,Swiss High German,2,n != 1
55+
de_1901,German (old spelling),2,n != 1
56+
de_AT,German (Austria),2,n != 1
57+
de_CH,German (Switzerland),2,n != 1
58+
de_LU,German (Luxembourg),2,n != 1
3959
doi,Dogri,2,n > 1
4060
dsb,Lower Sorbian,4,(n % 100 == 1) ? 0 : ((n % 100 == 2) ? 1 : ((n % 100 == 3 || n % 100 == 4) ? 2 : 3))
4161
dv,Divehi,2,n != 1
4262
dz,Dzongkha,1,0
4363
ee,Ewe,2,n != 1
4464
el,Greek,2,n != 1
4565
en,English,2,n != 1
46-
en_AU,Australian English,2,n != 1
47-
en_CA,Canadian English,2,n != 1
48-
en_GB,British English,2,n != 1
49-
en_US,American English,2,n != 1
66+
en_AU,English (Australia),2,n != 1
67+
en_CA,English (Canada),2,n != 1
68+
en_GB,English (United Kingdom),2,n != 1
69+
en_IE,English (Ireland),2,n != 1
70+
en_IN,English (India),2,n != 1
71+
en_NZ,English (New Zealand),2,n != 1
72+
en_PH,English (Philippines),2,n != 1
73+
en_Shaw,English (Shavian),2,n != 1
74+
en_Shaw_GB,English (Shavian script, United Kingdom),2,n != 1
75+
en_Shaw_US,English (Shavian script, United States),2,n != 1
76+
en_US,English (United States),2,n != 1
77+
en_XA,English (XA pseudolocale),2,n != 1
78+
en_ZA,English (South Africa),2,n != 1
79+
en_devel,English (Developer),2,n != 1
5080
eo,Esperanto,2,n != 1
5181
es,Spanish,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
5282
es_419,Latin American Spanish,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
53-
es_ES,European Spanish,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
54-
es_MX,Mexican Spanish,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
83+
es_AR,Spanish (Argentina),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
84+
es_BO,Spanish (Bolivia),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
85+
es_CL,Spanish (Chile),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
86+
es_CO,Spanish (Colombia),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
87+
es_CR,Spanish (Costa Rica),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
88+
es_CU,Spanish (Cuba),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
89+
es_DO,Spanish (Dominican Republic),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
90+
es_EC,Spanish (Ecuador),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
91+
es_MX,Spanish (Mexico),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
92+
es_NI,Spanish (Nicaragua),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
93+
es_PA,Spanish (Panama),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
94+
es_PE,Spanish (Peru),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
95+
es_PR,Spanish (Puerto Rico),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
96+
es_SV,Spanish (El Salvador),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
97+
es_US,Spanish (American),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
98+
es_UY,Spanish (Uruguay),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
99+
es_VE,Spanish (Venezuela),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
55100
et,Estonian,2,n != 1
56101
eu,Basque,2,n != 1
57102
fa,Persian,2,n > 1
58-
fa_AF,Dari,2,n > 1
59103
ff,Fula,2,n > 1
60104
fi,Finnish,2,n != 1
61105
fil,Filipino,2,n != 1 && n != 2 && n != 3 && (n % 10 == 4 || n % 10 == 6 || n % 10 == 9)
62106
fo,Faroese,2,n != 1
63107
fr,French,3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
64-
fr_CA,Canadian French,3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
65-
fr_CH,Swiss French,3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
108+
fr_AG,French (Antigua and Barbuda),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
109+
fr_BE,French (Belgium),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
110+
fr_CA,French (Canada),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
111+
fr_CH,French (Switzerland),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
112+
fr_LU,French (Luxembourg),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
113+
fr_SN,French (Senegal),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
66114
fur,Friulian,2,n != 1
67115
fy,Western Frisian,2,n != 1
68116
ga,Irish,5,(n == 1) ? 0 : ((n == 2) ? 1 : ((n >= 3 && n <= 6) ? 2 : ((n >= 7 && n <= 10) ? 3 : 4)))
69117
gd,Scottish Gaelic,4,(n == 1 || n == 11) ? 0 : ((n == 2 || n == 12) ? 1 : ((n >= 3 && n <= 10 || n >= 13 && n <= 19) ? 2 : 3))
70118
gl,Galician,2,n != 1
71119
gsw,Swiss German,2,n != 1
72120
gu,Gujarati,2,n > 1
121+
gu_IN,Gujarati (India),2,n > 1
73122
guw,Gun,2,n > 1
74123
gv,Manx,4,(n % 10 == 1) ? 0 : ((n % 10 == 2) ? 1 : ((n % 100 == 0 || n % 100 == 20 || n % 100 == 40 || n % 100 == 60 || n % 100 == 80) ? 2 : 3))
75124
ha,Hausa,2,n != 1
76125
haw,Hawaiian,2,n != 1
77126
he,Hebrew,3,(n == 1) ? 0 : ((n == 2) ? 1 : 2)
127+
he_IL,Hebrew (Israel),3,(n == 1) ? 0 : ((n == 2) ? 1 : 2)
78128
hi,Hindi,2,n > 1
79-
hi_Latn,Hindi (Latin),2,n > 1
129+
hi_Latn,Hindi (Latin script),2,n > 1
80130
hnj,Hmong Njua,1,0
81131
hr,Croatian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
82132
hsb,Upper Sorbian,4,(n % 100 == 1) ? 0 : ((n % 100 == 2) ? 1 : ((n % 100 == 3 || n % 100 == 4) ? 2 : 3))
@@ -89,8 +139,11 @@ ii,Sichuan Yi,1,0
89139
io,Ido,2,n != 1
90140
is,Icelandic,2,n % 10 != 1 || n % 100 == 11
91141
it,Italian,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
142+
it_CH,Italian (Switzerland),3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
92143
iu,Inuktitut,3,(n == 1) ? 0 : ((n == 2) ? 1 : 2)
144+
iu_Latn,Inuktitut (Latin script),3,(n == 1) ? 0 : ((n == 2) ? 1 : 2)
93145
ja,Japanese,1,0
146+
ja_KS,Japanese (Kansai),1,0
94147
jbo,Lojban,1,0
95148
jgo,Ngomba,2,n != 1
96149
jmc,Machame,2,n != 1
@@ -103,6 +156,7 @@ kcg,Tyap,2,n != 1
103156
kde,Makonde,1,0
104157
kea,Kabuverdianu,1,0
105158
kk,Kazakh,2,n != 1
159+
kk_Latn,Kazakh (Latin script),2,n != 1
106160
kkj,Kako,2,n != 1
107161
kl,Kalaallisut,2,n != 1
108162
km,Khmer,1,0
@@ -130,18 +184,22 @@ mgo,Metaʼ,2,n != 1
130184
mk,Macedonian,2,n % 10 != 1 || n % 100 == 11
131185
ml,Malayalam,2,n != 1
132186
mn,Mongolian,2,n != 1
187+
mn_Cyrl,Mongolian (Cyrillic script),2,n != 1
188+
mn_Mong,Mongolian (Traditional script),2,n != 1
133189
mo,Moldavian,3,(n == 1) ? 0 : ((n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2)
134190
mr,Marathi,2,n != 1
135191
ms,Malay,1,0
192+
ms_Arab,Malay (Jawi),1,0
136193
mt,Maltese,5,(n == 1) ? 0 : ((n == 2) ? 1 : ((n == 0 || n % 100 >= 3 && n % 100 <= 10) ? 2 : ((n % 100 >= 11 && n % 100 <= 19) ? 3 : 4)))
137194
my,Burmese,1,0
138195
nah,Nahuatl,2,n != 1
139196
naq,Nama,3,(n == 1) ? 0 : ((n == 2) ? 1 : 2)
140197
nb,Norwegian Bokmål,2,n != 1
198+
nb_NO,Norwegian Bokmål,2,n != 1
141199
nd,North Ndebele,2,n != 1
142200
ne,Nepali,2,n != 1
143201
nl,Dutch,2,n != 1
144-
nl_BE,Flemish,2,n != 1
202+
nl_BE,Dutch (Belgium),2,n != 1
145203
nn,Norwegian Nynorsk,2,n != 1
146204
nnh,Ngiemboon,2,n != 1
147205
no,Norwegian,2,n != 1
@@ -155,19 +213,24 @@ or,Odia,2,n != 1
155213
os,Ossetic,2,n != 1
156214
osa,Osage,1,0
157215
pa,Punjabi,2,n > 1
216+
pa_PK,Punjabi (Pakistan),2,n > 1
158217
pap,Papiamento,2,n != 1
159218
pcm,Nigerian Pidgin,2,n > 1
160219
pl,Polish,3,(n == 1) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
161220
prg,Prussian,3,(n % 10 == 0 || n % 100 >= 11 && n % 100 <= 19) ? 0 : ((n % 10 == 1 && n % 100 != 11) ? 1 : 2)
162221
ps,Pashto,2,n != 1
163-
pt,Portuguese,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
164-
pt_BR,Brazilian Portuguese,3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
222+
pt,Portuguese,3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
223+
pt_AO,Portuguese (Angola),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
224+
pt_BR,Portuguese (Brazil),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
225+
pt_BR@formal,Portuguese (Brazil, formal),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
226+
pt_BR@informal,Portuguese (Brazil, informal),3,(n == 0 || n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
165227
pt_PT,European Portuguese,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
166228
rm,Romansh,2,n != 1
167229
ro,Romanian,3,(n == 1) ? 0 : ((n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2)
168230
ro_MD,Moldavian,3,(n == 1) ? 0 : ((n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2)
169231
rof,Rombo,2,n != 1
170232
ru,Russian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
233+
ru_UA,Russian (Ukraine),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
171234
rwk,Rwa,2,n != 1
172235
sah,Yakut,1,0
173236
saq,Samburu,2,n != 1
@@ -194,16 +257,19 @@ sn,Shona,2,n != 1
194257
so,Somali,2,n != 1
195258
sq,Albanian,2,n != 1
196259
sr,Serbian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
197-
sr_ME,Montenegrin,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
260+
sr_Cyrl,Serbian (Cyrillic script),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
261+
sr_Latn,Serbian (Latin script),3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
198262
ss,Swati,2,n != 1
199263
ssy,Saho,2,n != 1
200264
st,Southern Sotho,2,n != 1
201265
su,Sundanese,1,0
202266
sv,Swedish,2,n != 1
203267
sw,Swahili,2,n != 1
204-
sw_CD,Congo Swahili,2,n != 1
268+
sw_CD,Swahili (Congo),2,n != 1
269+
sw_TZ,Swahili (Tanzania),2,n != 1
205270
syr,Syriac,2,n != 1
206271
ta,Tamil,2,n != 1
272+
ta_LK,Tamil (Sri Lanka),2,n != 1
207273
te,Telugu,2,n != 1
208274
teo,Teso,2,n != 1
209275
th,Thai,1,0
@@ -220,7 +286,10 @@ tzm,Central Atlas Tamazight,2,n >= 2 && (n < 11 || n > 99)
220286
ug,Uyghur,2,n != 1
221287
uk,Ukrainian,3,(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : 2)
222288
ur,Urdu,2,n != 1
289+
ur_IN,Urdu (India),2,n != 1
290+
ur_PK,Urdu (Pakistan),2,n != 1
223291
uz,Uzbek,2,n != 1
292+
uz_Latn,Uzbek (Latin script),2,n != 1
224293
ve,Venda,2,n != 1
225294
vec,Venetian,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
226295
vi,Vietnamese,1,0
@@ -234,7 +303,12 @@ xog,Soga,2,n != 1
234303
yi,Yiddish,2,n != 1
235304
yo,Yoruba,1,0
236305
yue,Cantonese,1,0
306+
yue_Hans,Cantonese (Simplified Han script),1,0
307+
yue_Hant,Cantonese (Traditional Han script),1,0
237308
zh,Chinese,1,0
238-
zh_Hans,Simplified Chinese,1,0
239-
zh_Hant,Traditional Chinese,1,0
309+
zh_Hans,Chinese (Simplified Han script),1,0
310+
zh_Hans_SG,Chinese (Simplified Han script, Singapore),1,0
311+
zh_Hant,Chinese (Traditional Han script),1,0
312+
zh_Hant_HK,Chinese (Traditional Han script, Hong Kong),1,0
313+
zh_Latn,Chinese (Hanyu Pinyin),1,0
240314
zu,Zulu,2,n > 1

scripts/export-cldr.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,14 @@
66

77
from __future__ import annotations
88

9+
import csv
910
import json
1011
import re
1112
from typing import Literal, TypedDict
1213

1314
MAPPINGS = {
1415
"ar_001": "ar",
15-
"de_AT": "de",
16-
"de_CH": "de",
17-
"en_AU": "en",
18-
"en_CA": "en",
19-
"en_GB": "en",
20-
"en_US": "en",
2116
"es_419": "es",
22-
"es_ES": "es",
23-
"es_MX": "es",
24-
"fa_AF": "fa",
25-
"fr_CA": "fr",
26-
"fr_CH": "fr",
27-
"hi_Latn": "hi",
28-
"nl_BE": "nl",
29-
"pt_BR": "pt",
30-
"pt": "pt_PT",
31-
"ro_MD": "ro",
32-
"sr_ME": "sr",
33-
"sw_CD": "sw",
34-
"zh_Hans": "zh",
35-
"zh_Hant": "zh",
3617
}
3718

3819
SIMPLIFICATIONS = {
@@ -41,6 +22,11 @@
4122
"(n == 0 || n == 1) && n != 0": "n == 1",
4223
}
4324

25+
with open("languages.csv") as csvfile:
26+
reader = csv.reader(csvfile, delimiter=",")
27+
next(reader)
28+
ALL_LANGUAGE_CODES = {lang[0]: lang for lang in reader}
29+
4430

4531
def map_code(code: str) -> str:
4632
return code.replace("-", "_")
@@ -276,16 +262,32 @@ class LanguageDict(TypedDict, total=False):
276262
LANGUAGES[code]["plurals"] = len(cleaned_up_formula)
277263
LANGUAGES[code]["formula"] = merge_formulas(cleaned_up_formula)
278264

279-
# Add aliases
265+
# Map some plurals to alternate names
280266
for new, old in MAPPINGS.items():
281267
for key in ("plurals", "formula"):
268+
if existing := LANGUAGES[new].get(key):
269+
raise ValueError(f"{new} already has {key}: {existing}")
282270
LANGUAGES[new][key] = LANGUAGES[old][key]
283271

284272
# Remove the languages for which we don't have plurals
285273
for code in sorted(LANGUAGES.keys()):
286274
if "plurals" not in LANGUAGES[code]:
287275
del LANGUAGES[code]
288276

277+
# Add aliases to the base language
278+
for code, existing in ALL_LANGUAGE_CODES.items():
279+
# Skip existing and base codes
280+
if "_" not in code or code in LANGUAGES:
281+
continue
282+
base = code.split("_", 1)[0]
283+
if data := LANGUAGES.get(base):
284+
LANGUAGES[code] = {
285+
"name": existing[1],
286+
"plurals": data["plurals"],
287+
"formula": data["formula"],
288+
}
289+
290+
289291
# Remove languages we do not want
290292
del LANGUAGES["und"] # Unknown language
291293

0 commit comments

Comments
 (0)