Skip to content

Commit 1f6d2ff

Browse files
committed
feat: add list of case-insentive lanuguages
This is automatically generated from CLDR. Needed for features like WeblateOrg/weblate#9450
1 parent 9352783 commit 1f6d2ff

File tree

5 files changed

+392
-1
lines changed

5 files changed

+392
-1
lines changed

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
all: weblate_language_data/languages.py weblate_language_data/plural_tags.py PLURALS_DIFF.md $(wildcard weblate_language_data/locale/*/LC_MESSAGES/django.po) $(filter-out $(patsubst modules/cldr-json/cldr-json/cldr-localenames-full/main/%/languages.json,languages-po/%.po,$(wildcard modules/cldr-json/cldr-json/cldr-localenames-full/main/*/languages.json)),languages-po/en.po)
66

7-
weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv rtl.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
7+
weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv rtl.csv case-insensitive.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
88
./scripts/generate-language-data
99

1010
PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv scripts/list-diff
@@ -17,6 +17,9 @@ cldr.csv: modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json module
1717
rtl.csv: modules/cldr-json/cldr-json/cldr-misc-full/main/*/layout.json scripts/export-cldr-orientation languages.csv
1818
./scripts/export-cldr-orientation
1919

20+
case-insensitive.csv: modules/cldr-json/cldr-json/cldr-core/scriptMetadata.json modules/cldr-json/cldr-json/cldr-core/supplemental/languageData.json scripts/export-cldr-case languages.csv
21+
./scripts/export-cldr-case
22+
2023
qt.csv: modules/qttools/src/linguist/shared/numerus.cpp scripts/export-qt languages.csv
2124
./scripts/export-qt
2225

case-insensitive.csv

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
code,
2+
aeb,
3+
am,
4+
anp,
5+
ar,
6+
arq,
7+
ars,
8+
arz,
9+
as,
10+
awa,
11+
az,
12+
bal,
13+
bej,
14+
bgc,
15+
bgn,
16+
bhb,
17+
bhi,
18+
bho,
19+
bjj,
20+
bm,
21+
bn,
22+
bo,
23+
bqi,
24+
bra,
25+
brh,
26+
brx,
27+
byn,
28+
cdo_Hans,
29+
cdo_Hant,
30+
ckb,
31+
cpx_Hans,
32+
cpx_Hant,
33+
cr,
34+
csw,
35+
dcc,
36+
doi,
37+
dv,
38+
dz,
39+
en_Shaw,
40+
fa,
41+
gan,
42+
gan_Hans,
43+
gan_Hant,
44+
gbm,
45+
glk,
46+
gon,
47+
gu,
48+
ha,
49+
hak,
50+
hak_Hans,
51+
hak_Hant,
52+
haz,
53+
he,
54+
hi,
55+
hnd,
56+
hne,
57+
hnj,
58+
hno,
59+
hoc,
60+
hoj,
61+
hsn,
62+
ii,
63+
iu,
64+
ja,
65+
jpr,
66+
jrb,
67+
ka,
68+
kfr,
69+
kfy,
70+
khn,
71+
kk,
72+
km,
73+
kn,
74+
ko,
75+
kok,
76+
kru,
77+
ks,
78+
ku,
79+
kxm,
80+
ky,
81+
lad,
82+
lki,
83+
lmn,
84+
lo,
85+
lrc,
86+
lus,
87+
luz,
88+
mag,
89+
mai,
90+
man,
91+
mfa,
92+
ml,
93+
mn_Mong,
94+
mni,
95+
mnw,
96+
mr,
97+
ms,
98+
ms_Arab,
99+
mtr,
100+
mwr,
101+
my,
102+
mzn,
103+
nan,
104+
nan_Hant,
105+
ne,
106+
new,
107+
nod,
108+
noe,
109+
nqo,
110+
oj,
111+
or,
112+
pa,
113+
ps,
114+
raj,
115+
rhg,
116+
rif,
117+
rkt,
118+
rmt,
119+
sat,
120+
sck,
121+
sd,
122+
sdh,
123+
shi,
124+
shn,
125+
si,
126+
skr,
127+
sou,
128+
swb,
129+
swv,
130+
syl,
131+
ta,
132+
tcy,
133+
te,
134+
tg,
135+
th,
136+
ti,
137+
tig,
138+
tk,
139+
tly,
140+
tsj,
141+
tts,
142+
tzm,
143+
ug,
144+
unr,
145+
ur,
146+
uz,
147+
vai,
148+
wal,
149+
wbq,
150+
wbr,
151+
wtm,
152+
wuu,
153+
wuu_Hans,
154+
wuu_Hant,
155+
xnr,
156+
yi,
157+
yue,
158+
yue_Hans,
159+
yue_Hant,
160+
zgh,
161+
zh,
162+
zh_Hans,
163+
zh_Hant,

scripts/export-cldr-case

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#! /usr/bin/env python3
2+
3+
# Copyright © Michal Čihař <[email protected]>
4+
#
5+
# SPDX-License-Identifier: MIT
6+
7+
import json
8+
import csv
9+
10+
# Read languages
11+
with open("languages.csv") as csvfile:
12+
reader = csv.reader(csvfile, delimiter=",")
13+
next(reader)
14+
LANGUAGES = list(reader)
15+
LANGUAGE_CODES = {lang[0] for lang in LANGUAGES}
16+
17+
# Read
18+
with open("case-insensitive.csv") as csvfile:
19+
reader = csv.reader(csvfile, delimiter=",")
20+
next(reader)
21+
CASES = list(reader)
22+
CASE_INSENSITIVE_CODES = {lang[0] for lang in CASES}
23+
24+
# Load CLDR
25+
with open("modules/cldr-json/cldr-json/cldr-core/scriptMetadata.json") as handle:
26+
SCRIPTS = json.load(handle)["scriptMetadata"]
27+
with open(
28+
"modules/cldr-json/cldr-json/cldr-core/supplemental/languageData.json"
29+
) as handle:
30+
LANGUAGES = json.load(handle)["supplemental"]["languageData"]
31+
32+
for code in LANGUAGE_CODES:
33+
if "_" in code:
34+
base, script = code.split("_", 1)
35+
else:
36+
base = code
37+
script = None
38+
if script is None or script not in SCRIPTS:
39+
if base in LANGUAGES:
40+
for script in LANGUAGES[base]["_scripts"]:
41+
if SCRIPTS[script]["hasCase"] != "YES":
42+
CASE_INSENSITIVE_CODES.add(base)
43+
elif SCRIPTS[script]["hasCase"] != "YES":
44+
CASE_INSENSITIVE_CODES.add(code)
45+
46+
with open("case-insensitive.csv", "w") as handle:
47+
handle.write("code,\n")
48+
for code in sorted(CASE_INSENSITIVE_CODES):
49+
handle.write(f"{code},\n")

scripts/generate-language-data

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ with open("rtl.csv") as csvfile:
150150
RTLS = list(reader)
151151
RTL_CODES = {lang[0] for lang in RTLS}
152152

153+
# Read case insentive
154+
with open("case-insensitive.csv") as csvfile:
155+
reader = csv.reader(csvfile, delimiter=",")
156+
next(reader)
157+
CASES = list(reader)
158+
CASE_INSENSITIVE_CODES = {lang[0] for lang in CASES}
159+
153160
# Write language definitions
154161
with open("weblate_language_data/languages.py", "w") as output:
155162
output.write(HEADER)
@@ -207,6 +214,14 @@ with open("weblate_language_data/rtl.py", "w") as output:
207214
output.write(f' "{code}",\n')
208215
output.write("}\n")
209216

217+
with open("weblate_language_data/case_insensitive.py", "w") as output:
218+
output.write(HEADER)
219+
output.write("# List of case-insentive languages\n")
220+
output.write("CASE_INSENSITIVE_LANGS: set[str] = {\n")
221+
for code in sorted(CASE_INSENSITIVE_CODES):
222+
output.write(f' "{code}",\n')
223+
output.write("}\n")
224+
210225
# Generate same check blacklist
211226
words = set()
212227

0 commit comments

Comments
 (0)