Skip to content

Commit c1ecbf4

Browse files
authored
CLDR-17600 BRS 7 No2 Update language names (#3680)
1 parent dd4a0bd commit c1ecbf4

File tree

6 files changed

+155
-362
lines changed

6 files changed

+155
-362
lines changed

common/supplemental/attributeValueValidity.xml

Lines changed: 80 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -7,105 +7,88 @@
77
<metadata>
88
<validity>
99
<!-- BCP 47 contains many more language codes than we are interested in maintaining as a part of the CLDR -->
10-
<!-- This $language list contains ONLY those languages that are in the CLDR locales' IDs (after maximizing with LikelySubtags) -->
10+
<!-- This $language list contains ONLY those languages that TC Locale's IDs (after maximizing with LikelySubtags) -->
1111
<variable id='$language' type='choice'>
12-
aa ab af agq ak am an ann apc ar arn as asa ast az
13-
ba bal bas be bem bew bez bg bgc bgn bho blo blt bm bn bo br brx bs bss byn
14-
ca cad cch ccp ce ceb cgg cho chr cic ckb co cs csw cu cv cy
15-
da dav de dje doi dsb dua dv dyo dz
16-
ebu ee el en eo es et eu ewo
17-
fa ff fi fil fo fr frr fur fy
18-
ga gaa gd gez gl gn gsw gu guz gv
19-
ha haw he hi hnj hr hsb hu hy
20-
ia id ie ig ii io is it iu
21-
ja jbo jgo jmc jv
22-
ka kab kaj kam kcg kde kea ken kgp khq ki kk kkj kl kln km kn ko kok kpe ks ksb ksf ksh ku kw kxv ky
23-
la lag lb lg lij lkt lld lmo ln lo lrc lt ltg lu luo luy lv
24-
mai mas mdf mer mfe mg mgh mgo mhn mi mic mk ml mn mni moh mr ms mt mua mus my myv mzn
25-
naq nb nd nds ne nl nmg nn nnh no nqo nr nso nus nv ny nyn
26-
oc om or os osa
27-
pa pap pcm pis pl prg ps pt
28-
qu quc
29-
raj rhg rif rm rn ro rof ru rw rwk
30-
sa sah saq sat sbp sc scn sd sdh se seh ses sg shi shn si sid sk skr sl sma smj smn sms sn so sq sr ss ssy st su sv sw syr szl
31-
ta te teo tg th ti tig tk tn to tok tpi tr trv trw ts tt twq tyv tzm
32-
ug uk ur uz
33-
vai ve vec vi vmw vo vun
34-
wa wae wal wbp wo
35-
xh xnr xog
36-
yav yi yo yrl yue
37-
za zgh zh zu
12+
af am ar as az
13+
be bg bgc bho bn brx bs
14+
ca ceb cs cv cy
15+
da de doi
16+
el en es et eu
17+
fa fi fil fr
18+
ga gd gl gu
19+
ha he hi hr hu hy
20+
id ig is it
21+
ja jv
22+
ka kk km kn ko kok ks ky
23+
lo lt lv
24+
mai mi mk ml mn mni mr ms my
25+
ne nl nn no
26+
or
27+
pa pcm pl ps pt
28+
raj ro ru
29+
sa sat sd si sk sl so sq sr su sv sw
30+
ta te tg th ti tk tr tt
31+
uk ur uz
32+
vi
33+
wo
34+
xh
35+
yo yue
36+
zh zu
3837
</variable>
39-
<!-- The following are exceptional cases.
40-
In v44 this contains a copy of the previous $language list to make review of CLDR-16789 easier;
41-
this should be rationalized later.
42-
-->
43-
<variable id='$languageExceptions' type='choice'>
44-
af agq ak am ann apc ar as asa ast az
45-
bas be bem bez bg bgc bho bm bn bo br brx bs
46-
ca ccp ce ceb cgg chr ckb cs cu cv cy
47-
da dav de dje doi dsb dua dyo dz
48-
ebu ee el en eo es et eu ewo
49-
fa ff fi fil fo fr frr fur fy
50-
ga gd gl gsw gu guz gv
51-
ha haw he hi hr hsb hu hy
52-
ia id ig ii is it
53-
ja jgo jmc jv
54-
ka kab kam kde kea kgp khq ki kk kkj kl kln km kn ko kok ks ksb ksf ksh ku kw ky
55-
lag lb lg lij lkt lmo ln lo lrc lt lu luo luy lv
56-
mai mas mer mfe mg mgh mgo mi mk ml mn mni mr ms mt mua my mzn
57-
naq nb nd nds ne nl nmg nn nnh no nus nyn
58-
om or os
59-
pa pap pcm pis pl prg ps pt
60-
qu
61-
raj rif rm rn ro rof
62-
und
63-
ru rw rwk
64-
sa sah saq sat sbp sc sd se seh ses sg shi si sk sl smn sms sn so sq sr su sv sw
65-
ta te teo tg th ti tk to tok tr tt twq tzm
66-
ug uk ur uz
67-
vai vec vi vo vun
68-
wae wo
69-
xh xog
70-
yav yi yo yrl yue
71-
zgh zh zu
72-
73-
mul root zxx
74-
ab ace ada ady ain ale alt an anp arn arp ars atj av awa ay
75-
ba ban bi bin bla bug byn
76-
cay ch chk chm cho chp chy clc co crg crj crk crl crm crr csw
77-
dak dar dgr dv dzg
78-
efi eka
79-
fj fon frc
80-
gaa gez gil gn gor gwi
81-
hai hax hil hmn ht hup hur hz
82-
iba ibb ikt ilo inh io iu
83-
jbo
84-
kac kaj kbd kcg kfo kha kj kmb kpe kr krc krl kru kum kv kwk
85-
la lad lez li lil lou loz lsm lua lun lus
86-
mad mag mak mdf men mh mic min moe moh mos mus mwl myv
87-
na nap new ng nia niu nog nqo nr nso nv ny
88-
oc ojb ojc ojs ojw oka
89-
pag pam pau pqm
90-
rap rar rhg rup
91-
sad sba scn sco shn slh sm snk srn ss st str suk swb syr
92-
tce tem tet tgx tht tig tlh tli tn tpi trv ts ttm tum tvl ty tyv
93-
udm umb
94-
ve
95-
wa wal war wuu
96-
xal
97-
ybb
98-
zun zza
38+
<!-- The following are non-TC locales that have reached at least Basic, plus specials mul root zxx und -->
39+
<variable id='$languageNonTcGeqBasic' type='choice'>
40+
ast
41+
blo br
42+
chr csw
43+
dsb
44+
eo
45+
ff fo fy
46+
hsb
47+
ia ie
48+
kea kgp ku kxv
49+
lb lij lmo
50+
mt
51+
nds nqo
52+
oc
53+
prg
54+
qu
55+
rm
56+
sah sc syr szl
57+
to
58+
ug
59+
vec vmw
60+
xnr
61+
yrl
62+
za
63+
mul root zxx und
9964
</variable>
100-
<!-- The following are not in modern coverage: -->
101-
<variable id='$oldLanguages' type='choice'>
102-
aa
103-
crs
104-
gan
105-
hak hsn
106-
nan
107-
quc
108-
sma smj sms ssy
65+
<!-- The following have not yet made it to Basic -->
66+
<variable id='$languageNonTcLtBasic' type='choice'>
67+
aa ab agq ak an ann apc arn asa
68+
ba bal bas bem bew bez bgn blt bm bo bss byn
69+
cad cch ccp ce cgg cho cic ckb co cu
70+
dav dje dua dv dyo dz
71+
ebu ee ewo
72+
ff frr fur
73+
gaa gez gn gsw guz gv
74+
haw hnj
75+
ii io iu
76+
jbo jgo jmc
77+
kab kaj kam kcg kde ken khq ki kkj kl kln kpe ksb ksf ksh kw
78+
la lag lg lkt lld ln lrc ltg lu luo luy
79+
mas mdf mer mfe mhn mg mgh mgo mic moh mua mus myv mzn
80+
naq nb nd nmg nnh nr nso nus nv ny nyn om
81+
os osa
82+
pap pis
83+
quc
84+
rhg rif rn rof rw rwk
85+
saq sbp scn sdh se seh ses sg shi shn sid skr sma smj smn sms sn ss ssy st
86+
teo tig tn tok tpi trv trw ts twq tyv tzm
87+
vai ve vo vun
88+
wa wae wal wbp
89+
xog
90+
yav yi
91+
zgh
10992
</variable>
11093
<variable id='$scriptNonUnicode' type='choice'>Afak Aran Blis Cirt Cyrs Egyd Egyh Geok Inds Jurc Kitl Kpel Latf Latg Loma Maya Moon
11194
Nkgb Phlv Roro Sara Syre Syrj Syrn Teng Visp Wole
@@ -458,4 +441,4 @@
458441
<attributeValues dtds='keyboard3' elements='version' attributes='number' type='TODO'></attributeValues>
459442
</validity>
460443
</metadata>
461-
</supplementalData>
444+
</supplementalData>

common/supplemental/coverageLevels.xml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ For terms of use, see http://www.unicode.org/copyright.html
107107
<coverageVariable key="%language60_TD" value="(shu|dzg|kbl|mde|mua|sba)"/>
108108
<!-- See CLDR-16673: All basic+ locales (per coverageLevels.txt) MUST have their language's name at least at level 80 (modern), except for those on exception list -->
109109
<!-- Can use MinimizeRegex.java to "unpack" the compressed lists that are painful to edit -->
110-
<coverageVariable key="%language80" value="(ace|ada|ady|ain|ale|alt|anp|arn|arp|ars|ast|atj|awa|ban|bho|bin|bla|brx|bug|byn|cay|ceb|chk|chm|cho|chp|chr|chy|clc|crg|crj|crk|crl|crm|crr|csw|dak|dar|dgr|doi|dsb|dzg|efi|eka|fil|fon|frc|gaa|gez|gil|gor|gwi|hai|hax|hil|hmn|hsb|hup|hur|iba|ibb|ikt|ilo|inh|jbo|kac|kaj|kbd|kcg|kea|kfo|kgp|kha|kmb|kok|kpe|krc|krl|kru|kum|kwk|lad|lez|lil|lou|loz|lsm|lua|lun|lus|mad|mag|mai|mak|mdf|men|mic|min|mni|moe|moh|mos|mul|mus|mwl|myv|nap|new|nia|niu|nog|nqo|nso|ojb|ojc|ojs|ojw|oka|pag|pam|pap|pau|pcm|pqm|rap|rar|rhg|rup|sad|sah|sat|sba|scn|sco|shn|slh|snk|srn|str|suk|swb|syr|tce|tem|tet|tgx|tht|tig|tlh|tli|tpi|trv|ttm|tum|tvl|tyv|udm|umb|wal|war|wuu|xal|ybb|yrl|yue|zun|zxx|zza|ab|af|am|an|ar|as|av|ay|az|ba|be|bg|bi|bn|br|bs|ca|ch|co|cs|cv|cy|da|de|dv|el|en|es|et|eu|fa|fi|fj|fo|fr|fy|ga|gd|gl|gn|gu|ha|he|hi|hr|ht|hu|hy|hz|ia|id|ig|io|is|it|iu|ja|jv|ka|kj|kk|km|kn|ko|kr|ks|ku|kv|ky|la|lb|li|lo|lt|lv|mh|mi|mk|ml|mn|mr|ms|mt|my|na|ne|ng|nl|nn|no|nr|nv|ny|oc|or|pa|pl|ps|pt|qu|rm|ro|ru|sa|sc|sd|si|sk|sl|sm|so|sq|sr|ss|st|su|sv|sw|ta|te|tg|th|ti|tk|tn|to|tr|ts|tt|ty|ug|uk|ur|uz|ve|vi|wa|wo|xh|yo|zh|zu)"/>
111-
<coverageVariable key="%languagecomp" value="(gan|hak|hsn|nan)"/> <!-- not currently used, just for reference: the only valid language codes that are not in modern coverage -->
110+
<coverageVariable key="%language80" value="(af|am|ar|as|az|be|bg|bgc|bho|bn|brx|bs|ca|ceb|cs|cv|cy|da|de|doi|el|en|es|et|eu|fa|fi|fil|fr|ga|gd|gl|gu|ha|he|hi|hr|hu|hy|id|ig|is|it|ja|jv|ka|kk|km|kn|ko|kok|ks|ky|lo|lt|lv|mai|mi|mk|ml|mn|mni|mr|ms|my|ne|nl|nn|no|or|pa|pcm|pl|ps|pt|raj|ro|ru|sa|sat|sd|si|sk|sl|so|sq|sr|su|sv|sw|ta|te|tg|th|ti|tk|tr|tt|uk|ur|uz|vi|wo|xh|yo|yue|zh|zu|ast|blo|br|chr|csw|dsb|eo|ff|fo|fy|hsb|ia|ie|kea|kgp|ku|kxv|lb|lij|lmo|mt|nds|nqo|oc|prg|qu|rm|sah|sc|syr|szl|to|ug|vec|vmw|xnr|yrl|za|mul|root|zxx|und)"/>
112111
<coverageVariable key="%lbTypes80" value="(strict|normal|loose)"/>
113112
<coverageVariable key="%lwTypes" value="(normal|breakall|keepall|phrase)"/>
114113
<coverageVariable key="%m0Types80" value="(bgn|prprname|ungegn)"/>
@@ -1007,4 +1006,4 @@ For terms of use, see http://www.unicode.org/copyright.html
10071006
<pathMatch id="annotations1" match="annotations/annotation[@cp='%anyAttribute'][@type='%anyAttribute']"/>
10081007
<pathMatch id="annotations2" match="annotations/annotation[@cp='%anyAttribute']"/>
10091008
</coverageLevels>
1010-
</supplementalData>
1009+
</supplementalData>

tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,8 @@ private void makeStuffSafe() {
13481348
}
13491349
}
13501350
CLDRScriptCodes = newScripts.build();
1351+
CLDRLanguageCodes = CldrUtility.protectCollection(CLDRLanguageCodes);
1352+
languageNonTcLtBasic = CldrUtility.protectCollection(languageNonTcLtBasic);
13511353
}
13521354

13531355
/**
@@ -2144,13 +2146,21 @@ private boolean handleMetadata(String level2, String value, XPathValue parts) {
21442146
String level3 = parts.getElement(3);
21452147
if (level3.equals("variable")) {
21462148
Map<String, String> attributes = parts.getAttributes(-1);
2147-
validityInfo.put(attributes.get("id"), Row.of(attributes.get("type"), value));
2148-
String idString = attributes.get("id");
2149-
if (("$language".equals(idString)
2150-
|| "$languageExceptions".equals(attributes.get("id")))
2151-
&& "choice".equals(attributes.get("type"))) {
2152-
String[] validCodeArray = value.trim().split("\\s+");
2153-
CLDRLanguageCodes.addAll(Arrays.asList(validCodeArray));
2149+
final String idString = attributes.get("id");
2150+
final String typeString = attributes.get("type");
2151+
validityInfo.put(idString, Row.of(typeString, value));
2152+
if ("choice".equals(typeString)) {
2153+
if ("$language".equals(idString)
2154+
|| "$languageNonTcGeqBasic".equals(idString)) {
2155+
String[] validCodeArray = value.trim().split("\\s+");
2156+
CLDRLanguageCodes.addAll(Arrays.asList(validCodeArray));
2157+
}
2158+
if ("$languageNonTcLtBasic".equals(idString)) { // not yet basic
2159+
String[] validCodeArray = value.trim().split("\\s+");
2160+
final List<String> asList = Arrays.asList(validCodeArray);
2161+
languageNonTcLtBasic.addAll(asList);
2162+
CLDRLanguageCodes.addAll(asList);
2163+
}
21542164
}
21552165
return true;
21562166
} else if (level3.equals("attributeValues")) {
@@ -2495,6 +2505,8 @@ public int parseIntegerOrNull(String attributeValue) {
24952505
public Map<CLDRLocale, CLDRLocale> baseToDefaultContent; // wo -> wo_Arab_SN
24962506
public Map<CLDRLocale, CLDRLocale> defaultContentToBase; // wo_Arab_SN -> wo
24972507
private Set<String> CLDRLanguageCodes = new TreeSet<>();
2508+
private Set<String> languageNonTcLtBasic = new TreeSet<>();
2509+
24982510
private Set<String> CLDRScriptCodes;
24992511

25002512
/**
@@ -4793,10 +4805,21 @@ public Map<String, R2<String, String>> getValidityInfo() {
47934805
return validityInfo;
47944806
}
47954807

4808+
/** TC languages or at Basic or better, or worse than Basic (grandfathered or Core). */
47964809
public Set<String> getCLDRLanguageCodes() {
47974810
return CLDRLanguageCodes;
47984811
}
47994812

4813+
/** TC languages or those at Basic or better. */
4814+
public Set<String> getLanguageTcOrBasic() {
4815+
return Sets.difference(CLDRLanguageCodes, languageNonTcLtBasic);
4816+
}
4817+
4818+
/** Non TC languages that are worse than Basic (grandfathered or Core) */
4819+
public Set<String> getLanguageNonTcLtBasic() {
4820+
return languageNonTcLtBasic;
4821+
}
4822+
48004823
public boolean isCLDRLanguageCode(String code) {
48014824
return CLDRLanguageCodes.contains(code);
48024825
}

0 commit comments

Comments
 (0)