Skip to content

Commit 44c96ed

Browse files
authored
CLDR-17857 coverage for scripts, update 'bal' coverage (#4567)
1 parent cef1690 commit 44c96ed

File tree

3 files changed

+200
-51
lines changed

3 files changed

+200
-51
lines changed

common/supplemental/coverageLevels.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ For terms of use, see http://www.unicode.org/copyright.html
107107
<coverageVariable key="%language60_TD" value="(shu|dzg|kbl|mde|mua|sba)"/>
108108
<!-- See CLDR-16673: All basic+ locales (per coverageLevels.txt) MUST have their language's name at least at level 80 (modern), except for those on exception list -->
109109
<!-- Can use MinimizeRegex.java to "unpack" the compressed lists that are painful to edit -->
110-
<coverageVariable key="%language80" value="(af|ak|am|ar|as|az|be|bg|bgc|bho|bn|brx|bs|ca|ceb|cs|cv|cy|da|de|doi|ee|el|en|es|et|eu|fa|fi|fil|fr|ga|gaa|gd|gl|gu|ha|he|hi|hr|hu|hy|id|ig|ii|is|it|ja|jv|ka|kk|km|kn|ko|kok|ks|ky|lo|lt|lv|mai|mi|mk|ml|mn|mni|mr|ms|my|ne|nl|nn|no|nso|om|or|pa|pcm|pl|ps|pt|raj|ro|ru|rw|sa|sat|sd|si|sk|sl|so|sq|sr|st|su|sv|sw|ta|te|tg|th|ti|tk|tn|tr|tt|uk|ur|uz|vi|wo|xh|yo|yue|zh|zu|ast|blo|br|chr|csw|dsb|eo|ff|fo|fy|hsb|ia|ie|kea|kgp|ku|kxv|lb|lij|lmo|mt|nds|nqo|oc|prg|qu|rm|sah|sc|syr|szl|to|ug|vec|vmw|xnr|yrl|za|mul|root|zxx|und)"/>
110+
<coverageVariable key="%language80" value="(af|ak|am|ar|as|az|bal|be|bg|bgc|bho|bn|brx|bs|ca|ceb|cs|cv|cy|da|de|doi|ee|el|en|es|et|eu|fa|fi|fil|fr|ga|gaa|gd|gl|gu|ha|he|hi|hr|hu|hy|id|ig|ii|is|it|ja|jv|ka|kk|km|kn|ko|kok|ks|ky|lo|lt|lv|mai|mi|mk|ml|mn|mni|mr|ms|my|ne|nl|nn|no|nso|om|or|pa|pcm|pl|ps|pt|raj|ro|ru|rw|sa|sat|sd|si|sk|sl|so|sq|sr|st|su|sv|sw|ta|te|tg|th|ti|tk|tn|tr|tt|uk|ur|uz|vi|wo|xh|yo|yue|zh|zu|ast|blo|br|chr|csw|dsb|eo|ff|fo|fy|hsb|ia|ie|kea|kgp|ku|kxv|lb|lij|lmo|mt|nds|nqo|oc|prg|qu|rm|sah|sc|syr|szl|to|ug|vec|vmw|xnr|yrl|za|mul|root|zxx|und)"/>
111111
<coverageVariable key="%lbTypes80" value="(strict|normal|loose)"/>
112112
<coverageVariable key="%lwTypes" value="(normal|breakall|keepall|phrase)"/>
113113
<coverageVariable key="%m0Types80" value="(bgn|prprname|ungegn)"/>

tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.concurrent.Callable;
1414
import java.util.concurrent.ConcurrentHashMap;
1515
import java.util.concurrent.ExecutionException;
16+
import org.unicode.cldr.tool.LikelySubtags;
1617

1718
/**
1819
* This class implements a CLDR UTS#35 compliant locale. It differs from ICU and Java locales in
@@ -708,4 +709,15 @@ public static CLDRLocale getInstance(String lang, String script, String region)
708709
public String getRegion() {
709710
return getCountry();
710711
}
712+
713+
private String getMaximalLocaleString() {
714+
return new LikelySubtags().maximize(getBaseName());
715+
}
716+
717+
/** get the maximized version of this locale or null if not set */
718+
public CLDRLocale getMaximal() {
719+
final String max = getMaximalLocaleString();
720+
if (max == null) return null;
721+
return getInstance(max);
722+
}
711723
}
Lines changed: 187 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.unicode.cldr.util;
22

33
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertNotNull;
45
import static org.junit.jupiter.api.Assertions.assertTrue;
56
import static org.junit.jupiter.api.Assumptions.assumeTrue;
67

@@ -10,28 +11,22 @@
1011
import java.util.Map;
1112
import java.util.Set;
1213
import java.util.TreeSet;
14+
import java.util.function.Supplier;
15+
import java.util.stream.Collectors;
1316
import org.junit.jupiter.api.Assertions;
14-
import org.junit.jupiter.api.BeforeAll;
1517
import org.junit.jupiter.api.Test;
18+
import org.opentest4j.MultipleFailuresError;
1619
import org.unicode.cldr.icu.LDMLConstants;
1720
import org.unicode.cldr.test.CoverageLevel2;
1821
import org.unicode.cldr.tool.ToolConstants;
19-
import org.unicode.cldr.util.StandardCodes.CodeType;
2022

2123
public class TestCoverageLevel2 {
2224

2325
final int ITERATIONS = 100000; // keep this low for normal testing
2426

25-
private static SupplementalDataInfo sdi;
26-
27-
@BeforeAll
28-
private static void setup() {
29-
sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
30-
CoverageLevel2 c = CoverageLevel2.getInstance(sdi, "fr_CA");
31-
}
32-
3327
@Test
3428
public void TestCoveragePerf() {
29+
final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
3530
for (int i = 0; i < ITERATIONS; i++) {
3631
CoverageLevel2 c = CoverageLevel2.getInstance(sdi, "fr_CA");
3732
assertEquals(
@@ -42,69 +37,211 @@ public void TestCoveragePerf() {
4237
}
4338

4439
@Test
45-
public void TestPriorBasicLanguage() throws IOException {
46-
// Fail if the language name is at above this level
47-
final Level failIfAbove = Level.MODERN;
48-
40+
public void TestPriorBasicCoverage() throws IOException {
4941
// we need the CLDR Archive dir for this.
5042
assumeTrue(TestCLDRPaths.canUseArchiveDirectory());
5143

5244
// Previous CLDR version
5345
final VersionInfo prev = ToolConstants.previousVersion();
46+
5447
// read coverageLevels.txt from the *previous* version
5548
final CalculatedCoverageLevels prevCovLevel = CalculatedCoverageLevels.forVersion(prev);
56-
// Our xpath: the language leaf
57-
final XPathParts xpp =
58-
XPathParts.getFrozenInstance("//ldml/localeDisplayNames/languages/language")
59-
.cloneAsThawed();
60-
// CLDR English File
61-
final CLDRFile english = CLDRConfig.getInstance().getEnglish();
62-
63-
// Result: locales not in en.xml
64-
final Set<String> notInEnglish = new TreeSet<>();
65-
// Result: locales not in coverage
66-
final Set<String> notInCoverage = new TreeSet<>();
6749

6850
final Set<String> localesToCheck =
6951
SupplementalDataInfo.getInstance().getLanguageTcOrBasic();
52+
7053
final Map<String, CoverageLevel2> covs = new HashMap<>();
7154

55+
final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
7256
for (final String lang : localesToCheck) {
7357
covs.put(lang, CoverageLevel2.getInstance(sdi, lang));
7458
}
7559

76-
for (final String lang : StandardCodes.make().getAvailableCodes(CodeType.language)) {
77-
if (prevCovLevel.isLocaleAtLeastBasic(lang)) {
78-
xpp.setAttribute(-1, LDMLConstants.TYPE, lang);
79-
final String xpath = xpp.toString();
80-
81-
if (!english.isHere(xpath.toString())) {
82-
// fail if not in English
83-
notInEnglish.add(lang);
84-
}
85-
86-
if (covs.values().stream()
87-
.anyMatch((cov) -> cov.getLevel(xpath.toString()).isAbove(failIfAbove))) {
88-
// fail if level > failIfAbove for any of those locales
89-
notInCoverage.add(lang);
90-
}
60+
// CLDR English File
61+
final CLDRFile english = CLDRConfig.getInstance().getEnglish();
62+
63+
Assertions.assertAll(
64+
() -> checkLanguageCoverage(prevCovLevel, covs, english),
65+
() -> checkScriptCoverage(prevCovLevel, covs, english),
66+
// region coverage elsewhere?
67+
() -> checkVariantCoverage(prevCovLevel, covs, english));
68+
}
69+
70+
private void checkLanguageCoverage(
71+
final CalculatedCoverageLevels prevCovLevel,
72+
final Map<String, CoverageLevel2> covs,
73+
final CLDRFile english)
74+
throws MultipleFailuresError {
75+
76+
// configuration
77+
final Level failIfAbove = Level.MODERN;
78+
final String XPATH = "//ldml/localeDisplayNames/languages/language";
79+
80+
// all languages of previous coverage levels at basic+
81+
final Set<String> typesAtBasic =
82+
prevCovLevel.levels.keySet().stream()
83+
.filter(l -> prevCovLevel.isLocaleAtLeastBasic(l))
84+
.map(l -> CLDRLocale.getInstance(l).getLanguage())
85+
.collect(Collectors.toSet());
86+
87+
assertMissingCoverage(covs, english, failIfAbove, XPATH, typesAtBasic);
88+
}
89+
90+
private void checkScriptCoverage(
91+
final CalculatedCoverageLevels prevCovLevel,
92+
final Map<String, CoverageLevel2> covs,
93+
final CLDRFile english)
94+
throws MultipleFailuresError {
95+
96+
// configuration
97+
final Level failIfAbove = Level.MODERN;
98+
final String XPATH = "//ldml/localeDisplayNames/scripts/script";
99+
100+
// all scripts of previous coverage levels at basic+
101+
final Set<String> typesAtBasic =
102+
prevCovLevel.levels.keySet().stream()
103+
.filter(l -> prevCovLevel.isLocaleAtLeastBasic(l))
104+
.map(l -> CLDRLocale.getInstance(l))
105+
.map(
106+
l -> {
107+
final CLDRLocale max = l.getMaximal();
108+
assertNotNull(max, () -> "Max locale for " + l);
109+
final String script = max.getScript();
110+
assertNotNull(
111+
script,
112+
() -> "Script for " + max + " which is max for " + l);
113+
return script;
114+
})
115+
.collect(Collectors.toSet());
116+
117+
assertMissingCoverage(covs, english, failIfAbove, XPATH, typesAtBasic);
118+
}
119+
120+
private void checkVariantCoverage(
121+
final CalculatedCoverageLevels prevCovLevel,
122+
final Map<String, CoverageLevel2> covs,
123+
final CLDRFile english)
124+
throws MultipleFailuresError {
125+
126+
// configuration
127+
final Level failIfAbove = Level.MODERN;
128+
final String XPATH = "//ldml/localeDisplayNames/variants/variant";
129+
130+
// We need all locales for looking for variants
131+
final Set<CLDRLocale> allLocales =
132+
CLDRConfig.getInstance().getFullCldrFactory().getAvailableCLDRLocales();
133+
134+
// get all of the "raw" locales mentioned in coverage
135+
final Set<CLDRLocale> localesAtBasic =
136+
prevCovLevel.levels.keySet().stream()
137+
.filter(l -> prevCovLevel.isLocaleAtLeastBasic(l))
138+
.map(l -> CLDRLocale.getInstance(l))
139+
.collect(Collectors.toSet());
140+
141+
final Set<CLDRLocale> localesWithVariant =
142+
allLocales.stream()
143+
.filter(l -> !l.getVariant().isEmpty())
144+
.collect(Collectors.toSet());
145+
final Set<CLDRLocale> variantLocalesInCoverage =
146+
localesWithVariant.stream()
147+
.filter(l -> localesAtBasic.stream().anyMatch(p -> l.childOf(p)))
148+
.collect(Collectors.toSet());
149+
final Set<String> typesAtBasic =
150+
variantLocalesInCoverage.stream()
151+
.map(l -> l.getVariant())
152+
.collect(Collectors.toSet());
153+
154+
assertMissingCoverage(covs, english, failIfAbove, XPATH, typesAtBasic);
155+
}
156+
157+
/**
158+
* Given types (script code, etc) at basic, check coverage and English inclusion.
159+
*
160+
* @param XPath in question - this will be mutated.
161+
*/
162+
private void assertMissingCoverage(
163+
final Map<String, CoverageLevel2> covs,
164+
final CLDRFile english,
165+
final Level failIfAbove,
166+
final String xpath,
167+
final Set<String> typesAtBasic)
168+
throws MultipleFailuresError {
169+
// Our xpath: the leaf node
170+
final XPathParts xpp = XPathParts.getFrozenInstance(xpath).cloneAsThawed();
171+
172+
// Result: types not in en.xml
173+
final Set<String> notInEnglish = new TreeSet<>();
174+
// Result: types not in coverage
175+
final Set<String> notInCoverage = new TreeSet<>();
176+
177+
collectMissingTypes(
178+
covs, english, failIfAbove, xpp, notInEnglish, notInCoverage, typesAtBasic);
179+
180+
assertMissingCoverage(failIfAbove, notInEnglish, notInCoverage, xpp);
181+
}
182+
183+
/**
184+
* Given types (script code, etc) at basic, check coverage and English inclusion.
185+
*
186+
* @param xpath XPath in question - this will be mutated.
187+
*/
188+
private void collectMissingTypes(
189+
final Map<String, CoverageLevel2> covs,
190+
final CLDRFile english,
191+
final Level failIfAbove,
192+
final XPathParts xpp,
193+
final Set<String> notInEnglish,
194+
final Set<String> notInCoverage,
195+
final Set<String> typesAtBasic) {
196+
for (final String type : typesAtBasic) {
197+
xpp.setAttribute(-1, LDMLConstants.TYPE, type);
198+
final String xpath = xpp.toString();
199+
200+
if (!english.isHere(xpath)) {
201+
// fail if not in English
202+
notInEnglish.add(type);
203+
}
204+
205+
if (covs.values().stream()
206+
.anyMatch((cov) -> cov.getLevel(xpath).isAbove(failIfAbove))) {
207+
// fail if level > failIfAbove for any of those locales
208+
notInCoverage.add(type);
91209
}
92210
}
211+
}
93212

213+
/** Bring the bad news (if any). Reporting factored out here. */
214+
private void assertMissingCoverage(
215+
final Level failIfAbove,
216+
final Set<String> notInEnglish,
217+
final Set<String> notInCoverage,
218+
final XPathParts xpp)
219+
throws MultipleFailuresError {
220+
// given xpp is scripts/script or languages/language, etc.
221+
final String plural = xpp.getElement(-2); // the plural form of what we're looking for
94222
Assertions.assertAll(
95223
() ->
96224
assertTrue(
97225
notInEnglish.isEmpty(),
98226
() ->
99-
"en.xml is missing translations for these languages' names:"
100-
+ notInEnglish.toString()),
101-
() ->
102-
assertTrue(
103-
notInCoverage.isEmpty(),
104-
() ->
105-
"coverageLevels.xml has a coverage level >"
106-
+ failIfAbove
107-
+ " for these language's names:"
108-
+ notInCoverage.toString()));
227+
String.format(
228+
"en.xml missing these %s: %s",
229+
plural, notInEnglish.toString())),
230+
() -> {
231+
final Supplier<String> formatter =
232+
() ->
233+
String.format(
234+
"coverageLevels.xml has level > %s for these %s: %s",
235+
failIfAbove, plural, notInCoverage.toString());
236+
if (!notInCoverage.isEmpty() && plural.equals("variants")) {
237+
// TODO CLDR-18481 need logKnownIssue in JUnit
238+
// if (logKnownIssue("CLDR-18480", formatter.get())) {
239+
// return;
240+
// }
241+
System.err.println("CLDR-18400: known issue: " + formatter.get());
242+
return;
243+
}
244+
assertTrue(notInCoverage.isEmpty(), formatter);
245+
});
109246
}
110247
}

0 commit comments

Comments
 (0)