Skip to content

Commit c0001a2

Browse files
authored
CLDR-7428 Freeze collators; new class CollatorHelper (#4207)
1 parent 163a3df commit c0001a2

39 files changed

+138
-261
lines changed

tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java

Lines changed: 0 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,15 @@
77

88
import com.ibm.icu.dev.test.TestFmwk.TestGroup;
99
import com.ibm.icu.dev.test.TestLog;
10-
import com.ibm.icu.text.Collator;
11-
import com.ibm.icu.text.RuleBasedCollator;
1210
import java.io.File;
1311
import java.io.PrintWriter;
1412
import java.sql.SQLException;
1513
import java.util.logging.Logger;
1614
import org.unicode.cldr.test.CheckCLDR;
1715
import org.unicode.cldr.util.CLDRConfig;
1816
import org.unicode.cldr.util.CLDRConfig.Environment;
19-
import org.unicode.cldr.util.CLDRFile;
2017
import org.unicode.cldr.util.CLDRPaths;
2118
import org.unicode.cldr.util.CldrUtility;
22-
import org.unicode.cldr.util.Factory;
23-
import org.unicode.cldr.util.StandardCodes;
24-
import org.unicode.cldr.util.SupplementalDataInfo;
2519
import org.unicode.cldr.web.CLDRProgressIndicator;
2620
import org.unicode.cldr.web.DBUtils;
2721
import org.unicode.cldr.web.SurveyLog;
@@ -128,13 +122,6 @@ public TestAll() {
128122
public static class WebTestInfo {
129123
private static WebTestInfo INSTANCE = null;
130124

131-
private SupplementalDataInfo supplementalDataInfo;
132-
private StandardCodes sc;
133-
private Factory cldrFactory;
134-
private CLDRFile english;
135-
private CLDRFile root;
136-
private RuleBasedCollator col;
137-
138125
public static WebTestInfo getInstance() {
139126
synchronized (WebTestInfo.class) {
140127
if (INSTANCE == null) {
@@ -145,62 +132,6 @@ public static WebTestInfo getInstance() {
145132
}
146133

147134
private WebTestInfo() {}
148-
149-
public SupplementalDataInfo getSupplementalDataInfo() {
150-
synchronized (this) {
151-
if (supplementalDataInfo == null) {
152-
supplementalDataInfo =
153-
SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
154-
}
155-
}
156-
return supplementalDataInfo;
157-
}
158-
159-
public StandardCodes getStandardCodes() {
160-
synchronized (this) {
161-
if (sc == null) {
162-
sc = StandardCodes.make();
163-
}
164-
}
165-
return sc;
166-
}
167-
168-
public Factory getCldrFactory() {
169-
synchronized (this) {
170-
if (cldrFactory == null) {
171-
cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
172-
}
173-
}
174-
return cldrFactory;
175-
}
176-
177-
public CLDRFile getEnglish() {
178-
synchronized (this) {
179-
if (english == null) {
180-
english = getCldrFactory().make("en", true);
181-
}
182-
}
183-
return english;
184-
}
185-
186-
public CLDRFile getRoot() {
187-
synchronized (this) {
188-
if (root == null) {
189-
root = getCldrFactory().make("root", true);
190-
}
191-
}
192-
return root;
193-
}
194-
195-
public Collator getCollator() {
196-
synchronized (this) {
197-
if (col == null) {
198-
col = (RuleBasedCollator) Collator.getInstance();
199-
col.setNumericCollation(true);
200-
}
201-
}
202-
return col;
203-
}
204135
}
205136

206137
static boolean dbSetup = false;

tools/cldr-code/src/main/java/org/unicode/cldr/draft/Misc.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.unicode.cldr.util.CLDRFile;
3737
import org.unicode.cldr.util.CLDRFile.WinningChoice;
3838
import org.unicode.cldr.util.CLDRPaths;
39+
import org.unicode.cldr.util.CollatorHelper;
3940
import org.unicode.cldr.util.Factory;
4041
import org.unicode.cldr.util.LanguageTagParser;
4142
import org.unicode.cldr.util.LocaleIDParser;
@@ -143,7 +144,9 @@ private static void showDefaultContent(String... strings) {
143144

144145
private static void showSortKey() {
145146
String[] tests = "a ä A ぁ あ ァ ァ ア ア ㋐".split(" ");
146-
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
147+
// TODO: freeze the Collator; problematic since changed in innermost for loop below
148+
// Reference: https://unicode-org.atlassian.net/browse/CLDR-7428
149+
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
147150
c.setStrength(RuleBasedCollator.QUATERNARY);
148151
c.setCaseLevel(true);
149152
c.setHiraganaQuaternary(true);
@@ -319,7 +322,7 @@ private static void showExemplarSize() {
319322
String[] locales =
320323
"en ru nl en-GB fr de it pl pt-BR es tr th ja zh-CN zh-TW ko ar bg sr uk ca hr cs da fil fi hu id lv lt no pt-PT ro sk sl es-419 sv vi el iw fa hi am af et is ms sw zu bn mr ta eu fr-CA gl zh-HK ur gu kn ml te"
321324
.split(" ");
322-
Set<String> nameAndInfo = new TreeSet<>(info.getCollator());
325+
Set<String> nameAndInfo = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR);
323326
for (String localeCode : locales) {
324327
String baseLanguage = ltp.set(localeCode).getLanguage();
325328
R2<List<String>, String> temp = lang2replacement.get(baseLanguage);

tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
import com.ibm.icu.lang.UCharacter;
1313
import com.ibm.icu.lang.UProperty;
1414
import com.ibm.icu.text.Collator;
15-
import com.ibm.icu.text.RuleBasedCollator;
1615
import com.ibm.icu.text.Transliterator;
17-
import com.ibm.icu.util.ULocale;
1816
import com.ibm.icu.util.UResourceBundle;
1917
import java.io.BufferedReader;
2018
import java.io.File;
@@ -35,6 +33,7 @@
3533
import org.unicode.cldr.util.CLDRFile;
3634
import org.unicode.cldr.util.CLDRPaths;
3735
import org.unicode.cldr.util.CldrUtility;
36+
import org.unicode.cldr.util.CollatorHelper;
3837
import org.unicode.cldr.util.PathUtilities;
3938
import org.unicode.cldr.util.PatternCache;
4039
import org.unicode.cldr.util.SimpleFactory;
@@ -411,8 +410,7 @@ static void testProps() {
411410
{UProperty.DOUBLE_START, UProperty.DOUBLE_START},
412411
{UProperty.STRING_START, UProperty.STRING_LIMIT},
413412
};
414-
Collator col = Collator.getInstance(ULocale.ROOT);
415-
((RuleBasedCollator) col).setNumericCollation(true);
413+
Collator col = CollatorHelper.ROOT_NUMERIC;
416414
Map<String, Set<String>> alpha = new TreeMap<>(col);
417415

418416
for (int range = 0; range < ranges.length; ++range) {
@@ -465,12 +463,6 @@ static void testProps() {
465463
}
466464
out.println("</table></td></tr>");
467465
}
468-
Collator c = Collator.getInstance(ULocale.ENGLISH);
469-
((RuleBasedCollator) c).setNumericCollation(true);
470-
471-
// int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
472-
// return UCharacter.getPropertyValueName(propEnum,enumValue, (int)nameChoice);
473-
474466
}
475467

476468
private static String getName(int index, String valueName, String shortValueName) {

tools/cldr-code/src/main/java/org/unicode/cldr/icu/LDMLComparator.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import com.ibm.icu.text.Collator;
1414
import com.ibm.icu.text.DecimalFormat;
1515
import com.ibm.icu.text.Normalizer;
16-
import com.ibm.icu.text.RuleBasedCollator;
1716
import com.ibm.icu.util.ULocale;
1817
import java.io.File;
1918
import java.io.FileOutputStream;
@@ -28,6 +27,7 @@
2827
import java.util.TreeMap;
2928
import java.util.TreeSet;
3029
import java.util.Vector;
30+
import org.unicode.cldr.util.CollatorHelper;
3131
import org.unicode.cldr.util.LDMLUtilities;
3232
import org.w3c.dom.Document;
3333
import org.w3c.dom.NamedNodeMap;
@@ -114,12 +114,7 @@ public static void main(String[] args) {
114114
}
115115

116116
static Collator getDefaultCollation() {
117-
// if (DEFAULT_COLLATION != null) return DEFAULT_COLLATION;
118-
RuleBasedCollator temp = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
119-
temp.setStrength(Collator.IDENTICAL);
120-
temp.setNumericCollation(true);
121-
// DEFAULT_COLLATION = temp;
122-
return temp;
117+
return CollatorHelper.ROOT_NUMERIC_IDENTICAL;
123118
}
124119

125120
Hashtable<String, String> optionTable = new Hashtable<>();

tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.unicode.cldr.util.CLDRPaths;
5656
import org.unicode.cldr.util.CLDRTool;
5757
import org.unicode.cldr.util.CldrUtility;
58+
import org.unicode.cldr.util.CollatorHelper;
5859
import org.unicode.cldr.util.DateTimeCanonicalizer;
5960
import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType;
6061
import org.unicode.cldr.util.DowngradePaths;
@@ -3309,8 +3310,7 @@ private static int stepsFromRoot(String origLoc) {
33093310

33103311
/** Internal */
33113312
public static void testJavaSemantics() {
3312-
Collator caseInsensitive = Collator.getInstance(ULocale.ROOT);
3313-
caseInsensitive.setStrength(Collator.SECONDARY);
3313+
Collator caseInsensitive = CollatorHelper.ROOT_SECONDARY;
33143314
Set<String> setWithCaseInsensitive = new TreeSet<>(caseInsensitive);
33153315
setWithCaseInsensitive.addAll(Arrays.asList(new String[] {"a", "b", "c"}));
33163316
Set<String> plainSet = new TreeSet<>();

tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,6 @@ private void addCollator(Map<String, Data> data, String type, RuleBasedCollator
258258
dataItem.collator = col;
259259
}
260260

261-
// RuleBasedCollator ROOT = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
262-
263261
private class Subchart extends Chart {
264262
private static final String HIGH_COLLATION_PRIMARY = "\uFFFF";
265263
String title;

tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import com.google.common.collect.ImmutableSet;
44
import com.google.common.collect.ImmutableSet.Builder;
55
import com.google.common.collect.Multimap;
6-
import com.ibm.icu.text.Collator;
7-
import com.ibm.icu.util.ULocale;
86
import java.io.IOException;
97
import java.util.Collection;
108
import java.util.Comparator;
@@ -76,8 +74,6 @@ public String getExplanation() {
7674
+ "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n";
7775
}
7876

79-
Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);
80-
8177
@Override
8278
public void writeContents(FormattedFileWriter pw) throws IOException {
8379

@@ -112,7 +108,9 @@ private void show(Multimap<String, String> lg, String parent, TablePrinter table
112108
new Comparator<Pair<String, String>>() {
113109
@Override
114110
public int compare(Pair<String, String> o1, Pair<String, String> o2) {
115-
int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
111+
int diff =
112+
CollatorHelper.ROOT_COLLATOR.compare(
113+
o1.getFirst(), o2.getFirst());
116114
if (diff != 0) {
117115
return diff;
118116
}

tools/cldr-code/src/main/java/org/unicode/cldr/tool/CheckAnnotations.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@
44
import java.util.TreeSet;
55
import org.unicode.cldr.util.Annotations;
66
import org.unicode.cldr.util.Annotations.AnnotationSet;
7-
import org.unicode.cldr.util.CLDRConfig;
7+
import org.unicode.cldr.util.CollatorHelper;
88

99
public class CheckAnnotations {
1010
public static void main(String[] args) {
1111
AnnotationSet data = Annotations.getDataSet("en");
12-
CLDRConfig config = CLDRConfig.getInstance();
13-
// UnicodeMap<Annotations> data2 = Annotations.getData("de");
14-
Set<String> sorted = new TreeSet<>(config.getCollator());
12+
Set<String> sorted = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR);
1513

1614
int i = 0;
1715
boolean needMore = true;

tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
package org.unicode.cldr.tool;
22

33
import com.ibm.icu.dev.util.UOption;
4-
import com.ibm.icu.text.Collator;
5-
import com.ibm.icu.text.RuleBasedCollator;
6-
import com.ibm.icu.util.ULocale;
74
import java.util.HashSet;
85
import java.util.Iterator;
96
import java.util.Set;
@@ -30,12 +27,6 @@ public class CompareData {
3027

3128
String[] directoryList = {"main", "collation", "segmentations"};
3229

33-
static RuleBasedCollator uca = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
34-
35-
{
36-
uca.setNumericCollation(true);
37-
}
38-
3930
static PrettyPath prettyPathMaker = new PrettyPath();
4031
static CLDRFile english;
4132
static Set<String> locales;

tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import com.google.common.base.Splitter;
55
import com.google.common.collect.ImmutableSet;
66
import com.google.common.collect.Sets;
7-
import com.ibm.icu.text.Collator;
87
import java.io.BufferedReader;
98
import java.io.File;
109
import java.io.IOException;
@@ -16,6 +15,7 @@
1615
import org.unicode.cldr.util.CLDRConfig;
1716
import org.unicode.cldr.util.CLDRFile;
1817
import org.unicode.cldr.util.CLDRPaths;
18+
import org.unicode.cldr.util.CollatorHelper;
1919
import org.unicode.cldr.util.Emoji;
2020
import org.unicode.cldr.util.Factory;
2121
import org.unicode.cldr.util.SimpleFactory;
@@ -29,11 +29,12 @@ public class CompareEmoji {
2929
static final Factory FACTORY_DERIVED = SimpleFactory.make(paths, ".*");
3030

3131
private static final Joiner BAR_JOINER = Joiner.on(" | ");
32-
private static final Collator collator = CLDRConfig.getInstance().getCollator();
32+
3333
private static final String base =
3434
"/Users/markdavis/github/private/DATA/cldr-private/emoji_diff/";
3535
private static final Set<String> sorted =
36-
ImmutableSet.copyOf(Emoji.getAllRgi().addAllTo(new TreeSet<>(collator)));
36+
ImmutableSet.copyOf(
37+
Emoji.getAllRgi().addAllTo(new TreeSet<>(CollatorHelper.EMOJI_COLLATOR)));
3738

3839
enum Status {
3940
regular,
@@ -155,7 +156,7 @@ public static Map<String, Set<String>> loadItems(
155156
continue;
156157
}
157158
String key = split[0];
158-
Set<String> values = new TreeSet<>(collator);
159+
Set<String> values = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR);
159160
for (int i = 1; i < split.length; ++i) {
160161
values.add(split[i]);
161162
}

0 commit comments

Comments
 (0)