66import com .google .common .collect .ImmutableSet ;
77import com .google .common .collect .ImmutableSortedSet ;
88import com .google .common .collect .Multimap ;
9+ import com .google .common .collect .Ordering ;
910import com .google .common .collect .TreeMultimap ;
1011import com .ibm .icu .impl .Row ;
1112import com .ibm .icu .impl .Row .R3 ;
2930import java .util .TreeMap ;
3031import java .util .TreeSet ;
3132import java .util .regex .Pattern ;
33+ import java .util .stream .Collectors ;
34+ import java .util .stream .Stream ;
3235import org .unicode .cldr .test .CoverageLevel2 ;
3336import org .unicode .cldr .util .Annotations ;
3437import org .unicode .cldr .util .Annotations .AnnotationSet ;
3538import org .unicode .cldr .util .CLDRConfig ;
3639import org .unicode .cldr .util .CLDRFile ;
3740import org .unicode .cldr .util .CLDRPaths ;
3841import org .unicode .cldr .util .CldrUtility ;
42+ import org .unicode .cldr .util .CollatorHelper ;
3943import org .unicode .cldr .util .Emoji ;
4044import org .unicode .cldr .util .Factory ;
4145import org .unicode .cldr .util .Level ;
@@ -281,34 +285,41 @@ public void TestCategories() {
281285 }
282286 }
283287
288+ // TODO CLDR-16947 - this test should migrate into
289+ // CheckDisplayCollisions-run-against-derived-annotations (see isuse)
290+ // TODO CLDR-19189
284291 public void TestUniqueness () {
285- // if (logKnownIssue(
286- // "CLDR-16947", "skip duplicate TestUniqueness in favor of
287- // CheckDisplayCollisions")) {
288- // return;
289- // }
290292 Set <String > locales = new TreeSet <>();
291293 locales .add ("en" );
292294 if (!TEST_ONLY_ENGLISH_UNIQUENESS ) {
293295 locales .addAll (Annotations .getAvailable ());
294296 locales .remove ("root" );
295297 }
296298 /*
297- * Note: "problems" here is a work-around for what appears to be a deficiency
298- * in the function sourceLocation, involving the call stack. Seemingly sourceLocation
299- * can't handle the "->" notation used for parallelStream().forEach() if
300- * uniquePerLocale calls errln directly.
299+ * "problems" is here to collect and sort issues in parallel,
300+ * and avoid issues calling errln() from a lambda.
301301 */
302- Set <String > problems = new HashSet <>();
303- locales .parallelStream ().forEach (locale -> uniquePerLocale (locale , problems ));
302+ Set <String > problems =
303+ locales .parallelStream ()
304+ .flatMap (locale -> uniquePerLocale (locale ))
305+ .collect (Collectors .toCollection (() -> new TreeSet <>()));
304306 if (!problems .isEmpty ()) {
307+ if (logKnownIssue (
308+ "CLDR-19189" ,
309+ "cased collision in annotations:\n " + String .join ("\n " , problems ))) {
310+ return ;
311+ }
305312 problems .forEach (s -> errln (s ));
306313 }
307314 }
308315
309- private void uniquePerLocale (String locale , Set <String > problems ) {
316+ private Stream <String > uniquePerLocale (String locale ) {
317+ Set <String > problems = new TreeSet <>();
310318 logln ("uniqueness: " + locale );
311- Multimap <String , String > nameToEmoji = TreeMultimap .create ();
319+ // use a case insensitive collator
320+ // 'value' is originalName -> emoji
321+ Multimap <String , Pair <String , String >> nameToEmoji =
322+ TreeMultimap .create (CollatorHelper .CASE_FOLDED , Ordering .natural ());
312323 AnnotationSet data = Annotations .getDataSet (locale );
313324 for (String emoji : Emoji .getAllRgi ()) {
314325 String name = data .getShortName (emoji );
@@ -319,37 +330,42 @@ private void uniquePerLocale(String locale, Set<String> problems) {
319330 throw new IllegalArgumentException (
320331 CldrUtility .INHERITANCE_MARKER + " in name of " + emoji + " in " + locale );
321332 }
322- nameToEmoji .put (name , emoji );
333+ nameToEmoji .put (name , Pair . of ( name , emoji ) );
323334 }
324335 Multimap <String , String > duplicateNameToEmoji = null ;
325- for (Entry <String , Collection <String >> entry : nameToEmoji .asMap ().entrySet ()) {
336+ for (Entry <String , Collection <Pair <String , String >>> entry :
337+ nameToEmoji .asMap ().entrySet ()) {
326338 String name = entry .getKey ();
327- Collection <String > emojis = entry .getValue ();
339+ final Collection <Pair <String , String >> emojis = entry .getValue ();
340+ if (duplicateNameToEmoji == null ) {
341+ duplicateNameToEmoji = TreeMultimap .create ();
342+ }
328343 if (emojis .size () > 1 ) {
329- synchronized (problems ) {
330- if (problems .add (
331- "Duplicate name in "
332- + locale
333- + ": “"
334- + name
335- + "” for "
336- + Joiner .on (" & " ).join (emojis ))) {
337- int debug = 0 ;
344+ final String prefix = "Duplicate name in " + locale + ": “" + name + "” for " ;
345+ final StringBuilder remainder = new StringBuilder ();
346+ for (final Pair <String , String > emoji : emojis ) {
347+ duplicateNameToEmoji .put (emoji .getFirst (), emoji .getSecond ());
348+ if (remainder .length () > 0 ) { // ampersand after the first item
349+ remainder .append (" & " );
350+ }
351+ remainder .append ("“" ).append (emoji .getSecond ()).append ("”" );
352+ if (!emoji .getFirst ().equals (name )) {
353+ // case-insensitive collision, so note that
354+ remainder .append ("(≈“" + emoji .getFirst () + "”) " );
338355 }
339356 }
340- if (duplicateNameToEmoji == null ) {
341- duplicateNameToEmoji = TreeMultimap .create ();
342- }
343- duplicateNameToEmoji .putAll (name , emojis );
357+ problems .add (prefix + remainder .toString ());
344358 }
345359 }
346360 if (isVerbose () && duplicateNameToEmoji != null && !duplicateNameToEmoji .isEmpty ()) {
361+ // TODO CLDR-16947: the following will print out in an interleaved way due to threading.
347362 System .out .println ("\n Collisions" );
348363 for (Entry <String , String > entry : duplicateNameToEmoji .entries ()) {
349364 String emoji = entry .getValue ();
350365 System .out .println (locale + "\t " + eng .getShortName (emoji ) + "\t " + emoji );
351366 }
352367 }
368+ return problems .stream ();
353369 }
354370
355371 public void testAnnotationPaths () {
0 commit comments