5
5
import com .google .common .collect .ComparisonChain ;
6
6
import com .google .common .collect .ImmutableList ;
7
7
import com .google .common .collect .ImmutableMap ;
8
- import com .google .common .collect .ImmutableSet ;
9
8
import com .google .common .collect .Sets ;
10
9
import com .ibm .icu .impl .Relation ;
11
10
import com .ibm .icu .impl .Row ;
42
41
import org .unicode .cldr .util .Containment ;
43
42
import org .unicode .cldr .util .Counter ;
44
43
import org .unicode .cldr .util .Factory ;
44
+ import org .unicode .cldr .util .Iso639Data ;
45
45
import org .unicode .cldr .util .LanguageTagParser ;
46
46
import org .unicode .cldr .util .LocaleNames ;
47
47
import org .unicode .cldr .util .LocaleScriptInfo ;
@@ -83,13 +83,8 @@ public enum LocaleOverride {
83
83
private static final Map <String , Status > SCRIPT_CODE_TO_STATUS =
84
84
Validity .getInstance ().getCodeToStatus (LstrType .script );
85
85
86
- private static final String TEMP_UNKNOWN_REGION = "XZ" ;
87
-
88
- private static final String DEBUG_ADD_KEY = "und_Latn_ZA" ;
89
-
90
86
private static final double MIN_UNOFFICIAL_LANGUAGE_SIZE = 10000000 ;
91
87
private static final double MIN_UNOFFICIAL_LANGUAGE_PROPORTION = 0.20 ;
92
- private static final double MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE = 100000 ;
93
88
94
89
/** When a language is not official, scale it down. */
95
90
private static final double UNOFFICIAL_SCALE_DOWN = 0.2 ;
@@ -153,8 +148,6 @@ private static Set<String> parse(String[] args) {
153
148
private static boolean DEBUG ;
154
149
private static Map <String , LstrType > WATCH_PAIRS = null ;
155
150
156
- private static final boolean SHOW_OVERRIDES = true ;
157
-
158
151
static final Map <String , LSRSource > silData = LangTagsData .getJsonData ();
159
152
160
153
public static void main (String [] args ) throws IOException {
@@ -217,7 +210,6 @@ public static void main(String[] args) throws IOException {
217
210
throw new IllegalArgumentException ();
218
211
}
219
212
220
- Set <String > newAdditions = new TreeSet <>();
221
213
Set <String > newMissing = new TreeSet <>();
222
214
223
215
// Check against last version
@@ -291,12 +283,6 @@ public static void main(String[] args) throws IOException {
291
283
System .out .println ("Keeping macroregions used in cldr " + cldrContainerToLanguages );
292
284
}
293
285
294
- private static final List <String > KEEP_TARGETS =
295
- DROP_HARDCODED ? List .of () : List .of ("und_Arab_PK" , "und_Latn_ET" );
296
-
297
- private static final ImmutableSet <String > deprecatedISONotInLST =
298
- DROP_HARDCODED ? ImmutableSet .of () : ImmutableSet .of ("scc" , "scr" );
299
-
300
286
/**
301
287
* This is the simplest way to override, by supplying the max value. It gets a very low weight,
302
288
* so doesn't override any stronger value.
@@ -469,7 +455,6 @@ public static void main(String[] args) throws IOException {
469
455
{"rhg_Arab" , "rhg_Arab_MM" },
470
456
{"und_Arab_MM" , "rhg_Arab_MM" },
471
457
{"sd_IN" , "sd_Deva_IN" }, // preferred in CLDR
472
- // { "sd_Deva", "sd_Deva_IN"},
473
458
{"und_Cpmn" , "und_Cpmn_CY" },
474
459
{"oc_ES" , "oc_Latn_ES" },
475
460
{"os" , "os_Cyrl_GE" },
@@ -517,15 +502,6 @@ public static void main(String[] args) throws IOException {
517
502
{"ko" , "Kore" }, // Korean (North Korea)
518
503
{"ko_KR" , "Kore" }, // Korean (North Korea)
519
504
{"ja" , "Jpan" }, // Special script for japan
520
-
521
- // {"chk", "Latn"}, // Chuukese (Micronesia)
522
- // {"fil", "Latn"}, // Filipino (Philippines)"
523
- // {"pap", "Latn"}, // Papiamento (Netherlands Antilles)
524
- // {"pau", "Latn"}, // Palauan (Palau)
525
- // {"su", "Latn"}, // Sundanese (Indonesia)
526
- // {"tet", "Latn"}, // Tetum (East Timor)
527
- // {"tk", "Latn"}, // Turkmen (Turkmenistan)
528
- // {"ty", "Latn"}, // Tahitian (French Polynesia)
529
505
// {LocaleNames.UND, "Latn"}, // Ultimate fallback
530
506
};
531
507
@@ -544,21 +520,6 @@ public static void main(String[] args) throws IOException {
544
520
}
545
521
}
546
522
547
- private static Map <String , String > FALLBACK_SCRIPTS ;
548
-
549
- static {
550
- LanguageTagParser additionLtp = new LanguageTagParser ();
551
- Map <String , String > _FALLBACK_SCRIPTS = new TreeMap <>();
552
- for (String addition : MAX_ADDITIONS ) {
553
- additionLtp .set (addition );
554
- String lan = additionLtp .getLanguage ();
555
- _FALLBACK_SCRIPTS .put (lan , additionLtp .getScript ());
556
- }
557
- FALLBACK_SCRIPTS = ImmutableMap .copyOf (_FALLBACK_SCRIPTS );
558
- }
559
-
560
- private static int errorCount ;
561
-
562
523
/**
563
524
* Debugging function that returns false if the flag is false, otherwise returns true if the
564
525
* WATCH is null or the locales don't match the WATCH.
@@ -679,8 +640,6 @@ public static String getNameSafe(String oldValue) {
679
640
private static OutputStyle OUTPUT_STYLE =
680
641
OutputStyle .valueOf (CldrUtility .getProperty ("OutputStyle" , "XML" , "XML" ).toUpperCase ());
681
642
682
- private static final String TAG_SEPARATOR = OUTPUT_STYLE == OutputStyle .C_ALT ? "-" : "_" ;
683
-
684
643
private static final Joiner JOIN_SPACE = Joiner .on (' ' );
685
644
private static final Joiner JOIN_UBAR = Joiner .on ('_' );
686
645
@@ -728,21 +687,6 @@ private static Map<String, String> generatePopulationData(Map<String, String> to
728
687
729
688
if (data .getOfficialStatus () == OfficialStatus .unknown ) {
730
689
final String locale = writtenLanguage + "_" + region ;
731
- // if (literatePopulation >= minimalLiteratePopulation) {
732
- // // ok, skip
733
- // } else if (literatePopulation >=
734
- // MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE
735
- // && cldrLocales.contains(locale)) {
736
- // // ok, skip
737
- // } else {
738
- // // if (SHOW_ADD)
739
- // // System.out.println("Skipping:\t" + writtenLanguage
740
- // + "\t" + region + "\t"
741
- // // + english.nameGetter().getName(locale)
742
- // // + "\t-- too small:\t" +
743
- // number.format(literatePopulation));
744
- // // continue;
745
- // }
746
690
order *= UNOFFICIAL_SCALE_DOWN ;
747
691
if (watching (SHOW_POP , writtenLanguage ))
748
692
System .out .println (
@@ -800,22 +744,6 @@ private static Map<String, String> generatePopulationData(Map<String, String> to
800
744
}
801
745
}
802
746
803
- // Old code for getting language to script, adding XZ, which converts to ZZ. Replaced by use
804
- // of SIL data
805
-
806
- // for (Entry<String, Collection<String>> entry :
807
- // DeriveScripts.getLanguageToScript().asMap().entrySet()) {
808
- // String language = entry.getKey();
809
- // final Collection<String> values = entry.getValue();
810
- // if (values.size() != 1) {
811
- // continue; // skip, no either way
812
- // }
813
- // Set<R3<Double, String, String>> old = maxData.languages.get(language);
814
- // if (!maxData.languages.containsKey(language)) {
815
- // maxData.add(language, values.iterator().next(), TEMP_UNKNOWN_REGION, 1.0);
816
- // }
817
- // }
818
-
819
747
// add others, with English default
820
748
for (String region : otherTerritories ) {
821
749
if (!LocaleValidator .ALLOW_IN_LIKELY .isAllowed (LstrType .region , region , null , null )) {
@@ -846,10 +774,6 @@ private static Map<String, String> generatePopulationData(Map<String, String> to
846
774
continue ;
847
775
}
848
776
849
- if (deprecatedISONotInLST .contains (badLanguage )) {
850
- continue ;
851
- }
852
-
853
777
if (LANGUAGE_CODE_TO_STATUS .get (badLanguage ) != Validity .Status .regular ) {
854
778
if (!LocaleValidator .ALLOW_IN_LIKELY .isAllowed (
855
779
LstrType .language , badLanguage , null , null )) {
@@ -1071,6 +995,10 @@ private static Map<String, String> generatePopulationData(Map<String, String> to
1071
995
System .out .println (JOIN_LS .join ("Failure in ScriptMetaData: " + ltp , errors ));
1072
996
continue ;
1073
997
}
998
+ if (isLanguageCollection (likelyLanguage )) {
999
+ // Dropping language collections
1000
+ continue ;
1001
+ }
1074
1002
final String result = likelyLanguage + "_" + script + "_" + originCountry ;
1075
1003
add ("und_" + script , result , toMaximized , "S->LR•" , LocaleOverride .KEEP_EXISTING );
1076
1004
add (likelyLanguage , result , toMaximized , "L->SR•" , LocaleOverride .KEEP_EXISTING );
@@ -1758,7 +1686,7 @@ private static File printLikelySubtags(Map<String, String> fluffup) throws IOExc
1758
1686
for (Entry <String , LSRSource > entry : silData .entrySet ()) {
1759
1687
CLDRLocale source = CLDRLocale .getInstance (entry .getKey ());
1760
1688
String lang = source .getLanguage ();
1761
- if (!fluffup .containsKey (lang )) {
1689
+ if (!fluffup .containsKey (lang ) && ! isLanguageCollection ( lang ) ) {
1762
1690
silMap .put (entry .getKey (), entry .getValue ().getLsrString ());
1763
1691
if (!entry .getValue ().getSources ().isEmpty ()) {
1764
1692
silOrigins .put (entry .getKey (), entry .getValue ().getSourceString ());
@@ -1841,4 +1769,10 @@ public static void printLine(
1841
1769
}
1842
1770
}
1843
1771
}
1772
+
1773
+ // Check if the language code is a collection of languages (ISO 639-5). Otherwise its probably
1774
+ // an individual one or maybe a macrolanguage.
1775
+ private static Boolean isLanguageCollection (String language ) {
1776
+ return Iso639Data .getHierarchy (language ) != null ;
1777
+ }
1844
1778
}
0 commit comments