Skip to content

Commit 0402317

Browse files
committed
Gunjala_Gondi: override ID usage in code until CLDR update
1 parent 10efac2 commit 0402317

File tree

3 files changed

+44
-25
lines changed

3 files changed

+44
-25
lines changed

unicodetools/data/security/dev/IdentifierType.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# IdentifierType.txt
2-
# Date: 2025-08-01, 18:11:44 GMT
2+
# Date: 2025-08-04, 21:58:43 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1787,12 +1787,6 @@ ABF0..ABF9 ; Limited_Use # 5.2 [10] MEETEI MAYEK DIGIT Z
17871787
1145F ; Limited_Use # 12.0 NEWA LETTER VEDIC ANUSVARA
17881788
11460..11461 ; Limited_Use # 13.0 [2] NEWA SIGN JIHVAMULIYA..NEWA SIGN UPADHMANIYA
17891789
11AB0..11ABF ; Limited_Use # 14.0 [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
1790-
11D60..11D65 ; Limited_Use # 11.0 [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU
1791-
11D67..11D68 ; Limited_Use # 11.0 [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI
1792-
11D6A..11D8E ; Limited_Use # 11.0 [37] GUNJALA GONDI LETTER OO..GUNJALA GONDI VOWEL SIGN UU
1793-
11D90..11D91 ; Limited_Use # 11.0 [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
1794-
11D93..11D98 ; Limited_Use # 11.0 [6] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI OM
1795-
11DA0..11DA9 ; Limited_Use # 11.0 [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
17961790
11FB0 ; Limited_Use # 13.0 LISU LETTER YHA
17971791
16800..16A38 ; Limited_Use # 6.0 [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
17981792
16F00..16F44 ; Limited_Use # 6.1 [69] MIAO LETTER PA..MIAO LETTER HHA
@@ -1810,7 +1804,7 @@ ABF0..ABF9 ; Limited_Use # 5.2 [10] MEETEI MAYEK DIGIT Z
18101804
1E94B ; Limited_Use # 12.0 ADLAM NASALIZATION MARK
18111805
1E950..1E959 ; Limited_Use # 9.0 [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
18121806

1813-
# Total code points: 5107
1807+
# Total code points: 5044
18141808

18151809
# Identifier_Type: Limited_Use Uncommon_Use
18161810

@@ -3945,6 +3939,12 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE
39453939
11D3C..11D3D ; Exclusion # 10.0 [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
39463940
11D3F..11D47 ; Exclusion # 10.0 [9] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI RA-KARA
39473941
11D50..11D59 ; Exclusion # 10.0 [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
3942+
11D60..11D65 ; Exclusion # 11.0 [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU
3943+
11D67..11D68 ; Exclusion # 11.0 [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI
3944+
11D6A..11D8E ; Exclusion # 11.0 [37] GUNJALA GONDI LETTER OO..GUNJALA GONDI VOWEL SIGN UU
3945+
11D90..11D91 ; Exclusion # 11.0 [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
3946+
11D93..11D98 ; Exclusion # 11.0 [6] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI OM
3947+
11DA0..11DA9 ; Exclusion # 11.0 [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
39483948
11DB0..11DDB ; Exclusion # 17.0 [44] TOLONG SIKI LETTER I..TOLONG SIKI UNGGA
39493949
11DE0..11DE9 ; Exclusion # 17.0 [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE
39503950
11EE0..11EF6 ; Exclusion # 11.0 [23] MAKASAR LETTER KA..MAKASAR VOWEL SIGN O
@@ -4019,7 +4019,7 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE
40194019
1E800..1E8C4 ; Exclusion # 7.0 [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
40204020
1E8D0..1E8D6 ; Exclusion # 7.0 [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
40214021

4022-
# Total code points: 20799
4022+
# Total code points: 20862
40234023

40244024
# Identifier_Type: Exclusion Not_XID
40254025

unicodetools/data/security/dev/data/source/removals.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,3 @@ AB66..AB67; Uncommon_Use
12151215
# U+16FF0 and U+16FF1 VIETNAMESE ALTERNATE READING MARK CA and NHAY from Recommended to Obsolete.
12161216
08C9; Technical
12171217
16FF0..16FF1; Obsolete
1218-
1219-
# Unicode 17.0
1220-
# UTC-184-A76 ... Derive the Identifier_Type values for Gunjala Gondi characters from the UAX31 classification of the script as specified.
1221-
\p{Script=Gunjala_Gondi}; Exclusion

unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,6 @@ private void loadFileData() throws IOException {
439439
sources =
440440
VersionedProperty.parseUnicodeSet(
441441
codelist, VersionedSymbolTable.forDevelopment());
442-
if (sources.contains("ᢰ")) {
443-
int x = 0;
444-
}
445442
} else {
446443
final String[] codes = Utility.split(codelist, ' ');
447444
for (final String code : codes) {
@@ -513,12 +510,8 @@ private void loadFileData() throws IOException {
513510
UnicodeSet hasRecommendedScript = new UnicodeSet();
514511
Set<String> scripts = LATEST.load(UcdProperty.Script).values();
515512
for (final String script : scripts) {
516-
String shortName = UcdPropertyValues.Script_Values.forName(script).getShortName();
517-
Info scriptInfo = ScriptMetadata.getInfo(shortName);
518-
if (scriptInfo == null) {
519-
System.out.println("No script metadata info for: " + script);
520-
}
521-
if (scriptInfo != null && scriptInfo.idUsage == IdUsage.RECOMMENDED) {
513+
IdUsage idUsage = getScriptUsage(script);
514+
if (idUsage == IdUsage.RECOMMENDED) {
522515
final UnicodeSet us = ScriptInfo.IDENTIFIER_INFO.getSetWith(script);
523516
if (us != null) {
524517
hasRecommendedScript.addAll(us);
@@ -528,9 +521,7 @@ private void loadFileData() throws IOException {
528521
hasRecommendedScript.freeze();
529522

530523
for (final String script : scripts) {
531-
String shortName = UcdPropertyValues.Script_Values.forName(script).getShortName();
532-
Info scriptInfo = ScriptMetadata.getInfo(shortName);
533-
final IdUsage idUsage = scriptInfo != null ? scriptInfo.idUsage : IdUsage.EXCLUSION;
524+
final IdUsage idUsage = getScriptUsage(script);
534525
IdentifierInfo.Identifier_Type status;
535526
switch (idUsage) {
536527
// case ASPIRATIONAL:
@@ -673,6 +664,38 @@ private void loadFileData() throws IOException {
673664

674665
}
675666

667+
private IdUsage getScriptUsage(String longScriptName) {
668+
String shortName = UcdPropertyValues.Script_Values.forName(longScriptName).getShortName();
669+
Info scriptInfo = ScriptMetadata.getInfo(shortName);
670+
IdUsage idUsage;
671+
if (scriptInfo == null) {
672+
System.out.println("No script metadata info for: " + longScriptName);
673+
idUsage = IdUsage.EXCLUSION;
674+
} else {
675+
idUsage = scriptInfo.idUsage;
676+
}
677+
// Sometimes UAX #31 and CLDR script metadata are updated but the Unicode Tools still
678+
// depend on an older version.
679+
// We temporarily override ID Usage values here.
680+
// See https://github.com/unicode-org/unicodetools/pull/1185 for an example.
681+
switch (longScriptName) {
682+
case "Gunjala_Gondi":
683+
// [184-C33] Consensus: Change the Identifier_Type values for
684+
// Gunjala Gondi characters (sc=Gong) from Limited_Use to Excluded,
685+
// to match the UAX31 classification of the script.
686+
// For Unicode Version 17.0. See L2/25-183 item 6.4.
687+
// [184-A76] Action Item for Josh Hadley, PAG:
688+
// Derive the Identifier_Type values for Gunjala Gondi characters from
689+
// the UAX31 classification of the script as specified.
690+
// For Unicode Version 17.0. See L2/25-183 item 6.4.
691+
System.out.println(
692+
"TODO: Replace Gunjala_Gondi=Excluded override with CLDR update");
693+
return IdUsage.EXCLUSION;
694+
default:
695+
return idUsage;
696+
}
697+
}
698+
676699
private void addToRemovalSets(
677700
String codepoint, final IdentifierInfo.Identifier_Type identifierType) {
678701
Set<Identifier_Type> oldSet = identifierTypesMap.get(codepoint);

0 commit comments

Comments
 (0)