Skip to content

Commit 426a044

Browse files
authored
Promote the sources to Normative and the other UAX60 properties to Provisional; introduce ExtraAliases for historical Nüshu data. (#1233)
1 parent 6342504 commit 426a044

File tree

9 files changed

+143
-25
lines changed

9 files changed

+143
-25
lines changed

unicodetools/data/ucd/dev/PropertyAliases.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# PropertyAliases-17.0.0.txt
2-
# Date: 2025-04-25, 14:00:52 GMT
1+
# PropertyAliases-18.0.0.txt
2+
# Date: 2025-11-11, 02:15:48 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -110,6 +110,8 @@ kEH_Desc ; kEH_Desc
110110
kEH_HG ; kEH_HG
111111
kEH_IFAO ; kEH_IFAO
112112
kEH_JSesh ; kEH_JSesh
113+
kNSHU_DubenSrc ; kNSHU_DubenSrc
114+
kTGT_MergedSrc ; kTGT_MergedSrc
113115
na ; Name
114116
na1 ; Unicode_1_Name
115117
Name_Alias ; Name_Alias
@@ -226,6 +228,6 @@ XO_NFKC ; Expands_On_NFKC
226228
XO_NFKD ; Expands_On_NFKD
227229

228230
# ================================================
229-
# Total: 145
231+
# Total: 147
230232

231233
# EOF

unicodetools/data/ucd/dev/PropertyValueAliases.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# PropertyValueAliases-18.0.0.txt
2-
# Date: 2025-09-11, 13:53:01 GMT
2+
# Date: 2025-11-11, 02:15:48 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1725,6 +1725,14 @@ kEH_NoRotate; Y ; Yes ; T
17251725

17261726
# @missing: 0000..10FFFF; kMandarin; <none>
17271727

1728+
# kNSHU_DubenSrc (kNSHU_DubenSrc)
1729+
1730+
# @missing: 0000..10FFFF; kNSHU_DubenSrc; <none>
1731+
1732+
# kTGT_MergedSrc (kTGT_MergedSrc)
1733+
1734+
# @missing: 0000..10FFFF; kTGT_MergedSrc; <none>
1735+
17281736
# kTotalStrokes (cjkTotalStrokes)
17291737

17301738
# @missing: 0000..10FFFF; kTotalStrokes; <none>

unicodetools/src/main/java/org/unicode/props/GenerateEnums.java

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ static class PropName implements Comparable<PropName> {
8080
final String shortName;
8181
final String longName;
8282
final List<String> others;
83+
List<String> extra;
8384
final DerivedPropertyStatus status;
8485
final Map<String, PropName> subnames = new TreeMap<String, PropName>();
8586

@@ -511,7 +512,8 @@ public static void writeMainUcdFile() throws IOException {
511512
classItem,
512513
cardinality,
513514
pname.shortName,
514-
pname.others);
515+
pname.others,
516+
pname.extra);
515517
output.print(",\n");
516518
}
517519
}
@@ -536,6 +538,21 @@ public static void writeMainUcdFile() throws IOException {
536538
+ " private UcdProperty(PropertyType type,\n"
537539
+ " DerivedPropertyStatus status,\n"
538540
+ " String shortName,\n"
541+
+ " String[] otherNames,\n"
542+
+ " String[] extraNames) {\n"
543+
+ " this.type = type;\n"
544+
+ " this.status = status;\n"
545+
+ " names = new PropertyNames<UcdProperty>(UcdProperty.class, this, shortName, otherNames, extraNames);\n"
546+
+ " name2enum = null;\n"
547+
+ " enums = null;\n"
548+
+ " enumClass = null;\n"
549+
+ " cardinality = ValueCardinality.Singleton;\n"
550+
+ " }\n"
551+
+ "\n"
552+
+ " \n"
553+
+ " private UcdProperty(PropertyType type,\n"
554+
+ " DerivedPropertyStatus status,\n"
555+
+ " String shortName,\n"
539556
+ " String... otherNames) {\n"
540557
+ " this.type = type;\n"
541558
+ " this.status = status;\n"
@@ -619,7 +636,8 @@ public static void writeOtherNames(
619636
String classItem,
620637
ValueCardinality cardinality,
621638
String shortName,
622-
List<String> otherNames) {
639+
List<String> otherNames,
640+
List<String> extraNames) {
623641
output.print("(");
624642
// if (shortName != null) {
625643
output.print(type);
@@ -635,8 +653,20 @@ public static void writeOtherNames(
635653
: "ValueCardinality." + cardinality.toString()));
636654
}
637655
output.print(", \"" + shortName + "\"");
638-
for (final String otherName : otherNames) {
639-
output.print(", \"" + otherName + "\"");
656+
if (extraNames == null) {
657+
for (final String otherName : otherNames) {
658+
output.print(", \"" + otherName + "\"");
659+
}
660+
} else {
661+
output.print(", new String[]{");
662+
for (final String otherName : otherNames) {
663+
output.print("\"" + otherName + "\", ");
664+
}
665+
output.print("}, new String[]{");
666+
for (final String extraName : extraNames) {
667+
output.print("\"" + extraName + "\", ");
668+
}
669+
output.print("}");
640670
}
641671
output.print(")");
642672
}
@@ -676,16 +706,41 @@ public static void addPropertyAliases(
676706
}
677707
var status = fileStatus;
678708
if (status == null) {
679-
status = DerivedPropertyStatus.valueOf(parts[parts.length - 1]);
709+
if (!parts[parts.length - 1].equals("ExtraAliases")) {
710+
status = DerivedPropertyStatus.valueOf(parts[parts.length - 1]);
711+
}
680712
parts = Arrays.copyOf(parts, parts.length - 1);
681713
}
682-
final PropName propName = new PropName(type, status, parts);
683-
values.put(
684-
propName,
685-
propName.longName.equals("Age")
686-
? new TreeSet<>(ARRAY_SORT)
687-
: new LinkedHashSet<>());
688-
System.out.println(propName);
714+
if (status == null) {
715+
for (var key : values.keySet()) {
716+
if (key.shortName.equals(parts[0]) || key.longName.equals(parts[1])) {
717+
if (!(key.shortName.equals(parts[0]) && key.longName.equals(parts[1]))) {
718+
throw new IllegalArgumentException(
719+
"Partial match for (short, long) on ExtraAliases: "
720+
+ key.shortName
721+
+ " "
722+
+ key.longName);
723+
}
724+
if (key.status != DerivedPropertyStatus.Approved) {
725+
throw new IllegalArgumentException(
726+
"ExtraAliases for unapproved property "
727+
+ key.shortName
728+
+ " "
729+
+ key.longName);
730+
}
731+
key.extra = Arrays.asList(parts).subList(2, parts.length);
732+
break;
733+
}
734+
}
735+
} else {
736+
final PropName propName = new PropName(type, status, parts);
737+
values.put(
738+
propName,
739+
propName.longName.equals("Age")
740+
? new TreeSet<>(ARRAY_SORT)
741+
: new LinkedHashSet<>());
742+
System.out.println(propName);
743+
}
689744
// if (!Locations.contains(propName.longName)) {
690745
// System.out.println("Missing file: " + propName.longName);
691746
// }

unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,14 @@ private String resolveValue(String rawValue, int codepoint) {
855855
}
856856
}
857857

858+
public List<String> getApprovedNameAliases() {
859+
var result = new ArrayList<String>();
860+
result.add(prop.getShortName());
861+
result.add(prop.getNames().getLongName());
862+
result.addAll(prop.getNames().getOtherNames());
863+
return result;
864+
}
865+
858866
@Override
859867
public List<String> _getNameAliases(List result) {
860868
result.addAll(prop.getNames().getAllNames());

unicodetools/src/main/java/org/unicode/props/PropertyNames.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,22 @@ public interface Named {
3131
private final NameMatcher name2enum;
3232
private final String shortName;
3333
private final List<String> otherNames;
34+
private List<String> extraNames = null;
3435
private final T enumItem;
3536

37+
public PropertyNames(
38+
Class<T> classItem,
39+
T enumItem,
40+
String shortName,
41+
String[] otherNames,
42+
String[] extraNames) {
43+
this(classItem, enumItem, shortName, otherNames);
44+
this.extraNames = Arrays.asList(extraNames);
45+
for (final String extra : extraNames) {
46+
this.name2enum.put(extra, enumItem);
47+
}
48+
}
49+
3650
public PropertyNames(Class<T> classItem, T enumItem, String shortName, String... otherNames) {
3751
this.enumItem = enumItem;
3852
this.shortName = shortName == null ? enumItem.toString() : shortName;
@@ -58,6 +72,10 @@ public String getShortName() {
5872
return shortName;
5973
}
6074

75+
public String getLongName() {
76+
return enumItem.toString();
77+
}
78+
6179
public List<String> getOtherNames() {
6280
return otherNames;
6381
}
@@ -67,6 +85,9 @@ public List<String> getAllNames() {
6785
result.add(shortName); // UCD code expects the first name to be the short one
6886
result.add(enumItem.toString());
6987
result.addAll(otherNames);
88+
if (extraNames != null) {
89+
result.addAll(extraNames);
90+
}
7091
return ImmutableList.copyOf(result);
7192
}
7293

unicodetools/src/main/java/org/unicode/props/UcdProperty.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -444,12 +444,15 @@ public enum UcdProperty {
444444
kMorohashi(PropertyType.Miscellaneous, DerivedPropertyStatus.Provisional, "cjkMorohashi"),
445445
kNSHU_DubenSrc(
446446
PropertyType.Miscellaneous,
447-
DerivedPropertyStatus.UCDNonProperty,
447+
DerivedPropertyStatus.Approved,
448448
"kNSHU_DubenSrc",
449-
"kSrc_NushuDuben"),
449+
new String[] {},
450+
new String[] {
451+
"kSrc_NushuDuben",
452+
}),
450453
kNSHU_Reading(
451454
PropertyType.Miscellaneous,
452-
DerivedPropertyStatus.UCDNonProperty,
455+
DerivedPropertyStatus.Provisional,
453456
"kNSHU_Reading",
454457
"kReading"),
455458
kNelson(
@@ -538,8 +541,7 @@ public enum UcdProperty {
538541
null,
539542
ValueCardinality.Unordered,
540543
"cjkTGHZ2013"),
541-
kTGT_MergedSrc(
542-
PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, "kTGT_MergedSrc"),
544+
kTGT_MergedSrc(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "kTGT_MergedSrc"),
543545
kTGT_Numeric(PropertyType.Miscellaneous, DerivedPropertyStatus.Provisional, "kTGT_Numeric"),
544546
kTGT_RSUnicode(
545547
PropertyType.Miscellaneous,
@@ -1011,6 +1013,23 @@ public enum UcdProperty {
10111013
private final Class enumClass;
10121014
private final ValueCardinality cardinality;
10131015

1016+
private UcdProperty(
1017+
PropertyType type,
1018+
DerivedPropertyStatus status,
1019+
String shortName,
1020+
String[] otherNames,
1021+
String[] extraNames) {
1022+
this.type = type;
1023+
this.status = status;
1024+
names =
1025+
new PropertyNames<UcdProperty>(
1026+
UcdProperty.class, this, shortName, otherNames, extraNames);
1027+
name2enum = null;
1028+
enums = null;
1029+
enumClass = null;
1030+
cardinality = ValueCardinality.Singleton;
1031+
}
1032+
10141033
private UcdProperty(
10151034
PropertyType type,
10161035
DerivedPropertyStatus status,

unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,11 @@ public static void generateAliasFile(String filename) throws IOException {
869869
final String propAlias = it.next();
870870

871871
final UnicodeProperty up = ups.getProperty(propAlias);
872-
final List<String> aliases = up.getNameAliases();
872+
final List<String> aliases =
873+
up instanceof IndexUnicodeProperties.IndexUnicodeProperty
874+
? ((IndexUnicodeProperties.IndexUnicodeProperty) up)
875+
.getApprovedNameAliases()
876+
: up.getNameAliases();
873877
String firstAlias = aliases.get(0).toString();
874878
if (firstAlias.isEmpty()) {
875879
throw new IllegalArgumentException("Internal error");

unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ public String _getValue(int codepoint) {
312312
add(iup.getProperty("kMandarin"));
313313
add(iup.getProperty("kTotalStrokes"));
314314
add(iup.getProperty("kUnihanCore2020"));
315+
add(iup.getProperty("kTGT_MergedSrc"));
316+
add(iup.getProperty("kNSHU_DubenSrc"));
315317
add(iup.getProperty("kEH_Cat"));
316318
add(iup.getProperty("kEH_Desc"));
317319
add(iup.getProperty("kEH_HG"));

unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ cjkTayNumeric ; kTayNumeric ; Provisional
190190
# change the tag kRSTUnicode to kTGT_RSUnicode.
191191
# For Unicode Version 17.0. See L2/25-087 item 1.9.
192192
# (Changed between 17 alpha and beta.)
193-
kTGT_MergedSrc ; kTGT_MergedSrc ; UCDNonProperty
194193
kTGT_RSUnicode ; kTGT_RSUnicode ; kRSTUnicode ; UCDNonProperty
195194

196195
# [185-C37] Consensus: Add a new provisional property, kTGT_Numeric to the
@@ -204,8 +203,8 @@ kTGT_Numeric ; kTGT_Numeric ; Provisional
204203
# change the tag kSrc_NushuDuben to kNSHU_DubenSrc, and change the tag kReading to kNSHU_Reading.
205204
# For Unicode Version 17.0. See L2/25-087 item 1.9.
206205
# (Changed between 17 alpha and beta.)
207-
kNSHU_DubenSrc ; kNSHU_DubenSrc ; kSrc_NushuDuben ; UCDNonProperty
208-
kNSHU_Reading ; kNSHU_Reading ; kReading ; UCDNonProperty
206+
kNSHU_DubenSrc ; kNSHU_DubenSrc ; kSrc_NushuDuben ; ExtraAliases
207+
kNSHU_Reading ; kNSHU_Reading ; kReading ; Provisional
209208

210209
kEH_Func ; kEH_Func ; Provisional
211210
kEH_FVal ; kEH_FVal ; Provisional

0 commit comments

Comments
 (0)