Skip to content

Commit 0839ff3

Browse files
committed
Make Math_Class multivalued to honestly show its multiple values of old. (Not changing the current draft to make it multivalued, though it sounds like we should).
1 parent 4a3f1da commit 0839ff3

File tree

5 files changed

+29
-38
lines changed

5 files changed

+29
-38
lines changed

unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,31 +1537,17 @@ private static void parseFields(
15371537
}
15381538
}
15391539
if ((propInfo.property == UcdProperty.Math_Entity_Name
1540-
|| propInfo.property == UcdProperty.Math_Entity_Set)
1540+
|| propInfo.property == UcdProperty.Math_Entity_Set
1541+
|| propInfo.property == UcdProperty.Math_Class_Ex)
15411542
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
15421543
< 0) {
1543-
merger = IndexUnicodeProperties.MULTIVALUED_JOINER;
1544+
merger = new PropertyUtilities.RedundancyIgnoringMultivaluedJoiner();
15441545
}
15451546
if (propInfo.property == UcdProperty.Math_Descriptive_Comments
15461547
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
15471548
< 0) {
15481549
merger = new PropertyUtilities.NullIgnorer();
15491550
}
1550-
if (propInfo.property == UcdProperty.Math_Class_Ex
1551-
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
1552-
< 0) {
1553-
merger = new PropertyUtilities.RedundancyIgnorer();
1554-
}
1555-
if (propInfo.property == UcdProperty.Math_Class_Ex
1556-
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_6_0) < 0
1557-
&& (line.getRange().start == 0x2020 || line.getRange().start == 0x2021)
1558-
&& line.getRange().end == line.getRange().start
1559-
&& value.equals("N")) {
1560-
// MathClassEx-11 had conflicting assignments for these two characters. Instead
1561-
// of making Math_Class multivalued, keep the one that stayed (R), and discard
1562-
// the N.
1563-
value = "R";
1564-
}
15651551
if (propInfo.property == UcdProperty.Math_Class_Ex
15661552
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_6_1) < 0
15671553
&& value.isEmpty()) {
@@ -1630,6 +1616,7 @@ private static void parseSimpleFieldFile(
16301616
propInfo.property, defaultValue, "hardcoded", false, version);
16311617
}
16321618
}
1619+
Merge<String> merger = null;
16331620
if (line.getParts().length == 3 && propInfo.property == UcdProperty.Block) {
16341621
// The old Blocks files had First; Last; Block.
16351622
IntRange range = new IntRange();
@@ -1709,21 +1696,7 @@ private static void parseSimpleFieldFile(
17091696
continue;
17101697
} else if (propInfo.property == UcdProperty.Math_Class
17111698
&& version.compareTo(VersionInfo.UNICODE_6_0) < 0) {
1712-
// MathClass-11 had conflicting assignments for these two characters. Instead
1713-
// of making Math_Class multivalued, keep the one that stayed (R), and discard
1714-
// the N.
1715-
if ((line.getRange().start == 0x2020 || line.getRange().start == 0x2021)
1716-
&& line.getRange().start == line.getRange().end
1717-
&& line.getParts()[1].equals("N")) {
1718-
continue;
1719-
}
1720-
// MathClass-9 had the same problem for U+0021 ! as well.
1721-
if (version.compareTo(VersionInfo.UNICODE_5_1) < 0
1722-
&& line.getRange().start == 0x0021
1723-
&& line.getRange().start == line.getRange().end
1724-
&& line.getParts()[1].equals("P")) {
1725-
continue;
1726-
}
1699+
merger = new PropertyUtilities.RedundancyIgnoringMultivaluedJoiner();
17271700
// MathClass-11 had a line without a value, 21EA..21F3;
17281701
if (line.getParts()[1].isEmpty()) {
17291702
line.getParts()[1] = "None";
@@ -1739,7 +1712,7 @@ private static void parseSimpleFieldFile(
17391712
line.getMissingSet(),
17401713
line.getRange(),
17411714
line.getParts()[1],
1742-
null,
1715+
merger,
17431716
false,
17441717
nextVersion);
17451718
} else {

unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import com.ibm.icu.text.UnicodeSet;
66
import java.util.Collection;
77
import java.util.Map;
8+
import java.util.Set;
9+
810
import org.unicode.text.utility.Utility;
911

1012
public class PropertyUtilities {
@@ -66,6 +68,20 @@ public String merge(String first, String second) {
6668
}
6769
}
6870

71+
public static final class RedundancyIgnoringMultivaluedJoiner implements Merge<String> {
72+
public RedundancyIgnoringMultivaluedJoiner() {}
73+
74+
@Override
75+
public String merge(String first, String second) {
76+
final Set<String> oldValues = Set.of(first.split("\\|"));
77+
if (second == null || oldValues.contains(second)) {
78+
return first;
79+
} else {
80+
return first + "|" + second;
81+
}
82+
}
83+
}
84+
6985
static final <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
7086
final V oldValue = map.get(key);
7187
if (oldValue != null) {

unicodetools/src/main/java/org/unicode/props/UcdProperty.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -735,13 +735,13 @@ public enum UcdProperty {
735735
PropertyType.Enumerated,
736736
DerivedPropertyStatus.NonUCDProperty,
737737
Math_Class_Values.class,
738-
null,
738+
ValueCardinality.Ordered,
739739
"Math_Class"),
740740
Math_Class_Ex(
741741
PropertyType.Enumerated,
742742
DerivedPropertyStatus.NonUCDNonProperty,
743743
Math_Class_Ex_Values.class,
744-
null,
744+
ValueCardinality.Ordered,
745745
"Math_Class_Ex"),
746746
NFC_Quick_Check(
747747
PropertyType.Enumerated,

unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,5 +251,7 @@ Confusable_MA ; SINGLE_VALUED ; $codePoints
251251
#Emoji_Modifier ; SINGLE_VALUED ; <enum>
252252
#Emoji_Modifier_Base ; SINGLE_VALUED ; <enum>
253253

254+
Math_Class ; ORDERED ; .*
255+
Math_Class_Ex ; ORDERED ; .*
254256
Math_Entity_Name ; MULTI_VALUED ; .*
255257
Math_Entity_Set ; MULTI_VALUED ; .*

unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,9 +547,9 @@ math/*/MathClassEx ; Math_Entity_Name ; 2
547547
math/*/MathClassEx ; Math_Entity_Set ; 3
548548
math/*/MathClassEx ; Math_Descriptive_Comments ; 4
549549
# Field 2 used to be the character itself.
550-
math/*/MathClassEx ; Math_Entity_Name ; 3 ; v16.0
551-
math/*/MathClassEx ; Math_Entity_Set ; 4 ; v16.0
552-
math/*/MathClassEx ; Math_Descriptive_Comments ; 5 ; v16.0
550+
math/*/MathClassEx ; Math_Entity_Name ; 3 ; v17.0
551+
math/*/MathClassEx ; Math_Entity_Set ; 4 ; v17.0
552+
math/*/MathClassEx ; Math_Descriptive_Comments ; 5 ; v17.0
553553

554554
FileType ; Unikemet ; PropertyValue
555555
Unikemet ; kEH_Cat

0 commit comments

Comments
 (0)