unicode-org
diff --git a/‎unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java‎
Lines changed: 59 additions & 1 deletion b/‎unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java‎
Lines changed: 47 additions & 0 deletions b/‎unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎unicodetools/src/main/java/org/unicode/props/UcdLineParser.java‎
Lines changed: 25 additions & 2 deletions b/‎unicodetools/src/main/java/org/unicode/props/UcdLineParser.java‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎unicodetools/src/main/java/org/unicode/props/UcdProperty.java‎
Lines changed: 30 additions & 0 deletions b/‎unicodetools/src/main/java/org/unicode/props/UcdProperty.java‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java‎
Lines changed: 90 additions & 0 deletions b/‎unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java‎
Lines changed: 90 additions & 0 deletions
@@ -666,6 +666,18 @@ static void parseSourceFile(
                             && (propInfo = propInfoSet.iterator().next()).special
                                     == SpecialProperty.None
                             && propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) == 1) {
+                        if (fileName.equals("math/*/MathClass")
+                                && indexUnicodeProperties.ucdVersion.compareTo(
+                                                VersionInfo.UNICODE_6_3)
+                                        <= 0) {
+                            parser =
+                                    parser.withLinePreprocessor(
+                                            s ->
+                                                    s.startsWith("1D455=210E;")
+                                                                    || s.equals("code point;class")
+                                                            ? "#" + s
+                                                            : s);
+                        }
                         parseSimpleFieldFile(
                                 parser.withMissing(true),
                                 propInfo,
@@ -674,6 +686,23 @@ static void parseSourceFile(
                                         ? null
                                         : nextProperties.getProperty(propInfo.property));
                     } else {
+                        if (fileName.equals("math/*/MathClassEx")
+                                && indexUnicodeProperties.ucdVersion.compareTo(
+                                                VersionInfo.UNICODE_6_3)
+                                        <= 0) {
+                            // Old versions of MathClassEx had a malformed range and a line that
+                            // should have been commented out.  Search for those specifically and
+                            // fix them; we don’t want to generally allow a new range syntax.
+                            parser =
+                                    parser.withLinePreprocessor(
+                                            s ->
+                                                    s.startsWith("FE61-FE68;")
+                                                            ? s.replaceFirst(
+                                                                    "FE61-FE68;", "FE61..FE68;")
+                                                            : s.startsWith("1D455=210E;")
+                                                                    ? "#" + s
+                                                                    : s);
+                        }
                         parseFieldFile(
                                 parser.withMissing(true),
                                 indexUnicodeProperties,
@@ -1510,6 +1539,27 @@ private static void parseFields(
                         value = "No";
                     }
                 }
+                if ((propInfo.property == UcdProperty.Math_Entity_Name
+                                || propInfo.property == UcdProperty.Math_Entity_Set
+                                || propInfo.property == UcdProperty.Math_Class_Ex)
+                        && indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
+                                < 0) {
+                    merger = new PropertyUtilities.RedundancyIgnoringMultivaluedJoiner();
+                }
+                if (propInfo.property == UcdProperty.Math_Descriptive_Comments
+                        && indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
+                                < 0) {
+                    merger = new PropertyUtilities.NullIgnorer();
+                }
+                if (propInfo.property == UcdProperty.Math_Class_Ex
+                        && indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_6_1) < 0
+                        && value.isEmpty()) {
+                    // MathClassEx-12 has
+                    // 27CA;;;;;;VERTICAL BAR WITH HORIZONTAL STROKE
+                    // MathClassEx-11 has
+                    // 21EA..21F3;;⇪..⇳;;;; 21EA-21F3 are keyboard
+                    value = "None";
+                }
                 propInfo.put(
                         data,
                         line.getMissingSet(),
@@ -1569,6 +1619,7 @@ private static void parseSimpleFieldFile(
                                 propInfo.property, defaultValue, "hardcoded", false, version);
                     }
                 }
+                Merge<String> merger = null;
                 if (line.getParts().length == 3 && propInfo.property == UcdProperty.Block) {
                     // The old Blocks files had First; Last; Block.
                     IntRange range = new IntRange();
@@ -1646,6 +1697,13 @@ private static void parseSimpleFieldFile(
                         }
                     }
                     continue;
+                } else if (propInfo.property == UcdProperty.Math_Class
+                        && version.compareTo(VersionInfo.UNICODE_6_0) < 0) {
+                    merger = new PropertyUtilities.RedundancyIgnoringMultivaluedJoiner();
+                    // MathClass-11 had a line without a value, 21EA..21F3;
+                    if (line.getParts()[1].isEmpty()) {
+                        line.getParts()[1] = "None";
+                    }
                 } else if (line.getParts().length != 2
                         && version.compareTo(VersionInfo.UNICODE_3_0_1) > 0) {
                     // Unicode 3.0 and earlier had name comments as an extra field.
@@ -1657,7 +1715,7 @@ private static void parseSimpleFieldFile(
                         line.getMissingSet(),
                         line.getRange(),
                         line.getParts()[1],
-                        null,
+                        merger,
                         false,
                         nextVersion);
             } else {
 
@@ -1,9 +1,11 @@
 package org.unicode.props;
 
+import com.google.common.base.Objects;
 import com.ibm.icu.impl.UnicodeMap;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.Collection;
 import java.util.Map;
+import java.util.Set;
 import org.unicode.text.utility.Utility;
 
 public class PropertyUtilities {
@@ -37,6 +39,51 @@ public String merge(String first, String second) {
         }
     }
 
+    public static final class NullIgnorer implements Merge<String> {
+        public NullIgnorer() {}
+
+        @Override
+        public String merge(String first, String second) {
+            if (second == null) {
+                return first;
+            } else {
+                throw new UnicodePropertyException(
+                        "Key already present in UnicodeMap:\told: " + first + ",\tnew: " + second);
+            }
+        }
+    }
+
+    public static final class RedundancyIgnorer implements Merge<String> {
+        public RedundancyIgnorer() {}
+
+        @Override
+        public String merge(String first, String second) {
+            if (Objects.equal(first, second)) {
+                return first;
+            } else {
+                throw new UnicodePropertyException(
+                        "Key already present in UnicodeMap:\told: " + first + ",\tnew: " + second);
+            }
+        }
+    }
+
+    public static final class RedundancyIgnoringMultivaluedJoiner implements Merge<String> {
+        public RedundancyIgnoringMultivaluedJoiner() {}
+
+        @Override
+        public String merge(String first, String second) {
+            if (first == null) {
+                return second;
+            }
+            final Set<String> oldValues = Set.of(first.split("\\|"));
+            if (second == null || oldValues.contains(second)) {
+                return first;
+            } else {
+                return first + "|" + second;
+            }
+        }
+    }
+
     static final <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
         final V oldValue = map.get(key);
         if (oldValue != null) {
 
@@ -4,6 +4,7 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.function.Function;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.unicode.cldr.util.RegexUtilities;
@@ -85,18 +86,21 @@ public enum Contents {
         private final ArrayList<String> partsList = new ArrayList<>();
         private String[] parts = null;
         private final IntRange intRange = new IntRange();
+        private final Function<String, String> linePreprocessor;
 
         UcdLine(
                 Pattern splitPattern,
                 boolean withRange,
                 boolean withMissing,
                 Iterator<String> rawLines,
-                UcdFileStats stats) {
+                UcdFileStats stats,
+                Function<String, String> linePreprocessor) {
             splitter = splitPattern.matcher("");
             this.withRange = withRange;
             this.withMissing = withMissing;
             this.rawLines = rawLines;
             this.stats = stats;
+            this.linePreprocessor = linePreprocessor;
         }
 
         @Override
@@ -117,6 +121,9 @@ public boolean hasNext() {
                             || line.startsWith(">>>>>>>")) {
                         line2 = "";
                     }
+                    if (linePreprocessor != null) {
+                        line2 = linePreprocessor.apply(line2);
+                    }
                     ++stats.lineCount;
                     final int hashPos = line2.indexOf('#');
                     if (hashPos >= 0) {
@@ -223,6 +230,7 @@ public UnicodeSet getMissingSet() {
     private boolean withTabs = false;
     private boolean withRange = true;
     private boolean withMissing = false;
+    private Function<String, String> linePreprocessor;
     private final Iterable<String> rawLines;
     private final UcdFileStats stats = new UcdFileStats();
 
@@ -245,10 +253,25 @@ public UcdLineParser withMissing(boolean m) {
         return this;
     }
 
+    // Sets a line preprocessor to which the line is fed before removing comments,
+    // splitting fields, and decoding ranges.
+    // This makes it possible to correct lines with ill-formed ranges.
+    // For corrections affecting only subsequent fields rather than the range,
+    // prefer handling in the parse* functions in PropertyParsingInfo.
+    public UcdLineParser withLinePreprocessor(Function<String, String> f) {
+        linePreprocessor = f;
+        return this;
+    }
+
     @Override
     public Iterator<UcdLine> iterator() {
         return new UcdLine(
-                withTabs ? TAB : SEMICOLON, withRange, withMissing, rawLines.iterator(), stats);
+                withTabs ? TAB : SEMICOLON,
+                withRange,
+                withMissing,
+                rawLines.iterator(),
+                stats,
+                linePreprocessor);
     }
 
     public int getLineCount() {
 
@@ -26,6 +26,8 @@
 import org.unicode.props.UcdPropertyValues.Joining_Group_Values;
 import org.unicode.props.UcdPropertyValues.Joining_Type_Values;
 import org.unicode.props.UcdPropertyValues.Line_Break_Values;
+import org.unicode.props.UcdPropertyValues.Math_Class_Ex_Values;
+import org.unicode.props.UcdPropertyValues.Math_Class_Values;
 import org.unicode.props.UcdPropertyValues.NFC_Quick_Check_Values;
 import org.unicode.props.UcdPropertyValues.NFD_Quick_Check_Values;
 import org.unicode.props.UcdPropertyValues.NFKC_Quick_Check_Values;
@@ -124,6 +126,22 @@ public enum UcdProperty {
     Emoji_SB(PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, "ESB"),
     ISO_Comment(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "isc"),
     Jamo_Short_Name(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "JSN"),
+    Math_Descriptive_Comments(
+            PropertyType.Miscellaneous,
+            DerivedPropertyStatus.NonUCDNonProperty,
+            "Math_Descriptive_Comments"),
+    Math_Entity_Name(
+            PropertyType.Miscellaneous,
+            DerivedPropertyStatus.NonUCDNonProperty,
+            null,
+            ValueCardinality.Unordered,
+            "Math_Entity_Name"),
+    Math_Entity_Set(
+            PropertyType.Miscellaneous,
+            DerivedPropertyStatus.NonUCDNonProperty,
+            null,
+            ValueCardinality.Unordered,
+            "Math_Entity_Set"),
     Name(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "na"),
     Name_Alias(
             PropertyType.Miscellaneous,
@@ -713,6 +731,18 @@ public enum UcdProperty {
             Line_Break_Values.class,
             null,
             "lb"),
+    Math_Class(
+            PropertyType.Enumerated,
+            DerivedPropertyStatus.NonUCDProperty,
+            Math_Class_Values.class,
+            ValueCardinality.Ordered,
+            "Math_Class"),
+    Math_Class_Ex(
+            PropertyType.Enumerated,
+            DerivedPropertyStatus.NonUCDNonProperty,
+            Math_Class_Ex_Values.class,
+            ValueCardinality.Ordered,
+            "Math_Class_Ex"),
     NFC_Quick_Check(
             PropertyType.Enumerated,
             DerivedPropertyStatus.Approved,
 
@@ -1817,6 +1817,96 @@ public static Line_Break_Values forName(String name) {
     }
 
     // Lowercase_Mapping
+    public enum Math_Class_Values implements Named {
+        None("None"),
+        Normal("N"),
+        Alphabetic("A"),
+        Binary("B"),
+        Closing("C"),
+        Diacritic("D"),
+        Fence("F"),
+        Glyph_Part("G"),
+        Invisible("I"),
+        Large("L"),
+        Opening("O"),
+        Punctuation("P"),
+        Relation("R", "R?"),
+        Space("S"),
+        Unary("U"),
+        Vary("V"),
+        Special("X");
+        private final PropertyNames<Math_Class_Values> names;
+
+        private Math_Class_Values(String shortName, String... otherNames) {
+            names =
+                    new PropertyNames<Math_Class_Values>(
+                            Math_Class_Values.class, this, shortName, otherNames);
+        }
+
+        @Override
+        public PropertyNames<Math_Class_Values> getNames() {
+            return names;
+        }
+
+        @Override
+        public String getShortName() {
+            return names.getShortName();
+        }
+
+        private static final NameMatcher<Math_Class_Values> NAME_MATCHER =
+                PropertyNames.getNameToEnums(Math_Class_Values.class);
+
+        public static Math_Class_Values forName(String name) {
+            return NAME_MATCHER.get(name);
+        }
+    }
+
+    public enum Math_Class_Ex_Values implements Named {
+        None("None"),
+        Normal("N"),
+        Alphabetic("A"),
+        Binary("B"),
+        Closing("C"),
+        Diacritic("D"),
+        Fence("F"),
+        Glyph_Part("G"),
+        Large("L"),
+        Opening("O"),
+        Punctuation("P"),
+        Relation("R", "R?"),
+        Space("S"),
+        Unary("U"),
+        Vary("V"),
+        Special("X");
+        private final PropertyNames<Math_Class_Ex_Values> names;
+
+        private Math_Class_Ex_Values(String shortName, String... otherNames) {
+            names =
+                    new PropertyNames<Math_Class_Ex_Values>(
+                            Math_Class_Ex_Values.class, this, shortName, otherNames);
+        }
+
+        @Override
+        public PropertyNames<Math_Class_Ex_Values> getNames() {
+            return names;
+        }
+
+        @Override
+        public String getShortName() {
+            return names.getShortName();
+        }
+
+        private static final NameMatcher<Math_Class_Ex_Values> NAME_MATCHER =
+                PropertyNames.getNameToEnums(Math_Class_Ex_Values.class);
+
+        public static Math_Class_Ex_Values forName(String name) {
+            return NAME_MATCHER.get(name);
+        }
+    }
+
+    // Math_Descriptive_Comments
+    // Math_Entity_Name
+    // Math_Entity_Set
     // Name
     // Name_Alias
     // Named_Sequences