From de44ea74b6cd0416c7951da4f81c972aca9637a7 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 19:36:31 +0100
Subject: [PATCH 01/14] =?UTF-8?q?Don=E2=80=99t=20invert=20twice=20on=20com?=
 =?UTF-8?q?parison=20queries,=20add=20support=20for=20null=20queries,=20al?=
 =?UTF-8?q?ign=20identity=20queries=20with=20the=20draft?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../org/unicode/jsp/UnicodeSetUtilities.java  | 10 +++++++-
 .../org/unicode/jsptest/TestUnicodeSet.java   | 23 +++++++++++++++++--
 docs/help/list-unicodeset.md                  |  4 ++--
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
index 34eed8b30d..dd2075df27 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
@@ -250,13 +250,16 @@ private boolean applyPropertyAlias0(
             }
             UnicodeProperty otherProperty = null;
             boolean testCp = false;
+            boolean testNone = false;
             if (trimmedPropertyValue.length() > 1
                     && trimmedPropertyValue.startsWith("@")
                     && trimmedPropertyValue.endsWith("@")) {
                 String otherPropName =
                         trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
-                if ("cp".equalsIgnoreCase(otherPropName)) {
+                if (UnicodeProperty.equalNames("code point", otherPropName)) {
                     testCp = true;
+                } else if (UnicodeProperty.equalNames("none", otherPropName)) {
+                    testNone = true;
                 } else {
                     otherProperty = factory.getProperty(otherPropName);
                 }
@@ -270,8 +273,12 @@ private boolean applyPropertyAlias0(
                         if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
                             set.add(i);
                         }
+                        invert = false;
                     }
+                } else if (testNone) {
+                    set = prop.getSet(UnicodeProperty.NULL_MATCHER);
                 } else if (otherProperty != null) {
+                    System.err.println(otherProperty + ", " + invert);
                     set = new UnicodeSet();
                     for (int i = 0; i <= 0x10FFFF; ++i) {
                         String v1 = prop.getValue(i);
@@ -279,6 +286,7 @@ private boolean applyPropertyAlias0(
                         if (invert != UnicodeProperty.equals(v1, v2)) {
                             set.add(i);
                         }
+                        invert = false;
                     }
                 } else if (patternMatcher == null) {
                     if (!isValid(prop, propertyValue)) {
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index a68acbc108..eabddd810c 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -141,6 +141,25 @@ public void TestPretty() {
         logln(derived);
     }
 
+    @Test
+    public void TestInteriorlyNegatedComparison() {
+        checkProperties("\\p{Uppercase≠@Changes_When_Lowercased@}", "[𝕬-𝖅]");
+        checkSetsEqual(
+                "\\p{Uppercase≠@Changes_When_Lowercased@}",
+                "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
+    }
+
+    @Test
+    public void TestIdentityQuery() {
+        checkSetsEqual("\\p{NFKC_Casefold=@codepoint@}", "\\P{Changes_When_NFKC_Casefolded}");
+        checkSetsEqual("\\p{NFKC_Casefold=@Code_Point@}", "\\P{Changes_When_NFKC_Casefolded}");
+    }
+
+    @Test
+    public void TestNullQuery() {
+        checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=None}");
+    }
+
     //    public void TestAExemplars() {
     //        checkProperties("[:exemplars_en:]", "[a]", "[\u0350]");
     //    }
@@ -380,7 +399,7 @@ public void TestGC() {
     public void TestNF() {
         for (String nf : new String[] {"d", "c", "kd", "kc"}) {
             checkSetsEqual("[:isnf" + nf + ":]", "[:nf" + nf + "qc!=N:]");
-            checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@cp@:]");
+            checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@code point@:]");
         }
     }
 
@@ -479,7 +498,7 @@ public void TestSetSyntax() {
         checkProperties("\\p{isNFC}", "[:ASCII:]", "[\u212B]");
         checkProperties("[:isNFC=no:]", "[\u212B]", "[:ASCII:]");
         checkProperties("[:dt!=none:]&[:toNFD=/^\\p{ccc:0}/:]", "[\u00A0]", "[\u0340]");
-        checkProperties("[:toLowercase!=@cp@:]", "[A-Z\u00C0]", "[abc]");
+        checkProperties("[:toLowercase!=@code point@:]", "[A-Z\u00C0]", "[abc]");
         checkProperties("[:toNfkc!=@toNfc@:]", "[\\u00A0]", "[abc]");
 
         String trans1 = Common.NFKC_CF.transform("\u2065");
diff --git a/docs/help/list-unicodeset.md b/docs/help/list-unicodeset.md
index 1d9f22ff82..9afa8c3a27 100644
--- a/docs/help/list-unicodeset.md
+++ b/docs/help/list-unicodeset.md
@@ -113,7 +113,7 @@ There is a special property "cp" that returns the code point itself. For
 example:
 
 *   Find the characters whose lowercase is different:
-    [`\p{toLowercase!=@cp@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40cp%40%7D&g=)
+    [`\p{toLowercase!=@code point@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40code%20point%40%7D&g=)
 
 ## **Available Properties**
 
@@ -157,7 +157,7 @@ then set the Group By box to the property name.
     1.  uca (the primary UCA weight -- after the CLDR transforms),
     2.  uca2 (the primary and secondary weights)
 
-Normally, \\p{isX} is equivalent to `\p{toX=@cp@}`. There are some exceptions and
+Normally, \\p{isX} is equivalent to `\p{toX=@code point@}`. There are some exceptions and
 missing cases.
 
 Note: The Unassigned, Surrogate, and Private Use code points are skipped in the

From 73485b303bcd94377b43fa8e1d91b0c07ccde4f9 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 19:41:55 +0100
Subject: [PATCH 02/14] more tests

---
 .../test/java/org/unicode/jsptest/TestUnicodeSet.java    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index eabddd810c..15da98ca8a 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -144,6 +144,10 @@ public void TestPretty() {
     @Test
     public void TestInteriorlyNegatedComparison() {
         checkProperties("\\p{Uppercase≠@Changes_When_Lowercased@}", "[𝕬-𝖅]");
+        checkSetsEqual(
+                "\\p{Uppercase≠@Changes_When_Lowercased@}",
+                "\\P{Uppercase=@Changes_When_Lowercased@}");
+
         checkSetsEqual(
                 "\\p{Uppercase≠@Changes_When_Lowercased@}",
                 "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
@@ -151,13 +155,14 @@ public void TestInteriorlyNegatedComparison() {
 
     @Test
     public void TestIdentityQuery() {
-        checkSetsEqual("\\p{NFKC_Casefold=@codepoint@}", "\\P{Changes_When_NFKC_Casefolded}");
-        checkSetsEqual("\\p{NFKC_Casefold=@Code_Point@}", "\\P{Changes_When_NFKC_Casefolded}");
+        checkSetsEqual("\\p{NFKC_Casefold=@code point@}", "\\P{Changes_When_NFKC_Casefolded}");
+        checkSetsEqual("\\p{NFKC_Casefold≠@Code_Point@}", "\\p{Changes_When_NFKC_Casefolded}");
     }
 
     @Test
     public void TestNullQuery() {
         checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=None}");
+        checkSetsEqual("\\p{Bidi_Paired_Bracket≠@None@}", "\\p{Bidi_Paired_Bracket_Type≠None}");
     }
 
     //    public void TestAExemplars() {

From 99d5625723d058d958ebf195da9b62ceec6d1a27 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 19:59:51 +0100
Subject: [PATCH 03/14] LM3 is

---
 .../src/test/java/org/unicode/jsptest/TestUnicodeSet.java  | 4 ++--
 .../src/main/java/org/unicode/props/UnicodeProperty.java   | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index 15da98ca8a..fe817fa9d5 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -149,7 +149,7 @@ public void TestInteriorlyNegatedComparison() {
                 "\\P{Uppercase=@Changes_When_Lowercased@}");
 
         checkSetsEqual(
-                "\\p{Uppercase≠@Changes_When_Lowercased@}",
+                "\\p{Is_Uppercase≠@Changes_When_Lowercased@}",
                 "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
     }
 
@@ -161,7 +161,7 @@ public void TestIdentityQuery() {
 
     @Test
     public void TestNullQuery() {
-        checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=None}");
+        checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=Is_None}");
         checkSetsEqual("\\p{Bidi_Paired_Bracket≠@None@}", "\\p{Bidi_Paired_Bracket_Type≠None}");
     }
 
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 208d98f974..0169761b3c 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -691,8 +691,7 @@ public static int compareNames(String a, String b) {
         return toSkeleton(a).compareTo(toSkeleton(b));
     }
 
-    /** Utility for managing property & non-string value aliases */
-    // TODO account for special names, tibetan, hangul
+    /** Returns a representative of the equivalence class of source under UAX44-LM3. */
     public static String toSkeleton(String source) {
         if (source == null) return null;
         StringBuffer skeletonBuffer = new StringBuffer();
@@ -713,6 +712,10 @@ public static String toSkeleton(String source) {
                 }
             }
         }
+        while (skeletonBuffer.subSequence(0, 2).equals("is")) {
+            gotOne = true;
+            skeletonBuffer.delete(0, 2);
+        }
         if (!gotOne) return source; // avoid string creation
         return skeletonBuffer.toString();
     }

From 2bb2c23c9a41a023f7a8c4f6aa2297778ac755dc Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 20:06:59 +0100
Subject: [PATCH 04/14] comments

---
 .../src/main/java/org/unicode/jsp/UnicodeSetUtilities.java      | 2 ++
 .../src/main/java/org/unicode/props/UnicodeProperty.java        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
index dd2075df27..15b726d4d6 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
@@ -264,6 +264,8 @@ private boolean applyPropertyAlias0(
                     otherProperty = factory.getProperty(otherPropName);
                 }
             }
+            // TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
+            // treating Name_Alias as aliases for Name.
             boolean isAge = UnicodeProperty.equalNames("age", propertyName);
             if (prop != null) {
                 UnicodeSet set;
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 0169761b3c..1e1f755497 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -720,7 +720,7 @@ public static String toSkeleton(String source) {
         return skeletonBuffer.toString();
     }
 
-    // get the name skeleton
+    /** Returns a representative of the equivalence class of source under UAX44-LM2. */
     public static String toNameSkeleton(String source) {
         if (source == null) return null;
         StringBuffer result = new StringBuffer();

From ae2750c6a432288d50437e1937ee568c4946828d Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 20:52:22 +0100
Subject: [PATCH 05/14] out of bounds

---
 .../src/main/java/org/unicode/props/UnicodeProperty.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 1e1f755497..ee1ef259b9 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -712,7 +712,7 @@ public static String toSkeleton(String source) {
                 }
             }
         }
-        while (skeletonBuffer.subSequence(0, 2).equals("is")) {
+        while (skeletonBuffer.length() >= 2 && skeletonBuffer.subSequence(0, 2).equals("is")) {
             gotOne = true;
             skeletonBuffer.delete(0, 2);
         }

From 244f2e26c3aaf8408d28f337ff1ab16a726720ec Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 20:59:51 +0100
Subject: [PATCH 06/14] Check lb=@none@ (though that should probably be an
 error).

---
 .../src/test/java/org/unicode/jsptest/TestUnicodeSet.java   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index fe817fa9d5..a47d607897 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -161,6 +161,12 @@ public void TestIdentityQuery() {
 
     @Test
     public void TestNullQuery() {
+        // Check that we are not falling into the trap described in
+        // https://www.unicode.org/reports/tr44/#UAX44-LM3.
+        checkProperties("\\p{lb=IS}", "[,.:;]");
+        // TODO(egg): This should perhaps be an error. But if it is not an error, it
+        // should be empty.
+        checkSetsEqual("\\p{lb=@none@}", "[]");
         checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=Is_None}");
         checkSetsEqual("\\p{Bidi_Paired_Bracket≠@None@}", "\\p{Bidi_Paired_Bracket_Type≠None}");
     }

From 30bca05aa4cd44ed3c3e228838f68a95a4c5cefe Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Tue, 11 Mar 2025 21:16:05 +0100
Subject: [PATCH 07/14] Millionfold falsification

---
 .../src/main/java/org/unicode/jsp/UnicodeSetUtilities.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
index 15b726d4d6..e4f323a3db 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
@@ -275,8 +275,8 @@ private boolean applyPropertyAlias0(
                         if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
                             set.add(i);
                         }
-                        invert = false;
                     }
+                    invert = false;
                 } else if (testNone) {
                     set = prop.getSet(UnicodeProperty.NULL_MATCHER);
                 } else if (otherProperty != null) {
@@ -288,8 +288,8 @@ private boolean applyPropertyAlias0(
                         if (invert != UnicodeProperty.equals(v1, v2)) {
                             set.add(i);
                         }
-                        invert = false;
                     }
+                    invert = false;
                 } else if (patternMatcher == null) {
                     if (!isValid(prop, propertyValue)) {
                         throw new IllegalArgumentException(

From 1c157fb06ee7e5fe811ff0cbc160beab2c3b11ab Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 12 Mar 2025 04:40:45 +0100
Subject: [PATCH 08/14] =?UTF-8?q?Need=20to=20figure=20out=20how=20to=20mak?=
 =?UTF-8?q?e=20Name=5FAlias=20behave=20as=20an=20alias=20for=20Name?=
 =?UTF-8?q?=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../org/unicode/jsptest/TestUnicodeSet.java   | 29 ++++++++++++++
 .../unicode/props/IndexUnicodeProperties.java |  1 +
 .../unicode/props/PropertyParsingInfo.java    |  4 +-
 .../org/unicode/props/UnicodeProperty.java    | 39 ++++++++++++++++---
 4 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index a47d607897..87324472c5 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -37,6 +37,8 @@
 import org.unicode.jsp.UnicodeSetUtilities;
 import org.unicode.jsp.UnicodeUtilities;
 import org.unicode.jsp.XPropertyFactory;
+import org.unicode.props.IndexUnicodeProperties;
+import org.unicode.props.UcdProperty;
 import org.unicode.props.UnicodeProperty;
 
 public class TestUnicodeSet extends TestFmwk2 {
@@ -153,6 +155,33 @@ public void TestInteriorlyNegatedComparison() {
                 "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
     }
 
+    @Test
+    public void TestNameMatching() {
+        // UAX44-LM2 for both Name and Name_Alias.
+        checkSetsEqual("\\p{Name=NO-BREAK SPACE}", "[\\xA0]");
+        checkSetsEqual("\\p{Name=no break space}", "[\\xA0]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG O-E}", "[\\u1180]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG OE}", "[\\u116C]");
+        checkSetsEqual("\\p{Name=MARCHEN LETTER -A}", "[\\x{11C88}]");
+        checkSetsEqual("\\p{Name=MARCHEN LETTER A}", "[\\x{11C8F}]");
+        checkSetsEqual("\\p{Name=TIBETAN MARK TSA -PHRU}", "[\\u0F39]");
+        checkSetsEqual("\\p{Name=TIBETAN MARK TSA PHRU}", "[]");
+        checkSetsEqual("\\p{Name=TIBETAN MARK BKA- SHOG YIG MGO}", "[\\u0F0A]");
+        checkSetsEqual("\\p{Name=TIBETAN MARK BKA SHOG YIG MGO}", "[]");
+        checkSetsEqual("\\p{Name_Alias=newline}", "[\\x0A]");
+        checkSetsEqual("\\p{Name_Alias=NEW LINE}", "[\\x0A]");
+    }
+
+    @Test
+    public void TestNameAliases() {
+        // Name_Alias values behave as aliases for Name, but not vice-versa.
+        checkSetsEqual("\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}", "[︘]");
+        checkSetsEqual("\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}", "[︘]");
+        checkSetsEqual("\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}", "[]");
+        checkSetsEqual("\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}", "[︘]");
+        checkProperties("\\p{Name_Alias=@none@}", "[a-z]");
+    }
+
     @Test
     public void TestIdentityQuery() {
         checkSetsEqual("\\p{NFKC_Casefold=@code point@}", "\\P{Changes_When_NFKC_Casefolded}");
diff --git a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
index bdbd14e582..b9c68d61d8 100644
--- a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
+++ b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
@@ -196,6 +196,7 @@ public Map<UcdProperty, Long> getCacheFileSize() {
     static final Transform<String, String> fromNumericPinyin =
             Transliterator.getInstance("NumericPinyin-Latin;nfc");
 
+    static final Merge<String> MULTIVALUED_JOINER = new PropertyUtilities.Joiner("|");
     static final Merge<String> ALPHABETIC_JOINER =
             new Merge<String>() {
                 TreeSet<String> sorted = new TreeSet<String>();
diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
index 5e60f04271..5b08551d36 100644
--- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
+++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java
@@ -884,7 +884,7 @@ private static void parsePropertyValueFile(
                                         && indexUnicodeProperties.ucdVersion.compareTo(
                                                         VersionInfo.UNICODE_4_0)
                                                 <= 0
-                                ? new PropertyUtilities.Joiner("|")
+                                ? IndexUnicodeProperties.MULTIVALUED_JOINER
                                 : null;
                 final var originalMultivaluedSplit = propInfo.multivaluedSplit;
                 // The first version of kPrimaryNumeric had spaces in values.
@@ -995,7 +995,7 @@ private static void parseNameAliasesFile(
                     indexUnicodeProperties,
                     nextProperties,
                     propInfoSet,
-                    IndexUnicodeProperties.ALPHABETIC_JOINER,
+                    IndexUnicodeProperties.MULTIVALUED_JOINER,
                     false);
         }
     }
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index ee1ef259b9..10c8d03241 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -448,7 +448,11 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
                             ? NULL_MATCHER
                             : new SimpleMatcher(
                                     propertyValue,
-                                    isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
+                                    getName().equals("Name") || getName().equals("Name_Alias")
+                                            ? CHARACTER_NAME_COMPARATOR
+                                            : isType(STRING_OR_MISC_MASK)
+                                                    ? null
+                                                    : PROPERTY_COMPARATOR),
                     result);
         }
     }
@@ -720,8 +724,25 @@ public static String toSkeleton(String source) {
         return skeletonBuffer.toString();
     }
 
-    /** Returns a representative of the equivalence class of source under UAX44-LM2. */
-    public static String toNameSkeleton(String source) {
+    public static final Comparator<String> CHARACTER_NAME_COMPARATOR =
+            new Comparator<String>() {
+                @Override
+                public int compare(String o1, String o2) {
+                    return compareCharacterNames(o1, o2);
+                }
+            };
+
+    public static int compareCharacterNames(String a, String b) {
+        if (a == b) return 0;
+        if (a == null) return -1;
+        if (b == null) return 1;
+        return toNameSkeleton(a, false).compareTo(toNameSkeleton(b, false));
+    }
+
+    /** Returns a representative of the equivalence class of source under UAX44-LM2.
+     * If validate=true, checks that source contains only characters allowed in character names.
+     */
+    public static String toNameSkeleton(String source, boolean validate) {
         if (source == null) return null;
         StringBuffer result = new StringBuffer();
         // remove spaces, medial '-'
@@ -741,18 +762,26 @@ public static String toNameSkeleton(String source) {
                         || (i == source.length() - 2
                                 && source.charAt(i - 1) == 'O'
                                 && source.charAt(i + 1) == 'E')) {
-                    System.out.println("****** EXCEPTION " + source);
+                    if (validate) {
+                        System.out.println("****** EXCEPTION " + source);
+                    }
                     result.append(ch);
                 }
                 // otherwise don't copy
-            } else {
+            } else if (validate) {
                 throw new IllegalArgumentException(
                         "Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
+            } else if (ch != '_') {
+                result.append(Character.toUpperCase(ch));
             }
         }
         return result.toString();
     }
 
+    public static String toNameSkeleton(String source) {
+        return toNameSkeleton(source, true);
+    }
+
     /**
      * These routines use the Java functions, because they only need to act on ASCII Changes space,
      * - into _, inserts _ between lower and UPPER.

From 29bc3191b74a30d857a63995644f316dcb554690 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Thu, 13 Mar 2025 00:07:45 +0100
Subject: [PATCH 09/14] =?UTF-8?q?Name=5FAlias=20as=20a=20Name=20alias;=20f?=
 =?UTF-8?q?ailing=20test=20(on=20ne=20fait=20pas=20d=E2=80=99omelette=20sa?=
 =?UTF-8?q?ns=20casser=20des=20=C5=93ufs)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../org/unicode/jsp/UnicodeSetUtilities.java  | 11 ++++++++++
 .../org/unicode/jsptest/TestUnicodeSet.java   | 21 +++++++++++++++----
 .../unicode/props/IndexUnicodeProperties.java |  6 +++++-
 .../org/unicode/props/UnicodeProperty.java    |  5 +++--
 4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
index 98266713ec..6e3e40f969 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
@@ -14,6 +14,8 @@
 import java.util.Map;
 import java.util.regex.Pattern;
 import org.unicode.cldr.util.MultiComparator;
+import org.unicode.props.IndexUnicodeProperties;
+import org.unicode.props.UcdProperty;
 import org.unicode.props.UcdPropertyValues;
 import org.unicode.props.UnicodeProperty;
 import org.unicode.props.UnicodeProperty.PatternMatcher;
@@ -340,6 +342,15 @@ private boolean applyPropertyAlias0(
                             }
                         }
                         set = prop.getSet(propertyValue);
+                        if (set.isEmpty()
+                                && prop instanceof IndexUnicodeProperties.IndexUnicodeProperty
+                                && prop.getName().equals("Name")) {
+                            set =
+                                    ((IndexUnicodeProperties.IndexUnicodeProperty) prop)
+                                            .getFactory()
+                                            .getProperty(UcdProperty.Name_Alias)
+                                            .getSet(propertyValue);
+                        }
                     }
                 } else if (isAge) {
                     set = new UnicodeSet();
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index e527ea970f..30e168c44d 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -182,6 +182,8 @@ public void TestNameMatching() {
         checkSetsEqual("\\p{Name=no break space}", "[\\xA0]");
         checkSetsEqual("\\p{Name=HANGUL JUNGSEONG O-E}", "[\\u1180]");
         checkSetsEqual("\\p{Name=HANGUL JUNGSEONG OE}", "[\\u116C]");
+        checkSetsEqual("\\p{Name=Hangul jungseong o-e}", "[\\u1180]");
+        checkSetsEqual("\\p{Name=Hangul jungseong oe}", "[\\u116C]");
         checkSetsEqual("\\p{Name=MARCHEN LETTER -A}", "[\\x{11C88}]");
         checkSetsEqual("\\p{Name=MARCHEN LETTER A}", "[\\x{11C8F}]");
         checkSetsEqual("\\p{Name=TIBETAN MARK TSA -PHRU}", "[\\u0F39]");
@@ -190,15 +192,26 @@ public void TestNameMatching() {
         checkSetsEqual("\\p{Name=TIBETAN MARK BKA SHOG YIG MGO}", "[]");
         checkSetsEqual("\\p{Name_Alias=newline}", "[\\x0A]");
         checkSetsEqual("\\p{Name_Alias=NEW LINE}", "[\\x0A]");
+        // The medial hyphen is only significant in HANGUL JUNGSEONG O-E, not in arbitrary O-E/OE.
+        checkSetsEqual("\\p{Name=twoemdash}", "⸺");
+        checkSetsEqual("\\p{Name=SeeNoEvil_Monkey}", "🙈");
+        checkSetsEqual("\\p{Name=BALLET S-H-O-E-S}", "🩰");
+        checkSetsEqual("[\\p{Name=LATIN SMALL LIGATURE O-E}uf]", "[œuf]");
     }
 
     @Test
     public void TestNameAliases() {
         // Name_Alias values behave as aliases for Name, but not vice-versa.
-        checkSetsEqual("\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}", "[︘]");
-        checkSetsEqual("\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}", "[︘]");
-        checkSetsEqual("\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}", "[]");
-        checkSetsEqual("\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}", "[︘]");
+        checkSetsEqual(
+                "\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}", "[︘]");
+        checkSetsEqual(
+                "\\p{Name=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}", "[︘]");
+        checkSetsEqual(
+                "\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}",
+                "[]");
+        checkSetsEqual(
+                "\\p{Name_Alias=PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}",
+                "[︘]");
         checkProperties("\\p{Name_Alias=@none@}", "[a-z]");
     }
 
diff --git a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
index 107cffab33..241cb7b921 100644
--- a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
+++ b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java
@@ -685,7 +685,7 @@ public VersionInfo getUcdVersion() {
     //        .get(toSkeleton(propertyAlias));
     //    }
 
-    class IndexUnicodeProperty extends UnicodeProperty.BaseProperty {
+    public class IndexUnicodeProperty extends UnicodeProperty.BaseProperty {
 
         private final UcdProperty prop;
         private final Map<String, PropertyNames> stringToNamedEnum;
@@ -725,6 +725,10 @@ class IndexUnicodeProperty extends UnicodeProperty.BaseProperty {
             }
         }
 
+        public IndexUnicodeProperties getFactory() {
+            return IndexUnicodeProperties.this;
+        }
+
         @Override
         public boolean isTrivial() {
             return _getRawUnicodeMap().isEmpty()
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 10c8d03241..f07557b749 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -739,8 +739,9 @@ public static int compareCharacterNames(String a, String b) {
         return toNameSkeleton(a, false).compareTo(toNameSkeleton(b, false));
     }
 
-    /** Returns a representative of the equivalence class of source under UAX44-LM2.
-     * If validate=true, checks that source contains only characters allowed in character names.
+    /**
+     * Returns a representative of the equivalence class of source under UAX44-LM2. If
+     * validate=true, checks that source contains only characters allowed in character names.
      */
     public static String toNameSkeleton(String source, boolean validate) {
         if (source == null) return null;

From 939c80b23dd8feed8e4bbb558e280878e642b1da Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Thu, 13 Mar 2025 00:50:00 +0100
Subject: [PATCH 10/14] Put Humpty Dumpty together again.

---
 .../org/unicode/jsptest/TestUnicodeSet.java   |  7 ++++
 .../org/unicode/props/UnicodeProperty.java    | 41 +++++++++++++++----
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index 30e168c44d..a87f07ef89 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -184,7 +184,14 @@ public void TestNameMatching() {
         checkSetsEqual("\\p{Name=HANGUL JUNGSEONG OE}", "[\\u116C]");
         checkSetsEqual("\\p{Name=Hangul jungseong o-e}", "[\\u1180]");
         checkSetsEqual("\\p{Name=Hangul jungseong oe}", "[\\u116C]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG O -E}", "[\\u1180]");
+        checkSetsEqual("\\p{Name= HANGUL JUNGSEONG O-E }", "[\\u1180]");
+        checkSetsEqual("\\p{Name=_HANGUL_JUNGSEONG_O-E_}", "[\\u1180]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG O-EO}", "[\\u117F]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG OE O}", "[\\u117F]");
+        checkSetsEqual("\\p{Name=HANGUL JUNGSEONG O -EO}", "[]");
         checkSetsEqual("\\p{Name=MARCHEN LETTER -A}", "[\\x{11C88}]");
+        checkSetsEqual("\\p{Name=MARCHEN_LETTER_-A}", "[\\x{11C88}]");
         checkSetsEqual("\\p{Name=MARCHEN LETTER A}", "[\\x{11C8F}]");
         checkSetsEqual("\\p{Name=TIBETAN MARK TSA -PHRU}", "[\\u0F39]");
         checkSetsEqual("\\p{Name=TIBETAN MARK TSA PHRU}", "[]");
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index f07557b749..43602878a3 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -750,19 +750,44 @@ public static String toNameSkeleton(String source, boolean validate) {
         // we can do this with char, since no surrogates are involved
         for (int i = 0; i < source.length(); ++i) {
             char ch = source.charAt(i);
+            final char uppercase = Character.toUpperCase(ch);
+            if (validate && uppercase != ch) {
+                throw new IllegalArgumentException(
+                        "Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
+            }
+            ch = uppercase;
             if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') {
                 result.append(ch);
             } else if (ch == ' ') {
                 // don't copy ever
             } else if (ch == '-') {
-                // only copy non-medials AND trailing O-E
-                if (0 == i
-                        || i == source.length() - 1
-                        || source.charAt(i - 1) == ' '
-                        || source.charAt(i + 1) == ' '
-                        || (i == source.length() - 2
-                                && source.charAt(i - 1) == 'O'
-                                && source.charAt(i + 1) == 'E')) {
+                // Only copy a hyphen-minus if it is non-medial, or if it is
+                // the hyphen in U+1180 HANGUL JUNGSEONG O-E.
+                boolean medial;
+                if (0 == i || i == source.length() - 1) {
+                    medial = false; // Name-initial or name-final.
+                } else {
+                    final char preceding = Character.toUpperCase(source.charAt(i - 1));
+                    final char following = Character.toUpperCase(source.charAt(i + 1));
+                    medial =
+                            (('0' <= preceding && preceding <= '9')
+                                            || ('A' <= preceding && preceding <= 'Z'))
+                                    && (('0' <= following && following <= '9')
+                                            || ('A' <= following && following <= 'Z'));
+                }
+                boolean is1180 = false;
+                if (medial
+                        && i <= source.length() - 2
+                        && Character.toUpperCase(source.charAt(i + 1)) == 'E'
+                        && result.toString().equals("HANGULJUNGSEONGO")) {
+                    is1180 = true;
+                    for (int j = i + 2; j < source.length(); ++j) {
+                        if (source.charAt(j) != ' ' && source.charAt(j) != '_') {
+                            is1180 = false;
+                        }
+                    }
+                }
+                if (!medial || is1180) {
                     if (validate) {
                         System.out.println("****** EXCEPTION " + source);
                     }

From ed26f6206c4c0aa3859efff35051dc65503d4f4e Mon Sep 17 00:00:00 2001
From: Robin Leroy <eggrobin@unicode.org>
Date: Thu, 13 Mar 2025 19:23:07 +0100
Subject: [PATCH 11/14] uppercase once

Co-authored-by: Markus Scherer <markus.icu@gmail.com>
---
 .../src/main/java/org/unicode/props/UnicodeProperty.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 43602878a3..23974c19e9 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -798,7 +798,7 @@ public static String toNameSkeleton(String source, boolean validate) {
                 throw new IllegalArgumentException(
                         "Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
             } else if (ch != '_') {
-                result.append(Character.toUpperCase(ch));
+                result.append(ch);
             }
         }
         return result.toString();

From 2890745b9b3a444fb541a7267e776568a3ac01ed Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Thu, 13 Mar 2025 19:38:24 +0100
Subject: [PATCH 12/14] =?UTF-8?q?Don=E2=80=99t=20yell=20about=20non-medial?=
 =?UTF-8?q?=20hyphens=20nor=20the=20hyphen=20in=20U+1180.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/main/java/org/unicode/props/UnicodeProperty.java       | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 23974c19e9..a15233ad9e 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -788,9 +788,6 @@ public static String toNameSkeleton(String source, boolean validate) {
                     }
                 }
                 if (!medial || is1180) {
-                    if (validate) {
-                        System.out.println("****** EXCEPTION " + source);
-                    }
                     result.append(ch);
                 }
                 // otherwise don't copy

From a91b6cb1ac91fce23e2c2aa230caf97d1c40bf4c Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Thu, 13 Mar 2025 19:39:22 +0100
Subject: [PATCH 13/14] s/ff/ild/g

---
 .../src/main/java/org/unicode/props/UnicodeProperty.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index a15233ad9e..e6fe2984d7 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -745,7 +745,7 @@ public static int compareCharacterNames(String a, String b) {
      */
     public static String toNameSkeleton(String source, boolean validate) {
         if (source == null) return null;
-        StringBuffer result = new StringBuffer();
+        StringBuilder result = new StringBuilder();
         // remove spaces, medial '-'
         // we can do this with char, since no surrogates are involved
         for (int i = 0; i < source.length(); ++i) {

From bcc8e29def9c7c45de35bcb71e88539552c59c6d Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Thu, 13 Mar 2025 19:43:16 +0100
Subject: [PATCH 14/14] isLetterOrDigit

---
 .../src/main/java/org/unicode/props/UnicodeProperty.java  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index e6fe2984d7..e54761b123 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -767,13 +767,9 @@ public static String toNameSkeleton(String source, boolean validate) {
                 if (0 == i || i == source.length() - 1) {
                     medial = false; // Name-initial or name-final.
                 } else {
-                    final char preceding = Character.toUpperCase(source.charAt(i - 1));
-                    final char following = Character.toUpperCase(source.charAt(i + 1));
                     medial =
-                            (('0' <= preceding && preceding <= '9')
-                                            || ('A' <= preceding && preceding <= 'Z'))
-                                    && (('0' <= following && following <= '9')
-                                            || ('A' <= following && following <= 'Z'));
+                            Character.isLetterOrDigit(source.charAt(i - 1))
+                                    && Character.isLetterOrDigit(source.charAt(i + 1));
                 }
                 boolean is1180 = false;
                 if (medial