Don’t invert twice on comparison queries (#1057)

eggrobin · web-flow · commit c60a326cf106 · 2025-03-12T19:31:51.000+01:00
diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java
@@ -250,17 +250,22 @@ private boolean applyPropertyAlias0(
             }
             UnicodeProperty otherProperty = null;
             boolean testCp = false;
+            boolean testNone = false;
             if (trimmedPropertyValue.length() > 1
                     && trimmedPropertyValue.startsWith("@")
                     && trimmedPropertyValue.endsWith("@")) {
                 String otherPropName =
                         trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
-                if ("cp".equalsIgnoreCase(otherPropName)) {
+                if (UnicodeProperty.equalNames("code point", otherPropName)) {
                     testCp = true;
+                } else if (UnicodeProperty.equalNames("none", otherPropName)) {
+                    testNone = true;
                 } else {
                     otherProperty = factory.getProperty(otherPropName);
                 }
             }
+            // TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
+            // treating Name_Alias as aliases for Name.
             boolean isAge = UnicodeProperty.equalNames("age", propertyName);
             if (prop != null) {
                 UnicodeSet set;
@@ -271,7 +276,11 @@ private boolean applyPropertyAlias0(
                             set.add(i);
                         }
                     }
+                    invert = false;
+                } else if (testNone) {
+                    set = prop.getSet(UnicodeProperty.NULL_MATCHER);
                 } else if (otherProperty != null) {
+                    System.err.println(otherProperty + ", " + invert);
                     set = new UnicodeSet();
                     for (int i = 0; i <= 0x10FFFF; ++i) {
                         String v1 = prop.getValue(i);
@@ -280,6 +289,7 @@ private boolean applyPropertyAlias0(
                             set.add(i);
                         }
                     }
+                    invert = false;
                 } else if (patternMatcher == null) {
                     if (!isValid(prop, propertyValue)) {
                         throw new IllegalArgumentException(
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -141,6 +141,36 @@ public void TestPretty() {
         logln(derived);
     }
 
+    @Test
+    public void TestInteriorlyNegatedComparison() {
+        checkProperties("\\p{Uppercase≠@Changes_When_Lowercased@}", "[𝕬-𝖅]");
+        checkSetsEqual(
+                "\\p{Uppercase≠@Changes_When_Lowercased@}",
+                "\\P{Uppercase=@Changes_When_Lowercased@}");
+
+        checkSetsEqual(
+                "\\p{Is_Uppercase≠@Changes_When_Lowercased@}",
+                "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
+    }
+
+    @Test
+    public void TestIdentityQuery() {
+        checkSetsEqual("\\p{NFKC_Casefold=@code point@}", "\\P{Changes_When_NFKC_Casefolded}");
+        checkSetsEqual("\\p{NFKC_Casefold≠@Code_Point@}", "\\p{Changes_When_NFKC_Casefolded}");
+    }
+
+    @Test
+    public void TestNullQuery() {
+        // Check that we are not falling into the trap described in
+        // https://www.unicode.org/reports/tr44/#UAX44-LM3.
+        checkProperties("\\p{lb=IS}", "[,.:;]");
+        // TODO(egg): This should perhaps be an error. But if it is not an error, it
+        // should be empty.
+        checkSetsEqual("\\p{lb=@none@}", "[]");
+        checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=Is_None}");
+        checkSetsEqual("\\p{Bidi_Paired_Bracket≠@None@}", "\\p{Bidi_Paired_Bracket_Type≠None}");
+    }
+
     //    public void TestAExemplars() {
     //        checkProperties("[:exemplars_en:]", "[a]", "[\u0350]");
     //    }
@@ -380,7 +410,7 @@ public void TestGC() {
     public void TestNF() {
         for (String nf : new String[] {"d", "c", "kd", "kc"}) {
             checkSetsEqual("[:isnf" + nf + ":]", "[:nf" + nf + "qc!=N:]");
-            checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@cp@:]");
+            checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@code point@:]");
         }
     }
 
@@ -479,7 +509,7 @@ public void TestSetSyntax() {
         checkProperties("\\p{isNFC}", "[:ASCII:]", "[\u212B]");
         checkProperties("[:isNFC=no:]", "[\u212B]", "[:ASCII:]");
         checkProperties("[:dt!=none:]&[:toNFD=/^\\p{ccc:0}/:]", "[\u00A0]", "[\u0340]");
-        checkProperties("[:toLowercase!=@cp@:]", "[A-Z\u00C0]", "[abc]");
+        checkProperties("[:toLowercase!=@code point@:]", "[A-Z\u00C0]", "[abc]");
         checkProperties("[:toNfkc!=@toNfc@:]", "[\\u00A0]", "[abc]");
 
         String trans1 = Common.NFKC_CF.transform("\u2065");
diff --git a/docs/help/list-unicodeset.md b/docs/help/list-unicodeset.md
@@ -113,7 +113,7 @@ There is a special property "cp" that returns the code point itself. For
 example:
 
 *   Find the characters whose lowercase is different:
-    [`\p{toLowercase!=@cp@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40cp%40%7D&g=)
+    [`\p{toLowercase!=@code point@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40code%20point%40%7D&g=)
 
 ## **Available Properties**
 
@@ -157,7 +157,7 @@ then set the Group By box to the property name.
     1.  uca (the primary UCA weight -- after the CLDR transforms),
     2.  uca2 (the primary and secondary weights)
 
-Normally, \\p{isX} is equivalent to `\p{toX=@cp@}`. There are some exceptions and
+Normally, \\p{isX} is equivalent to `\p{toX=@code point@}`. There are some exceptions and
 missing cases.
 
 Note: The Unassigned, Surrogate, and Private Use code points are skipped in the
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -691,8 +691,7 @@ public static int compareNames(String a, String b) {
         return toSkeleton(a).compareTo(toSkeleton(b));
     }
 
-    /** Utility for managing property & non-string value aliases */
-    // TODO account for special names, tibetan, hangul
+    /** Returns a representative of the equivalence class of source under UAX44-LM3. */
     public static String toSkeleton(String source) {
         if (source == null) return null;
         StringBuffer skeletonBuffer = new StringBuffer();
@@ -713,11 +712,15 @@ public static String toSkeleton(String source) {
                 }
             }
         }
+        while (skeletonBuffer.length() >= 2 && skeletonBuffer.subSequence(0, 2).equals("is")) {
+            gotOne = true;
+            skeletonBuffer.delete(0, 2);
+        }
         if (!gotOne) return source; // avoid string creation
         return skeletonBuffer.toString();
     }
 
-    // get the name skeleton
+    /** Returns a representative of the equivalence class of source under UAX44-LM2. */
     public static String toNameSkeleton(String source) {
         if (source == null) return null;
         StringBuffer result = new StringBuffer();

Original file line number	Diff line number	Diff line change
`@@ -691,8 +691,7 @@ public static int compareNames(String a, String b) {`
`691`	`691`	`return toSkeleton(a).compareTo(toSkeleton(b));`
`692`	`692`	`}`
`693`	`693`
`694`		`- /** Utility for managing property & non-string value aliases */`
`695`		`- // TODO account for special names, tibetan, hangul`
	`694`	`+ /** Returns a representative of the equivalence class of source under UAX44-LM3. */`
`696`	`695`	`public static String toSkeleton(String source) {`
`697`	`696`	`if (source == null) return null;`
`698`	`697`	`StringBuffer skeletonBuffer = new StringBuffer();`
`@@ -713,11 +712,15 @@ public static String toSkeleton(String source) {`
`713`	`712`	`}`
`714`	`713`	`}`
`715`	`714`	`}`
	`715`	`+ while (skeletonBuffer.length() >= 2 && skeletonBuffer.subSequence(0, 2).equals("is")) {`
	`716`	`+ gotOne = true;`
	`717`	`+ skeletonBuffer.delete(0, 2);`
	`718`	`+ }`
`716`	`719`	`if (!gotOne) return source; // avoid string creation`
`717`	`720`	`return skeletonBuffer.toString();`
`718`	`721`	`}`
`719`	`722`
`720`		`- // get the name skeleton`
	`723`	`+ /** Returns a representative of the equivalence class of source under UAX44-LM2. */`
`721`	`724`	`public static String toNameSkeleton(String source) {`
`722`	`725`	`if (source == null) return null;`
`723`	`726`	`StringBuffer result = new StringBuffer();`