Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,16 @@ private boolean applyPropertyAlias0(
}
UnicodeProperty otherProperty = null;
boolean testCp = false;
boolean testNone = false;
if (trimmedPropertyValue.length() > 1
&& trimmedPropertyValue.startsWith("@")
&& trimmedPropertyValue.endsWith("@")) {
String otherPropName =
trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
if ("cp".equalsIgnoreCase(otherPropName)) {
if (UnicodeProperty.equalNames("code point", otherPropName)) {
testCp = true;
} else if (UnicodeProperty.equalNames("none", otherPropName)) {
testNone = true;
} else {
otherProperty = factory.getProperty(otherPropName);
}
Expand All @@ -270,15 +273,20 @@ private boolean applyPropertyAlias0(
if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
set.add(i);
}
invert = false;
}
} else if (testNone) {
set = prop.getSet(UnicodeProperty.NULL_MATCHER);
} else if (otherProperty != null) {
System.err.println(otherProperty + ", " + invert);
set = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
String v1 = prop.getValue(i);
String v2 = otherProperty.getValue(i);
if (invert != UnicodeProperty.equals(v1, v2)) {
set.add(i);
}
invert = false;
}
} else if (patternMatcher == null) {
if (!isValid(prop, propertyValue)) {
Expand Down
23 changes: 21 additions & 2 deletions UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,25 @@ public void TestPretty() {
logln(derived);
}

@Test
public void TestInteriorlyNegatedComparison() {
checkProperties("\\p{Uppercase≠@Changes_When_Lowercased@}", "[𝕬-𝖅]");
checkSetsEqual(
"\\p{Uppercase≠@Changes_When_Lowercased@}",
"[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
}

@Test
public void TestIdentityQuery() {
checkSetsEqual("\\p{NFKC_Casefold=@codepoint@}", "\\P{Changes_When_NFKC_Casefolded}");
checkSetsEqual("\\p{NFKC_Casefold=@Code_Point@}", "\\P{Changes_When_NFKC_Casefolded}");
}

@Test
public void TestNullQuery() {
checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=None}");
}

// public void TestAExemplars() {
// checkProperties("[:exemplars_en:]", "[a]", "[\u0350]");
// }
Expand Down Expand Up @@ -380,7 +399,7 @@ public void TestGC() {
public void TestNF() {
for (String nf : new String[] {"d", "c", "kd", "kc"}) {
checkSetsEqual("[:isnf" + nf + ":]", "[:nf" + nf + "qc!=N:]");
checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@cp@:]");
checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@code point@:]");
}
}

Expand Down Expand Up @@ -479,7 +498,7 @@ public void TestSetSyntax() {
checkProperties("\\p{isNFC}", "[:ASCII:]", "[\u212B]");
checkProperties("[:isNFC=no:]", "[\u212B]", "[:ASCII:]");
checkProperties("[:dt!=none:]&[:toNFD=/^\\p{ccc:0}/:]", "[\u00A0]", "[\u0340]");
checkProperties("[:toLowercase!=@cp@:]", "[A-Z\u00C0]", "[abc]");
checkProperties("[:toLowercase!=@code point@:]", "[A-Z\u00C0]", "[abc]");
checkProperties("[:toNfkc!=@toNfc@:]", "[\\u00A0]", "[abc]");

String trans1 = Common.NFKC_CF.transform("\u2065");
Expand Down
4 changes: 2 additions & 2 deletions docs/help/list-unicodeset.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ There is a special property "cp" that returns the code point itself. For
example:

* Find the characters whose lowercase is different:
[`\p{toLowercase!=@cp@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40cp%40%7D&g=)
[`\p{toLowercase!=@code point@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40code%20point%40%7D&g=)

## **Available Properties**

Expand Down Expand Up @@ -157,7 +157,7 @@ then set the Group By box to the property name.
1. uca (the primary UCA weight -- after the CLDR transforms),
2. uca2 (the primary and secondary weights)

Normally, \\p{isX} is equivalent to `\p{toX=@cp@}`. There are some exceptions and
Normally, \\p{isX} is equivalent to `\p{toX=@code point@}`. There are some exceptions and
missing cases.

Note: The Unassigned, Surrogate, and Private Use code points are skipped in the
Expand Down