Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,22 @@ private boolean applyPropertyAlias0(
}
UnicodeProperty otherProperty = null;
boolean testCp = false;
boolean testNone = false;
if (trimmedPropertyValue.length() > 1
&& trimmedPropertyValue.startsWith("@")
&& trimmedPropertyValue.endsWith("@")) {
String otherPropName =
trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
if ("cp".equalsIgnoreCase(otherPropName)) {
if (UnicodeProperty.equalNames("code point", otherPropName)) {
testCp = true;
} else if (UnicodeProperty.equalNames("none", otherPropName)) {
testNone = true;
} else {
otherProperty = factory.getProperty(otherPropName);
}
}
// TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
// treating Name_Alias as aliases for Name.
boolean isAge = UnicodeProperty.equalNames("age", propertyName);
if (prop != null) {
UnicodeSet set;
Expand All @@ -270,15 +275,20 @@ private boolean applyPropertyAlias0(
if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
set.add(i);
}
invert = false;
}
} else if (testNone) {
set = prop.getSet(UnicodeProperty.NULL_MATCHER);
} else if (otherProperty != null) {
System.err.println(otherProperty + ", " + invert);
set = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
String v1 = prop.getValue(i);
String v2 = otherProperty.getValue(i);
if (invert != UnicodeProperty.equals(v1, v2)) {
set.add(i);
}
invert = false;
}
} else if (patternMatcher == null) {
if (!isValid(prop, propertyValue)) {
Expand Down
28 changes: 26 additions & 2 deletions UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,30 @@ public void TestPretty() {
logln(derived);
}

@Test
public void TestInteriorlyNegatedComparison() {
checkProperties("\\p{Uppercase≠@Changes_When_Lowercased@}", "[𝕬-𝖅]");
checkSetsEqual(
"\\p{Uppercase≠@Changes_When_Lowercased@}",
"\\P{Uppercase=@Changes_When_Lowercased@}");

checkSetsEqual(
"\\p{Is_Uppercase≠@Changes_When_Lowercased@}",
"[[\\p{Uppercase}\\p{Changes_When_Lowercased}]-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]");
}

@Test
public void TestIdentityQuery() {
checkSetsEqual("\\p{NFKC_Casefold=@code point@}", "\\P{Changes_When_NFKC_Casefolded}");
checkSetsEqual("\\p{NFKC_Casefold≠@Code_Point@}", "\\p{Changes_When_NFKC_Casefolded}");
}

@Test
public void TestNullQuery() {
checkSetsEqual("\\p{Bidi_Paired_Bracket=@none@}", "\\p{Bidi_Paired_Bracket_Type=Is_None}");
checkSetsEqual("\\p{Bidi_Paired_Bracket≠@None@}", "\\p{Bidi_Paired_Bracket_Type≠None}");
}

// public void TestAExemplars() {
// checkProperties("[:exemplars_en:]", "[a]", "[\u0350]");
// }
Expand Down Expand Up @@ -380,7 +404,7 @@ public void TestGC() {
public void TestNF() {
for (String nf : new String[] {"d", "c", "kd", "kc"}) {
checkSetsEqual("[:isnf" + nf + ":]", "[:nf" + nf + "qc!=N:]");
checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@cp@:]");
checkSetsEqual("[:isnf" + nf + ":]", "[:tonf" + nf + "=@code point@:]");
}
}

Expand Down Expand Up @@ -479,7 +503,7 @@ public void TestSetSyntax() {
checkProperties("\\p{isNFC}", "[:ASCII:]", "[\u212B]");
checkProperties("[:isNFC=no:]", "[\u212B]", "[:ASCII:]");
checkProperties("[:dt!=none:]&[:toNFD=/^\\p{ccc:0}/:]", "[\u00A0]", "[\u0340]");
checkProperties("[:toLowercase!=@cp@:]", "[A-Z\u00C0]", "[abc]");
checkProperties("[:toLowercase!=@code point@:]", "[A-Z\u00C0]", "[abc]");
checkProperties("[:toNfkc!=@toNfc@:]", "[\\u00A0]", "[abc]");

String trans1 = Common.NFKC_CF.transform("\u2065");
Expand Down
4 changes: 2 additions & 2 deletions docs/help/list-unicodeset.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ There is a special property "cp" that returns the code point itself. For
example:

* Find the characters whose lowercase is different:
[`\p{toLowercase!=@cp@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40cp%40%7D&g=)
[`\p{toLowercase!=@code point@}`](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BtoLowercase!%3D%40code%20point%40%7D&g=)

## **Available Properties**

Expand Down Expand Up @@ -157,7 +157,7 @@ then set the Group By box to the property name.
1. uca (the primary UCA weight -- after the CLDR transforms),
2. uca2 (the primary and secondary weights)

Normally, \\p{isX} is equivalent to `\p{toX=@cp@}`. There are some exceptions and
Normally, \\p{isX} is equivalent to `\p{toX=@code point@}`. There are some exceptions and
missing cases.

Note: The Unassigned, Surrogate, and Private Use code points are skipped in the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,7 @@ public static int compareNames(String a, String b) {
return toSkeleton(a).compareTo(toSkeleton(b));
}

/** Utility for managing property & non-string value aliases */
// TODO account for special names, tibetan, hangul
/** Returns a representative of the equivalence class of source under UAX44-LM3. */
public static String toSkeleton(String source) {
if (source == null) return null;
StringBuffer skeletonBuffer = new StringBuffer();
Expand All @@ -713,11 +712,15 @@ public static String toSkeleton(String source) {
}
}
}
while (skeletonBuffer.subSequence(0, 2).equals("is")) {
gotOne = true;
skeletonBuffer.delete(0, 2);
}
if (!gotOne) return source; // avoid string creation
return skeletonBuffer.toString();
}

// get the name skeleton
/** Returns a representative of the equivalence class of source under UAX44-LM2. */
public static String toNameSkeleton(String source) {
if (source == null) return null;
StringBuffer result = new StringBuffer();
Expand Down
Loading