@@ -2044,18 +2044,25 @@ void UnicodeSetTest::TestLookupSymbolTable() {
2044
2044
symbols.add (u' {' , UnicodeSet (u" [{leftCurlyBracket}]" , errorCode));
2045
2045
symbols.add (u' }' , UnicodeSet (u" [{rightCurlyBracket}]" , errorCode));
2046
2046
symbols.add (u' $' , UnicodeSet (u" [{dollarSign}]" , errorCode));
2047
+ symbols.add (u' :' , UnicodeSet (u" [{colon}]" , errorCode));
2048
+ symbols.add (u' \\ ' , UnicodeSet (u" [{reverseSolidus}]" , errorCode));
2049
+ symbols.add (u' p' , UnicodeSet (u" [{latinSmallLetterP}]" , errorCode));
2047
2050
for (const auto &[expression, expectedErrorCode, expectedPattern, expectedRegeneratedPattern,
2048
2051
expectedLookups, variables] : std::vector<TestCase>{
2049
2052
{u" -" , U_ZERO_ERROR, u" [{hyphenMinus}]" , u" [{hyphenMinus}]" },
2050
2053
{u" 0" , U_ZERO_ERROR, u" [a-z]" , u" [a-z]" },
2051
2054
// The hyphen no longer works as set difference.
2052
2055
{u" [0-1]" , U_ZERO_ERROR, u" [[a-z][{hyphenMinus}][bc]]" , u" [a-z{hyphenMinus}]" },
2053
2056
{u" [!-0]" , U_ZERO_ERROR, u" [![{hyphenMinus}][a-z]]" , u" [!a-z{hyphenMinus}]" },
2057
+ // An initial HYPHEN-MINUS is still treated as a literal '-', but a final one is treated
2058
+ // as a set.
2059
+ {u" [-1]" , U_ZERO_ERROR, uR"( [\-[bc]])" , uR"( [\-bc])" },
2060
+ {u" [1-]" , U_ZERO_ERROR, u" [[bc][{hyphenMinus}]]" , u" [bc{hyphenMinus}]" },
2054
2061
// String literals no longer work.
2055
2062
{uR"( [!-/{0}])" , U_ZERO_ERROR,
2056
2063
u" [![{hyphenMinus}]/[{leftCurlyBracket}][a-z][{rightCurlyBracket}]]" ,
2057
2064
u" [!/a-z{hyphenMinus}{leftCurlyBracket}{rightCurlyBracket}]" },
2058
- // The ampersand no longer works as set difference .
2065
+ // The ampersand no longer works as set intersection .
2059
2066
{uR"( [ 2 & 1 ])" , U_ZERO_ERROR, u" [[: Co :][{ampersand}][bc]]" ,
2060
2067
u" [bc---{ampersand}]" },
2061
2068
// Complementing still works.
@@ -2070,6 +2077,9 @@ void UnicodeSetTest::TestLookupSymbolTable() {
2070
2077
{uR"( [ \[ ])" , U_ZERO_ERROR, uR"( [[{leftSquareBracket}]])" , uR"( [{leftSquareBracket}])" },
2071
2078
// Anchors are gone.
2072
2079
{uR"( [$])" , U_ZERO_ERROR, uR"( [[{dollarSign}]])" , uR"( [{dollarSign}])" },
2080
+ // Property queries are unaffected.
2081
+ {u" [:Co:]" , U_ZERO_ERROR, u" [:Co:]" , u" [\uE000 -\uF8FF\U000F0000 -\U000FFFFD\U00100000 -\U0010FFFD ]" },
2082
+ {uR"( \p{Co})" , U_ZERO_ERROR, uR"( \p{Co})" , u" [\uE000 -\uF8FF\U000F0000 -\U000FFFFD\U00100000 -\U0010FFFD ]" },
2073
2083
}) {
2074
2084
UnicodeString actual;
2075
2085
UErrorCode errorCode = U_ZERO_ERROR;
@@ -2094,6 +2104,7 @@ void UnicodeSetTest::TestLookupSymbolTable() {
2094
2104
for (const auto &[expression, expectedErrorCode, expectedPattern, expectedRegeneratedPattern,
2095
2105
expectedLookups, variables] : std::vector<TestCase>{
2096
2106
{u" ]" , U_ZERO_ERROR, u" [{rightSquareBracket}]" , u" [{rightSquareBracket}]" },
2107
+ {u" [:Co:]" , U_ZERO_ERROR, u" [:Co:]" , u" [\uE000 -\uF8FF\U000F0000 -\U000FFFFD\U00100000 -\U0010FFFD ]" },
2097
2108
{u" []" , U_MALFORMED_SET, u" [{rightSquareBracket}]" , u" [{rightSquareBracket}]" },
2098
2109
}) {
2099
2110
UnicodeString actual;
0 commit comments