Skip to content

Commit dce38b5

Browse files
committed
ICU-23179 Test more edge cases when mapping syntax characters to sets
1 parent 71b9755 commit dce38b5

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

icu4c/source/test/intltest/usettest.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2044,18 +2044,25 @@ void UnicodeSetTest::TestLookupSymbolTable() {
20442044
symbols.add(u'{', UnicodeSet(u"[{leftCurlyBracket}]", errorCode));
20452045
symbols.add(u'}', UnicodeSet(u"[{rightCurlyBracket}]", errorCode));
20462046
symbols.add(u'$', UnicodeSet(u"[{dollarSign}]", errorCode));
2047+
symbols.add(u':', UnicodeSet(u"[{colon}]", errorCode));
2048+
symbols.add(u'\\', UnicodeSet(u"[{reverseSolidus}]", errorCode));
2049+
symbols.add(u'p', UnicodeSet(u"[{latinSmallLetterP}]", errorCode));
20472050
for (const auto &[expression, expectedErrorCode, expectedPattern, expectedRegeneratedPattern,
20482051
expectedLookups, variables] : std::vector<TestCase>{
20492052
{u"-", U_ZERO_ERROR, u"[{hyphenMinus}]", u"[{hyphenMinus}]"},
20502053
{u"0", U_ZERO_ERROR, u"[a-z]", u"[a-z]"},
20512054
// The hyphen no longer works as set difference.
20522055
{u"[0-1]", U_ZERO_ERROR, u"[[a-z][{hyphenMinus}][bc]]", u"[a-z{hyphenMinus}]"},
20532056
{u"[!-0]", U_ZERO_ERROR, u"[![{hyphenMinus}][a-z]]", u"[!a-z{hyphenMinus}]"},
2057+
// An initial HYPHEN-MINUS is still treated as a literal '-', but a final one is treated
2058+
// as a set.
2059+
{u"[-1]", U_ZERO_ERROR, uR"([\-[bc]])", uR"([\-bc])"},
2060+
{u"[1-]", U_ZERO_ERROR, u"[[bc][{hyphenMinus}]]", u"[bc{hyphenMinus}]"},
20542061
// String literals no longer work.
20552062
{uR"([!-/{0}])", U_ZERO_ERROR,
20562063
u"[![{hyphenMinus}]/[{leftCurlyBracket}][a-z][{rightCurlyBracket}]]",
20572064
u"[!/a-z{hyphenMinus}{leftCurlyBracket}{rightCurlyBracket}]"},
2058-
// The ampersand no longer works as set difference.
2065+
// The ampersand no longer works as set intersection.
20592066
{uR"([ 2 & 1 ])", U_ZERO_ERROR, u"[[: Co :][{ampersand}][bc]]",
20602067
u"[bc-󰀀-󿿽􀀀-􏿽{ampersand}]"},
20612068
// Complementing still works.
@@ -2070,6 +2077,9 @@ void UnicodeSetTest::TestLookupSymbolTable() {
20702077
{uR"([ \[ ])", U_ZERO_ERROR, uR"([[{leftSquareBracket}]])", uR"([{leftSquareBracket}])"},
20712078
// Anchors are gone.
20722079
{uR"([$])", U_ZERO_ERROR, uR"([[{dollarSign}]])", uR"([{dollarSign}])"},
2080+
// Property queries are unaffected.
2081+
{u"[:Co:]", U_ZERO_ERROR, u"[:Co:]", u"[\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD]"},
2082+
{uR"(\p{Co})", U_ZERO_ERROR, uR"(\p{Co})", u"[\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD]"},
20732083
}) {
20742084
UnicodeString actual;
20752085
UErrorCode errorCode = U_ZERO_ERROR;
@@ -2094,6 +2104,7 @@ void UnicodeSetTest::TestLookupSymbolTable() {
20942104
for (const auto &[expression, expectedErrorCode, expectedPattern, expectedRegeneratedPattern,
20952105
expectedLookups, variables] : std::vector<TestCase>{
20962106
{u"]", U_ZERO_ERROR, u"[{rightSquareBracket}]", u"[{rightSquareBracket}]"},
2107+
{u"[:Co:]", U_ZERO_ERROR, u"[:Co:]", u"[\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD]"},
20972108
{u"[]", U_MALFORMED_SET, u"[{rightSquareBracket}]", u"[{rightSquareBracket}]"},
20982109
}) {
20992110
UnicodeString actual;

0 commit comments

Comments
 (0)