Skip to content

Commit 4e9c96d

Browse files
committed
ICU-23307 Space-sensitive UnicodeSet string literals
1 parent 148f5ca commit 4e9c96d

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

icu4c/source/common/uniset_props.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,9 @@ class UnicodeSet::Lexer {
538538
UChar32 next;
539539
int32_t codePointCount = 0;
540540
while (!chars_.atEnd() && U_SUCCESS(errorCode)) {
541-
// TODO(egg): Propose making this space-sensitive.
542541
const RuleCharacterIterator::Pos beforeNext = getPos();
543-
next = chars_.next(charsOptions_ & ~RuleCharacterIterator::PARSE_ESCAPES,
542+
next = chars_.next(charsOptions_ & ~(RuleCharacterIterator::PARSE_ESCAPES |
543+
RuleCharacterIterator::SKIP_WHITESPACE),
544544
unusedEscaped, errorCode);
545545
if (next == u'\\') {
546546
if (chars_.next(charsOptions_ & ~(RuleCharacterIterator::PARSE_ESCAPES |

icu4c/source/test/intltest/usettest.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4633,8 +4633,8 @@ void UnicodeSetTest::TestToPatternOutput() {
46334633
{u"[ - - ]", uR"([\-])"},
46344634
{u"[ - _ - ]", uR"([\-_])"},
46354635
{u"[ - + - ]", uR"([+\-])"},
4636-
{u"[ { Z e i c h e n k e t t e } Zeichenmenge ]", u"[Zceg-imn{Zeichenkette}]"},
4637-
{uR"([ { \x5A e i c h e n k e t t e } \x5Aeichenmenge ])", u"[Zceg-imn{Zeichenkette}]"},
4636+
{u"[ { Z e i c h e n k e t t e } Zeichenmenge ]", uR"([Zceg-imn{\ Z\ e\ i\ c\ h\ e\ n\ k\ e\ t\ t\ e\ }])"},
4637+
{uR"([ { \x5A e i c h e n k e t t e } \x5Aeichenmenge ])", uR"([Zceg-imn{\ Z\ e\ i\ c\ h\ e\ n\ k\ e\ t\ t\ e\ }])"},
46384638
{u"[$d-za-c]", uR"([\$a-z])"},
46394639
{u"[a-c$d-z]", uR"([\$a-z])"},
46404640
{uR"([\uFFFFa-z])", uR"([a-z\uFFFF])"},
@@ -4659,10 +4659,10 @@ void UnicodeSetTest::TestToPatternOutput() {
46594659
{uR"([ ^ [ \u0000-b d-\U0010FFFF ] ])", uR"([^[^c]])"},
46604660
{u"[$[]]", uR"([\$[]])"},
46614661
// Spaces are eliminated within a string-literal even when the syntax is preserved.
4662-
{u"[ {Z e i c h e n k e t t e } [] Zeichenmenge ]", u"[{Zeichenkette}[]Zeichenmenge]"},
4662+
{u"[ { Z e i c h e n k e t t e } [] Zeichenmenge ]", uR"([{\ Z\ e\ i\ c\ h\ e\ n\ k\ e\ t\ t\ e\ }[]Zeichenmenge])"},
46634663
// Escapes are removed even when the syntax is preserved.
46644664
{uR"([ { \x5A e i c h e n k e t t e } [] \x5Aeichenmenge ])",
4665-
u"[{Zeichenkette}[]Zeichenmenge]"},
4665+
uR"([{\ Z\ e\ i\ c\ h\ e\ n\ k\ e\ t\ t\ e\ }[]Zeichenmenge])"},
46664666
// In ICU 78 and earlier, a named-element was a nested set, so it was preserved and
46674667
// caused the syntax to be preserved. Now it is treated like an escape.
46684668
{uR"([ \N{LATIN CAPITAL LETTER Z}eichenmenge ])", uR"([Zceg-imn])"},

0 commit comments

Comments
 (0)