Skip to content

Commit 376da67

Browse files
committed
ICU-22953 MF2: Allow unpaired surrogates in text and quoted literals
See unicode-org/message-format-wg#906
1 parent 842899d commit 376da67

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ static bool isContentChar(UChar32 c) {
121121
|| inRange(c, 0x002F, 0x003F) // Omit '@'
122122
|| inRange(c, 0x0041, 0x005B) // Omit '\'
123123
|| inRange(c, 0x005D, 0x007A) // Omit { | }
124-
|| inRange(c, 0x007E, 0xD7FF) // Omit surrogates
125-
|| inRange(c, 0xE000, 0x10FFFF);
124+
|| inRange(c, 0x007E, 0x2FFF) // Omit IDEOGRAPHIC_SPACE
125+
|| inRange(c, 0x3001, 0x10FFFF); // Allowing surrogates is intentional
126126
}
127127

128128
// See `s` in the MessageFormat 2 grammar

icu4c/source/test/intltest/messageformat2test.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec,
3333
TESTCASE_AUTO(testFormatterAPI);
3434
TESTCASE_AUTO(testHighLoneSurrogate);
3535
TESTCASE_AUTO(testLowLoneSurrogate);
36+
TESTCASE_AUTO(testLoneSurrogateInQuotedLiteral);
3637
TESTCASE_AUTO(dataDrivenTests);
3738
TESTCASE_AUTO_END;
3839
}
@@ -350,7 +351,8 @@ void TestMessageFormat2::testHighLoneSurrogate() {
350351
.setPattern(loneSurrogate, pe, errorCode)
351352
.build(errorCode);
352353
UnicodeString result = msgfmt1.formatToString({}, errorCode);
353-
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testHighLoneSurrogate");
354+
assertEquals("testHighLoneSurrogate", loneSurrogate, result);
355+
errorCode.errIfFailureAndReset("testHighLoneSurrogate");
354356
}
355357

356358
// ICU-22890 lone surrogate cause infinity loop
@@ -364,7 +366,25 @@ void TestMessageFormat2::testLowLoneSurrogate() {
364366
.setPattern(loneSurrogate, pe, errorCode)
365367
.build(errorCode);
366368
UnicodeString result = msgfmt2.formatToString({}, errorCode);
367-
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testLowLoneSurrogate");
369+
assertEquals("testLowLoneSurrogate", loneSurrogate, result);
370+
errorCode.errIfFailureAndReset("testLowLoneSurrogate");
371+
}
372+
373+
void TestMessageFormat2::testLoneSurrogateInQuotedLiteral() {
374+
IcuTestErrorCode errorCode(*this, "testLoneSurrogateInQuotedLiteral");
375+
UParseError pe = { 0, 0, {0}, {0} };
376+
// |\udc02|
377+
UnicodeString literal("{|");
378+
literal += 0xdc02;
379+
literal += "|}";
380+
UnicodeString expectedResult({0xdc02, 0});
381+
icu::message2::MessageFormatter msgfmt2 =
382+
icu::message2::MessageFormatter::Builder(errorCode)
383+
.setPattern(literal, pe, errorCode)
384+
.build(errorCode);
385+
UnicodeString result = msgfmt2.formatToString({}, errorCode);
386+
assertEquals("testLoneSurrogateInQuotedLiteral", expectedResult, result);
387+
errorCode.errIfFailureAndReset("testLoneSurrogateInQuotedLiteral");
368388
}
369389

370390
void TestMessageFormat2::dataDrivenTests() {

icu4c/source/test/intltest/messageformat2test.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ class TestMessageFormat2: public IntlTest {
9191

9292
void testHighLoneSurrogate(void);
9393
void testLowLoneSurrogate(void);
94+
void testLoneSurrogateInQuotedLiteral(void);
9495
}; // class TestMessageFormat2
9596

9697
U_NAMESPACE_BEGIN

0 commit comments

Comments
 (0)