@@ -125,7 +125,13 @@ static bool isContentChar(UChar32 c) {
125125 || inRange (c, 0xE000 , 0x10FFFF );
126126}
127127
128- // See `s` in the MessageFormat 2 grammar
128+ // See `bidi` in the MF2 grammar
129+ static bool isBidi (UChar32 c) {
130+ return (c == 0x061C || c == 0x200E || c == 0x200F ||
131+ inRange (c, 0x2066 , 0x2069 ));
132+ }
133+
134+ // See `ws` in the MessageFormat 2 grammar
129135inline bool isWhitespace (UChar32 c) {
130136 switch (c) {
131137 case SPACE:
@@ -153,8 +159,8 @@ static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }
153159
154160static bool isNameStart (UChar32 c) {
155161 return isAlpha (c) || c == UNDERSCORE || inRange (c, 0x00C0 , 0x00D6 ) || inRange (c, 0x00D8 , 0x00F6 ) ||
156- inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x1FFF ) ||
157- inRange (c, 0x200C , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
162+ inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x061B ) ||
163+ inRange (c, 0x061D , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
158164 inRange (c, 0x3001 , 0xD7FF ) || inRange (c, 0xF900 , 0xFDCF ) || inRange (c, 0xFDF0 , 0xFFFD ) ||
159165 inRange (c, 0x10000 , 0xEFFFF );
160166}
@@ -347,7 +353,7 @@ option, or the optional space before an attribute.
347353 No pre, no post.
348354 A message may end with whitespace, so `index` may equal `len()` on exit.
349355*/
350- void Parser::parseWhitespaceMaybeRequired ( bool required, UErrorCode& errorCode) {
356+ void Parser::parseRequiredWS ( UErrorCode& errorCode) {
351357 bool sawWhitespace = false ;
352358
353359 // The loop exits either when we consume all the input,
@@ -358,7 +364,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
358364 // If whitespace isn't required -- or if we saw it already --
359365 // then the caller is responsible for checking this case and
360366 // setting an error if necessary.
361- if (!required || sawWhitespace) {
367+ if (sawWhitespace) {
362368 // Not an error.
363369 return ;
364370 }
@@ -380,24 +386,51 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
380386 }
381387 }
382388
383- if (!sawWhitespace && required ) {
389+ if (!sawWhitespace) {
384390 ERROR (errorCode);
385391 }
386392}
387393
394+ void Parser::parseOptionalBidi () {
395+ while (true ) {
396+ if (!inBounds ()) {
397+ return ;
398+ }
399+ if (isBidi (peek ())) {
400+ next ();
401+ } else {
402+ break ;
403+ }
404+ }
405+ }
406+
388407/*
389- No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
408+ No pre, no post, because a message may end with whitespace
409+ Matches `s` in the MF2 grammar
390410*/
391411void Parser::parseRequiredWhitespace (UErrorCode& errorCode) {
392- parseWhitespaceMaybeRequired (true , errorCode);
412+ parseOptionalBidi ();
413+ parseRequiredWS (errorCode);
414+ parseOptionalWhitespace ();
393415 normalizedInput += SPACE;
394416}
395417
396418/*
397419 No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
398420*/
399- void Parser::parseOptionalWhitespace (UErrorCode& errorCode) {
400- parseWhitespaceMaybeRequired (false , errorCode);
421+ void Parser::parseOptionalWhitespace () {
422+ while (true ) {
423+ if (!inBounds ()) {
424+ return ;
425+ }
426+ auto cp = peek ();
427+ if (isWhitespace (cp) || isBidi (cp)) {
428+ maybeAdvanceLine ();
429+ next ();
430+ } else {
431+ break ;
432+ }
433+ }
401434}
402435
403436// Consumes a single character, signaling an error if `peek()` != `c`
@@ -442,11 +475,11 @@ void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode)
442475*/
443476void Parser::parseTokenWithWhitespace (const std::u16string_view& token, UErrorCode& errorCode) {
444477 // No need for error check or bounds check before parseOptionalWhitespace
445- parseOptionalWhitespace (errorCode );
478+ parseOptionalWhitespace ();
446479 // Establish precondition
447480 CHECK_BOUNDS (errorCode);
448481 parseToken (token, errorCode);
449- parseOptionalWhitespace (errorCode );
482+ parseOptionalWhitespace ();
450483 // Guarantee postcondition
451484 CHECK_BOUNDS (errorCode);
452485}
@@ -458,12 +491,12 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
458491 then consumes optional whitespace again
459492*/
460493void Parser::parseTokenWithWhitespace (UChar32 c, UErrorCode& errorCode) {
461- // No need for error check or bounds check before parseOptionalWhitespace(errorCode )
462- parseOptionalWhitespace (errorCode );
494+ // No need for error check or bounds check before parseOptionalWhitespace()
495+ parseOptionalWhitespace ();
463496 // Establish precondition
464497 CHECK_BOUNDS (errorCode);
465498 parseToken (c, errorCode);
466- parseOptionalWhitespace (errorCode );
499+ parseOptionalWhitespace ();
467500 // Guarantee postcondition
468501 CHECK_BOUNDS (errorCode);
469502}
@@ -482,11 +515,17 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
482515
483516 U_ASSERT (inBounds ());
484517
485- if (!isNameStart (peek ())) {
518+ if (!( isNameStart (peek ()) || isBidi ( peek () ))) {
486519 ERROR (errorCode);
487520 return name;
488521 }
489522
523+ // name = [bidi] name-start *name-char [bidi]
524+
525+ // [bidi]
526+ parseOptionalBidi ();
527+
528+ // name-start *name-char
490529 while (isNameChar (peek ())) {
491530 UChar32 c = peek ();
492531 name += c;
@@ -497,6 +536,10 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
497536 break ;
498537 }
499538 }
539+
540+ // [bidi]
541+ parseOptionalBidi ();
542+
500543 return name;
501544}
502545
@@ -845,7 +888,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
845888 // about whether whitespace precedes another
846889 // attribute, or the '=' sign
847890 int32_t savedIndex = index;
848- parseOptionalWhitespace (errorCode );
891+ parseOptionalWhitespace ();
849892
850893 Operand rand;
851894 if (peek () == EQUALS) {
@@ -1131,7 +1174,7 @@ the comment in `parseOptions()` for details.
11311174 // (the character is either the required space before an annotation, or optional
11321175 // trailing space after the literal or variable). It's still ambiguous which
11331176 // one does apply.
1134- parseOptionalWhitespace (status );
1177+ parseOptionalWhitespace ();
11351178 // Restore precondition
11361179 CHECK_BOUNDS (status);
11371180
@@ -1202,7 +1245,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
12021245 // Parse opening brace
12031246 parseToken (LEFT_CURLY_BRACE, status);
12041247 // Optional whitespace after opening brace
1205- parseOptionalWhitespace (status );
1248+ parseOptionalWhitespace ();
12061249
12071250 Expression::Builder exprBuilder (status);
12081251 // Restore precondition
@@ -1245,7 +1288,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
12451288
12461289 // Parse optional space
12471290 // (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
1248- parseOptionalWhitespace (status );
1291+ parseOptionalWhitespace ();
12491292
12501293 // Either an operand or operator (or both) must have been set already,
12511294 // so there can't be an error
@@ -1321,7 +1364,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
13211364 CHECK_BOUNDS (status);
13221365
13231366 parseToken (ID_INPUT, status);
1324- parseOptionalWhitespace (status );
1367+ parseOptionalWhitespace ();
13251368
13261369 // Restore precondition before calling parseExpression()
13271370 CHECK_BOUNDS (status);
@@ -1382,7 +1425,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
13821425 // Avoid looping infinitely
13831426 CHECK_ERROR (status);
13841427
1385- parseOptionalWhitespace (status );
1428+ parseOptionalWhitespace ();
13861429 // Restore precondition
13871430 CHECK_BOUNDS (status);
13881431 }
@@ -1492,8 +1535,8 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
14921535
14931536 // We've seen at least one whitespace-key pair, so now we can parse
14941537 // *(s key) [s]
1495- while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ())) { // Try to recover from errors
1496- bool wasWhitespace = isWhitespace (peek ());
1538+ while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ()) || isBidi ( peek ())) {
1539+ bool wasWhitespace = isWhitespace (peek ()) || isBidi ( peek ()) ;
14971540 parseRequiredWhitespace (status);
14981541 if (!wasWhitespace) {
14991542 // Avoid infinite loop when parsing something like:
@@ -1551,7 +1594,7 @@ Markup Parser::parseMarkup(UErrorCode& status) {
15511594 // Consume the '{'
15521595 next ();
15531596 normalizedInput += LEFT_CURLY_BRACE;
1554- parseOptionalWhitespace (status );
1597+ parseOptionalWhitespace ();
15551598 bool closing = false ;
15561599 switch (peek ()) {
15571600 case NUMBER_SIGN: {
@@ -1578,19 +1621,19 @@ Markup Parser::parseMarkup(UErrorCode& status) {
15781621
15791622 // Parse the options, which must begin with a ' '
15801623 // if present
1581- if (inBounds () && isWhitespace (peek ())) {
1624+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
15821625 OptionAdder<Markup::Builder> optionAdder (builder);
15831626 parseOptions (optionAdder, status);
15841627 }
15851628
15861629 // Parse the attributes, which also must begin
15871630 // with a ' '
1588- if (inBounds () && isWhitespace (peek ())) {
1631+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
15891632 AttributeAdder<Markup::Builder> attrAdder (builder);
15901633 parseAttributes (attrAdder, status);
15911634 }
15921635
1593- parseOptionalWhitespace (status );
1636+ parseOptionalWhitespace ();
15941637
15951638 bool standalone = false ;
15961639 // Check if this is a standalone or not
@@ -1638,7 +1681,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
16381681 isMarkup = true ;
16391682 break ;
16401683 }
1641- if (!isWhitespace (c)) {
1684+ if (!( isWhitespace (c) || isBidi (c))) {
16421685 break ;
16431686 }
16441687 tempIndex++;
@@ -1738,8 +1781,7 @@ void Parser::parseSelectors(UErrorCode& status) {
17381781 // "Backtracking" is required here. It's not clear if whitespace is
17391782 // (`[s]` selector) or (`[s]` variant)
17401783 while (isWhitespace (peek ()) || peek () == DOLLAR) {
1741- int32_t whitespaceStart = index;
1742- parseRequiredWhitespace (status);
1784+ parseOptionalWhitespace ();
17431785 // Restore precondition
17441786 CHECK_BOUNDS (status);
17451787 if (peek () != DOLLAR) {
@@ -1771,24 +1813,9 @@ void Parser::parseSelectors(UErrorCode& status) {
17711813 } \
17721814
17731815 // Parse variants
1774- // matcher = match-statement s variant *(o variant)
1775-
1776- // Parse first variant
1777- parseRequiredWhitespace (status);
1778- if (!inBounds ()) {
1779- ERROR (status);
1780- return ;
1781- }
1782- parseVariant (status);
1783- if (!inBounds ()) {
1784- // Not an error; there might be only one variant
1785- return ;
1786- }
1787-
1788- while (isWhitespace (peek ()) || isKeyStart (peek ())) {
1789- parseOptionalWhitespace (status);
1790- // Restore the precondition.
1791- // Trailing whitespace is allowed.
1816+ while (isWhitespace (peek ()) || isBidi (peek ()) || isKeyStart (peek ())) {
1817+ // Trailing whitespace is allowed
1818+ parseOptionalWhitespace ();
17921819 if (!inBounds ()) {
17931820 return ;
17941821 }
@@ -1874,7 +1901,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
18741901 bool complex = false ;
18751902 // First, "look ahead" to determine if this is a simple or complex
18761903 // message. To do that, check the first non-whitespace character.
1877- while (inBounds (index) && isWhitespace (peek ())) {
1904+ while (inBounds (index) && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
18781905 next ();
18791906 }
18801907
@@ -1894,10 +1921,10 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
18941921 // Message can be empty, so we need to only look ahead
18951922 // if we know it's non-empty
18961923 if (complex ) {
1897- parseOptionalWhitespace (status );
1924+ parseOptionalWhitespace ();
18981925 parseDeclarations (status);
18991926 parseBody (status);
1900- parseOptionalWhitespace (status );
1927+ parseOptionalWhitespace ();
19011928 } else {
19021929 // Simple message
19031930 // For normalization, quote the pattern
0 commit comments