@@ -125,7 +125,13 @@ static bool isContentChar(UChar32 c) {
125125 || inRange (c, 0xE000 , 0x10FFFF );
126126}
127127
128- // See `s` in the MessageFormat 2 grammar
128+ // See `bidi` in the MF2 grammar
129+ static bool isBidi (UChar32 c) {
130+ return (c == 0x061C || c == 0x200E || c == 0x200F ||
131+ inRange (c, 0x2066 , 0x2069 ));
132+ }
133+
134+ // See `ws` in the MessageFormat 2 grammar
129135inline bool isWhitespace (UChar32 c) {
130136 switch (c) {
131137 case SPACE:
@@ -153,8 +159,8 @@ static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }
153159
154160static bool isNameStart (UChar32 c) {
155161 return isAlpha (c) || c == UNDERSCORE || inRange (c, 0x00C0 , 0x00D6 ) || inRange (c, 0x00D8 , 0x00F6 ) ||
156- inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x1FFF ) ||
157- inRange (c, 0x200C , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
162+ inRange (c, 0x00F8 , 0x02FF ) || inRange (c, 0x0370 , 0x037D ) || inRange (c, 0x037F , 0x061B ) ||
163+ inRange (c, 0x061D , 0x200D ) || inRange (c, 0x2070 , 0x218F ) || inRange (c, 0x2C00 , 0x2FEF ) ||
158164 inRange (c, 0x3001 , 0xD7FF ) || inRange (c, 0xF900 , 0xFDCF ) || inRange (c, 0xFDF0 , 0xFFFD ) ||
159165 inRange (c, 0x10000 , 0xEFFFF );
160166}
@@ -347,7 +353,7 @@ option, or the optional space before an attribute.
347353 No pre, no post.
348354 A message may end with whitespace, so `index` may equal `len()` on exit.
349355*/
350- void Parser::parseWhitespaceMaybeRequired ( bool required, UErrorCode& errorCode) {
356+ void Parser::parseRequiredWS ( UErrorCode& errorCode) {
351357 bool sawWhitespace = false ;
352358
353359 // The loop exits either when we consume all the input,
@@ -358,7 +364,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
358364 // If whitespace isn't required -- or if we saw it already --
359365 // then the caller is responsible for checking this case and
360366 // setting an error if necessary.
361- if (!required || sawWhitespace) {
367+ if (sawWhitespace) {
362368 // Not an error.
363369 return ;
364370 }
@@ -380,24 +386,51 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
380386 }
381387 }
382388
383- if (!sawWhitespace && required ) {
389+ if (!sawWhitespace) {
384390 ERROR (errorCode);
385391 }
386392}
387393
394+ void Parser::parseOptionalBidi () {
395+ while (true ) {
396+ if (!inBounds ()) {
397+ return ;
398+ }
399+ if (isBidi (peek ())) {
400+ next ();
401+ } else {
402+ break ;
403+ }
404+ }
405+ }
406+
388407/*
389- No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
408+ No pre, no post, because a message may end with whitespace
409+ Matches `s` in the MF2 grammar
390410*/
391411void Parser::parseRequiredWhitespace (UErrorCode& errorCode) {
392- parseWhitespaceMaybeRequired (true , errorCode);
412+ parseOptionalBidi ();
413+ parseRequiredWS (errorCode);
414+ parseOptionalWhitespace ();
393415 normalizedInput += SPACE;
394416}
395417
396418/*
397419 No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
398420*/
399- void Parser::parseOptionalWhitespace (UErrorCode& errorCode) {
400- parseWhitespaceMaybeRequired (false , errorCode);
421+ void Parser::parseOptionalWhitespace () {
422+ while (true ) {
423+ if (!inBounds ()) {
424+ return ;
425+ }
426+ auto cp = peek ();
427+ if (isWhitespace (cp) || isBidi (cp)) {
428+ maybeAdvanceLine ();
429+ next ();
430+ } else {
431+ break ;
432+ }
433+ }
401434}
402435
403436// Consumes a single character, signaling an error if `peek()` != `c`
@@ -442,11 +475,11 @@ void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode)
442475*/
443476void Parser::parseTokenWithWhitespace (const std::u16string_view& token, UErrorCode& errorCode) {
444477 // No need for error check or bounds check before parseOptionalWhitespace
445- parseOptionalWhitespace (errorCode );
478+ parseOptionalWhitespace ();
446479 // Establish precondition
447480 CHECK_BOUNDS (errorCode);
448481 parseToken (token, errorCode);
449- parseOptionalWhitespace (errorCode );
482+ parseOptionalWhitespace ();
450483 // Guarantee postcondition
451484 CHECK_BOUNDS (errorCode);
452485}
@@ -458,12 +491,12 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
458491 then consumes optional whitespace again
459492*/
460493void Parser::parseTokenWithWhitespace (UChar32 c, UErrorCode& errorCode) {
461- // No need for error check or bounds check before parseOptionalWhitespace(errorCode )
462- parseOptionalWhitespace (errorCode );
494+ // No need for error check or bounds check before parseOptionalWhitespace()
495+ parseOptionalWhitespace ();
463496 // Establish precondition
464497 CHECK_BOUNDS (errorCode);
465498 parseToken (c, errorCode);
466- parseOptionalWhitespace (errorCode );
499+ parseOptionalWhitespace ();
467500 // Guarantee postcondition
468501 CHECK_BOUNDS (errorCode);
469502}
@@ -482,11 +515,17 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
482515
483516 U_ASSERT (inBounds ());
484517
485- if (!isNameStart (peek ())) {
518+ if (!( isNameStart (peek ()) || isBidi ( peek () ))) {
486519 ERROR (errorCode);
487520 return name;
488521 }
489522
523+ // name = [bidi] name-start *name-char [bidi]
524+
525+ // [bidi]
526+ parseOptionalBidi ();
527+
528+ // name-start *name-char
490529 while (isNameChar (peek ())) {
491530 UChar32 c = peek ();
492531 name += c;
@@ -497,6 +536,10 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
497536 break ;
498537 }
499538 }
539+
540+ // [bidi]
541+ parseOptionalBidi ();
542+
500543 return name;
501544}
502545
@@ -853,7 +896,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
853896 // about whether whitespace precedes another
854897 // attribute, or the '=' sign
855898 int32_t savedIndex = index;
856- parseOptionalWhitespace (errorCode );
899+ parseOptionalWhitespace ();
857900
858901 Operand rand;
859902 if (peek () == EQUALS) {
@@ -1149,7 +1192,7 @@ the comment in `parseOptions()` for details.
11491192 // (the character is either the required space before an annotation, or optional
11501193 // trailing space after the literal or variable). It's still ambiguous which
11511194 // one does apply.
1152- parseOptionalWhitespace (status );
1195+ parseOptionalWhitespace ();
11531196 // Restore precondition
11541197 CHECK_BOUNDS (status);
11551198
@@ -1220,7 +1263,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
12201263 // Parse opening brace
12211264 parseToken (LEFT_CURLY_BRACE, status);
12221265 // Optional whitespace after opening brace
1223- parseOptionalWhitespace (status );
1266+ parseOptionalWhitespace ();
12241267
12251268 Expression::Builder exprBuilder (status);
12261269 // Restore precondition
@@ -1263,7 +1306,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
12631306
12641307 // Parse optional space
12651308 // (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
1266- parseOptionalWhitespace (status );
1309+ parseOptionalWhitespace ();
12671310
12681311 // Either an operand or operator (or both) must have been set already,
12691312 // so there can't be an error
@@ -1339,7 +1382,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
13391382 CHECK_BOUNDS (status);
13401383
13411384 parseToken (ID_INPUT, status);
1342- parseOptionalWhitespace (status );
1385+ parseOptionalWhitespace ();
13431386
13441387 // Restore precondition before calling parseExpression()
13451388 CHECK_BOUNDS (status);
@@ -1400,7 +1443,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
14001443 // Avoid looping infinitely
14011444 CHECK_ERROR (status);
14021445
1403- parseOptionalWhitespace (status );
1446+ parseOptionalWhitespace ();
14041447 // Restore precondition
14051448 CHECK_BOUNDS (status);
14061449 }
@@ -1510,8 +1553,8 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
15101553
15111554 // We've seen at least one whitespace-key pair, so now we can parse
15121555 // *(s key) [s]
1513- while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ())) { // Try to recover from errors
1514- bool wasWhitespace = isWhitespace (peek ());
1556+ while (peek () != LEFT_CURLY_BRACE || isWhitespace (peek ()) || isBidi ( peek ())) {
1557+ bool wasWhitespace = isWhitespace (peek ()) || isBidi ( peek ()) ;
15151558 parseRequiredWhitespace (status);
15161559 if (!wasWhitespace) {
15171560 // Avoid infinite loop when parsing something like:
@@ -1569,7 +1612,7 @@ Markup Parser::parseMarkup(UErrorCode& status) {
15691612 // Consume the '{'
15701613 next ();
15711614 normalizedInput += LEFT_CURLY_BRACE;
1572- parseOptionalWhitespace (status );
1615+ parseOptionalWhitespace ();
15731616 bool closing = false ;
15741617 switch (peek ()) {
15751618 case NUMBER_SIGN: {
@@ -1596,19 +1639,19 @@ Markup Parser::parseMarkup(UErrorCode& status) {
15961639
15971640 // Parse the options, which must begin with a ' '
15981641 // if present
1599- if (inBounds () && isWhitespace (peek ())) {
1642+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
16001643 OptionAdder<Markup::Builder> optionAdder (builder);
16011644 parseOptions (optionAdder, status);
16021645 }
16031646
16041647 // Parse the attributes, which also must begin
16051648 // with a ' '
1606- if (inBounds () && isWhitespace (peek ())) {
1649+ if (inBounds () && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
16071650 AttributeAdder<Markup::Builder> attrAdder (builder);
16081651 parseAttributes (attrAdder, status);
16091652 }
16101653
1611- parseOptionalWhitespace (status );
1654+ parseOptionalWhitespace ();
16121655
16131656 bool standalone = false ;
16141657 // Check if this is a standalone or not
@@ -1656,7 +1699,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
16561699 isMarkup = true ;
16571700 break ;
16581701 }
1659- if (!isWhitespace (c)) {
1702+ if (!( isWhitespace (c) || isBidi (c))) {
16601703 break ;
16611704 }
16621705 tempIndex++;
@@ -1740,7 +1783,7 @@ void Parser::parseSelectors(UErrorCode& status) {
17401783 // "Backtracking" is required here. It's not clear if whitespace is
17411784 // (`[s]` selector) or (`[s]` variant)
17421785 while (isWhitespace (peek ()) || peek () == LEFT_CURLY_BRACE) {
1743- parseOptionalWhitespace (status );
1786+ parseOptionalWhitespace ();
17441787 // Restore precondition
17451788 CHECK_BOUNDS (status);
17461789 if (peek () != LEFT_CURLY_BRACE) {
@@ -1770,9 +1813,9 @@ void Parser::parseSelectors(UErrorCode& status) {
17701813 } \
17711814
17721815 // Parse variants
1773- while (isWhitespace (peek ()) || isKeyStart (peek ())) {
1816+ while (isWhitespace (peek ()) || isBidi ( peek ()) || isKeyStart (peek ())) {
17741817 // Trailing whitespace is allowed
1775- parseOptionalWhitespace (status );
1818+ parseOptionalWhitespace ();
17761819 if (!inBounds ()) {
17771820 return ;
17781821 }
@@ -1871,7 +1914,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
18711914 bool complex = false ;
18721915 // First, "look ahead" to determine if this is a simple or complex
18731916 // message. To do that, check the first non-whitespace character.
1874- while (inBounds (index) && isWhitespace (peek ())) {
1917+ while (inBounds (index) && ( isWhitespace (peek ()) || isBidi ( peek () ))) {
18751918 next ();
18761919 }
18771920
@@ -1891,10 +1934,10 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
18911934 // Message can be empty, so we need to only look ahead
18921935 // if we know it's non-empty
18931936 if (complex ) {
1894- parseOptionalWhitespace (status );
1937+ parseOptionalWhitespace ();
18951938 parseDeclarations (status);
18961939 parseBody (status);
1897- parseOptionalWhitespace (status );
1940+ parseOptionalWhitespace ();
18981941 } else {
18991942 // Simple message
19001943 // For normalization, quote the pattern
0 commit comments