Skip to content

Commit 2fa9f9c

Browse files
committed
Implement bidi default strategy and update tests from unicode-org/message-format-wg#917
1 parent 61caad9 commit 2fa9f9c

10 files changed

+196
-38
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,7 @@ FunctionOptions MessageFormatter::resolveOptions(Environment& env,
267267
return FunctionOptions(std::move(*optionsVector), status);
268268
}
269269

270-
static UBiDiDirection getBiDiDirection(const Locale& locale,
271-
const UnicodeString& s) {
272-
if (s.isEmpty()) {
273-
return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
274-
}
270+
static UBiDiDirection getBiDiDirection(const UnicodeString& s) {
275271
if (s == u"ltr") {
276272
return UBIDI_LTR;
277273
}
@@ -281,7 +277,7 @@ static UBiDiDirection getBiDiDirection(const Locale& locale,
281277
if (s == u"auto") {
282278
return UBIDI_MIXED;
283279
}
284-
return UBIDI_NEUTRAL;
280+
return UBIDI_MIXED; // stands in for "unknown"
285281
}
286282

287283
FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const {
@@ -305,8 +301,7 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
305301
localeToUse = locale;
306302
}
307303
}
308-
UBiDiDirection dir = getBiDiDirection(localeToUse,
309-
options.getStringFunctionOption(UnicodeString("u:dir")));
304+
UBiDiDirection dir = getBiDiDirection(options.getStringFunctionOption(UnicodeString("u:dir")));
310305
UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id"));
311306

312307
return FunctionContext(localeToUse, dir, id);
@@ -510,6 +505,47 @@ void MessageFormatter::validateUOptionsOnMarkup(MessageContext& context,
510505
}
511506
}
512507

508+
UnicodeString& MessageFormatter::bidiIsolate(UBiDiDirection dir, UnicodeString& fmt) const {
509+
// If strategy is 'none', just return the string
510+
if (bidiIsolationStrategy == U_MF_BIDI_NONE) {
511+
return fmt;
512+
}
513+
514+
/* 1. Let msgdir be the directionality of the whole message, one of « 'LTR', 'RTL', 'unknown' ». These correspond to the message having left-to-right directionality, right-to-left directionality, and to the message's directionality not being known. */
515+
bool isLtr = !locale.isRightToLeft();
516+
517+
// 2i Let fmt be the formatted string representation of the resolved value of exp.
518+
// (Passed as argument)
519+
520+
// 2ii Let dir be the directionality of fmt, one of « 'LTR', 'RTL', 'unknown' », with the same meanings as for msgdir
521+
// (Passed as argument)
522+
523+
// 2iii. If dir is 'LTR'
524+
switch (dir) {
525+
case UBIDI_LTR:
526+
if (isLtr) {
527+
// 2iii(a). If msgdir is 'LTR' in the formatted output, let fmt be itself
528+
return fmt;
529+
}
530+
// 2iii(b) Else, in the formatted output, prefix fmt with U+2066 LEFT-TO-RIGHT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
531+
fmt.insert(0, LRI_CHAR);
532+
fmt.insert(fmt.length(), PDI_CHAR);
533+
break;
534+
// 2iv. Else, if dir is 'RTL':
535+
case UBIDI_RTL:
536+
// 2iv(a). In the formatted output, prefix fmt with U+2067 RIGHT-TO-LEFT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
537+
fmt.insert(0, RLI_CHAR);
538+
fmt.insert(fmt.length(), PDI_CHAR);
539+
break;
540+
// 2v. Else:
541+
default:
542+
// 2v(a). In the formatted output, prefix fmt with U+2068 FIRST STRONG ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
543+
fmt.insert(0, FSI_CHAR);
544+
fmt.insert(fmt.length(), PDI_CHAR);
545+
break;
546+
}
547+
return fmt;
548+
}
513549

514550
// Formats each text and expression part of a pattern, appending the results to `result`
515551
void MessageFormatter::formatPattern(MessageContext& context,
@@ -539,6 +575,8 @@ void MessageFormatter::formatPattern(MessageContext& context,
539575

540576
// See comment in matchSelectorKeys()
541577
bool badSelectOption = !checkSelectOption(*val);
578+
UnicodeString fmt = val->formatToString(status);
579+
result += bidiIsolate(val->getDirection(), fmt);
542580
result += val->formatToString(status);
543581

544582
if (badSelectOption) {

icu4c/source/i18n/messageformat2_evaluation.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ BaseValue::BaseValue(const Locale& loc, const UnicodeString& fb, const Formattab
3333
fallback += LEFT_CURLY_BRACE;
3434
fallback += fb;
3535
fallback += RIGHT_CURLY_BRACE;
36+
dir = UBIDI_MIXED; // represents 'unknown'
3637
}
3738

3839
/* static */ BaseValue* BaseValue::create(const Locale& locale,
@@ -43,18 +44,16 @@ BaseValue::BaseValue(const Locale& loc, const UnicodeString& fb, const Formattab
4344
return message2::create<BaseValue>(BaseValue(locale, fallback, source, wasCreatedFromLiteral), errorCode);
4445
}
4546

46-
extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&);
47+
extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&);
4748

4849
UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const {
49-
return formattableToString(locale,
50-
UBIDI_NEUTRAL,
51-
operand,
52-
errorCode);
50+
return formattableToString(locale, operand, errorCode);
5351
}
5452

5553
BaseValue& BaseValue::operator=(BaseValue&& other) noexcept {
5654
operand = std::move(other.operand);
5755
opts = std::move(other.opts);
56+
dir = other.dir;
5857
locale = other.locale;
5958
fallback = other.fallback;
6059
fromLiteral = other.fromLiteral;

icu4c/source/i18n/messageformat2_formattable.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#if !UCONFIG_NO_MF2
1111

1212
#include "unicode/messageformat2_formattable.h"
13+
#include "unicode/messageformat2.h"
1314
#include "unicode/smpdtfmt.h"
1415
#include "unicode/ubidi.h"
1516
#include "messageformat2_allocation.h"
@@ -212,35 +213,39 @@ namespace message2 {
212213
df->format(dateInfo.date, result, nullptr, errorCode);
213214
}
214215

215-
static UnicodeString& handleBiDi(const Locale& locale,
216+
/*
217+
static UnicodeString& handleBiDi(MessageFormatter::UMFBidiIsolationStrategy strategy,
218+
const Locale& locale,
216219
UBiDiDirection dir,
217220
UnicodeString& result) {
218-
switch (dir) {
219-
case UBIDI_LTR:
220-
if (locale.isRightToLeft()) {
221-
result.insert(0, LRI_CHAR);
221+
if (strategy == MessageFormatter::U_MF_BIDI_DEFAULT) {
222+
switch (dir) {
223+
case UBIDI_LTR:
224+
if (locale.isRightToLeft()) {
225+
result.insert(0, LRI_CHAR);
226+
result.insert(result.length(), PDI_CHAR);
227+
}
228+
break;
229+
case UBIDI_RTL:
230+
result.insert(0, RLI_CHAR);
222231
result.insert(result.length(), PDI_CHAR);
232+
break;
233+
case UBIDI_NEUTRAL:
234+
// Do nothing
235+
break;
236+
case UBIDI_MIXED:
237+
// mixed = auto
238+
result.insert(0, FSI_CHAR);
239+
result.insert(result.length(), PDI_CHAR);
240+
break;
223241
}
224-
break;
225-
case UBIDI_RTL:
226-
result.insert(0, RLI_CHAR);
227-
result.insert(result.length(), PDI_CHAR);
228-
break;
229-
case UBIDI_NEUTRAL:
230-
// Do nothing
231-
break;
232-
case UBIDI_MIXED:
233-
// mixed = auto
234-
result.insert(0, FSI_CHAR);
235-
result.insert(result.length(), PDI_CHAR);
236-
break;
237242
}
238243
239244
return result;
240245
}
246+
*/
241247

242248
UnicodeString formattableToString(const Locale& locale,
243-
UBiDiDirection dir,
244249
const Formattable& toFormat,
245250
UErrorCode& status) {
246251
EMPTY_ON_ERROR(status);
@@ -303,7 +308,7 @@ namespace message2 {
303308
}
304309
}
305310

306-
return handleBiDi(locale, dir, result);
311+
return result;
307312
}
308313

309314
} // namespace message2

icu4c/source/i18n/messageformat2_formatter.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ namespace message2 {
9292
return *this;
9393
}
9494

95+
MessageFormatter::Builder&
96+
MessageFormatter::Builder::setBidiIsolationStrategy(
97+
MessageFormatter::UMFBidiIsolationStrategy strategy) {
98+
bidiIsolationStrategy = strategy;
99+
return *this;
100+
}
101+
95102
/*
96103
This build() method is non-destructive, which entails the risk that
97104
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
@@ -158,6 +165,7 @@ namespace message2 {
158165

159166
normalizedInput = builder.normalizedInput;
160167
signalErrors = builder.signalErrors;
168+
bidiIsolationStrategy = builder.bidiIsolationStrategy;
161169

162170
// Build data model
163171
// First, check that there is a data model
@@ -205,6 +213,7 @@ namespace message2 {
205213
dataModel = std::move(other.dataModel);
206214
normalizedInput = std::move(other.normalizedInput);
207215
signalErrors = other.signalErrors;
216+
bidiIsolationStrategy = other.bidiIsolationStrategy;
208217
errors = other.errors;
209218
other.errors = nullptr;
210219
return *this;

icu4c/source/i18n/messageformat2_function_registry.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,7 @@ StandardFunctions::NumberValue::NumberValue(const Number& parent,
721721
opts = options.mergeOptions(arg.getResolvedOptions(), errorCode);
722722
operand = arg.getOperand();
723723
functionName = UnicodeString(parent.isInteger ? "integer" : "number");
724+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
724725

725726
number::LocalizedNumberFormatter realFormatter;
726727
realFormatter = formatterForOptions(parent, locale, opts, errorCode);
@@ -1060,6 +1061,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type,
10601061
functionName = functions::DATETIME;
10611062
break;
10621063
}
1064+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
10631065

10641066
const Formattable* source = &operand;
10651067

@@ -1421,7 +1423,6 @@ StandardFunctions::String::string(UErrorCode& success) {
14211423
}
14221424

14231425
extern UnicodeString formattableToString(const Locale&,
1424-
const UBiDiDirection,
14251426
const Formattable&,
14261427
UErrorCode&);
14271428

@@ -1454,9 +1455,10 @@ StandardFunctions::StringValue::StringValue(const FunctionContext& context,
14541455
CHECK_ERROR(status);
14551456
operand = val.getOperand();
14561457
functionName = UnicodeString("string");
1458+
dir = context.getDirection();
14571459
// No options
14581460
// Convert to string
1459-
formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status);
1461+
formattedString = formattableToString(context.getLocale(), operand, status);
14601462
}
14611463

14621464
void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys,

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1973,7 +1973,7 @@ void Parser::parseSelectors(UErrorCode& status) {
19731973
// Parse selectors
19741974
// "Backtracking" is required here. It's not clear if whitespace is
19751975
// (`[s]` selector) or (`[s]` variant)
1976-
while (isWhitespace(peek()) || peek() == DOLLAR) {
1976+
while (isWhitespace(peek()) || isBidiControl(peek()) || peek() == DOLLAR) {
19771977
int32_t whitespaceStart = index;
19781978
parseRequiredWhitespace(status);
19791979
// Restore precondition

icu4c/source/i18n/unicode/messageformat2.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,31 @@ namespace message2 {
168168
U_MF_STRICT
169169
} UMFErrorHandlingBehavior;
170170

171+
/**
172+
* Used in conjunction with the
173+
* MessageFormatter::Builder::setBidiIsolationStrategy() method.
174+
*
175+
* @internal ICU 77 technology preview
176+
* @deprecated This API is for technology preview only.
177+
*/
178+
typedef enum UMFBidiIsolationStrategy {
179+
/**
180+
* Do not perform bidi isolation (default)
181+
*
182+
* @internal ICU 77 technology preview
183+
* @deprecated This API is for technology preview only.
184+
*/
185+
U_MF_BIDI_NONE = 0,
186+
/**
187+
* Perform bidi isolation using the "default" strategy
188+
* described in the MF2 specification
189+
* https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text
190+
*
191+
* @internal ICU 77 technology preview
192+
* @deprecated This API is for technology preview only.
193+
*/
194+
U_MF_BIDI_DEFAULT
195+
} UMFBidiIsolationStrategy;
171196
/**
172197
* The mutable Builder class allows each part of the MessageFormatter to be initialized
173198
* separately; calling its `build()` method yields an immutable MessageFormatter.
@@ -197,6 +222,9 @@ namespace message2 {
197222
const MFFunctionRegistry* customMFFunctionRegistry;
198223
// Error behavior; see comment in `MessageFormatter` class
199224
bool signalErrors = false;
225+
// Bidi isolation strategy
226+
MessageFormatter::UMFBidiIsolationStrategy
227+
bidiIsolationStrategy = U_MF_BIDI_NONE;
200228

201229
void clearState();
202230
public:
@@ -280,6 +308,27 @@ namespace message2 {
280308
* @deprecated This API is for technology preview only.
281309
*/
282310
Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
311+
/**
312+
* Set the bidi isolation behavior for this formatter.
313+
*
314+
* "None" means that no bidi isolation will be performed.
315+
* "Default" means that the default bidi isolation strategy
316+
* as described in the MF2 specification
317+
* ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text )
318+
* will be applied.
319+
*
320+
* @param strategy An enum with type UMFBidiIsolationStrategy;
321+
* if strategy == U_MF_BIDI_NONE, then the behavior is "None".
322+
* If strategy == U_MF_BIDI_DEFAULT, then the behavior is "Default".
323+
*
324+
* The default is "None".
325+
*
326+
* @return A reference to the builder.
327+
*
328+
* @internal ICU 76 technology preview
329+
* @deprecated This API is for technology preview only.
330+
*/
331+
Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy);
283332
/**
284333
* Constructs a new immutable MessageFormatter using the pattern or data model
285334
* that was previously set, and the locale (if it was previously set)
@@ -358,6 +407,7 @@ namespace message2 {
358407

359408
// Formatting methods
360409
[[nodiscard]] InternalValue evalLiteral(const UnicodeString&, const data_model::Literal&, UErrorCode&) const;
410+
[[nodiscard]] UnicodeString& bidiIsolate(UBiDiDirection dir, UnicodeString&) const;
361411
void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
362412
FunctionContext makeFunctionContext(const FunctionOptions&) const;
363413
[[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&,
@@ -437,6 +487,8 @@ namespace message2 {
437487
// The default is false.
438488
bool signalErrors = false;
439489

490+
// Bidi isolation strategy.
491+
UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_NONE;
440492
}; // class MessageFormatter
441493

442494
} // namespace message2

icu4c/source/i18n/unicode/messageformat2_function_registry.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,17 @@ namespace message2 {
369369
* @deprecated This API is for technology preview only.
370370
*/
371371
virtual const FunctionOptions& getResolvedOptions() const { return opts; }
372+
/*
373+
* Returns the directionality of this value, i.e. the directionality
374+
* that its formatted result should have.
375+
*
376+
* @return A UBiDiDirection indicating the directionality that
377+
* the formatted result of this value should have.
378+
*
379+
* @internal ICU 77 technology preview
380+
* @deprecated This API is for technology preview only.
381+
*/
382+
virtual UBiDiDirection getDirection() const { return dir; }
372383
/**
373384
* Returns true if this value supports selection. The default method
374385
* returns false. The method must be overridden for values that support
@@ -501,6 +512,14 @@ namespace message2 {
501512
* @deprecated This API is for technology preview only.
502513
*/
503514
UnicodeString fallback;
515+
private:
516+
/*
517+
* Directionality that this value should be formatted with.
518+
*
519+
* @internal ICU 77 technology preview
520+
* @deprecated This API is for technology preview only.
521+
*/
522+
UBiDiDirection dir;
504523
}; // class FunctionValue
505524

506525
} // namespace message2

0 commit comments

Comments
 (0)