Skip to content

Commit e12866f

Browse files
committed
Implement bidi default strategy and update tests from unicode-org/message-format-wg#917
1 parent f19b35e commit e12866f

13 files changed

+256
-113
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,7 @@ FunctionOptions MessageFormatter::resolveOptions(Environment& env,
197197
return FunctionOptions(std::move(*optionsVector), status);
198198
}
199199

200-
static UBiDiDirection getBiDiDirection(const Locale& locale,
201-
const UnicodeString& s) {
202-
if (s.isEmpty()) {
203-
return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
204-
}
200+
static UBiDiDirection getBiDiDirection(const UnicodeString& s) {
205201
if (s == u"ltr") {
206202
return UBIDI_LTR;
207203
}
@@ -211,7 +207,7 @@ static UBiDiDirection getBiDiDirection(const Locale& locale,
211207
if (s == u"auto") {
212208
return UBIDI_MIXED;
213209
}
214-
return UBIDI_NEUTRAL;
210+
return UBIDI_MIXED; // stands in for "unknown"
215211
}
216212

217213
FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const {
@@ -235,8 +231,7 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
235231
localeToUse = locale;
236232
}
237233
}
238-
UBiDiDirection dir = getBiDiDirection(localeToUse,
239-
options.getStringFunctionOption(UnicodeString("u:dir")));
234+
UBiDiDirection dir = getBiDiDirection(options.getStringFunctionOption(UnicodeString("u:dir")));
240235
UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id"));
241236

242237
return FunctionContext(localeToUse, dir, id);
@@ -360,6 +355,48 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
360355
}
361356
}
362357

358+
UnicodeString& MessageFormatter::bidiIsolate(UBiDiDirection dir, UnicodeString& fmt) const {
359+
// If strategy is 'none', just return the string
360+
if (bidiIsolationStrategy == U_MF_BIDI_NONE) {
361+
return fmt;
362+
}
363+
364+
/* 1. Let msgdir be the directionality of the whole message, one of « 'LTR', 'RTL', 'unknown' ». These correspond to the message having left-to-right directionality, right-to-left directionality, and to the message's directionality not being known. */
365+
bool isLtr = !locale.isRightToLeft();
366+
367+
// 2i Let fmt be the formatted string representation of the resolved value of exp.
368+
// (Passed as argument)
369+
370+
// 2ii Let dir be the directionality of fmt, one of « 'LTR', 'RTL', 'unknown' », with the same meanings as for msgdir
371+
// (Passed as argument)
372+
373+
// 2iii. If dir is 'LTR'
374+
switch (dir) {
375+
case UBIDI_LTR:
376+
if (isLtr) {
377+
// 2iii(a). If msgdir is 'LTR' in the formatted output, let fmt be itself
378+
return fmt;
379+
}
380+
// 2iii(b) Else, in the formatted output, prefix fmt with U+2066 LEFT-TO-RIGHT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
381+
fmt.insert(0, LRI_CHAR);
382+
fmt.insert(fmt.length(), PDI_CHAR);
383+
break;
384+
// 2iv. Else, if dir is 'RTL':
385+
case UBIDI_RTL:
386+
// 2iv(a). In the formatted output, prefix fmt with U+2067 RIGHT-TO-LEFT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
387+
fmt.insert(0, RLI_CHAR);
388+
fmt.insert(fmt.length(), PDI_CHAR);
389+
break;
390+
// 2v. Else:
391+
default:
392+
// 2v(a). In the formatted output, prefix fmt with U+2068 FIRST STRONG ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
393+
fmt.insert(0, FSI_CHAR);
394+
fmt.insert(fmt.length(), PDI_CHAR);
395+
break;
396+
}
397+
return fmt;
398+
}
399+
363400
// Formats each text and expression part of a pattern, appending the results to `result`
364401
void MessageFormatter::formatPattern(MessageContext& context,
365402
Environment& globalEnv,
@@ -385,7 +422,8 @@ void MessageFormatter::formatPattern(MessageContext& context,
385422
const FunctionValue* val = partVal.getValue(status);
386423
// Shouldn't be null or a fallback
387424
U_ASSERT(U_SUCCESS(status));
388-
result += val->formatToString(status);
425+
UnicodeString fmt = val->formatToString(status);
426+
result += bidiIsolate(val->getDirection(), fmt);
389427
// Handle formatting errors. `formatToString()` can't take a context and thus can't
390428
// register an error directly
391429
if (status == U_MF_FORMATTING_ERROR) {
@@ -415,7 +453,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, Environment& en
415453
// 2. For each expression exp of the message's selectors
416454
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
417455
// 2i. Let rv be the resolved value of exp.
418-
InternalValue& rv = evalExpression(env, selectors[i], context, status);
456+
InternalValue& rv = evalOperand(env, Operand(selectors[i]), context, status);
419457
if (rv.isSelectable()) {
420458
// 2ii. If selection is supported for rv:
421459
// (True if this code has been reached)

icu4c/source/i18n/messageformat2_evaluation.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ using namespace data_model;
2727
BaseValue::BaseValue(const Locale& loc, const Formattable& source)
2828
: locale(loc) {
2929
operand = source;
30+
dir = UBIDI_MIXED; // represents 'unknown'
3031
}
3132

3233
/* static */ BaseValue* BaseValue::create(const Locale& locale,
@@ -35,18 +36,16 @@ BaseValue::BaseValue(const Locale& loc, const Formattable& source)
3536
return message2::create<BaseValue>(BaseValue(locale, source), errorCode);
3637
}
3738

38-
extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&);
39+
extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&);
3940

4041
UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const {
41-
return formattableToString(locale,
42-
UBIDI_NEUTRAL,
43-
operand,
44-
errorCode);
42+
return formattableToString(locale, operand, errorCode);
4543
}
4644

4745
BaseValue& BaseValue::operator=(BaseValue&& other) noexcept {
4846
operand = std::move(other.operand);
4947
opts = std::move(other.opts);
48+
dir = other.dir;
5049
locale = other.locale;
5150

5251
return *this;

icu4c/source/i18n/messageformat2_formattable.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#if !UCONFIG_NO_MF2
99

1010
#include "unicode/messageformat2_formattable.h"
11+
#include "unicode/messageformat2.h"
1112
#include "unicode/smpdtfmt.h"
1213
#include "unicode/ubidi.h"
1314
#include "messageformat2_allocation.h"
@@ -186,35 +187,39 @@ namespace message2 {
186187
df->format(date, result, 0, errorCode);
187188
}
188189

189-
static UnicodeString& handleBiDi(const Locale& locale,
190+
/*
191+
static UnicodeString& handleBiDi(MessageFormatter::UMFBidiIsolationStrategy strategy,
192+
const Locale& locale,
190193
UBiDiDirection dir,
191194
UnicodeString& result) {
192-
switch (dir) {
193-
case UBIDI_LTR:
194-
if (locale.isRightToLeft()) {
195-
result.insert(0, LRI_CHAR);
195+
if (strategy == MessageFormatter::U_MF_BIDI_DEFAULT) {
196+
switch (dir) {
197+
case UBIDI_LTR:
198+
if (locale.isRightToLeft()) {
199+
result.insert(0, LRI_CHAR);
200+
result.insert(result.length(), PDI_CHAR);
201+
}
202+
break;
203+
case UBIDI_RTL:
204+
result.insert(0, RLI_CHAR);
196205
result.insert(result.length(), PDI_CHAR);
206+
break;
207+
case UBIDI_NEUTRAL:
208+
// Do nothing
209+
break;
210+
case UBIDI_MIXED:
211+
// mixed = auto
212+
result.insert(0, FSI_CHAR);
213+
result.insert(result.length(), PDI_CHAR);
214+
break;
197215
}
198-
break;
199-
case UBIDI_RTL:
200-
result.insert(0, RLI_CHAR);
201-
result.insert(result.length(), PDI_CHAR);
202-
break;
203-
case UBIDI_NEUTRAL:
204-
// Do nothing
205-
break;
206-
case UBIDI_MIXED:
207-
// mixed = auto
208-
result.insert(0, FSI_CHAR);
209-
result.insert(result.length(), PDI_CHAR);
210-
break;
211216
}
212217
213218
return result;
214219
}
220+
*/
215221

216222
UnicodeString formattableToString(const Locale& locale,
217-
UBiDiDirection dir,
218223
const Formattable& toFormat,
219224
UErrorCode& status) {
220225
EMPTY_ON_ERROR(status);
@@ -277,7 +282,7 @@ namespace message2 {
277282
}
278283
}
279284

280-
return handleBiDi(locale, dir, result);
285+
return result;
281286
}
282287

283288
} // namespace message2

icu4c/source/i18n/messageformat2_formatter.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ namespace message2 {
9090
return *this;
9191
}
9292

93+
MessageFormatter::Builder&
94+
MessageFormatter::Builder::setBidiIsolationStrategy(
95+
MessageFormatter::UMFBidiIsolationStrategy strategy) {
96+
bidiIsolationStrategy = strategy;
97+
return *this;
98+
}
99+
93100
/*
94101
This build() method is non-destructive, which entails the risk that
95102
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
@@ -147,6 +154,7 @@ namespace message2 {
147154

148155
normalizedInput = builder.normalizedInput;
149156
signalErrors = builder.signalErrors;
157+
bidiIsolationStrategy = builder.bidiIsolationStrategy;
150158

151159
// Build data model
152160
// First, check that there is a data model
@@ -194,6 +202,7 @@ namespace message2 {
194202
dataModel = std::move(other.dataModel);
195203
normalizedInput = std::move(other.normalizedInput);
196204
signalErrors = other.signalErrors;
205+
bidiIsolationStrategy = other.bidiIsolationStrategy;
197206
errors = other.errors;
198207
other.errors = nullptr;
199208
return *this;

icu4c/source/i18n/messageformat2_function_registry.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ StandardFunctions::NumberValue::NumberValue(const Number& parent,
540540
locale = context.getLocale();
541541
opts = options.mergeOptions(arg.getResolvedOptions(), errorCode);
542542
operand = arg.getOperand();
543+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
543544

544545
number::LocalizedNumberFormatter realFormatter;
545546
realFormatter = formatterForOptions(parent, locale, opts, errorCode);
@@ -785,6 +786,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type,
785786
const Locale& locale = context.getLocale();
786787
operand = val.getOperand();
787788
opts = options.mergeOptions(val.getResolvedOptions(), errorCode);
789+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
788790

789791
const Formattable* source = &operand;
790792

@@ -1039,7 +1041,6 @@ StandardFunctions::String::string(UErrorCode& success) {
10391041
}
10401042

10411043
extern UnicodeString formattableToString(const Locale&,
1042-
const UBiDiDirection,
10431044
const Formattable&,
10441045
UErrorCode&);
10451046

@@ -1071,9 +1072,10 @@ StandardFunctions::StringValue::StringValue(const FunctionContext& context,
10711072
UErrorCode& status) {
10721073
CHECK_ERROR(status);
10731074
operand = val.getOperand();
1075+
dir = context.getDirection();
10741076
// No options
10751077
// Convert to string
1076-
formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status);
1078+
formattedString = formattableToString(context.getLocale(), operand, status);
10771079
}
10781080

10791081
void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys,

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1972,7 +1972,7 @@ void Parser::parseSelectors(UErrorCode& status) {
19721972
// Parse selectors
19731973
// "Backtracking" is required here. It's not clear if whitespace is
19741974
// (`[s]` selector) or (`[s]` variant)
1975-
while (isWhitespace(peek()) || peek() == DOLLAR) {
1975+
while (isWhitespace(peek()) || isBidiControl(peek()) || peek() == DOLLAR) {
19761976
int32_t whitespaceStart = index;
19771977
parseRequiredWhitespace(status);
19781978
// Restore precondition

icu4c/source/i18n/unicode/messageformat2.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,31 @@ namespace message2 {
165165
U_MF_STRICT
166166
} UMFErrorHandlingBehavior;
167167

168+
/**
169+
* Used in conjunction with the
170+
* MessageFormatter::Builder::setBidiIsolationStrategy() method.
171+
*
172+
* @internal ICU 77 technology preview
173+
* @deprecated This API is for technology preview only.
174+
*/
175+
typedef enum UMFBidiIsolationStrategy {
176+
/**
177+
* Do not perform bidi isolation (default)
178+
*
179+
* @internal ICU 77 technology preview
180+
* @deprecated This API is for technology preview only.
181+
*/
182+
U_MF_BIDI_NONE = 0,
183+
/**
184+
* Perform bidi isolation using the "default" strategy
185+
* described in the MF2 specification
186+
* https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text
187+
*
188+
* @internal ICU 77 technology preview
189+
* @deprecated This API is for technology preview only.
190+
*/
191+
U_MF_BIDI_DEFAULT
192+
} UMFBidiIsolationStrategy;
168193
/**
169194
* The mutable Builder class allows each part of the MessageFormatter to be initialized
170195
* separately; calling its `build()` method yields an immutable MessageFormatter.
@@ -194,6 +219,9 @@ namespace message2 {
194219
const MFFunctionRegistry* customMFFunctionRegistry;
195220
// Error behavior; see comment in `MessageFormatter` class
196221
bool signalErrors = false;
222+
// Bidi isolation strategy
223+
MessageFormatter::UMFBidiIsolationStrategy
224+
bidiIsolationStrategy = U_MF_BIDI_NONE;
197225

198226
void clearState();
199227
public:
@@ -277,6 +305,27 @@ namespace message2 {
277305
* @deprecated This API is for technology preview only.
278306
*/
279307
Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
308+
/**
309+
* Set the bidi isolation behavior for this formatter.
310+
*
311+
* "None" means that no bidi isolation will be performed.
312+
* "Default" means that the default bidi isolation strategy
313+
* as described in the MF2 specification
314+
* ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text )
315+
* will be applied.
316+
*
317+
* @param strategy An enum with type UMFBidiIsolationStrategy;
318+
* if strategy == U_MF_BIDI_NONE, then the behavior is "None".
319+
* If strategy == U_MF_BIDI_DEFAULT, then the behavior is "Default".
320+
*
321+
* The default is "None".
322+
*
323+
* @return A reference to the builder.
324+
*
325+
* @internal ICU 76 technology preview
326+
* @deprecated This API is for technology preview only.
327+
*/
328+
Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy);
280329
/**
281330
* Constructs a new immutable MessageFormatter using the pattern or data model
282331
* that was previously set, and the locale (if it was previously set)
@@ -351,6 +400,7 @@ namespace message2 {
351400

352401
// Formatting methods
353402
[[nodiscard]] InternalValue evalLiteral(const data_model::Literal&, UErrorCode&) const;
403+
[[nodiscard]] UnicodeString& bidiIsolate(UBiDiDirection dir, UnicodeString&) const;
354404
void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
355405
FunctionContext makeFunctionContext(const FunctionOptions&) const;
356406
[[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&,
@@ -426,6 +476,9 @@ namespace message2 {
426476
// formatting methods return best-effort output.
427477
// The default is false.
428478
bool signalErrors = false;
479+
480+
// Bidi isolation strategy.
481+
UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_NONE;
429482
}; // class MessageFormatter
430483

431484
} // namespace message2

0 commit comments

Comments
 (0)