Skip to content

Commit 4dfb267

Browse files
committed
Implement bidi default strategy and update tests from unicode-org/message-format-wg#917
1 parent e620409 commit 4dfb267

12 files changed

+255
-111
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,7 @@ FunctionOptions MessageFormatter::resolveOptions(Environment& env,
251251
return FunctionOptions(std::move(*optionsVector), status);
252252
}
253253

254-
static UBiDiDirection getBiDiDirection(const Locale& locale,
255-
const UnicodeString& s) {
256-
if (s.isEmpty()) {
257-
return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
258-
}
254+
static UBiDiDirection getBiDiDirection(const UnicodeString& s) {
259255
if (s == u"ltr") {
260256
return UBIDI_LTR;
261257
}
@@ -265,7 +261,7 @@ static UBiDiDirection getBiDiDirection(const Locale& locale,
265261
if (s == u"auto") {
266262
return UBIDI_MIXED;
267263
}
268-
return UBIDI_NEUTRAL;
264+
return UBIDI_MIXED; // stands in for "unknown"
269265
}
270266

271267
FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const {
@@ -289,8 +285,7 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
289285
localeToUse = locale;
290286
}
291287
}
292-
UBiDiDirection dir = getBiDiDirection(localeToUse,
293-
options.getStringFunctionOption(UnicodeString("u:dir")));
288+
UBiDiDirection dir = getBiDiDirection(options.getStringFunctionOption(UnicodeString("u:dir")));
294289
UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id"));
295290

296291
return FunctionContext(localeToUse, dir, id);
@@ -415,6 +410,48 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
415410
}
416411
}
417412

413+
UnicodeString& MessageFormatter::bidiIsolate(UBiDiDirection dir, UnicodeString& fmt) const {
414+
// If strategy is 'none', just return the string
415+
if (bidiIsolationStrategy == U_MF_BIDI_NONE) {
416+
return fmt;
417+
}
418+
419+
/* 1. Let msgdir be the directionality of the whole message, one of « 'LTR', 'RTL', 'unknown' ». These correspond to the message having left-to-right directionality, right-to-left directionality, and to the message's directionality not being known. */
420+
bool isLtr = !locale.isRightToLeft();
421+
422+
// 2i Let fmt be the formatted string representation of the resolved value of exp.
423+
// (Passed as argument)
424+
425+
// 2ii Let dir be the directionality of fmt, one of « 'LTR', 'RTL', 'unknown' », with the same meanings as for msgdir
426+
// (Passed as argument)
427+
428+
// 2iii. If dir is 'LTR'
429+
switch (dir) {
430+
case UBIDI_LTR:
431+
if (isLtr) {
432+
// 2iii(a). If msgdir is 'LTR' in the formatted output, let fmt be itself
433+
return fmt;
434+
}
435+
// 2iii(b) Else, in the formatted output, prefix fmt with U+2066 LEFT-TO-RIGHT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
436+
fmt.insert(0, LRI_CHAR);
437+
fmt.insert(fmt.length(), PDI_CHAR);
438+
break;
439+
// 2iv. Else, if dir is 'RTL':
440+
case UBIDI_RTL:
441+
// 2iv(a). In the formatted output, prefix fmt with U+2067 RIGHT-TO-LEFT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
442+
fmt.insert(0, RLI_CHAR);
443+
fmt.insert(fmt.length(), PDI_CHAR);
444+
break;
445+
// 2v. Else:
446+
default:
447+
// 2v(a). In the formatted output, prefix fmt with U+2068 FIRST STRONG ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
448+
fmt.insert(0, FSI_CHAR);
449+
fmt.insert(fmt.length(), PDI_CHAR);
450+
break;
451+
}
452+
return fmt;
453+
}
454+
418455
// Formats each text and expression part of a pattern, appending the results to `result`
419456
void MessageFormatter::formatPattern(MessageContext& context,
420457
Environment& globalEnv,
@@ -440,7 +477,8 @@ void MessageFormatter::formatPattern(MessageContext& context,
440477
const FunctionValue* val = partVal.getValue(status);
441478
// Shouldn't be null or a fallback
442479
U_ASSERT(U_SUCCESS(status));
443-
result += val->formatToString(status);
480+
UnicodeString fmt = val->formatToString(status);
481+
result += bidiIsolate(val->getDirection(), fmt);
444482
// Handle formatting errors. `formatToString()` can't take a context and thus can't
445483
// register an error directly
446484
if (status == U_MF_FORMATTING_ERROR) {

icu4c/source/i18n/messageformat2_evaluation.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ BaseValue::BaseValue(const Locale& loc, const UnicodeString& fb, const Formattab
3232
fallback += LEFT_CURLY_BRACE;
3333
fallback += fb;
3434
fallback += RIGHT_CURLY_BRACE;
35+
dir = UBIDI_MIXED; // represents 'unknown'
3536
}
3637

3738
/* static */ BaseValue* BaseValue::create(const Locale& locale,
@@ -41,18 +42,16 @@ BaseValue::BaseValue(const Locale& loc, const UnicodeString& fb, const Formattab
4142
return message2::create<BaseValue>(BaseValue(locale, fallback, source), errorCode);
4243
}
4344

44-
extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&);
45+
extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&);
4546

4647
UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const {
47-
return formattableToString(locale,
48-
UBIDI_NEUTRAL,
49-
operand,
50-
errorCode);
48+
return formattableToString(locale, operand, errorCode);
5149
}
5250

5351
BaseValue& BaseValue::operator=(BaseValue&& other) noexcept {
5452
operand = std::move(other.operand);
5553
opts = std::move(other.opts);
54+
dir = other.dir;
5655
locale = other.locale;
5756
fallback = other.fallback;
5857

icu4c/source/i18n/messageformat2_formattable.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#if !UCONFIG_NO_MF2
1111

1212
#include "unicode/messageformat2_formattable.h"
13+
#include "unicode/messageformat2.h"
1314
#include "unicode/smpdtfmt.h"
1415
#include "unicode/ubidi.h"
1516
#include "messageformat2_allocation.h"
@@ -188,35 +189,39 @@ namespace message2 {
188189
df->format(date, result, 0, errorCode);
189190
}
190191

191-
static UnicodeString& handleBiDi(const Locale& locale,
192+
/*
193+
static UnicodeString& handleBiDi(MessageFormatter::UMFBidiIsolationStrategy strategy,
194+
const Locale& locale,
192195
UBiDiDirection dir,
193196
UnicodeString& result) {
194-
switch (dir) {
195-
case UBIDI_LTR:
196-
if (locale.isRightToLeft()) {
197-
result.insert(0, LRI_CHAR);
197+
if (strategy == MessageFormatter::U_MF_BIDI_DEFAULT) {
198+
switch (dir) {
199+
case UBIDI_LTR:
200+
if (locale.isRightToLeft()) {
201+
result.insert(0, LRI_CHAR);
202+
result.insert(result.length(), PDI_CHAR);
203+
}
204+
break;
205+
case UBIDI_RTL:
206+
result.insert(0, RLI_CHAR);
198207
result.insert(result.length(), PDI_CHAR);
208+
break;
209+
case UBIDI_NEUTRAL:
210+
// Do nothing
211+
break;
212+
case UBIDI_MIXED:
213+
// mixed = auto
214+
result.insert(0, FSI_CHAR);
215+
result.insert(result.length(), PDI_CHAR);
216+
break;
199217
}
200-
break;
201-
case UBIDI_RTL:
202-
result.insert(0, RLI_CHAR);
203-
result.insert(result.length(), PDI_CHAR);
204-
break;
205-
case UBIDI_NEUTRAL:
206-
// Do nothing
207-
break;
208-
case UBIDI_MIXED:
209-
// mixed = auto
210-
result.insert(0, FSI_CHAR);
211-
result.insert(result.length(), PDI_CHAR);
212-
break;
213218
}
214219
215220
return result;
216221
}
222+
*/
217223

218224
UnicodeString formattableToString(const Locale& locale,
219-
UBiDiDirection dir,
220225
const Formattable& toFormat,
221226
UErrorCode& status) {
222227
EMPTY_ON_ERROR(status);
@@ -279,7 +284,7 @@ namespace message2 {
279284
}
280285
}
281286

282-
return handleBiDi(locale, dir, result);
287+
return result;
283288
}
284289

285290
} // namespace message2

icu4c/source/i18n/messageformat2_formatter.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ namespace message2 {
9292
return *this;
9393
}
9494

95+
MessageFormatter::Builder&
96+
MessageFormatter::Builder::setBidiIsolationStrategy(
97+
MessageFormatter::UMFBidiIsolationStrategy strategy) {
98+
bidiIsolationStrategy = strategy;
99+
return *this;
100+
}
101+
95102
/*
96103
This build() method is non-destructive, which entails the risk that
97104
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
@@ -158,6 +165,7 @@ namespace message2 {
158165

159166
normalizedInput = builder.normalizedInput;
160167
signalErrors = builder.signalErrors;
168+
bidiIsolationStrategy = builder.bidiIsolationStrategy;
161169

162170
// Build data model
163171
// First, check that there is a data model
@@ -205,6 +213,7 @@ namespace message2 {
205213
dataModel = std::move(other.dataModel);
206214
normalizedInput = std::move(other.normalizedInput);
207215
signalErrors = other.signalErrors;
216+
bidiIsolationStrategy = other.bidiIsolationStrategy;
208217
errors = other.errors;
209218
other.errors = nullptr;
210219
return *this;

icu4c/source/i18n/messageformat2_function_registry.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ StandardFunctions::NumberValue::NumberValue(const Number& parent,
589589
opts = options.mergeOptions(arg.getResolvedOptions(), errorCode);
590590
operand = arg.getOperand();
591591
functionName = UnicodeString(parent.isInteger ? "integer" : "number");
592+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
592593

593594
number::LocalizedNumberFormatter realFormatter;
594595
realFormatter = formatterForOptions(parent, locale, opts, errorCode);
@@ -860,6 +861,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type,
860861
functionName = UnicodeString("time");
861862
break;
862863
}
864+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
863865

864866
const Formattable* source = &operand;
865867

@@ -1112,7 +1114,6 @@ StandardFunctions::String::string(UErrorCode& success) {
11121114
}
11131115

11141116
extern UnicodeString formattableToString(const Locale&,
1115-
const UBiDiDirection,
11161117
const Formattable&,
11171118
UErrorCode&);
11181119

@@ -1145,9 +1146,10 @@ StandardFunctions::StringValue::StringValue(const FunctionContext& context,
11451146
CHECK_ERROR(status);
11461147
operand = val.getOperand();
11471148
functionName = UnicodeString("string");
1149+
dir = context.getDirection();
11481150
// No options
11491151
// Convert to string
1150-
formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status);
1152+
formattedString = formattableToString(context.getLocale(), operand, status);
11511153
}
11521154

11531155
void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys,

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2021,7 +2021,7 @@ void Parser::parseSelectors(UErrorCode& status) {
20212021
// Parse selectors
20222022
// "Backtracking" is required here. It's not clear if whitespace is
20232023
// (`[s]` selector) or (`[s]` variant)
2024-
while (isWhitespace(peek()) || peek() == DOLLAR) {
2024+
while (isWhitespace(peek()) || isBidiControl(peek()) || peek() == DOLLAR) {
20252025
int32_t whitespaceStart = index;
20262026
parseRequiredWhitespace(status);
20272027
// Restore precondition

icu4c/source/i18n/unicode/messageformat2.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,31 @@ namespace message2 {
168168
U_MF_STRICT
169169
} UMFErrorHandlingBehavior;
170170

171+
/**
172+
* Used in conjunction with the
173+
* MessageFormatter::Builder::setBidiIsolationStrategy() method.
174+
*
175+
* @internal ICU 77 technology preview
176+
* @deprecated This API is for technology preview only.
177+
*/
178+
typedef enum UMFBidiIsolationStrategy {
179+
/**
180+
* Do not perform bidi isolation (default)
181+
*
182+
* @internal ICU 77 technology preview
183+
* @deprecated This API is for technology preview only.
184+
*/
185+
U_MF_BIDI_NONE = 0,
186+
/**
187+
* Perform bidi isolation using the "default" strategy
188+
* described in the MF2 specification
189+
* https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text
190+
*
191+
* @internal ICU 77 technology preview
192+
* @deprecated This API is for technology preview only.
193+
*/
194+
U_MF_BIDI_DEFAULT
195+
} UMFBidiIsolationStrategy;
171196
/**
172197
* The mutable Builder class allows each part of the MessageFormatter to be initialized
173198
* separately; calling its `build()` method yields an immutable MessageFormatter.
@@ -197,6 +222,9 @@ namespace message2 {
197222
const MFFunctionRegistry* customMFFunctionRegistry;
198223
// Error behavior; see comment in `MessageFormatter` class
199224
bool signalErrors = false;
225+
// Bidi isolation strategy
226+
MessageFormatter::UMFBidiIsolationStrategy
227+
bidiIsolationStrategy = U_MF_BIDI_NONE;
200228

201229
void clearState();
202230
public:
@@ -280,6 +308,27 @@ namespace message2 {
280308
* @deprecated This API is for technology preview only.
281309
*/
282310
Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
311+
/**
312+
* Set the bidi isolation behavior for this formatter.
313+
*
314+
* "None" means that no bidi isolation will be performed.
315+
* "Default" means that the default bidi isolation strategy
316+
* as described in the MF2 specification
317+
* ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text )
318+
* will be applied.
319+
*
320+
* @param strategy An enum with type UMFBidiIsolationStrategy;
321+
* if strategy == U_MF_BIDI_NONE, then the behavior is "None".
322+
* If strategy == U_MF_BIDI_DEFAULT, then the behavior is "Default".
323+
*
324+
* The default is "None".
325+
*
326+
* @return A reference to the builder.
327+
*
328+
* @internal ICU 76 technology preview
329+
* @deprecated This API is for technology preview only.
330+
*/
331+
Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy);
283332
/**
284333
* Constructs a new immutable MessageFormatter using the pattern or data model
285334
* that was previously set, and the locale (if it was previously set)
@@ -356,6 +405,7 @@ namespace message2 {
356405

357406
// Formatting methods
358407
[[nodiscard]] InternalValue evalLiteral(const UnicodeString&, const data_model::Literal&, UErrorCode&) const;
408+
[[nodiscard]] UnicodeString& bidiIsolate(UBiDiDirection dir, UnicodeString&) const;
359409
void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
360410
FunctionContext makeFunctionContext(const FunctionOptions&) const;
361411
[[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&,
@@ -433,6 +483,8 @@ namespace message2 {
433483
// The default is false.
434484
bool signalErrors = false;
435485

486+
// Bidi isolation strategy.
487+
UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_NONE;
436488
}; // class MessageFormatter
437489

438490
} // namespace message2

icu4c/source/i18n/unicode/messageformat2_function_registry.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,17 @@ namespace message2 {
369369
* @deprecated This API is for technology preview only.
370370
*/
371371
virtual const FunctionOptions& getResolvedOptions() const { return opts; }
372+
/*
373+
* Returns the directionality of this value, i.e. the directionality
374+
* that its formatted result should have.
375+
*
376+
* @return A UBiDiDirection indicating the directionality that
377+
* the formatted result of this value should have.
378+
*
379+
* @internal ICU 77 technology preview
380+
* @deprecated This API is for technology preview only.
381+
*/
382+
virtual UBiDiDirection getDirection() const { return dir; }
372383
/**
373384
* Returns true if this value supports selection. The default method
374385
* returns false. The method must be overridden for values that support
@@ -484,6 +495,14 @@ namespace message2 {
484495
* @deprecated This API is for technology preview only.
485496
*/
486497
UnicodeString fallback;
498+
private:
499+
/*
500+
* Directionality that this value should be formatted with.
501+
*
502+
* @internal ICU 77 technology preview
503+
* @deprecated This API is for technology preview only.
504+
*/
505+
UBiDiDirection dir;
487506
}; // class FunctionValue
488507

489508
} // namespace message2

0 commit comments

Comments
 (0)