Skip to content

Commit 80ba818

Browse files
committed
Implement bidi default strategy and update tests from unicode-org/message-format-wg#917
1 parent a75d490 commit 80ba818

12 files changed

+254
-112
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -250,11 +250,7 @@ FunctionOptions MessageFormatter::resolveOptions(Environment& env,
250250
return FunctionOptions(std::move(*optionsVector), status);
251251
}
252252

253-
static UBiDiDirection getBiDiDirection(const Locale& locale,
254-
const UnicodeString& s) {
255-
if (s.isEmpty()) {
256-
return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
257-
}
253+
static UBiDiDirection getBiDiDirection(const UnicodeString& s) {
258254
if (s == u"ltr") {
259255
return UBIDI_LTR;
260256
}
@@ -264,7 +260,7 @@ static UBiDiDirection getBiDiDirection(const Locale& locale,
264260
if (s == u"auto") {
265261
return UBIDI_MIXED;
266262
}
267-
return UBIDI_NEUTRAL;
263+
return UBIDI_MIXED; // stands in for "unknown"
268264
}
269265

270266
FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const {
@@ -288,8 +284,7 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
288284
localeToUse = locale;
289285
}
290286
}
291-
UBiDiDirection dir = getBiDiDirection(localeToUse,
292-
options.getStringFunctionOption(UnicodeString("u:dir")));
287+
UBiDiDirection dir = getBiDiDirection(options.getStringFunctionOption(UnicodeString("u:dir")));
293288
UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id"));
294289

295290
return FunctionContext(localeToUse, dir, id);
@@ -414,6 +409,48 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt
414409
}
415410
}
416411

412+
UnicodeString& MessageFormatter::bidiIsolate(UBiDiDirection dir, UnicodeString& fmt) const {
413+
// If strategy is 'none', just return the string
414+
if (bidiIsolationStrategy == U_MF_BIDI_NONE) {
415+
return fmt;
416+
}
417+
418+
/* 1. Let msgdir be the directionality of the whole message, one of « 'LTR', 'RTL', 'unknown' ». These correspond to the message having left-to-right directionality, right-to-left directionality, and to the message's directionality not being known. */
419+
bool isLtr = !locale.isRightToLeft();
420+
421+
// 2i Let fmt be the formatted string representation of the resolved value of exp.
422+
// (Passed as argument)
423+
424+
// 2ii Let dir be the directionality of fmt, one of « 'LTR', 'RTL', 'unknown' », with the same meanings as for msgdir
425+
// (Passed as argument)
426+
427+
// 2iii. If dir is 'LTR'
428+
switch (dir) {
429+
case UBIDI_LTR:
430+
if (isLtr) {
431+
// 2iii(a). If msgdir is 'LTR' in the formatted output, let fmt be itself
432+
return fmt;
433+
}
434+
// 2iii(b) Else, in the formatted output, prefix fmt with U+2066 LEFT-TO-RIGHT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
435+
fmt.insert(0, LRI_CHAR);
436+
fmt.insert(fmt.length(), PDI_CHAR);
437+
break;
438+
// 2iv. Else, if dir is 'RTL':
439+
case UBIDI_RTL:
440+
// 2iv(a). In the formatted output, prefix fmt with U+2067 RIGHT-TO-LEFT ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
441+
fmt.insert(0, RLI_CHAR);
442+
fmt.insert(fmt.length(), PDI_CHAR);
443+
break;
444+
// 2v. Else:
445+
default:
446+
// 2v(a). In the formatted output, prefix fmt with U+2068 FIRST STRONG ISOLATE and postfix it with U+2069 POP DIRECTIONAL ISOLATE.
447+
fmt.insert(0, FSI_CHAR);
448+
fmt.insert(fmt.length(), PDI_CHAR);
449+
break;
450+
}
451+
return fmt;
452+
}
453+
417454
// Formats each text and expression part of a pattern, appending the results to `result`
418455
void MessageFormatter::formatPattern(MessageContext& context,
419456
Environment& globalEnv,
@@ -439,7 +476,8 @@ void MessageFormatter::formatPattern(MessageContext& context,
439476
const FunctionValue* val = partVal.getValue(status);
440477
// Shouldn't be null or a fallback
441478
U_ASSERT(U_SUCCESS(status));
442-
result += val->formatToString(status);
479+
UnicodeString fmt = val->formatToString(status);
480+
result += bidiIsolate(val->getDirection(), fmt);
443481
// Handle formatting errors. `formatToString()` can't take a context and thus can't
444482
// register an error directly
445483
if (status == U_MF_FORMATTING_ERROR) {

icu4c/source/i18n/messageformat2_evaluation.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ using namespace data_model;
2929
BaseValue::BaseValue(const Locale& loc, const Formattable& source)
3030
: locale(loc) {
3131
operand = source;
32+
dir = UBIDI_MIXED; // represents 'unknown'
3233
}
3334

3435
/* static */ BaseValue* BaseValue::create(const Locale& locale,
@@ -37,18 +38,16 @@ BaseValue::BaseValue(const Locale& loc, const Formattable& source)
3738
return message2::create<BaseValue>(BaseValue(locale, source), errorCode);
3839
}
3940

40-
extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&);
41+
extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&);
4142

4243
UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const {
43-
return formattableToString(locale,
44-
UBIDI_NEUTRAL,
45-
operand,
46-
errorCode);
44+
return formattableToString(locale, operand, errorCode);
4745
}
4846

4947
BaseValue& BaseValue::operator=(BaseValue&& other) noexcept {
5048
operand = std::move(other.operand);
5149
opts = std::move(other.opts);
50+
dir = other.dir;
5251
locale = other.locale;
5352

5453
return *this;

icu4c/source/i18n/messageformat2_formattable.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#if !UCONFIG_NO_MF2
1111

1212
#include "unicode/messageformat2_formattable.h"
13+
#include "unicode/messageformat2.h"
1314
#include "unicode/smpdtfmt.h"
1415
#include "unicode/ubidi.h"
1516
#include "messageformat2_allocation.h"
@@ -188,35 +189,39 @@ namespace message2 {
188189
df->format(date, result, 0, errorCode);
189190
}
190191

191-
static UnicodeString& handleBiDi(const Locale& locale,
192+
/*
193+
static UnicodeString& handleBiDi(MessageFormatter::UMFBidiIsolationStrategy strategy,
194+
const Locale& locale,
192195
UBiDiDirection dir,
193196
UnicodeString& result) {
194-
switch (dir) {
195-
case UBIDI_LTR:
196-
if (locale.isRightToLeft()) {
197-
result.insert(0, LRI_CHAR);
197+
if (strategy == MessageFormatter::U_MF_BIDI_DEFAULT) {
198+
switch (dir) {
199+
case UBIDI_LTR:
200+
if (locale.isRightToLeft()) {
201+
result.insert(0, LRI_CHAR);
202+
result.insert(result.length(), PDI_CHAR);
203+
}
204+
break;
205+
case UBIDI_RTL:
206+
result.insert(0, RLI_CHAR);
198207
result.insert(result.length(), PDI_CHAR);
208+
break;
209+
case UBIDI_NEUTRAL:
210+
// Do nothing
211+
break;
212+
case UBIDI_MIXED:
213+
// mixed = auto
214+
result.insert(0, FSI_CHAR);
215+
result.insert(result.length(), PDI_CHAR);
216+
break;
199217
}
200-
break;
201-
case UBIDI_RTL:
202-
result.insert(0, RLI_CHAR);
203-
result.insert(result.length(), PDI_CHAR);
204-
break;
205-
case UBIDI_NEUTRAL:
206-
// Do nothing
207-
break;
208-
case UBIDI_MIXED:
209-
// mixed = auto
210-
result.insert(0, FSI_CHAR);
211-
result.insert(result.length(), PDI_CHAR);
212-
break;
213218
}
214219
215220
return result;
216221
}
222+
*/
217223

218224
UnicodeString formattableToString(const Locale& locale,
219-
UBiDiDirection dir,
220225
const Formattable& toFormat,
221226
UErrorCode& status) {
222227
EMPTY_ON_ERROR(status);
@@ -279,7 +284,7 @@ namespace message2 {
279284
}
280285
}
281286

282-
return handleBiDi(locale, dir, result);
287+
return result;
283288
}
284289

285290
} // namespace message2

icu4c/source/i18n/messageformat2_formatter.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ namespace message2 {
9292
return *this;
9393
}
9494

95+
MessageFormatter::Builder&
96+
MessageFormatter::Builder::setBidiIsolationStrategy(
97+
MessageFormatter::UMFBidiIsolationStrategy strategy) {
98+
bidiIsolationStrategy = strategy;
99+
return *this;
100+
}
101+
95102
/*
96103
This build() method is non-destructive, which entails the risk that
97104
its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
@@ -158,6 +165,7 @@ namespace message2 {
158165

159166
normalizedInput = builder.normalizedInput;
160167
signalErrors = builder.signalErrors;
168+
bidiIsolationStrategy = builder.bidiIsolationStrategy;
161169

162170
// Build data model
163171
// First, check that there is a data model
@@ -205,6 +213,7 @@ namespace message2 {
205213
dataModel = std::move(other.dataModel);
206214
normalizedInput = std::move(other.normalizedInput);
207215
signalErrors = other.signalErrors;
216+
bidiIsolationStrategy = other.bidiIsolationStrategy;
208217
errors = other.errors;
209218
other.errors = nullptr;
210219
return *this;

icu4c/source/i18n/messageformat2_function_registry.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ StandardFunctions::NumberValue::NumberValue(const Number& parent,
589589
opts = options.mergeOptions(arg.getResolvedOptions(), errorCode);
590590
operand = arg.getOperand();
591591
functionName = UnicodeString(parent.isInteger ? "integer" : "number");
592+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
592593

593594
number::LocalizedNumberFormatter realFormatter;
594595
realFormatter = formatterForOptions(parent, locale, opts, errorCode);
@@ -860,6 +861,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type,
860861
functionName = UnicodeString("time");
861862
break;
862863
}
864+
dir = locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR;
863865

864866
const Formattable* source = &operand;
865867

@@ -1112,7 +1114,6 @@ StandardFunctions::String::string(UErrorCode& success) {
11121114
}
11131115

11141116
extern UnicodeString formattableToString(const Locale&,
1115-
const UBiDiDirection,
11161117
const Formattable&,
11171118
UErrorCode&);
11181119

@@ -1145,9 +1146,10 @@ StandardFunctions::StringValue::StringValue(const FunctionContext& context,
11451146
CHECK_ERROR(status);
11461147
operand = val.getOperand();
11471148
functionName = UnicodeString("string");
1149+
dir = context.getDirection();
11481150
// No options
11491151
// Convert to string
1150-
formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status);
1152+
formattedString = formattableToString(context.getLocale(), operand, status);
11511153
}
11521154

11531155
void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys,

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2021,7 +2021,7 @@ void Parser::parseSelectors(UErrorCode& status) {
20212021
// Parse selectors
20222022
// "Backtracking" is required here. It's not clear if whitespace is
20232023
// (`[s]` selector) or (`[s]` variant)
2024-
while (isWhitespace(peek()) || peek() == DOLLAR) {
2024+
while (isWhitespace(peek()) || isBidiControl(peek()) || peek() == DOLLAR) {
20252025
int32_t whitespaceStart = index;
20262026
parseRequiredWhitespace(status);
20272027
// Restore precondition

icu4c/source/i18n/unicode/messageformat2.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,31 @@ namespace message2 {
168168
U_MF_STRICT
169169
} UMFErrorHandlingBehavior;
170170

171+
/**
172+
* Used in conjunction with the
173+
* MessageFormatter::Builder::setBidiIsolationStrategy() method.
174+
*
175+
* @internal ICU 77 technology preview
176+
* @deprecated This API is for technology preview only.
177+
*/
178+
typedef enum UMFBidiIsolationStrategy {
179+
/**
180+
* Do not perform bidi isolation (default)
181+
*
182+
* @internal ICU 77 technology preview
183+
* @deprecated This API is for technology preview only.
184+
*/
185+
U_MF_BIDI_NONE = 0,
186+
/**
187+
* Perform bidi isolation using the "default" strategy
188+
* described in the MF2 specification
189+
* https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text
190+
*
191+
* @internal ICU 77 technology preview
192+
* @deprecated This API is for technology preview only.
193+
*/
194+
U_MF_BIDI_DEFAULT
195+
} UMFBidiIsolationStrategy;
171196
/**
172197
* The mutable Builder class allows each part of the MessageFormatter to be initialized
173198
* separately; calling its `build()` method yields an immutable MessageFormatter.
@@ -197,6 +222,9 @@ namespace message2 {
197222
const MFFunctionRegistry* customMFFunctionRegistry;
198223
// Error behavior; see comment in `MessageFormatter` class
199224
bool signalErrors = false;
225+
// Bidi isolation strategy
226+
MessageFormatter::UMFBidiIsolationStrategy
227+
bidiIsolationStrategy = U_MF_BIDI_NONE;
200228

201229
void clearState();
202230
public:
@@ -280,6 +308,27 @@ namespace message2 {
280308
* @deprecated This API is for technology preview only.
281309
*/
282310
Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
311+
/**
312+
* Set the bidi isolation behavior for this formatter.
313+
*
314+
* "None" means that no bidi isolation will be performed.
315+
* "Default" means that the default bidi isolation strategy
316+
* as described in the MF2 specification
317+
* ( https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#handling-bidirectional-text )
318+
* will be applied.
319+
*
320+
* @param strategy An enum with type UMFBidiIsolationStrategy;
321+
* if strategy == U_MF_BIDI_NONE, then the behavior is "None".
322+
* If strategy == U_MF_BIDI_DEFAULT, then the behavior is "Default".
323+
*
324+
* The default is "None".
325+
*
326+
* @return A reference to the builder.
327+
*
328+
* @internal ICU 76 technology preview
329+
* @deprecated This API is for technology preview only.
330+
*/
331+
Builder& setBidiIsolationStrategy(UMFBidiIsolationStrategy strategy);
283332
/**
284333
* Constructs a new immutable MessageFormatter using the pattern or data model
285334
* that was previously set, and the locale (if it was previously set)
@@ -356,6 +405,7 @@ namespace message2 {
356405

357406
// Formatting methods
358407
[[nodiscard]] InternalValue evalLiteral(const UnicodeString&, const data_model::Literal&, UErrorCode&) const;
408+
[[nodiscard]] UnicodeString& bidiIsolate(UBiDiDirection dir, UnicodeString&) const;
359409
void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
360410
FunctionContext makeFunctionContext(const FunctionOptions&) const;
361411
[[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&,
@@ -433,6 +483,8 @@ namespace message2 {
433483
// The default is false.
434484
bool signalErrors = false;
435485

486+
// Bidi isolation strategy.
487+
UMFBidiIsolationStrategy bidiIsolationStrategy = U_MF_BIDI_NONE;
436488
}; // class MessageFormatter
437489

438490
} // namespace message2

icu4c/source/i18n/unicode/messageformat2_function_registry.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,17 @@ namespace message2 {
369369
* @deprecated This API is for technology preview only.
370370
*/
371371
virtual const FunctionOptions& getResolvedOptions() const { return opts; }
372+
/*
373+
* Returns the directionality of this value, i.e. the directionality
374+
* that its formatted result should have.
375+
*
376+
* @return A UBiDiDirection indicating the directionality that
377+
* the formatted result of this value should have.
378+
*
379+
* @internal ICU 77 technology preview
380+
* @deprecated This API is for technology preview only.
381+
*/
382+
virtual UBiDiDirection getDirection() const { return dir; }
372383
/**
373384
* Returns true if this value supports selection. The default method
374385
* returns false. The method must be overridden for values that support
@@ -467,7 +478,13 @@ namespace message2 {
467478
*/
468479
UnicodeString functionName;
469480
private:
470-
481+
/*
482+
* Directionality that this value should be formatted with.
483+
*
484+
* @internal ICU 77 technology preview
485+
* @deprecated This API is for technology preview only.
486+
*/
487+
UBiDiDirection dir;
471488
}; // class FunctionValue
472489

473490
} // namespace message2

0 commit comments

Comments
 (0)