Skip to content

Commit ecb67cf

Browse files
committed
ICU-22908 MF2: Update spec tests and update implementation for recent spec changes
Updating the spec tests requires two implementation changes: * Match on variables rather than expressions -- see unicode-org/message-format-wg#877 * Require attribute values to be literals (if present) -- see unicode-org/message-format-wg#894
1 parent 7459872 commit ecb67cf

29 files changed

+505
-480
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, Environment& en
409409
CHECK_ERROR(status);
410410
U_ASSERT(!dataModel.hasPattern());
411411

412-
const Expression* selectors = dataModel.getSelectorsInternal();
412+
const VariableName* selectors = dataModel.getSelectorsInternal();
413413
// 1. Let res be a new empty list of resolved values that support selection.
414414
// (Implicit, since `res` is an out-parameter)
415415
// 2. For each expression exp of the message's selectors

icu4c/source/i18n/messageformat2_checker.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -205,18 +205,14 @@ void Checker::checkVariants(UErrorCode& status) {
205205
}
206206
}
207207

208-
void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
208+
void Checker::requireAnnotated(const TypeEnvironment& t,
209+
const VariableName& selectorVar,
210+
UErrorCode& status) {
209211
CHECK_ERROR(status);
210212

211-
if (selectorExpr.isFunctionCall()) {
213+
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
212214
return; // No error
213215
}
214-
const Operand& rand = selectorExpr.getOperand();
215-
if (rand.isVariable()) {
216-
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
217-
return; // No error
218-
}
219-
}
220216
// If this code is reached, an error was detected
221217
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
222218
}
@@ -226,7 +222,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
226222

227223
// Check each selector; if it's not annotated, emit a
228224
// "missing selector annotation" error
229-
const Expression* selectors = dataModel.getSelectorsInternal();
225+
const VariableName* selectors = dataModel.getSelectorsInternal();
230226
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
231227
requireAnnotated(t, selectors[i], status);
232228
}

icu4c/source/i18n/messageformat2_checker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ namespace message2 {
6464
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
6565
private:
6666

67-
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
67+
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
6868
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
6969
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
7070
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);

icu4c/source/i18n/messageformat2_data_model.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -691,9 +691,9 @@ Matcher::Matcher(const Matcher& other) {
691691
numSelectors = other.numSelectors;
692692
numVariants = other.numVariants;
693693
UErrorCode localErrorCode = U_ZERO_ERROR;
694-
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
695-
numSelectors,
696-
localErrorCode));
694+
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
695+
numSelectors,
696+
localErrorCode));
697697
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
698698
numVariants,
699699
localErrorCode));
@@ -702,7 +702,7 @@ Matcher::Matcher(const Matcher& other) {
702702
}
703703
}
704704

705-
Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
705+
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
706706
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}
707707

708708
Matcher::~Matcher() {}
@@ -724,7 +724,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
724724
return bindings.getAlias();
725725
}
726726

727-
const Expression* MFDataModel::getSelectorsInternal() const {
727+
const VariableName* MFDataModel::getSelectorsInternal() const {
728728
U_ASSERT(!bogus);
729729
U_ASSERT(!hasPattern());
730730
return std::get_if<Matcher>(&body)->selectors.getAlias();
@@ -786,15 +786,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
786786
return *this;
787787
}
788788

789-
/*
790-
selector must be non-null
791-
*/
792-
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
789+
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
790+
UErrorCode& status) {
793791
THIS_ON_ERROR(status);
794792

795793
buildSelectorsMessage(status);
796794
U_ASSERT(selectors != nullptr);
797-
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
795+
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);
798796

799797
return *this;
800798
}
@@ -830,11 +828,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
830828
if (other.hasPattern()) {
831829
body = *std::get_if<Pattern>(&other.body);
832830
} else {
833-
const Expression* otherSelectors = other.getSelectorsInternal();
831+
const VariableName* otherSelectors = other.getSelectorsInternal();
834832
const Variant* otherVariants = other.getVariantsInternal();
835833
int32_t numSelectors = other.numSelectors();
836834
int32_t numVariants = other.numVariants();
837-
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
835+
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
838836
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
839837
if (U_FAILURE(localErrorCode)) {
840838
bogus = true;
@@ -863,7 +861,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
863861
int32_t numVariants = builder.variants->size();
864862
int32_t numSelectors = builder.selectors->size();
865863
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
866-
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
864+
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
865+
errorCode),
866+
errorCode);
867867
if (U_FAILURE(errorCode)) {
868868
bogus = true;
869869
return;

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -510,21 +510,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
510510
VariableName result;
511511

512512
U_ASSERT(inBounds());
513-
// If the '$' is missing, we don't want a binding
514-
// for this variable to be created.
515-
bool valid = peek() == DOLLAR;
513+
516514
parseToken(DOLLAR, errorCode);
517515
if (!inBounds()) {
518516
ERROR(errorCode);
519517
return result;
520518
}
521-
UnicodeString varName = parseName(errorCode);
522-
// Set the name to "" if the variable wasn't
523-
// declared correctly
524-
if (!valid) {
525-
varName.remove();
526-
}
527-
return VariableName(varName);
519+
return VariableName(parseName(errorCode));
528520
}
529521

530522
/*
@@ -861,27 +853,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
861853
parseTokenWithWhitespace(EQUALS, errorCode);
862854

863855
UnicodeString rhsStr;
864-
// Parse RHS, which is either a literal or variable
865-
switch (peek()) {
866-
case DOLLAR: {
867-
rand = Operand(parseVariableName(errorCode));
868-
break;
869-
}
870-
default: {
871-
// Must be a literal
872-
rand = Operand(parseLiteral(errorCode));
873-
break;
874-
}
875-
}
876-
U_ASSERT(!rand.isNull());
856+
// Parse RHS, which must be a literal
857+
// attribute = "@" identifier [o "=" o literal]
858+
rand = Operand(parseLiteral(errorCode));
877859
} else {
878860
// attribute -> "@" identifier [[s] "=" [s]]
879861
// Use null operand, which `rand` is already set to
880862
// "Backtrack" by restoring the whitespace (if there was any)
881863
index = savedIndex;
882864
}
883865

884-
attrAdder.addAttribute(lhs, std::move(rand), errorCode);
866+
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
885867
}
886868

887869
/*
@@ -1720,6 +1702,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
17201702
return result.build(status);
17211703
}
17221704

1705+
void Parser::parseVariant(UErrorCode& status) {
1706+
CHECK_ERROR(status);
1707+
1708+
// At least one key is required
1709+
SelectorKeys keyList(parseNonEmptyKeys(status));
1710+
1711+
// parseNonEmptyKeys() consumes any trailing whitespace,
1712+
// so the pattern can be consumed next.
1713+
1714+
// Restore precondition before calling parsePattern()
1715+
// (which must return a non-null value)
1716+
CHECK_BOUNDS(status);
1717+
Pattern rhs = parseQuotedPattern(status);
1718+
1719+
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1720+
}
17231721

17241722
/*
17251723
Consume a `selectors` (matching the nonterminal in the grammar),
@@ -1739,22 +1737,25 @@ void Parser::parseSelectors(UErrorCode& status) {
17391737
// Parse selectors
17401738
// "Backtracking" is required here. It's not clear if whitespace is
17411739
// (`[s]` selector) or (`[s]` variant)
1742-
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
1743-
parseOptionalWhitespace(status);
1740+
while (isWhitespace(peek()) || peek() == DOLLAR) {
1741+
int32_t whitespaceStart = index;
1742+
parseRequiredWhitespace(status);
17441743
// Restore precondition
17451744
CHECK_BOUNDS(status);
1746-
if (peek() != LEFT_CURLY_BRACE) {
1745+
if (peek() != DOLLAR) {
17471746
// This is not necessarily an error, but rather,
17481747
// means the whitespace we parsed was the optional
17491748
// whitespace preceding the first variant, not the
1750-
// optional whitespace preceding a subsequent expression.
1749+
// required whitespace preceding a subsequent variable.
1750+
// In that case, "push back" the whitespace.
1751+
normalizedInput.truncate(normalizedInput.length() - 1);
1752+
index = whitespaceStart;
17511753
break;
17521754
}
1753-
Expression expression;
1754-
expression = parseExpression(status);
1755+
VariableName var = parseVariableName(status);
17551756
empty = false;
17561757

1757-
dataModel.addSelector(std::move(expression), status);
1758+
dataModel.addSelector(std::move(var), status);
17581759
CHECK_ERROR(status);
17591760
}
17601761

@@ -1770,27 +1771,29 @@ void Parser::parseSelectors(UErrorCode& status) {
17701771
} \
17711772

17721773
// Parse variants
1774+
// matcher = match-statement s variant *(o variant)
1775+
1776+
// Parse first variant
1777+
parseRequiredWhitespace(status);
1778+
if (!inBounds()) {
1779+
ERROR(status);
1780+
return;
1781+
}
1782+
parseVariant(status);
1783+
if (!inBounds()) {
1784+
// Not an error; there might be only one variant
1785+
return;
1786+
}
1787+
17731788
while (isWhitespace(peek()) || isKeyStart(peek())) {
1774-
// Trailing whitespace is allowed
17751789
parseOptionalWhitespace(status);
1790+
// Restore the precondition.
1791+
// Trailing whitespace is allowed.
17761792
if (!inBounds()) {
17771793
return;
17781794
}
17791795

1780-
// At least one key is required
1781-
SelectorKeys keyList(parseNonEmptyKeys(status));
1782-
1783-
CHECK_ERROR(status);
1784-
1785-
// parseNonEmptyKeys() consumes any trailing whitespace,
1786-
// so the pattern can be consumed next.
1787-
1788-
// Restore precondition before calling parsePattern()
1789-
// (which must return a non-null value)
1790-
CHECK_BOUNDS(status);
1791-
Pattern rhs = parseQuotedPattern(status);
1792-
1793-
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1796+
parseVariant(status);
17941797

17951798
// Restore the precondition, *without* erroring out if we've
17961799
// reached the end of input. That's because it's valid for the

icu4c/source/i18n/messageformat2_parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ namespace message2 {
100100
void parseUnsupportedStatement(UErrorCode&);
101101
void parseLocalDeclaration(UErrorCode&);
102102
void parseInputDeclaration(UErrorCode&);
103-
void parseSelectors(UErrorCode&);
103+
void parseSelectors(UErrorCode&);
104+
void parseVariant(UErrorCode&);
104105

105106
void parseWhitespaceMaybeRequired(bool, UErrorCode&);
106107
void parseRequiredWhitespace(UErrorCode&);

icu4c/source/i18n/messageformat2_serializer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,18 +244,20 @@ void Serializer::serializeDeclarations() {
244244

245245
void Serializer::serializeSelectors() {
246246
U_ASSERT(!dataModel.hasPattern());
247-
const Expression* selectors = dataModel.getSelectorsInternal();
247+
const VariableName* selectors = dataModel.getSelectorsInternal();
248248

249249
emit(ID_MATCH);
250250
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
251-
// No whitespace needed here -- see `selectors` in the grammar
251+
whitespace();
252+
emit(DOLLAR);
252253
emit(selectors[i]);
253254
}
254255
}
255256

256257
void Serializer::serializeVariants() {
257258
U_ASSERT(!dataModel.hasPattern());
258259
const Variant* variants = dataModel.getVariantsInternal();
260+
whitespace();
259261
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
260262
const Variant& v = variants[i];
261263
emit(v.getKeys());

icu4c/source/i18n/unicode/messageformat2_data_model.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,16 +2211,16 @@ namespace message2 {
22112211

22122212
friend class MFDataModel;
22132213

2214-
Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv);
2214+
Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv);
22152215
Matcher() {}
22162216

22172217
// A Matcher may have numSelectors=0 and numVariants=0
22182218
// (this is a data model error, but it's representable).
22192219
// So we have to keep a separate flag to track failed copies.
22202220
bool bogus = false;
22212221

2222-
// The expressions that are being matched on.
2223-
LocalArray<Expression> selectors;
2222+
// The variables that are being matched on.
2223+
LocalArray<VariableName> selectors;
22242224
// The number of selectors
22252225
int32_t numSelectors = 0;
22262226
// The list of `when` clauses (case arms).
@@ -2328,13 +2328,13 @@ namespace message2 {
23282328
* @internal ICU 75 technology preview
23292329
* @deprecated This API is for technology preview only.
23302330
*/
2331-
const std::vector<Expression> getSelectors() const {
2331+
const std::vector<VariableName> getSelectors() const {
23322332
if (std::holds_alternative<Pattern>(body)) {
23332333
return {};
23342334
}
23352335
const Matcher* match = std::get_if<Matcher>(&body);
23362336
// match must be non-null, given the previous check
2337-
return toStdVector<Expression>(match->selectors.getAlias(), match->numSelectors);
2337+
return toStdVector<VariableName>(match->selectors.getAlias(), match->numSelectors);
23382338
}
23392339
/**
23402340
* Accesses the variants. Returns an empty vector if this is a pattern message.
@@ -2462,17 +2462,17 @@ namespace message2 {
24622462
*/
24632463
Builder& addBinding(Binding&& b, UErrorCode& status);
24642464
/**
2465-
* Adds a selector expression. Copies `expression`.
2465+
* Adds a selector variable.
24662466
* If a pattern was previously set, clears the pattern.
24672467
*
2468-
* @param selector Expression to add as a selector. Passed by move.
2468+
* @param selector Variable to add as a selector. Passed by move.
24692469
* @param errorCode Input/output error code
24702470
* @return A reference to the builder.
24712471
*
24722472
* @internal ICU 75 technology preview
24732473
* @deprecated This API is for technology preview only.
24742474
*/
2475-
Builder& addSelector(Expression&& selector, UErrorCode& errorCode) noexcept;
2475+
Builder& addSelector(VariableName&& selector, UErrorCode& errorCode);
24762476
/**
24772477
* Adds a single variant.
24782478
* If a pattern was previously set using `setPattern()`, clears the pattern.
@@ -2564,7 +2564,7 @@ namespace message2 {
25642564
int32_t bindingsLen = 0;
25652565

25662566
const Binding* getLocalVariablesInternal() const;
2567-
const Expression* getSelectorsInternal() const;
2567+
const VariableName* getSelectorsInternal() const;
25682568
const Variant* getVariantsInternal() const;
25692569

25702570
int32_t numSelectors() const {

0 commit comments

Comments
 (0)