Skip to content

Commit fd779e9

Browse files
committed
ICU-22908 MF2: Update spec tests and update implementation for recent spec changes
Updating the spec tests requires two implementation changes: * Match on variables rather than expressions -- see unicode-org/message-format-wg#877 * Require attribute values to be literals (if present) -- see unicode-org/message-format-wg#894
1 parent 2f348f4 commit fd779e9

31 files changed

+542
-492
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,13 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
328328
CHECK_ERROR(status);
329329
U_ASSERT(!dataModel.hasPattern());
330330

331-
const Expression* selectors = dataModel.getSelectorsInternal();
331+
const VariableName* selectors = dataModel.getSelectorsInternal();
332332
// 1. Let res be a new empty list of resolved values that support selection.
333333
// (Implicit, since `res` is an out-parameter)
334334
// 2. For each expression exp of the message's selectors
335335
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
336336
// 2i. Let rv be the resolved value of exp.
337-
ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
337+
ResolvedSelector rv = formatSelector(env, selectors[i], context, status);
338338
if (rv.hasSelector()) {
339339
// 2ii. If selection is supported for rv:
340340
// (True if this code has been reached)
@@ -606,7 +606,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo
606606

607607

608608
// Evaluate the operand
609-
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
609+
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
610+
const Operand& rand,
611+
MessageContext& context,
612+
UErrorCode &status) const {
610613
if (U_FAILURE(status)) {
611614
return {};
612615
}
@@ -620,7 +623,13 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons
620623
}
621624

622625
// Must be variable
623-
const VariableName& var = rand.asVariable();
626+
return resolveVariables(env, rand.asVariable(), context, status);
627+
}
628+
629+
ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
630+
const VariableName& var,
631+
MessageContext& context,
632+
UErrorCode &status) const {
624633
// Resolve the variable
625634
if (env.has(var)) {
626635
const Closure& referent = env.lookup(var);
@@ -683,13 +692,16 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
683692
}
684693
}
685694

686-
ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
695+
ResolvedSelector MessageFormatter::formatSelector(const Environment& globalEnv,
696+
const VariableName& var,
697+
MessageContext& context,
698+
UErrorCode &status) const {
687699
if (U_FAILURE(status)) {
688700
return {};
689701
}
690702

691703
// Resolve expression to determine if it's a function call
692-
ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
704+
ResolvedSelector exprResult = resolveVariables(globalEnv, var, context, status);
693705

694706
DynamicErrors& err = context.getErrors();
695707

icu4c/source/i18n/messageformat2_checker.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -205,18 +205,14 @@ void Checker::checkVariants(UErrorCode& status) {
205205
}
206206
}
207207

208-
void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
208+
void Checker::requireAnnotated(const TypeEnvironment& t,
209+
const VariableName& selectorVar,
210+
UErrorCode& status) {
209211
CHECK_ERROR(status);
210212

211-
if (selectorExpr.isFunctionCall()) {
213+
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
212214
return; // No error
213215
}
214-
const Operand& rand = selectorExpr.getOperand();
215-
if (rand.isVariable()) {
216-
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
217-
return; // No error
218-
}
219-
}
220216
// If this code is reached, an error was detected
221217
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
222218
}
@@ -226,7 +222,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
226222

227223
// Check each selector; if it's not annotated, emit a
228224
// "missing selector annotation" error
229-
const Expression* selectors = dataModel.getSelectorsInternal();
225+
const VariableName* selectors = dataModel.getSelectorsInternal();
230226
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
231227
requireAnnotated(t, selectors[i], status);
232228
}

icu4c/source/i18n/messageformat2_checker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ namespace message2 {
6464
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
6565
private:
6666

67-
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
67+
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
6868
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
6969
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
7070
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);

icu4c/source/i18n/messageformat2_data_model.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -691,9 +691,9 @@ Matcher::Matcher(const Matcher& other) {
691691
numSelectors = other.numSelectors;
692692
numVariants = other.numVariants;
693693
UErrorCode localErrorCode = U_ZERO_ERROR;
694-
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
695-
numSelectors,
696-
localErrorCode));
694+
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
695+
numSelectors,
696+
localErrorCode));
697697
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
698698
numVariants,
699699
localErrorCode));
@@ -702,7 +702,7 @@ Matcher::Matcher(const Matcher& other) {
702702
}
703703
}
704704

705-
Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
705+
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
706706
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}
707707

708708
Matcher::~Matcher() {}
@@ -724,7 +724,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
724724
return bindings.getAlias();
725725
}
726726

727-
const Expression* MFDataModel::getSelectorsInternal() const {
727+
const VariableName* MFDataModel::getSelectorsInternal() const {
728728
U_ASSERT(!bogus);
729729
U_ASSERT(!hasPattern());
730730
return std::get_if<Matcher>(&body)->selectors.getAlias();
@@ -786,15 +786,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
786786
return *this;
787787
}
788788

789-
/*
790-
selector must be non-null
791-
*/
792-
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
789+
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
790+
UErrorCode& status) {
793791
THIS_ON_ERROR(status);
794792

795793
buildSelectorsMessage(status);
796794
U_ASSERT(selectors != nullptr);
797-
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
795+
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);
798796

799797
return *this;
800798
}
@@ -830,11 +828,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
830828
if (other.hasPattern()) {
831829
body = *std::get_if<Pattern>(&other.body);
832830
} else {
833-
const Expression* otherSelectors = other.getSelectorsInternal();
831+
const VariableName* otherSelectors = other.getSelectorsInternal();
834832
const Variant* otherVariants = other.getVariantsInternal();
835833
int32_t numSelectors = other.numSelectors();
836834
int32_t numVariants = other.numVariants();
837-
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
835+
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
838836
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
839837
if (U_FAILURE(localErrorCode)) {
840838
bogus = true;
@@ -863,7 +861,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
863861
int32_t numVariants = builder.variants->size();
864862
int32_t numSelectors = builder.selectors->size();
865863
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
866-
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
864+
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
865+
errorCode),
866+
errorCode);
867867
if (U_FAILURE(errorCode)) {
868868
bogus = true;
869869
return;

icu4c/source/i18n/messageformat2_parser.cpp

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -510,21 +510,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
510510
VariableName result;
511511

512512
U_ASSERT(inBounds());
513-
// If the '$' is missing, we don't want a binding
514-
// for this variable to be created.
515-
bool valid = peek() == DOLLAR;
513+
516514
parseToken(DOLLAR, errorCode);
517515
if (!inBounds()) {
518516
ERROR(errorCode);
519517
return result;
520518
}
521-
UnicodeString varName = parseName(errorCode);
522-
// Set the name to "" if the variable wasn't
523-
// declared correctly
524-
if (!valid) {
525-
varName.remove();
526-
}
527-
return VariableName(varName);
519+
return VariableName(parseName(errorCode));
528520
}
529521

530522
/*
@@ -861,27 +853,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
861853
parseTokenWithWhitespace(EQUALS, errorCode);
862854

863855
UnicodeString rhsStr;
864-
// Parse RHS, which is either a literal or variable
865-
switch (peek()) {
866-
case DOLLAR: {
867-
rand = Operand(parseVariableName(errorCode));
868-
break;
869-
}
870-
default: {
871-
// Must be a literal
872-
rand = Operand(parseLiteral(errorCode));
873-
break;
874-
}
875-
}
876-
U_ASSERT(!rand.isNull());
856+
// Parse RHS, which must be a literal
857+
// attribute = "@" identifier [o "=" o literal]
858+
rand = Operand(parseLiteral(errorCode));
877859
} else {
878860
// attribute -> "@" identifier [[s] "=" [s]]
879861
// Use null operand, which `rand` is already set to
880862
// "Backtrack" by restoring the whitespace (if there was any)
881863
index = savedIndex;
882864
}
883865

884-
attrAdder.addAttribute(lhs, std::move(rand), errorCode);
866+
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
885867
}
886868

887869
/*
@@ -1720,6 +1702,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
17201702
return result.build(status);
17211703
}
17221704

1705+
void Parser::parseVariant(UErrorCode& status) {
1706+
CHECK_ERROR(status);
1707+
1708+
// At least one key is required
1709+
SelectorKeys keyList(parseNonEmptyKeys(status));
1710+
1711+
// parseNonEmptyKeys() consumes any trailing whitespace,
1712+
// so the pattern can be consumed next.
1713+
1714+
// Restore precondition before calling parsePattern()
1715+
// (which must return a non-null value)
1716+
CHECK_BOUNDS(status);
1717+
Pattern rhs = parseQuotedPattern(status);
1718+
1719+
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1720+
}
17231721

17241722
/*
17251723
Consume a `selectors` (matching the nonterminal in the grammar),
@@ -1739,22 +1737,25 @@ void Parser::parseSelectors(UErrorCode& status) {
17391737
// Parse selectors
17401738
// "Backtracking" is required here. It's not clear if whitespace is
17411739
// (`[s]` selector) or (`[s]` variant)
1742-
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
1743-
parseOptionalWhitespace(status);
1740+
while (isWhitespace(peek()) || peek() == DOLLAR) {
1741+
int32_t whitespaceStart = index;
1742+
parseRequiredWhitespace(status);
17441743
// Restore precondition
17451744
CHECK_BOUNDS(status);
1746-
if (peek() != LEFT_CURLY_BRACE) {
1745+
if (peek() != DOLLAR) {
17471746
// This is not necessarily an error, but rather,
17481747
// means the whitespace we parsed was the optional
17491748
// whitespace preceding the first variant, not the
1750-
// optional whitespace preceding a subsequent expression.
1749+
// required whitespace preceding a subsequent variable.
1750+
// In that case, "push back" the whitespace.
1751+
normalizedInput.truncate(normalizedInput.length() - 1);
1752+
index = whitespaceStart;
17511753
break;
17521754
}
1753-
Expression expression;
1754-
expression = parseExpression(status);
1755+
VariableName var = parseVariableName(status);
17551756
empty = false;
17561757

1757-
dataModel.addSelector(std::move(expression), status);
1758+
dataModel.addSelector(std::move(var), status);
17581759
CHECK_ERROR(status);
17591760
}
17601761

@@ -1770,27 +1771,29 @@ void Parser::parseSelectors(UErrorCode& status) {
17701771
} \
17711772

17721773
// Parse variants
1774+
// matcher = match-statement s variant *(o variant)
1775+
1776+
// Parse first variant
1777+
parseRequiredWhitespace(status);
1778+
if (!inBounds()) {
1779+
ERROR(status);
1780+
return;
1781+
}
1782+
parseVariant(status);
1783+
if (!inBounds()) {
1784+
// Not an error; there might be only one variant
1785+
return;
1786+
}
1787+
17731788
while (isWhitespace(peek()) || isKeyStart(peek())) {
1774-
// Trailing whitespace is allowed
17751789
parseOptionalWhitespace(status);
1790+
// Restore the precondition.
1791+
// Trailing whitespace is allowed.
17761792
if (!inBounds()) {
17771793
return;
17781794
}
17791795

1780-
// At least one key is required
1781-
SelectorKeys keyList(parseNonEmptyKeys(status));
1782-
1783-
CHECK_ERROR(status);
1784-
1785-
// parseNonEmptyKeys() consumes any trailing whitespace,
1786-
// so the pattern can be consumed next.
1787-
1788-
// Restore precondition before calling parsePattern()
1789-
// (which must return a non-null value)
1790-
CHECK_BOUNDS(status);
1791-
Pattern rhs = parseQuotedPattern(status);
1792-
1793-
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
1796+
parseVariant(status);
17941797

17951798
// Restore the precondition, *without* erroring out if we've
17961799
// reached the end of input. That's because it's valid for the

icu4c/source/i18n/messageformat2_parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ namespace message2 {
100100
void parseUnsupportedStatement(UErrorCode&);
101101
void parseLocalDeclaration(UErrorCode&);
102102
void parseInputDeclaration(UErrorCode&);
103-
void parseSelectors(UErrorCode&);
103+
void parseSelectors(UErrorCode&);
104+
void parseVariant(UErrorCode&);
104105

105106
void parseWhitespaceMaybeRequired(bool, UErrorCode&);
106107
void parseRequiredWhitespace(UErrorCode&);

icu4c/source/i18n/messageformat2_serializer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,18 +244,20 @@ void Serializer::serializeDeclarations() {
244244

245245
void Serializer::serializeSelectors() {
246246
U_ASSERT(!dataModel.hasPattern());
247-
const Expression* selectors = dataModel.getSelectorsInternal();
247+
const VariableName* selectors = dataModel.getSelectorsInternal();
248248

249249
emit(ID_MATCH);
250250
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
251-
// No whitespace needed here -- see `selectors` in the grammar
251+
whitespace();
252+
emit(DOLLAR);
252253
emit(selectors[i]);
253254
}
254255
}
255256

256257
void Serializer::serializeVariants() {
257258
U_ASSERT(!dataModel.hasPattern());
258259
const Variant* variants = dataModel.getVariantsInternal();
260+
whitespace();
259261
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
260262
const Variant& v = variants[i];
261263
emit(v.getKeys());

0 commit comments

Comments
 (0)