diff --git a/inflection/src/inflection/message2/MF2Factory.cpp b/inflection/src/inflection/message2/MF2Factory.cpp new file mode 100644 index 00000000..ceea90e9 --- /dev/null +++ b/inflection/src/inflection/message2/MF2Factory.cpp @@ -0,0 +1,198 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#include "inflection/message2/MF2Factory.hpp" + +#include "inflection/dialog/InflectableStringConcept.hpp" +#include "inflection/dialog/LocalizedCommonConceptFactoryProvider.hpp" +#include "inflection/dialog/SemanticFeatureModel.hpp" +#include "inflection/dialog/SpeakableString.hpp" +#include "inflection/lang/features/LanguageGrammarFeatures.hpp" +#include "inflection/util/ULocale.hpp" + +#include +#include +#include +#include + +using U_ICU_NAMESPACE::Locale; +using U_ICU_NAMESPACE::UnicodeString; +using U_ICU_NAMESPACE::message2::Formatter; +using U_ICU_NAMESPACE::message2::Formattable; +using U_ICU_NAMESPACE::message2::FormattedValue; +using U_ICU_NAMESPACE::message2::FormattedPlaceholder; +using U_ICU_NAMESPACE::message2::FormatterFactory; +using U_ICU_NAMESPACE::message2::FunctionOptions; +using U_ICU_NAMESPACE::message2::FunctionOptionsMap; +using U_ICU_NAMESPACE::message2::MessageArguments; +using U_ICU_NAMESPACE::message2::MessageFormatter; +using U_ICU_NAMESPACE::message2::MFFunctionRegistry; +using U_ICU_NAMESPACE::message2::Selector; +using U_ICU_NAMESPACE::message2::SelectorFactory; + +namespace inflection::message2 { + +class InflectionFormatterFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class InflectionSelectorFactory : public SelectorFactory { + public: + Selector* createSelector(const Locale&, UErrorCode&) const override; +}; + +icu::message2::FormatterFactory* MF2Factory::CreateFormatterFactory() { + return new InflectionFormatterFactory(); +} + +icu::message2::SelectorFactory* MF2Factory::CreateSelectorFactory() { + return new InflectionSelectorFactory(); +} + +const inflection::dialog::SemanticFeatureModel* GetSemanticFeatureModel( + const Locale& locale) { + return ::inflection::dialog::LocalizedCommonConceptFactoryProvider + ::getDefaultCommonConceptFactoryProvider() + ->getCommonConceptFactory( + inflection::util::ULocale(locale.getName())) + ->getSemanticFeatureModel(); +} + +class InflectionFormatter : public Formatter { + public: + FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, + UErrorCode& errorCode) const override; + InflectionFormatter(const inflection::dialog::SemanticFeatureModel* model) + : model(model) { + } + private: + const ::inflection::dialog::SemanticFeatureModel* model; +}; + +Formatter* InflectionFormatterFactory::createFormatter( + const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + + Formatter* result = new InflectionFormatter(GetSemanticFeatureModel(locale)); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +FormattedPlaceholder InflectionFormatter::format( + FormattedPlaceholder&& arg, FunctionOptions&& options, + UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { return {}; } + + // Argument must be present + if (!arg.canFormat()) { + errorCode = U_MF_FORMATTING_ERROR; + return FormattedPlaceholder("inflection"); + } + + // Assumes the argument is not-yet-formatted + const Formattable& toFormat = arg.asFormattable(); + UnicodeString result; + + switch (toFormat.getType()) { + case UFMT_STRING: { + inflection::dialog::SpeakableString input( + toFormat.getString(errorCode)); + inflection::dialog::InflectableStringConcept stringConcept( + model, input); + for (const auto& [key, value] : options.getOptions()) { + auto constraint = model->getFeature(key); + if (constraint != nullptr) { + stringConcept.putConstraint(*constraint, + value.getString(errorCode)); + } + } + result += stringConcept.toSpeakableString()->getPrint(); + break; + } + default: { + result += toFormat.getString(errorCode); + break; + } + } + + return FormattedPlaceholder(arg, FormattedValue(std::move(result))); +} + +class InflectionSelector : public Selector { + public: + void selectKey(FormattedPlaceholder &&arg, FunctionOptions &&options, + const UnicodeString *keys, int32_t keysLen, + UnicodeString *prefs, int32_t &prefsLen, + UErrorCode &status) const override; + + InflectionSelector(const inflection::dialog::SemanticFeatureModel* model) + : model(model) { + } + + private: + const ::inflection::dialog::SemanticFeatureModel* model; +}; + +Selector* InflectionSelectorFactory::createSelector( + const Locale& locale, UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + + Selector* result = new InflectionSelector(GetSemanticFeatureModel(locale)); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +void InflectionSelector::selectKey( + FormattedPlaceholder &&arg, FunctionOptions &&options, + const UnicodeString *keys, int32_t keysLen, + UnicodeString *prefs, int32_t &prefsLen, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return; } + // Argument must be present + if (!arg.canFormat()) { + errorCode = U_MF_SELECTOR_ERROR; + return; + } + + // Assumes the argument is not-yet-formatted + const Formattable& toFormat = arg.asFormattable(); + prefsLen = 0; + auto opt = options.getOptions(); + if (toFormat.getType() == UFMT_STRING) { + inflection::dialog::SpeakableString input( + toFormat.getString(errorCode)); + inflection::dialog::InflectableStringConcept stringConcept( + model, input); + if (!opt.contains(u"select")) { + errorCode = U_MF_SELECTOR_ERROR; + return; + } + for (const auto& [key, value] : options.getOptions()) { + auto constraint = model->getFeature(key); + if (constraint != nullptr) { + stringConcept.putConstraint(*constraint, + value.getString(errorCode)); + } + } + auto value = model->getFeature(opt.at(u"select").getString(errorCode)); + UnicodeString feature; + if (value != nullptr) { + auto result = stringConcept.getFeatureValue(*value); + if (result != nullptr) { + feature = result->getPrint(); + } + } + for (int i = 0; i < keysLen; i++) { + if (feature == keys[i]) { + prefs[prefsLen++] = keys[i]; + } + } + } + return; +} + +} // namespace inflection::message2 diff --git a/inflection/src/inflection/message2/MF2Factory.hpp b/inflection/src/inflection/message2/MF2Factory.hpp new file mode 100644 index 00000000..49266b3d --- /dev/null +++ b/inflection/src/inflection/message2/MF2Factory.hpp @@ -0,0 +1,79 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#pragma once + +#include +// INFLECTION_CLASS_API + +#include + +namespace U_ICU_NAMESPACE::message2 { +class FormatterFactory; +class SelectorFactory; +} // namespace U_ICU_NAMESPACE::message2 + +namespace inflection::message2 { + +/** + * The MF2Factory provide factory method to create custom formatter and selector + * factory to work with icu::message2 library. + * The intend usage is to creat icu::message2::FormatterFactory + * or icu::message2::SelectorFactory and use it with + * icu::message2::MFFunctionRegistry::Builder + * to construct a custom function registry to build MessageFormatter + * with MessageFormatter::Builder. + * + * For example: + * + * auto customRegistry = icu::message2::MFFunctionRegistry::Builder(errorCode) + * .adoptFormatter(FunctionName("inflection"), + * MF2Factory::CreateFormatterFactory(), errorCode) + * .adoptSelector(FunctionName("inflection"), + * MF2Factory::CreateSelectorFactory(), errorCode) + * .build(); + * + * UParseError pe; + * auto mf1 = icu::message2::MessageFormatter::Builder(errorCode) + * .setFunctionRegistry(customRegistry) + * .setLocale(Locale::forLanguageTag("es-MX", errorCode)) + * .setPattern("Location is {$name :inflection hello=world \ + * definiteness=definite number=plural \ + * gender=feminine}", + * pe, errorCode) + * .build(errorCode); + * + * auto mf2 = icu::message2::MessageFormatter::Builder(errorCode) + * .setFunctionRegistry(customRegistry) + * .setLocale(Locale::forLanguageTag("es-MX", errorCode)) + * .setPattern(".local $gender = {$name :inflection \ + * feature=gender} \ + * .local $number = {$name :inflection \ + * feature=number} \ + * .match $gender $number \ + * feminine singular {{Feminine Singular {$name}}}\ + * masculine singular {{Masculine Singular {$name}}}\ + * * * {{other {$name} }}\n", + * pe, errorCode) + * .build(errorCode); + */ +class INFLECTION_CLASS_API MF2Factory { + public: + /** + * Create an implementation of icu::message2::FormatterFactory*, based on the + * infleciton library, which can be passed to + * icu::messsage2::MFFunctionRegistry::Builder::adoptFormatter + * to register a custom formatter factory. + */ + static icu::message2::FormatterFactory* CreateFormatterFactory(); + + /** + * Create an implementation of icu::message2::SelectorFactory*, based on the + * infleciton library, which can be passed to + * icu::messsage2::MFFunctionRegistry::Builder::adoptSelector + * to register a custom selector factory. + */ + static icu::message2::SelectorFactory* CreateSelectorFactory(); +}; + +} // namespace inflection::message2 diff --git a/inflection/test/src/inflection/message2/MF2FactoryTest.cpp b/inflection/test/src/inflection/message2/MF2FactoryTest.cpp new file mode 100644 index 00000000..b5512338 --- /dev/null +++ b/inflection/test/src/inflection/message2/MF2FactoryTest.cpp @@ -0,0 +1,194 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#include "catch2/catch_test_macros.hpp" + +#include +#include +#include +#include +#include +#include + +#include "util/TestUtils.hpp" +#include "util/XMLUtils.hpp" +#include +#include + +#include "unicode/locid.h" +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2_formattable.h" + +using icu::Locale; +using icu::UnicodeString; +using icu::message2::Formattable; +using icu::message2::data_model::FunctionName; +using icu::message2::MFFunctionRegistry; +using icu::message2::MessageArguments; +using icu::message2::MessageFormatter; + + +TEST_CASE("MF2Factory#testBasic") +{ + UErrorCode errorCode = U_ZERO_ERROR; + auto functionName = FunctionName("inflection"); + MFFunctionRegistry::Builder(errorCode) + .adoptFormatter( + functionName, + inflection::message2::MF2Factory::CreateFormatterFactory(), + errorCode) + .adoptSelector( + functionName, + inflection::message2::MF2Factory::CreateSelectorFactory(), + errorCode) + .build(); + REQUIRE(U_SUCCESS(errorCode)); +} + +TEST_CASE("MF2Factory#testCreateFormatter") +{ + UErrorCode errorCode = U_ZERO_ERROR; + auto functionName = FunctionName("inflection"); + auto customRegistry = MFFunctionRegistry::Builder(errorCode) + .adoptFormatter( + functionName, + inflection::message2::MF2Factory::CreateFormatterFactory(), + errorCode) + .build(); + REQUIRE(U_SUCCESS(errorCode)); + + UParseError pe; + MessageFormatter mf = MessageFormatter::Builder(errorCode) + .setFunctionRegistry(customRegistry) + .setLocale(Locale::forLanguageTag("es-MX", errorCode)) + .setPattern("\ +Foo {$name :inflection hello=world definiteness=definite \ + number=plural gender=feminine} Bar", + pe, errorCode) + .build(errorCode); + REQUIRE(U_SUCCESS(errorCode)); + + std::map arguments; + arguments["name"]= Formattable("gato"); + + UnicodeString ret = mf.formatToString( + MessageArguments(arguments, errorCode), errorCode); + REQUIRE(U_SUCCESS(errorCode)); + REQUIRE(ret == u"Foo las gatas Bar"); +} + +TEST_CASE("MF2Factory#testCreateSelector") +{ + UErrorCode errorCode = U_ZERO_ERROR; + auto functionName = FunctionName("inflection"); + auto customRegistry = MFFunctionRegistry::Builder(errorCode) + .adoptSelector( + functionName, + inflection::message2::MF2Factory::CreateSelectorFactory(), + errorCode) + .build(); + REQUIRE(U_SUCCESS(errorCode)); + + UParseError pe; + MessageFormatter mf = MessageFormatter::Builder(errorCode) + .setFunctionRegistry(customRegistry) + .setLocale(Locale::forLanguageTag("es-MX", errorCode)) + .setPattern("\ +.local $var1 = {$name :inflection select=gender} \ +.local $var2 = {$name :inflection select=number} \ +.match $var1 $var2\ + masculine 2 {{{$name} is Masculine & 2}} \ + feminine singular {{{$name} is Feminine & Singular}} \ + foo 4 {{{$name} is Foo & 4}} \ + masculine singular {{{$name} is Masculine & Singular}} \ + hello singular {{{$name} is Hello & Singular}} \ + * * {{{$name} is other}}\n", + pe, errorCode) + .build(errorCode); + REQUIRE(U_SUCCESS(errorCode)); + + std::map arguments; + arguments["name"]= Formattable("gato"); + + UnicodeString ret = mf.formatToString( + MessageArguments(arguments, errorCode), errorCode); + REQUIRE(U_SUCCESS(errorCode)); + REQUIRE(ret == u"gato is Masculine & Singular"); +} + +// Test the case in de +// Bank +TEST_CASE("MF2Factory#testSelectorWithoutQuote") +{ + UErrorCode errorCode = U_ZERO_ERROR; + auto functionName = FunctionName("inflection"); + auto customRegistry = MFFunctionRegistry::Builder(errorCode) + .adoptSelector( + functionName, + inflection::message2::MF2Factory::CreateSelectorFactory(), + errorCode) + .build(); + REQUIRE(U_SUCCESS(errorCode)); + + UParseError pe; + MessageFormatter mf = MessageFormatter::Builder(errorCode) + .setFunctionRegistry(customRegistry) + .setLocale(Locale::forLanguageTag("de", errorCode)) + .setPattern("\ +.local $indefArticle = {$name :inflection number=singular\ + case=accusative select=indefArticle} \ +.match $indefArticle \ +eine {{The indefArticle of '{$name}' is 'eine'.}} \ +* {{The indefArticle of '{$name}' is something else.}}\n", + pe, errorCode) + .build(errorCode); + REQUIRE(U_SUCCESS(errorCode)); + + std::map arguments; + arguments["name"]= Formattable("Bank"); + + UnicodeString ret = mf.formatToString(MessageArguments(arguments, errorCode), errorCode); + REQUIRE(U_SUCCESS(errorCode)); + REQUIRE(ret == u"The indefArticle of 'Bank' is 'eine'."); +} + +// Test the case in de +// Fundort +TEST_CASE("MF2Factory#testSelectorWithQuote") +{ + UErrorCode errorCode = U_ZERO_ERROR; + auto functionName = FunctionName("inflection"); + auto customRegistry = MFFunctionRegistry::Builder(errorCode) + .adoptSelector( + functionName, + inflection::message2::MF2Factory::CreateSelectorFactory(), + errorCode) + .build(); + REQUIRE(U_SUCCESS(errorCode)); + + UParseError pe; + MessageFormatter mf = MessageFormatter::Builder(errorCode) + .setFunctionRegistry(customRegistry) + .setLocale(Locale::forLanguageTag("de", errorCode)) + .setPattern("\ +.local $withDefArticleInPreposition = {$name \ +:inflection number=singular case=dative \ +select=withDefArticleInPreposition} \ +.match $withDefArticleInPreposition \ +|im Fundort| {{The withDefArticleInPreposition of '{$name}' is 'im Fundort'.}} \ +* {{The withDefArticleInPreposition of '{$name}' is\ +* something else.}}\n", + pe, errorCode) + .build(errorCode); + REQUIRE(U_SUCCESS(errorCode)); + + std::map arguments; + arguments["name"]= Formattable("Fundort"); + + UnicodeString ret = mf.formatToString( + MessageArguments(arguments, errorCode), errorCode); + REQUIRE(U_SUCCESS(errorCode)); + REQUIRE(ret == + u"The withDefArticleInPreposition of 'Fundort' is 'im Fundort'."); +}