diff --git a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes index fe535cd8..eab34d01 100644 --- a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes +++ b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes @@ -15,6 +15,7 @@ dictionary_ru.lst filter=lfs diff=lfs merge=lfs -text dictionary_sv.lst filter=lfs diff=lfs merge=lfs -text dictionary_tr.lst filter=lfs diff=lfs merge=lfs -text inflectional_ar.xml filter=lfs diff=lfs merge=lfs -text +dictionary_ml.lst filter=lfs diff=lfs merge=lfs -text inflectional_da.xml filter=lfs diff=lfs merge=lfs -text inflectional_de.xml filter=lfs diff=lfs merge=lfs -text inflectional_en.xml filter=lfs diff=lfs merge=lfs -text @@ -27,5 +28,6 @@ inflectional_nb.xml filter=lfs diff=lfs merge=lfs -text inflectional_nl.xml filter=lfs diff=lfs merge=lfs -text inflectional_pt.xml filter=lfs diff=lfs merge=lfs -text inflectional_ru.xml filter=lfs diff=lfs merge=lfs -text +inflectional_ml.xml filter=lfs diff=lfs merge=lfs -text inflectional_sv.xml filter=lfs diff=lfs merge=lfs -text inflectional_tr.xml filter=lfs diff=lfs merge=lfs -text diff --git a/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst new file mode 100644 index 00000000..fd511ba1 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a772581bf6b41c099bfd146e9baaae6a02feea179ffb545d33aef55f9fda16a +size 53959170 diff --git a/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml new file mode 100644 index 00000000..05699ac1 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b94140bfac794d2ccf811447e6c142daf88d65692ddf704d71289c5bfa06ddc +size 617521 diff --git a/inflection/resources/org/unicode/inflection/features/grammar.xml b/inflection/resources/org/unicode/inflection/features/grammar.xml index 6a620220..06ff0d8c 100644 --- a/inflection/resources/org/unicode/inflection/features/grammar.xml +++ b/inflection/resources/org/unicode/inflection/features/grammar.xml @@ -1624,6 +1624,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv new file mode 100644 index 00000000..a066cb93 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv @@ -0,0 +1,39 @@ +എനിക്ക്,first,singular,dative +ഞാൻ,first,singular,nominative +എന്നെ,first,singular,accusative +എൻ്റെ,first,singular,genitive,dependency=dependent +എൻ്റെത്,first,singular,genitive,dependency=independent +നമുക്ക്,first,plural,dative +ഞങ്ങൾ,first,plural,nominative +ഞങ്ങളെ,first,plural,accusative +ഞങ്ങളുടെ,first,plural,genitive,dependency=dependent +ഞങ്ങളുടേതു്,first,plural,genitive,dependency=independent +നമ്മുടെ,first,plural,genitive,dependency=dependent +നമ്മുടേതു്,first,plural,genitive,dependency=independent +നിനക്ക്,second,singular,dative,dependency=nonhonorific +നീ,second,singular,nominative,dependency=nonhonorific +നിനെ,second,singular,accusative,dependency=nonhonorific +നിന്റെ,second,singular,genitive,dependency=dependent,dependency=nonhonorific +നിന്റേതു്,second,singular,genitive,dependency=independent,dependency=nonhonorific +നിങ്ങൾക്ക്,second,plural,dative,dependency=honorific +നിങ്ങൾ,second,plural,nominative,dependency=honorific +നിങ്ങളെ,second,plural,accusative,dependency=honorific +നിങ്ങളുടെ,second,plural,genitive,dependency=dependent,dependency=honorific +നിങ്ങളുടേതു്,second,plural,genitive,dependency=independent,dependency=honorific +അവൻ,third,singular,nominative,masculine +അവനെ,third,singular,accusative,masculine +അവൻ്റെ,third,singular,genitive,dependency=dependent,masculine +അവൻ്റെത്,third,singular,genitive,dependency=independent,masculine +അവൾ,third,singular,nominative,feminine +അവളെ,third,singular,accusative,feminine +അവളുടെ,third,singular,genitive,dependency=dependent,feminine +അവളുടേതു്,third,singular,genitive,dependency=independent,feminine +അത്,third,singular,nominative,neuter +അതിനെ,third,singular,accusative,neuter +അതിന്റെ,third,singular,genitive,dependency=dependent,neuter +അതിന്റേതു്,third,singular,genitive,dependency=independent,neuter +അവർ,third,plural,nominative +അവരെ,third,plural,accusative +അവരുടെ,third,plural,genitive,dependency=dependent +അവരുടേതു്,third,plural,genitive,dependency=independent + diff --git a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties index 6815591d..43741bca 100644 --- a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties +++ b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties @@ -15,6 +15,7 @@ locale.group.it=it_IT,it_CH locale.group.ja=ja_JP locale.group.ko=ko_KR locale.group.ms=ms_MY +locale.group.ml=ml_IN locale.group.nb=nb_NO locale.group.nl=nl_NL,nl_BE locale.group.pt=pt_BR,pt_PT diff --git a/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties new file mode 100644 index 00000000..879ad816 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties @@ -0,0 +1,5 @@ +# +# Copyright 2025 Unicode Incorporated and others. All rights reserved. +# +tokenizer.implementation.class=DefaultTokenizer + diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp new file mode 100644 index 00000000..319a863a --- /dev/null +++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp @@ -0,0 +1,17 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +namespace inflection::dialog::language { + +MlCommonConceptFactory::MlCommonConceptFactory(const ::inflection::util::ULocale& language) + : super(language) +{ +} + +MlCommonConceptFactory::~MlCommonConceptFactory() +{ +} + +} // namespace inflection::dialog::language diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp new file mode 100644 index 00000000..5cc63231 --- /dev/null +++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp @@ -0,0 +1,17 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include + +class inflection::dialog::language::MlCommonConceptFactory + : public CommonConceptFactoryImpl +{ +public: + typedef CommonConceptFactoryImpl super; +public: + explicit MlCommonConceptFactory(const ::inflection::util::ULocale& language); + ~MlCommonConceptFactory() override; +}; diff --git a/inflection/src/inflection/dialog/language/fwd.hpp b/inflection/src/inflection/dialog/language/fwd.hpp index 6429ca3a..8dbefaee 100644 --- a/inflection/src/inflection/dialog/language/fwd.hpp +++ b/inflection/src/inflection/dialog/language/fwd.hpp @@ -1,5 +1,5 @@ /* - * Copyright 2017-2024 Apple Inc. All rights reserved. + * Copyright 2017-2025 Apple Inc. All rights reserved. */ // Forward declarations for inflection.dialog.language #pragma once @@ -28,6 +28,7 @@ namespace inflection class JaCommonConceptFactory; class KoCommonConceptFactory; class KoCommonConceptFactory_KoAndList; + class MlCommonConceptFactory; class MsCommonConceptFactory; class NbCommonConceptFactory; class NlCommonConceptFactory; diff --git a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp index ecb31303..faff5658 100644 --- a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp +++ b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2017-2024 Apple Inc. All rights reserved. + * Copyright 2017-2025 Apple Inc. All rights reserved. */ #include @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ static const ::std::map<::inflection::util::ULocale, addSemanticFeatures>& GRAMM {::inflection::util::LocaleUtils::HINDI(), &HiGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::ITALIAN(), &ItGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::KOREAN(), &KoGrammarSynthesizer::addSemanticFeatures}, + {::inflection::util::LocaleUtils::MALAYALAM(), &MlGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::NORWEGIAN(), &NbGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::DUTCH(), &NlGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::PORTUGUESE(), &PtGrammarSynthesizer::addSemanticFeatures}, diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp new file mode 100644 index 00000000..bb4bb985 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp @@ -0,0 +1,25 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +void MlGrammarSynthesizer::addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& featureModel) +{ + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::NUMBER, new MlGrammarSynthesizer_CountLookupFunction()); + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::GENDER, new MlGrammarSynthesizer_GenderLookupFunction()); + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::CASE, new MlGrammarSynthesizer_CaseLookupFunction()); + + featureModel.setDefaultDisplayFunction(new MlGrammarSynthesizer_MlDisplayFunction(featureModel)); +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp new file mode 100644 index 00000000..8992c108 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp @@ -0,0 +1,17 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer final +{ +public: + static void addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& featureModel); +private: + MlGrammarSynthesizer() = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp new file mode 100644 index 00000000..181b954a --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +MlGrammarSynthesizer_CaseLookupFunction::MlGrammarSynthesizer_CaseLookupFunction() + : super() +{ + // No file needed +} + +inflection::dialog::SpeakableString* MlGrammarSynthesizer_CaseLookupFunction::getFeatureValue(const ::inflection::dialog::DisplayValue& displayValue, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& /*constraints*/) const +{ + std::u16string displayString; + ::inflection::util::StringViewUtils::lowercase(&displayString, displayValue.getDisplayString(), ::inflection::util::LocaleUtils::MALAYALAM()); + + if (displayString.length() >= 3) { + // Genitive-indicative suffixes in Malayalam + if (displayString.ends_with(u"ഉടെ") || // uṭe + displayString.ends_with(u"യുടെ") || // yude (my, your, his, her...) + displayString.ends_with(u"ന്റെ") || // ente (mine), avante, etc. + displayString.ends_with(u"ആയുടെ")) // āyuṭe (fem. 3rd person possessive) + { + return new ::inflection::dialog::SpeakableString(GrammemeConstants::CASE_GENITIVE()); + } + } + return nullptr; +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp new file mode 100644 index 00000000..3eca6b41 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp @@ -0,0 +1,25 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_CaseLookupFunction + : public ::inflection::dialog::DefaultFeatureFunction +{ +public: + typedef ::inflection::dialog::DefaultFeatureFunction super; + +public: + ::inflection::dialog::SpeakableString* getFeatureValue(const ::inflection::dialog::DisplayValue& displayValue, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints) const override; + + MlGrammarSynthesizer_CaseLookupFunction(); + MlGrammarSynthesizer_CaseLookupFunction(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; + MlGrammarSynthesizer_CaseLookupFunction& operator=(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.cpp new file mode 100644 index 00000000..095cfca3 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +MlGrammarSynthesizer_CountLookupFunction::MlGrammarSynthesizer_CountLookupFunction() + : super(::inflection::util::LocaleUtils::MALAYALAM(), + {GrammemeConstants::NUMBER_SINGULAR(), GrammemeConstants::NUMBER_PLURAL()}, + {GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_VERB()}) + , tokenizer(::inflection::tokenizer::TokenizerFactory::createTokenizer(::inflection::util::LocaleUtils::MALAYALAM())) + , dictionary(getDictionary()) +{ + ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {u"noun"})); +} + +MlGrammarSynthesizer_CountLookupFunction::~MlGrammarSynthesizer_CountLookupFunction() +{ +} + +::std::u16string MlGrammarSynthesizer_CountLookupFunction::determine(const ::std::u16string& word) const +{ + auto out = super::determine(word); + if (!out.empty() || word.empty()) { + return out; + } + + ::std::u16string returnValue = u""; + ::std::unique_ptr<::inflection::tokenizer::TokenChain> tokenChain(npc(npc(tokenizer.get())->createTokenChain(word))); + + for (const auto& token : *tokenChain) { + if (dynamic_cast(&token) != nullptr) { + if (dictionary.hasAllProperties(token.getCleanValue(), nounProperty)) { + out = super::determine(npc(&token)->getValue()); + if (!out.empty()) { + returnValue = out; + } + } + } + } + + if (!returnValue.empty()) { + return returnValue; + } + + return super::determine(npc(npc(tokenChain->getEnd())->getPrevious())->getValue()); +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.hpp new file mode 100644 index 00000000..738f1b4a --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.hpp @@ -0,0 +1,29 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_CountLookupFunction + : public ::inflection::dialog::DictionaryLookupFunction +{ +public: + typedef ::inflection::dialog::DictionaryLookupFunction super; + +private: + const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer; + const ::inflection::dictionary::DictionaryMetaData& dictionary; + int64_t nounProperty {}; + +public: + ::std::u16string determine(const ::std::u16string& word) const override; + + MlGrammarSynthesizer_CountLookupFunction(); + ~MlGrammarSynthesizer_CountLookupFunction() override; + MlGrammarSynthesizer_CountLookupFunction(const MlGrammarSynthesizer_CountLookupFunction&) = delete; + MlGrammarSynthesizer_CountLookupFunction& operator=(const MlGrammarSynthesizer_CountLookupFunction&) = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp new file mode 100644 index 00000000..2fe55cd1 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp @@ -0,0 +1,98 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +MlGrammarSynthesizer_GenderLookupFunction::MlGrammarSynthesizer_GenderLookupFunction() + : super(::inflection::util::LocaleUtils::MALAYALAM(), {u"masculine", u"feminine"}) + , tokenizer(::inflection::tokenizer::TokenizerFactory::createTokenizer(::inflection::util::LocaleUtils::MALAYALAM())) + , dictionary(getDictionary()) +{ + ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {u"noun"})); +} + +MlGrammarSynthesizer_GenderLookupFunction::~MlGrammarSynthesizer_GenderLookupFunction() +{ + +} + +static const ::std::set<::std::u16string_view>& FEMININE_SUFFIXES() +{ + static auto FEMININE_SUFFIXES_ = new ::std::set<::std::u16string_view>({ + u"ി" // e.g. പെൺ (pen) endings + , u" ാളി" // common feminine suffix in Malayalam nouns + }); + return *npc(FEMININE_SUFFIXES_); +} + +static const ::std::set<::std::u16string_view>& MASCULINE_SUFFIXES() +{ + static auto MASCULINE_SUFFIXES_ = new ::std::set<::std::u16string_view>({ + u"ൻ" // e.g. ആൾ (person) endings + , u"ർ" // common masculine suffix in Malayalam nouns + }); + return *npc(MASCULINE_SUFFIXES_); +} + +::std::u16string MlGrammarSynthesizer_GenderLookupFunction::determine(const ::std::u16string& word) const +{ + if (word.empty()) { + return {}; + } + auto out = super::determine(word); + if (out.empty() && !word.empty()) { + ::std::unique_ptr<::inflection::tokenizer::TokenChain> tokenChain(npc(npc(tokenizer.get())->createTokenChain(word))); + for (auto token = tokenChain->begin(); token != tokenChain->end(); ++token) { + if (dynamic_cast(token.get()) != nullptr && dictionary.hasAllProperties(token->getCleanValue(), nounProperty)) { + out = super::determine(token->getValue()); + break; + } + } + if (out.empty()) { + for (auto token = tokenChain->begin(); token != tokenChain->end(); ++token) { + if (dynamic_cast(token.get()) != nullptr) { + out = super::determine(token->getValue()); + break; + } + } + } + if (out.empty()) { + auto token = npc(tokenChain->getHead())->getNext(); + const auto& stringToken = npc(token)->getCleanValue(); + for (const auto& suffix : MASCULINE_SUFFIXES()) { + if (stringToken.ends_with(suffix)) { + out = GrammemeConstants::GENDER_MASCULINE(); + break; + } + } + if (out.empty()) { + for (const auto& suffix : FEMININE_SUFFIXES()) { + if (stringToken.ends_with(suffix)) { + out = GrammemeConstants::GENDER_FEMININE(); + break; + } + } + } + } + } + if (out.empty()) { + out = GrammemeConstants::GENDER_MASCULINE(); + } + return out; +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp new file mode 100644 index 00000000..6ab744f9 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_GenderLookupFunction + : public ::inflection::dialog::DictionaryLookupFunction +{ +public: + typedef ::inflection::dialog::DictionaryLookupFunction super; + +private: + const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer; + const ::inflection::dictionary::DictionaryMetaData& dictionary; + int64_t nounProperty { }; +public: + ::std::u16string determine(const ::std::u16string& word) const override; + + explicit MlGrammarSynthesizer_GenderLookupFunction(); + ~MlGrammarSynthesizer_GenderLookupFunction() override; + MlGrammarSynthesizer_GenderLookupFunction(const MlGrammarSynthesizer_GenderLookupFunction&) = delete; + MlGrammarSynthesizer_GenderLookupFunction& operator=(const MlGrammarSynthesizer_GenderLookupFunction&) = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp new file mode 100644 index 00000000..e2607342 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp @@ -0,0 +1,74 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +static constexpr auto ADJECTIVAL = u"adjectival"; +static constexpr auto PREDICATIVE = u"predicative"; + +MlGrammarSynthesizer_MlDisplayFunction::MlGrammarSynthesizer_MlDisplayFunction(const ::inflection::dialog::SemanticFeatureModel& model) + : super() + , caseFeature(*npc(model.getFeature(GrammemeConstants::CASE))) + , definitenessFeature(*npc(model.getFeature(GrammemeConstants::DEFINITENESS))) + , adjectivalFeature(*npc(model.getFeature(ADJECTIVAL))) + , particleMap({ + {GrammemeConstants::CASE_ACCUSATIVE(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"യെ", u"നെ"}}, + {GrammemeConstants::CASE_DATIVE(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"ക്ക്", u"നു"}}, + {GrammemeConstants::CASE_GENITIVE(), {MlGrammarSynthesizer::EMPTY_SET(), u"", u"ന്റെ"}}, + {GrammemeConstants::CASE_LOCATIVE(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"യിൽ", u"ൽ"}}, + {GrammemeConstants::CASE_ABLATIVE(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"ഇൽനിന്ന്", u"ൽനിന്ന്"}}, + {GrammemeConstants::CASE_INSTRUMENTAL(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"ഉപയോഗിച്ച്", u"കൊണ്ട്"}}, + {GrammemeConstants::CASE_VOCATIVE(), {MlGrammarSynthesizer::FINAL_VOWELS(), u"ആ", u"േ"}}, + {PREDICATIVE, {MlGrammarSynthesizer::FINAL_VOWELS(), u"ആണ്", u"ഇാണ്"}} + }) {} + +MlGrammarSynthesizer_MlDisplayFunction::~MlGrammarSynthesizer_MlDisplayFunction() = default; + +::inflection::dialog::DisplayValue* +MlGrammarSynthesizer_MlDisplayFunction::getDisplayValue( + const dialog::SemanticFeatureModel_DisplayData& displayData, + const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, + bool enableInflectionGuess) const +{ + const auto displayValue = GrammarSynthesizerUtil::getTheBestDisplayValue(displayData, constraints); + if (displayValue == nullptr) { + return nullptr; + } + + std::u16string featureString = GrammarSynthesizerUtil::getFeatureValue(constraints, caseFeature); + std::u16string particle; + + if (featureString.empty()) { + featureString = GrammarSynthesizerUtil::getFeatureValue(constraints, adjectivalFeature); + } + + auto particleResolver = particleMap.find(featureString); + std::u16string displayString(displayValue->getDisplayString()); + + if (particleResolver != particleMap.end() && !displayString.empty()) { + auto result = particleResolver->second.switchParticleValue(displayString, enableInflectionGuess); + if (result.has_value()) { + displayString = result.value(); + } else { + return nullptr; + } + } else if (!enableInflectionGuess) { + return nullptr; + } + + return new ::inflection::dialog::DisplayValue(displayString, constraints); +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp new file mode 100644 index 00000000..bc1489c8 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp @@ -0,0 +1,40 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_MlDisplayFunction : public virtual ::inflection::dialog::DefaultDisplayFunction { +public: + typedef ::inflection::dialog::DefaultDisplayFunction super; + +private: + const ::inflection::dialog::SemanticFeature& caseFeature; + const ::inflection::dialog::SemanticFeature& definitenessFeature; + const ::inflection::dialog::SemanticFeature& adjectivalFeature; + + std::map particleMap; + +public: + ::inflection::dialog::DisplayValue* getDisplayValue( + const dialog::SemanticFeatureModel_DisplayData& displayData, + const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, + bool enableInflectionGuess) const override; + + explicit MlGrammarSynthesizer_MlDisplayFunction(const ::inflection::dialog::SemanticFeatureModel& model); + ~MlGrammarSynthesizer_MlDisplayFunction() override; + + MlGrammarSynthesizer_MlDisplayFunction(MlGrammarSynthesizer_MlDisplayFunction&) = delete; + MlGrammarSynthesizer_MlDisplayFunction& operator=(const MlGrammarSynthesizer_MlDisplayFunction&) = delete; + +private: + friend class MlGrammarSynthesizer; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/fwd.hpp b/inflection/src/inflection/grammar/synthesis/fwd.hpp index 0693277e..e0487394 100644 --- a/inflection/src/inflection/grammar/synthesis/fwd.hpp +++ b/inflection/src/inflection/grammar/synthesis/fwd.hpp @@ -69,6 +69,11 @@ namespace inflection class NbGrammarSynthesizer; class NbGrammarSynthesizer_ArticleLookupFunction; class NbGrammarSynthesizer_NbDisplayFunction; + class MlGrammarSynthesizer; + class MlGrammarSynthesizer_CountLookupFunction; + class MlGrammarSynthesizer_GenderLookupFunction; + class MlGrammarSynthesizer_CaseLookupFunction; + class MlGrammarSynthesizer_MlDisplayFunction; class NlGrammarSynthesizer; class NlGrammarSynthesizer_ArticleLookupFunction; class NlGrammarSynthesizer_DefinitenessLookupFunction; diff --git a/inflection/src/inflection/util/LocaleUtils.cpp b/inflection/src/inflection/util/LocaleUtils.cpp index 0a5cdc9f..8238bec9 100644 --- a/inflection/src/inflection/util/LocaleUtils.cpp +++ b/inflection/src/inflection/util/LocaleUtils.cpp @@ -407,6 +407,18 @@ const ULocale& LocaleUtils::MALAYSIA() return *npc(MALAYSIA_); } +const ULocale& LocaleUtils::MALAYALAM() +{ + static auto MALAYALAM_ = new ULocale("ml"); + return *npc(MALAYALAM_); +} + +const ULocale& LocaleUtils::INDIA_MALAYALAM() +{ + static auto INDIA_MALAYALAM_ = new ULocale("ml", "IN"); + return *npc(INDIA_MALAYALAM_); +} + const ULocale& LocaleUtils::NORWEGIAN() { static auto NORWEGIAN_ = new ULocale("nb"); diff --git a/inflection/src/inflection/util/LocaleUtils.hpp b/inflection/src/inflection/util/LocaleUtils.hpp index e5fa8582..ac4ec784 100644 --- a/inflection/src/inflection/util/LocaleUtils.hpp +++ b/inflection/src/inflection/util/LocaleUtils.hpp @@ -376,6 +376,14 @@ class INFLECTION_CLASS_API inflection::util::LocaleUtils final * ms_MY: Malay (Malaysia) */ static const ::inflection::util::ULocale& MALAYSIA(); + /** + * ml: Malayalam + */ + static const ::inflection::util::ULocale& MALAYALAM(); + /** + * ml_IN: Malayalam (India) + */ + static const ::inflection::util::ULocale& INDIA_MALAYALAM(); /** * nb: Norwegian Bokmål */ diff --git a/inflection/test/resources/inflection/dialog/inflection/ml.xml b/inflection/test/resources/inflection/dialog/inflection/ml.xml new file mode 100644 index 00000000..e954b384 --- /dev/null +++ b/inflection/test/resources/inflection/dialog/inflection/ml.xml @@ -0,0 +1,104 @@ + + + + +ഞാൻഞാൻ +ഞങ്ങൾഞങ്ങൾ +നീനീ +നിങ്ങൾനിങ്ങൾ +അവൾഅവൾ +അവൻഅവൻ +അത്അത് + + +കുട്ടികുട്ടികൾ +പുസ്തകങ്ങൾപുസ്തകം +മരംമരങ്ങൾ +കഥകൾകഥ + + +നല്ലനല്ല +നല്ലനല്ല + + +ചോദിക്കുകചോദിക്കുന്നു +ചോദിക്കുകചോദിച്ചു +ചോദിക്കുകചോദിക്കും +വരികവരുന്നു + + +കുട്ടികുട്ടി +കുട്ടികുട്ടിയെ +കുട്ടികുട്ടിക്ക് +കുട്ടികുട്ടിയുടെ +കുട്ടികുട്ടിയില് +കുട്ടികുട്ടിയാല് + + +അവൻഅവൻ +അവൾഅവൾ + + +പുതിയപുതിയ +പുതിയപുതിയ + + +മരം +മരങ്ങൾ + + +അവൾ +അവൻ +അത് + + +സൂപ്പർസൂപ്പർ +ഫേസ്ബുക്ക്ഫേസ്ബുക്കുകൾ + + +വളപ്പുറത്തെ ലൈറ്റ് +വളപ്പുറത്തെ ലൈറ്റുകൾ +തോട്ടത്തിലെ ലൈറ്റുകൾ + + +വരികവരുന്നു +വരികവന്നു +ചോദിക്കുകചോദിക്കും + + +ചോദിക്കുന്നുചോദിക്കുന്നു +ചോദിക്കുന്നുചോദിക്കുന്നു +വരുന്നുവരുന്നു +വരുന്നുവരുന്നു +ആകുന്നുആകുന്നു +ആകുന്നുആകുന്നു + + +കിലോമീറ്റർകിലോമീറ്റർ +കിലോമീറ്റർകിലോമീറ്ററുകൾ + + +കപ്പ്കപ്പുകൾ +പൂച്ചപൂച്ചകൾ + + +അവൻ ഓടുന്നു +അവൾ ഓടുന്നു + + +അവൻഅവനെ +അവൾഅവളെ +അവൻഅവന് + + +ക്യാമ്പസ് ലൈറ്റ് +ക്യാമ്പസ് ലൈറ്റുകൾ + + +ക്യാമ്പസ് ലൈറ്റ് +ക്യാമ്പസ് ലൈറ്റുകൾ +തോട്ടത്തിലെ ലൈറ്റുകൾ + + diff --git a/inflection/test/resources/inflection/dialog/pronoun/ml.xml b/inflection/test/resources/inflection/dialog/pronoun/ml.xml new file mode 100644 index 00000000..6e32fb1f --- /dev/null +++ b/inflection/test/resources/inflection/dialog/pronoun/ml.xml @@ -0,0 +1,59 @@ + + + + + അവൻ + + + ഞാൻ + എനിക്ക് + എന്റെ + + + നാം + ഞങ്ങൾ + നമുക്ക് + ഞങ്ങൾക്ക് + നമ്മുടെ + ഞങ്ങളുടെ + + + നീ + താങ്കൾ + നിനക്ക് + താങ്കൾക്ക് + നിന്റെ + താങ്കളുടെ + + + നിങ്ങൾ + നിങ്ങൾക്ക് + നിങ്ങളുടെ + + + അവൻ + അവനെ + അവന്റെ + + + അവൾ + അവളെ + അവളുടെ + + + അത് + അത് + അതിന്റേത് + + + അവർ + അവരെ + അവരുടെ + + + ഞാൻഅവൻ + ഞാൻഅവൾ + ഞാൻതാങ്കൾ + diff --git a/inflection/test/src/inflection/util/LocaleUtilsTest.cpp b/inflection/test/src/inflection/util/LocaleUtilsTest.cpp index 884d558c..420476d1 100644 --- a/inflection/test/src/inflection/util/LocaleUtilsTest.cpp +++ b/inflection/test/src/inflection/util/LocaleUtilsTest.cpp @@ -96,6 +96,7 @@ TEST_CASE("LocaleUtilsTest#testCoverage") inflection::util::LocaleUtils::KOREAN(), inflection::util::LocaleUtils::LITHUANIAN(), inflection::util::LocaleUtils::MALAY(), + inflection::util::LocaleUtils::MALAYALAM(), inflection::util::LocaleUtils::NORWEGIAN(), inflection::util::LocaleUtils::DUTCH(), inflection::util::LocaleUtils::POLISH(), @@ -142,6 +143,7 @@ TEST_CASE("LocaleUtilsTest#testCoverage") inflection::util::LocaleUtils::FRANCE(), inflection::util::LocaleUtils::SWITZERLAND_FRENCH(), inflection::util::LocaleUtils::INDIA_HINDI(), + inflection::util::LocaleUtils::INDIA_MALAYALAM(), inflection::util::LocaleUtils::CROATIA(), inflection::util::LocaleUtils::ISRAEL(), inflection::util::LocaleUtils::HUNGARY(),