diff --git a/documents/how_to_add_new_language.md b/documents/how_to_add_new_language.md index b0ff36d3..5813dfb7 100644 --- a/documents/how_to_add_new_language.md +++ b/documents/how_to_add_new_language.md @@ -10,6 +10,9 @@ The following steps with help you identify files that need to be added or change NOTE: Take a look at [PR #40](https://github.com/unicode-org/inflection/pull/40) and [PR #111](https://github.com/unicode-org/inflection/pull/111) for example on how to add initial language support based on dictionary lookup only. In general, to bootstrap your progress look for grammatically similar language that's already supported, e.g. if you are adding Serbian look for existing Russian implementation. This will help you find most of the files you need to add/change and will speed up implementation of the rules and lexicons. +We recommend you spend around a week researching the language and all the different components of the language before even beginning to modify and add the files below. Look at all the files in the project such as tokenizers, configuration files, grammar files, and different lookup functions to see what you need. This will save you a lot of time in the end. We highly suggest you stray away from hardcoded logic and rely on the Dictionary Lookup. Look at all the grammemes, tokenizer logic, and multi-word phrase handling. + +Before you add new language support, go to the README.md in the inflection subfolder (inflection/inflection/README.md), build the project, and make sure all the tests run on your computer. ## Mark your language as supported * UPDATE: inflection/src/inflection/util/LocaleUtils.hpp @@ -29,13 +32,13 @@ TODO: We need to expand what each of these do. * ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.hpp * ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.cpp * ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp -* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp +* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.cpp * UPDATE: inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp * UPDATE: inflection/src/inflection/grammar/synthesis/fwd.hpp ## Add language specific properties for lists, quantities and related topics * ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp -* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp +* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.cpp * UPDATE: inflection/src/inflection/dialog/language/fwd.hpp ## Define and create lexion diff --git a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes index fe535cd8..9dc3b5af 100644 --- a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes +++ b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes @@ -8,6 +8,7 @@ dictionary_he.lst filter=lfs diff=lfs merge=lfs -text dictionary_hi.lst filter=lfs diff=lfs merge=lfs -text dictionary_it.lst filter=lfs diff=lfs merge=lfs -text dictionary_ko.lst filter=lfs diff=lfs merge=lfs -text +dictionary_ml.lst filter=lfs diff=lfs merge=lfs -text dictionary_nb.lst filter=lfs diff=lfs merge=lfs -text dictionary_nl.lst filter=lfs diff=lfs merge=lfs -text dictionary_pt.lst filter=lfs diff=lfs merge=lfs -text @@ -23,6 +24,7 @@ inflectional_fr.xml filter=lfs diff=lfs merge=lfs -text inflectional_he.xml filter=lfs diff=lfs merge=lfs -text inflectional_hi.xml filter=lfs diff=lfs merge=lfs -text inflectional_it.xml filter=lfs diff=lfs merge=lfs -text +inflectional_ml.xml filter=lfs diff=lfs merge=lfs -text inflectional_nb.xml filter=lfs diff=lfs merge=lfs -text inflectional_nl.xml filter=lfs diff=lfs merge=lfs -text inflectional_pt.xml filter=lfs diff=lfs merge=lfs -text diff --git a/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst new file mode 100644 index 00000000..16334d78 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0c250408b3b33d34c649abdc9fb38f4038eb8c79376457927d6875c6b2d2b1 +size 53959671 diff --git a/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml new file mode 100644 index 00000000..07c896d8 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d37a014f1784bde85ab6d5e2c274a9509a535a1a4d070d86cc506c09bcd4813 +size 617609 diff --git a/inflection/resources/org/unicode/inflection/features/grammar.xml b/inflection/resources/org/unicode/inflection/features/grammar.xml index 6a620220..ba7ddccd 100644 --- a/inflection/resources/org/unicode/inflection/features/grammar.xml +++ b/inflection/resources/org/unicode/inflection/features/grammar.xml @@ -1624,6 +1624,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv new file mode 100644 index 00000000..d4150315 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv @@ -0,0 +1,83 @@ +അവൻ,third,singular,nominative,masculine,personal,distal +എനിക്ക്,first,singular,dative,personal +ഞാൻ,first,singular,nominative,exclusive,personal +എന്നെ,first,singular,accusative,exclusive,personal +എന്റെ,first,singular,genitive,determination=dependent,exclusive,personal +എന്റേത്,first,singular,genitive,determination=independent,exclusive,personal +നമ്മെ,first,plural,accusative,inclusive,personal +നമുക്ക്,first,plural,dative,inclusive,personal +ഞങ്ങൾ,first,plural,nominative,exclusive,personal +ഞങ്ങളെ,first,plural,accusative,exclusive,personal +ഞങ്ങൾക്ക്,first,plural,dative,exclusive,personal +ഞങ്ങളുടെ,first,plural,genitive,exclusive,determination=dependent,personal +ഞങ്ങളുടേത്,first,plural,genitive,exclusive,determination=independent,personal +നമ്മുടെ,first,plural,genitive,inclusive,determination=dependent,personal +നമ്മുടേതു്,first,plural,genitive,inclusive,determination=independent,personal +നിനക്ക്,second,singular,dative,informal,personal +നീ,second,singular,nominative,informal,personal +നിനെ,second,singular,accusative,informal,personal +നിന്റെ,second,singular,genitive,informal,determination=dependent,personal +നിന്റേതു്,second,singular,genitive,informal,determination=independent,personal +താങ്കൾ,second,singular,nominative,formal,personal +താങ്കളെ,second,singular,accusative,formal,personal +താങ്കൾക്ക്,second,singular,dative,formal,personal +താങ്കളുടെ,second,singular,genitive,formal,determination=dependent,personal +താങ്കളുടേതു്,second,singular,genitive,formal,determination=independent,personal +നിങ്ങൾ,second,plural,nominative,formal,personal +നിങ്ങളെ,second,plural,accusative,formal,personal +നിങ്ങൾക്ക്,second,plural,dative,formal,personal +നിങ്ങളുടെ,second,plural,genitive,formal,determination=dependent,personal +നിങ്ങളുടേതു്,second,plural,genitive,formal,determination=independent,personal +അവനെ,third,singular,accusative,masculine,personal,distal +അവന്റെ,third,singular,genitive,masculine,determination=dependent,personal,distal +അവന്റെത്,third,singular,genitive,masculine,determination=independent,personal,distal +അവൾ,third,singular,nominative,feminine,personal,distal +അവളെ,third,singular,accusative,feminine,personal,distal +അവളുടെ,third,singular,genitive,feminine,determination=dependent,personal,distal +അവളുടേതു്,third,singular,genitive,feminine,determination=independent,personal,distal +അത്,third,singular,nominative,neuter,personal,distal +അതിനെ,third,singular,accusative,neuter,personal,distal +അതിന്റെ,third,singular,genitive,neuter,determination=dependent,personal,distal +അതിന്റേതു്,third,singular,genitive,neuter,determination=independent,personal,distal +അവർ,third,plural,nominative,personal,distal +അവരെ,third,plural,accusative,personal,distal +അവരുടെ,third,plural,genitive,determination=dependent,personal,distal +അവരുടേതു്,third,plural,genitive,determination=independent,personal,distal +എന്നിൽ,first,singular,locative,personal +എന്നാൽ,first,singular,instrumental,personal +എന്നോടു്,first,singular,sociative,personal +ഞങ്ങളിലു്,first,plural,locative,exclusive,personal +ഞങ്ങളാൽ,first,plural,instrumental,exclusive,personal +ഞങ്ങളോടു്,first,plural,sociative,exclusive,personal +നിനിൽ,second,singular,locative,informal,personal +നിനാൽ,second,singular,instrumental,informal,personal +നിനോടു്,second,singular,sociative,informal,personal +താങ്കളിൽ,second,singular,locative,formal,personal +താങ്കളാൽ,second,singular,instrumental,formal,personal +താങ്കളോടു്,second,singular,sociative,formal,personal +നിങ്ങളിൽ,second,plural,locative,formal,personal +നിങ്ങളാൽ,second,plural,instrumental,formal,personal +നിങ്ങളോടു്,second,plural,sociative,formal,personal +അവനിൽ,third,singular,locative,masculine,personal,distal +അവനാൽ,third,singular,instrumental,masculine,personal,distal +അവനോടു്,third,singular,sociative,masculine,personal,distal +അവളിൽ,third,singular,locative,feminine,personal,distal +അവളാൽ,third,singular,instrumental,feminine,personal,distal +അവളോടു്,third,singular,sociative,feminine,personal,distal +അതിൽ,third,singular,locative,neuter,personal,distal +അതാൽ,third,singular,instrumental,neuter,personal,distal +അതോടു്,third,singular,sociative,neuter,personal,distal +അവരിൽ,third,plural,locative,personal,distal +അവരാൽ,third,plural,instrumental,personal,distal +അവരോടു്,third,plural,sociative,personal,distal +താൻ,third,singular,nominative,reflexive,personal +തങ്ങൾ,third,plural,nominative,formal,reflexive,personal +ഇവൻ,third,singular,nominative,masculine,proximal,personal +ഇവൾ,third,singular,nominative,feminine,proximal,personal +ഇത്,third,singular,nominative,neuter,proximal,personal +ഇവർ,third,plural,nominative,proximal,personal +എവൻ,third,singular,nominative,masculine,interrogative +എവൾ,third,singular,nominative,feminine,interrogative +എവർ,third,plural,nominative,interrogative +ഏത്,third,singular,nominative,neuter,interrogative +നാം,first,plural,nominative,inclusive,personal diff --git a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties index 6815591d..43741bca 100644 --- a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties +++ b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties @@ -15,6 +15,7 @@ locale.group.it=it_IT,it_CH locale.group.ja=ja_JP locale.group.ko=ko_KR locale.group.ms=ms_MY +locale.group.ml=ml_IN locale.group.nb=nb_NO locale.group.nl=nl_NL,nl_BE locale.group.pt=pt_BR,pt_PT diff --git a/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties new file mode 100644 index 00000000..03f49492 --- /dev/null +++ b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties @@ -0,0 +1,7 @@ +# +# Copyright 2025 Unicode Incorporated and others. All rights reserved. +# +tokenizer.implementation.class=DefaultTokenizer +tokenizer.nonDecompound.file=/org/unicode/inflection/tokenizer/ml/nondecompound.tok +tokenizer.decompound=(ശ്രീ)(.+?)(ഗുരു|സര്‍ക്കാര്‍)$|^(.+?)(ഗുരു|സര്‍ക്കാര്‍)$|^(.+?)(ഉണ്ട്|ആണ്|ഇല്ല)$|^(.+?)(ഒടൊപ്പം|ഉടൻ|ഓടെ|ഓട്|ഒപ്പം|തന്നെ|പോലും|പോലെ|ഉം|യ്)$|^(.+?)(കളുടെ|ങ്ങളുടെ|ത്തിന്റെ|ൻ്റെ|ന്റെ|യുടേ|യുടെ|യാൽ|യിൽ|ഇൽ|ല്|ൽ|ക്ക്|മാർ|ങ്ങൾ|കൾ|നെ|യെ) + diff --git a/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok b/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok new file mode 100644 index 00000000..c62b299c --- /dev/null +++ b/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok @@ -0,0 +1,35 @@ +അമ്മ +അച്ഛൻ +അച്ഛി +അമ്മൻ +മകൻ +മകൾ +കുട്ടി +കുട്ടികൾ +ആൺകുട്ടി +ആൺകുട്ടികൾ +പെൺകുട്ടി +പെൺകുട്ടികൾ +കഥ +ചിത്രം +ചിത്രങ്ങൾ +ഗ്രന്ഥം +ഗ്രന്ഥങ്ങൾ +മക്കൾ +ഞാൻ +നീ +നിങ്ങൾ +അവൻ +അവൾ +അവ +അവർ +ഇത് +അത് +ഇവ +അവ +ശ്രീ +നാരായണ +ഗുരു +കേരളം +സര്‍ക്കാര്‍ +കേരളസര്‍ക്കാര്‍ diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp new file mode 100644 index 00000000..0626c420 --- /dev/null +++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ + +#include +#include +#include +#include +#include + +namespace inflection::dialog::language { + +// Malayalam-specific conjunction for OR +::inflection::dialog::SemanticConceptList* MlCommonConceptFactory::createOrList( + const ::std::vector& concepts) const +{ + auto list = super::createOrList(concepts); + if (list) { + list->setBeforeLast(::inflection::dialog::SpeakableString(u" അല്ലെങ്കിൽ ")); + } + return list; +} + +// Malayalam-specific conjunction for AND +::inflection::dialog::SemanticConceptList* MlCommonConceptFactory::createAndList( + const ::std::vector& concepts) const +{ + auto list = super::createAndList(concepts); + if (list) { + list->setBeforeLast(::inflection::dialog::SpeakableString(u"യും ")); + list->setItemDelimiter(::inflection::dialog::SpeakableString(u", ")); + } + return list; +} + +// In Malayalam, numbers generally follow the noun +::inflection::dialog::SpeakableString +MlCommonConceptFactory::quantifiedJoin(const ::inflection::dialog::SpeakableString& formattedNumber, + const ::inflection::dialog::SpeakableString& nounPhrase, + const ::std::u16string& /*measureWord*/, + Plurality::Rule countType) const +{ + ::inflection::dialog::SpeakableString space(u" "); + if (countType == Plurality::Rule::ONE) { + return nounPhrase + space + formattedNumber; + } + return formattedNumber + space + nounPhrase; +} + +// Fallback to base implementation for now +::inflection::dialog::SpeakableString +MlCommonConceptFactory::quantifyType(const ::inflection::dialog::SpeakableString& formattedNumber, + const ::inflection::dialog::SemanticFeatureConceptBase& semanticConcept, + bool useDefault, + ::inflection::dialog::Plurality::Rule countType) const +{ + return super::quantifyType(formattedNumber, semanticConcept, useDefault, countType); +} + +} // namespace inflection::dialog::language diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp new file mode 100644 index 00000000..ee9e9f42 --- /dev/null +++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp @@ -0,0 +1,41 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#pragma once + +#include +#include +#include +#include + +namespace inflection::dialog::language { + +class MlCommonConceptFactory : public CommonConceptFactoryImpl { + using super = CommonConceptFactoryImpl; + +public: + explicit MlCommonConceptFactory(const ::inflection::util::ULocale& language); + ~MlCommonConceptFactory() override; + + // Malayalam-specific conjunction handling + ::inflection::dialog::SemanticConceptList* createOrList( + const ::std::vector& concepts) const override; + + ::inflection::dialog::SemanticConceptList* createAndList( + const ::std::vector& concepts) const override; + +protected: + ::inflection::dialog::SpeakableString quantifiedJoin( + const ::inflection::dialog::SpeakableString& formattedNumber, + const ::inflection::dialog::SpeakableString& nounPhrase, + const ::std::u16string& measureWord, + ::inflection::dialog::Plurality::Rule countType) const override; + + ::inflection::dialog::SpeakableString quantifyType( + const ::inflection::dialog::SpeakableString& formattedNumber, + const ::inflection::dialog::SemanticFeatureConceptBase& semanticConcept, + bool useDefault, + ::inflection::dialog::Plurality::Rule countType) const override; +}; + +} // namespace inflection::dialog::language diff --git a/inflection/src/inflection/dialog/language/fwd.hpp b/inflection/src/inflection/dialog/language/fwd.hpp index 6429ca3a..8dbefaee 100644 --- a/inflection/src/inflection/dialog/language/fwd.hpp +++ b/inflection/src/inflection/dialog/language/fwd.hpp @@ -1,5 +1,5 @@ /* - * Copyright 2017-2024 Apple Inc. All rights reserved. + * Copyright 2017-2025 Apple Inc. All rights reserved. */ // Forward declarations for inflection.dialog.language #pragma once @@ -28,6 +28,7 @@ namespace inflection class JaCommonConceptFactory; class KoCommonConceptFactory; class KoCommonConceptFactory_KoAndList; + class MlCommonConceptFactory; class MsCommonConceptFactory; class NbCommonConceptFactory; class NlCommonConceptFactory; diff --git a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp index ecb31303..faff5658 100644 --- a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp +++ b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2017-2024 Apple Inc. All rights reserved. + * Copyright 2017-2025 Apple Inc. All rights reserved. */ #include @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ static const ::std::map<::inflection::util::ULocale, addSemanticFeatures>& GRAMM {::inflection::util::LocaleUtils::HINDI(), &HiGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::ITALIAN(), &ItGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::KOREAN(), &KoGrammarSynthesizer::addSemanticFeatures}, + {::inflection::util::LocaleUtils::MALAYALAM(), &MlGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::NORWEGIAN(), &NbGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::DUTCH(), &NlGrammarSynthesizer::addSemanticFeatures}, {::inflection::util::LocaleUtils::PORTUGUESE(), &PtGrammarSynthesizer::addSemanticFeatures}, diff --git a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp index 52d1f31f..68ef6b23 100644 --- a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp +++ b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp @@ -159,6 +159,12 @@ const ::std::u16string& GrammemeConstants::CASE_PREPOSITIONAL() return *npc(CASE_PREPOSITIONAL_); } +const ::std::u16string& GrammemeConstants::CASE_SOCIATIVE() +{ + static auto CASE_SOCIATIVE_ = new ::std::u16string(u"sociative"); + return *npc(CASE_SOCIATIVE_); +} + const ::std::u16string& GrammemeConstants::CASE_TRANSLATIVE() { static auto CASE_TRANSLATIVE_ = new ::std::u16string(u"translative"); diff --git a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp index 4010c1ba..dcd61fc7 100644 --- a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp +++ b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp @@ -42,6 +42,7 @@ class inflection::grammar::synthesis::GrammemeConstants final static const ::std::u16string& CASE_OBLIQUE(); static const ::std::u16string& CASE_PARTITIVE(); static const ::std::u16string& CASE_PREPOSITIONAL(); + static const ::std::u16string& CASE_SOCIATIVE(); static const ::std::u16string& CASE_TRANSLATIVE(); static const ::std::u16string& CASE_VOCATIVE(); diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp new file mode 100644 index 00000000..759d4cb2 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp @@ -0,0 +1,157 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + + static constexpr auto MOOD_SUBJUNCTIVE = u"subjunctive"; + +void MlGrammarSynthesizer::addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& featureModel) +{ + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::NUMBER, + new MlGrammarSynthesizer_NumberLookupFunction()); + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::GENDER, + new MlGrammarSynthesizer_GenderLookupFunction()); + featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::CASE, + new MlGrammarSynthesizer_CaseLookupFunction()); + + featureModel.setDefaultDisplayFunction(new MlGrammarSynthesizer_MlDisplayFunction(featureModel)); +} + +MlGrammarSynthesizer::Number MlGrammarSynthesizer::getNumber(const ::std::u16string* value) { + if (!value) return Number::undefined; + if (*value == GrammemeConstants::NUMBER_SINGULAR()) return Number::singular; + if (*value == GrammemeConstants::NUMBER_PLURAL()) return Number::plural; + return Number::undefined; +} + +MlGrammarSynthesizer::Case MlGrammarSynthesizer::getCase(const ::std::u16string* value) { + if (!value) return Case::undefined; + if (*value == GrammemeConstants::CASE_NOMINATIVE()) return Case::nominative; + if (*value == GrammemeConstants::CASE_ACCUSATIVE()) return Case::accusative; + if (*value == GrammemeConstants::CASE_DATIVE()) return Case::dative; + if (*value == GrammemeConstants::CASE_GENITIVE()) return Case::genitive; + if (*value == GrammemeConstants::CASE_INSTRUMENTAL()) return Case::instrumental; + if (*value == GrammemeConstants::CASE_LOCATIVE()) return Case::locative; + return Case::undefined; +} + +MlGrammarSynthesizer::Person MlGrammarSynthesizer::getPerson(const ::std::u16string* value) { + if (!value) return Person::undefined; + if (*value == GrammemeConstants::PERSON_FIRST()) return Person::first; + if (*value == GrammemeConstants::PERSON_SECOND()) return Person::second; + if (*value == GrammemeConstants::PERSON_THIRD()) return Person::third; + return Person::undefined; +} + +MlGrammarSynthesizer::Tense MlGrammarSynthesizer::getTense(const ::std::u16string* value) { + if (!value) return Tense::undefined; + if (*value == GrammemeConstants::TENSE_PAST()) return Tense::past; + if (*value == GrammemeConstants::TENSE_PRESENT()) return Tense::present; + if (*value == GrammemeConstants::TENSE_FUTURE()) return Tense::future; + return Tense::undefined; +} + +MlGrammarSynthesizer::Mood MlGrammarSynthesizer::getMood(const ::std::u16string* value) { + if (!value) return Mood::undefined; + if (*value == GrammemeConstants::MOOD_INDICATIVE()) return Mood::indicative; + if (*value == GrammemeConstants::MOOD_IMPERATIVE()) return Mood::imperative; + if (*value == MOOD_SUBJUNCTIVE) return Mood::subjunctive; + return Mood::undefined; +} + +MlGrammarSynthesizer::LookupKey MlGrammarSynthesizer::makeLookupKey(Number num, Case kase) { + return (static_cast(kase) & 0xFF) + | ((static_cast(num) & 0xFF) << 8); +} + +MlGrammarSynthesizer::LookupKey MlGrammarSynthesizer::makeVerbLookupKey(Person person, Number num, Tense tense, Mood mood) { + return (static_cast(person) & 0xFF) + | ((static_cast(num) & 0xFF) << 8) + | ((static_cast(tense) & 0x0F) << 24) + | ((static_cast(mood) & 0x0F) << 28); +} + +MlGrammarSynthesizer::Person MlGrammarSynthesizer::personFromConstraint(const ::std::u16string& val) { + return getPerson(&val); +} + +MlGrammarSynthesizer::Number MlGrammarSynthesizer::numberFromConstraint(const ::std::u16string& val) { + return getNumber(&val); +} + +MlGrammarSynthesizer::Case MlGrammarSynthesizer::caseFromConstraint(const ::std::u16string& val) { + return getCase(&val); +} + +MlGrammarSynthesizer::LookupKey MlGrammarSynthesizer::buildVerbSuffixKey(const std::vector<::std::u16string>& constraintValues) { + Person person = Person::undefined; + Number num = Number::undefined; + Tense tense = Tense::undefined; + Mood mood = Mood::undefined; + + for (const auto& val : constraintValues) { + if (person == Person::undefined) person = personFromConstraint(val); + if (num == Number::undefined) num = numberFromConstraint(val); + if (tense == Tense::undefined) tense = getTense(&val); + if (mood == Mood::undefined) mood = getMood(&val); + } + + return makeVerbLookupKey(person, num, tense, mood); +} + +const std::map MlGrammarSynthesizer::malayalamSuffixMap = { + {makeLookupKey(Number::singular, Case::nominative), u""}, + {makeLookupKey(Number::plural, Case::nominative), u"കൾ"}, + {makeLookupKey(Number::singular, Case::genitive), u"യുടെ"}, + {makeLookupKey(Number::plural, Case::genitive), u"കളുടെ"}, + {makeLookupKey(Number::singular, Case::dative), u"ക്ക്"}, + {makeLookupKey(Number::plural, Case::dative), u"കൾക്ക്"}, +}; + +const std::map MlGrammarSynthesizer::malayalamVerbSuffixMap = { + {makeVerbLookupKey(Person::first, Number::singular, Tense::past, Mood::indicative), u"ച്ചു"}, + {makeVerbLookupKey(Person::first, Number::plural, Tense::past, Mood::indicative), u"ഞ്ഞു"}, + {makeVerbLookupKey(Person::second, Number::singular, Tense::past, Mood::indicative), u"ച്ചു"}, + {makeVerbLookupKey(Person::second, Number::plural, Tense::past, Mood::indicative), u"ന്നു"}, + {makeVerbLookupKey(Person::third, Number::singular, Tense::past, Mood::indicative), u"ച്ചു"}, + {makeVerbLookupKey(Person::third, Number::plural, Tense::past, Mood::indicative), u"ന്നു"}, + + {makeVerbLookupKey(Person::first, Number::singular, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + {makeVerbLookupKey(Person::first, Number::plural, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + {makeVerbLookupKey(Person::second, Number::singular, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + {makeVerbLookupKey(Person::second, Number::plural, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + {makeVerbLookupKey(Person::third, Number::singular, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + {makeVerbLookupKey(Person::third, Number::plural, Tense::present, Mood::indicative), u"ിക്കുന്നു"}, + + {makeVerbLookupKey(Person::first, Number::singular, Tense::future, Mood::indicative), u" ചെയ്യും"}, + {makeVerbLookupKey(Person::first, Number::plural, Tense::future, Mood::indicative), u" ചെയ്യും"}, + {makeVerbLookupKey(Person::second, Number::singular, Tense::future, Mood::indicative), u" ചെയ്യും"}, + {makeVerbLookupKey(Person::second, Number::plural, Tense::future, Mood::indicative), u" ചെയ്യും"}, + {makeVerbLookupKey(Person::third, Number::singular, Tense::future, Mood::indicative), u" ചെയ്യും"}, + {makeVerbLookupKey(Person::third, Number::plural, Tense::future, Mood::indicative), u" ചെയ്യും"}, +}; + +const std::u16string& MlGrammarSynthesizer::getSuffix(LookupKey key) { + static const std::u16string empty = u""; + auto it = malayalamSuffixMap.find(key); + return it != malayalamSuffixMap.end() ? it->second : empty; +} + +const std::u16string& MlGrammarSynthesizer::getVerbSuffix(LookupKey key) { + static const std::u16string empty = u""; + auto it = malayalamVerbSuffixMap.find(key); + return it != malayalamVerbSuffixMap.end() ? it->second : empty; +} + +} // namespace inflection::grammar::synthesis diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp new file mode 100644 index 00000000..9017796b --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.hpp @@ -0,0 +1,86 @@ +/* +* Copyright 2025 Unicode Incorporated and others. All rights reserved. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer final +{ +public: + static void addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& model); + + enum class Number { + undefined, + singular, + plural + }; + static Number getNumber(const ::std::u16string* value); + + enum class Case { + undefined, + nominative, + accusative, + dative, + genitive, + instrumental, + locative + }; + static Case getCase(const ::std::u16string* value); + + enum class Person { + undefined, + first, + second, + third + }; + static Person getPerson(const ::std::u16string* value); + + enum class Tense { + undefined, + past, + present, + future + }; + static Tense getTense(const ::std::u16string* value); + + enum class Mood { + undefined, + indicative, + imperative, + subjunctive + }; + static Mood getMood(const ::std::u16string* value); + + typedef uint32_t LookupKey; + static LookupKey makeLookupKey(Number num, Case kase); + static LookupKey makeVerbLookupKey(Person person, Number num, Tense tense, Mood mood); + + static Person personFromConstraint(const ::std::u16string& val); + static Number numberFromConstraint(const ::std::u16string& val); + static Case caseFromConstraint(const ::std::u16string& val); + + static LookupKey buildVerbSuffixKey(const std::vector<::std::u16string>& constraintValues); + + static const std::map malayalamSuffixMap; + static const std::map malayalamVerbSuffixMap; + + static const ::std::u16string& getSuffix(LookupKey key); + static const ::std::u16string& getVerbSuffix(LookupKey key); + +private: + MlGrammarSynthesizer() = delete; + +public: + static constexpr auto NOUN_CASE = u"case"; + static constexpr auto NOUN_NUMBER = u"number"; + static constexpr auto VERB_PERSON = u"person"; + static constexpr auto VERB_NUMBER = u"verbNumber"; + static constexpr auto VERB_TENSE = u"tense"; + static constexpr auto VERB_MOOD = u"mood"; +}; diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp new file mode 100644 index 00000000..8eb24e80 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.cpp @@ -0,0 +1,59 @@ +/* +* Copyright 2025 Unicode Incorporated and others. All rights reserved. +*/ +#include "MlGrammarSynthesizer_CaseLookupFunction.hpp" + +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +// Constructor: initialize the member table here (no static locals). +MlGrammarSynthesizer_CaseLookupFunction::MlGrammarSynthesizer_CaseLookupFunction() + : m_suffixToCase_{ + { u"ന്റെ", GrammemeConstants::CASE_GENITIVE() }, + { u"യുടെ", GrammemeConstants::CASE_GENITIVE() }, + { u"ഉടെ", GrammemeConstants::CASE_GENITIVE() }, + { u"ആയുടെ", GrammemeConstants::CASE_GENITIVE() }, + { u"ഉടേതു്", GrammemeConstants::CASE_GENITIVE() }, + { u"ഉടേതു", GrammemeConstants::CASE_GENITIVE() }, + { u"ഉടെത്", GrammemeConstants::CASE_GENITIVE() }, + { u"നെ", GrammemeConstants::CASE_ACCUSATIVE() }, + { u"ക്ക്", GrammemeConstants::CASE_DATIVE() }, + { u"യ്ക്ക്", GrammemeConstants::CASE_DATIVE() }, + { u"യിൽ", GrammemeConstants::CASE_LOCATIVE() }, + { u"ഇൽ", GrammemeConstants::CASE_LOCATIVE() }, + { u"ആൽ", GrammemeConstants::CASE_INSTRUMENTAL() }, + { u"വഴി", GrammemeConstants::CASE_INSTRUMENTAL() }, + { u"ഓടെ", GrammemeConstants::CASE_SOCIATIVE() } + } +{ +} + +::inflection::dialog::SpeakableString* MlGrammarSynthesizer_CaseLookupFunction::getFeatureValue( + const ::inflection::dialog::DisplayValue& displayValue, + const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& /*constraints*/) const +{ + std::u16string displayString; + ::inflection::util::StringViewUtils::lowercase( + &displayString, + displayValue.getDisplayString(), + ::inflection::util::LocaleUtils::MALAYALAM()); + + // Use std::u16string::ends_with with a u16string_view suffix + for (const auto& pair : m_suffixToCase_) { + const std::u16string_view suffix = pair.first; + const std::u16string& caseGrammeme = pair.second; + if (displayString.ends_with(suffix)) { + return new ::inflection::dialog::SpeakableString(caseGrammeme); + } + } + + return nullptr; +} + +} // namespace inflection::grammar::synthesis \ No newline at end of file diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp new file mode 100644 index 00000000..bacc176b --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp @@ -0,0 +1,39 @@ +/* +* Copyright 2025 Unicode Incorporated and others. All rights reserved. +*/ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +class MlGrammarSynthesizer_CaseLookupFunction + : public ::inflection::dialog::DefaultFeatureFunction +{ +public: + typedef ::inflection::dialog::DefaultFeatureFunction super; + +public: + MlGrammarSynthesizer_CaseLookupFunction(); + ~MlGrammarSynthesizer_CaseLookupFunction() override = default; + + MlGrammarSynthesizer_CaseLookupFunction(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; + MlGrammarSynthesizer_CaseLookupFunction& operator=(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; + + ::inflection::dialog::SpeakableString* getFeatureValue( + const ::inflection::dialog::DisplayValue& displayValue, + const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints) const override; + +private: + const std::vector> m_suffixToCase_; +}; + +} // namespace inflection::grammar::synthesis \ No newline at end of file diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp new file mode 100644 index 00000000..dc078f29 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.cpp @@ -0,0 +1,144 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +static bool ends_with(const std::u16string& str, const std::u16string_view& suffix) { + if (suffix.size() > str.size()) return false; + return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin()); +} + +MlGrammarSynthesizer_GenderLookupFunction::MlGrammarSynthesizer_GenderLookupFunction() + : super(::inflection::util::LocaleUtils::MALAYALAM(), + {GrammemeConstants::GENDER_MASCULINE(), + GrammemeConstants::GENDER_FEMININE(), + GrammemeConstants::GENDER_NEUTER()}, + {GrammemeConstants::POS_NOUN(), + GrammemeConstants::POS_PRONOUN()}) + , tokenizer(::inflection::tokenizer::TokenizerFactory::createTokenizer(::inflection::util::LocaleUtils::MALAYALAM())) + , dictionary(getDictionary()) +{ + ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {u"noun"})); +} + +MlGrammarSynthesizer_GenderLookupFunction::~MlGrammarSynthesizer_GenderLookupFunction() +{ + +} + +static const ::std::set<::std::u16string_view>& FEMININE_SUFFIXES() +{ + static auto FEMININE_SUFFIXES_ = new ::std::set<::std::u16string_view>({ + u"ി" // e.g. പെൺ (pen) endings + , u"ാളി" // common feminine suffix in Malayalam nouns + }); + return *npc(FEMININE_SUFFIXES_); +} + +static const ::std::set<::std::u16string_view>& MASCULINE_SUFFIXES() +{ + static auto MASCULINE_SUFFIXES_ = new ::std::set<::std::u16string_view>({ + u"ൻ" // e.g. ആൾ (person) endings + , u"ർ" // common masculine suffix in Malayalam nouns + }); + return *npc(MASCULINE_SUFFIXES_); +} + +static const ::std::set<::std::u16string_view>& NEUTER_SUFFIXES() +{ + static auto NEUTER_SUFFIXES_ = new ::std::set<::std::u16string_view>({ + u"ത്", + u"ം", + u"യം" + }); + return *npc(NEUTER_SUFFIXES_); +} + +::std::u16string MlGrammarSynthesizer_GenderLookupFunction::determine(const ::std::u16string& word) const +{ + if (word.empty()) { + return {}; + } + + auto gender = super::determine(word); + if (gender.empty()) { + ::std::unique_ptr<::inflection::tokenizer::TokenChain> tokenChain(npc(npc(tokenizer.get())->createTokenChain(word))); + + // First try dictionary lookup on noun tokens + for (auto token = tokenChain->begin(); token != tokenChain->end(); ++token) { + if (dynamic_cast(token.get()) != nullptr && + dictionary.hasAllProperties(token->getCleanValue(), nounProperty)) { + gender = super::determine(token->getValue()); + if (!gender.empty()) break; + } + } + + // If still empty, try any word token + if (gender.empty()) { + for (auto token = tokenChain->begin(); token != tokenChain->end(); ++token) { + if (dynamic_cast(token.get()) != nullptr) { + gender = super::determine(token->getValue()); + if (!gender.empty()) break; + } + } + } + + // If still empty, fallback to suffix heuristics on the second token in chain + if (gender.empty()) { + auto head = tokenChain->getHead(); + if (head != nullptr) { + auto token = npc(head)->getNext(); + if (token != nullptr) { + const auto& stringToken = npc(token)->getCleanValue(); + + for (const auto& suffix : MASCULINE_SUFFIXES()) { + if (ends_with(stringToken, suffix)) { + gender = GrammemeConstants::GENDER_MASCULINE(); + break; + } + } + + if (gender.empty()) { + for (const auto& suffix : FEMININE_SUFFIXES()) { + if (ends_with(stringToken, suffix)) { + gender = GrammemeConstants::GENDER_FEMININE(); + break; + } + } + } + + if (gender.empty()) { + for (const auto& suffix : NEUTER_SUFFIXES()) { + if (ends_with(stringToken, suffix)) { + gender = GrammemeConstants::GENDER_NEUTER(); + break; + } + } + } + } + } + } + } + + if (gender.empty()) { + // Default to masculine if no gender is detected + gender = GrammemeConstants::GENDER_MASCULINE(); + } + return gender; +} + +} // namespace inflection::grammar::synthesis + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp new file mode 100644 index 00000000..6ab744f9 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Apple Inc. All rights reserved. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_GenderLookupFunction + : public ::inflection::dialog::DictionaryLookupFunction +{ +public: + typedef ::inflection::dialog::DictionaryLookupFunction super; + +private: + const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer; + const ::inflection::dictionary::DictionaryMetaData& dictionary; + int64_t nounProperty { }; +public: + ::std::u16string determine(const ::std::u16string& word) const override; + + explicit MlGrammarSynthesizer_GenderLookupFunction(); + ~MlGrammarSynthesizer_GenderLookupFunction() override; + MlGrammarSynthesizer_GenderLookupFunction(const MlGrammarSynthesizer_GenderLookupFunction&) = delete; + MlGrammarSynthesizer_GenderLookupFunction& operator=(const MlGrammarSynthesizer_GenderLookupFunction&) = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp new file mode 100644 index 00000000..32b94758 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp @@ -0,0 +1,293 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +using dialog::SemanticFeature; +using dialog::SemanticFeatureModel_DisplayData; +using dialog::DisplayValue; + +static const icu4cxx::UnicodeSet& malayalamInflectableChars = + ::inflection::lang::StringFilterUtil::MALAYALAM_SCRIPT(); +static const icu4cxx::UnicodeSet nonMalayalamChars(u"[\\p{Latin}\\p{Nd}\\p{Punct}]"); + +static constexpr auto FORMALITY_FORMAL = u"formal"; +static constexpr auto FORMALITY_INFORMAL = u"informal"; +static constexpr auto CLUSIVITY_INCLUSIVE = u"inclusive"; +static constexpr auto CLUSIVITY_EXCLUSIVE = u"exclusive"; +static constexpr auto MOOD_SUBJUNCTIVE = u"subjunctive"; + +MlGrammarSynthesizer_MlDisplayFunction::MlGrammarSynthesizer_MlDisplayFunction( + const ::inflection::dialog::SemanticFeatureModel& model) + : caseFeature(*npc(model.getFeature(GrammemeConstants::CASE))), + numberFeature(*npc(model.getFeature(GrammemeConstants::NUMBER))), + genderFeature(*npc(model.getFeature(GrammemeConstants::GENDER))), + posFeature(*npc(model.getFeature(GrammemeConstants::POS))), + formalityFeature(*npc(model.getFeature(u"formality"))), + clusivityFeature(*npc(model.getFeature(u"clusivity"))), + personFeature(*npc(model.getFeature(GrammemeConstants::PERSON))), + tenseFeature(*npc(model.getFeature(u"tense"))), + moodFeature(*npc(model.getFeature(u"mood"))), + pronounTypeFeature(*npc(model.getFeature(u"pronounType"))), + determinationFeature(*npc(model.getFeature(u"determination"))), + dictionaryInflector( + util::LocaleUtils::MALAYALAM(), + { + {GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_VERB(), GrammemeConstants::POS_PRONOUN()}, + {GrammemeConstants::CASE_NOMINATIVE(), GrammemeConstants::CASE_ACCUSATIVE(), GrammemeConstants::CASE_DATIVE(), + GrammemeConstants::CASE_GENITIVE(), GrammemeConstants::CASE_LOCATIVE(), GrammemeConstants::CASE_INSTRUMENTAL(), + GrammemeConstants::CASE_SOCIATIVE()}, + {GrammemeConstants::NUMBER_SINGULAR(), GrammemeConstants::NUMBER_PLURAL()}, + {GrammemeConstants::GENDER_MASCULINE(), GrammemeConstants::GENDER_FEMININE(), GrammemeConstants::GENDER_NEUTER()}, + {FORMALITY_FORMAL, FORMALITY_INFORMAL}, + {CLUSIVITY_INCLUSIVE, CLUSIVITY_EXCLUSIVE}, + {GrammemeConstants::PERSON_FIRST(), GrammemeConstants::PERSON_SECOND(), GrammemeConstants::PERSON_THIRD()}, + {GrammemeConstants::TENSE_PAST(), GrammemeConstants::TENSE_PRESENT(), GrammemeConstants::TENSE_FUTURE()}, + {GrammemeConstants::MOOD_INDICATIVE(), GrammemeConstants::MOOD_IMPERATIVE(), MOOD_SUBJUNCTIVE} // local constant + }, + {}, + true) {} + +static std::vector buildConstraintVector( + const std::map& constraints, + const SemanticFeature& posFeature, + const SemanticFeature& caseFeature, + const SemanticFeature& numberFeature, + const SemanticFeature& genderFeature, + const SemanticFeature& formalityFeature, + const SemanticFeature& clusivityFeature, + const SemanticFeature& personFeature, + const SemanticFeature& determinationFeature, + const SemanticFeature& tenseFeature, + const SemanticFeature& moodFeature, + const SemanticFeature& pronounTypeFeature) { + + std::vector vals; + auto addIfNotEmpty = [&](const SemanticFeature& f) { + const auto v = GrammarSynthesizerUtil::getFeatureValue(constraints, f); + if (!v.empty()) vals.push_back(v); + }; + + addIfNotEmpty(caseFeature); + addIfNotEmpty(numberFeature); + addIfNotEmpty(genderFeature); + addIfNotEmpty(formalityFeature); + addIfNotEmpty(clusivityFeature); + addIfNotEmpty(personFeature); + addIfNotEmpty(tenseFeature); + addIfNotEmpty(moodFeature); // subjunctive now correctly included + addIfNotEmpty(determinationFeature); + addIfNotEmpty(pronounTypeFeature); + + const auto posVal = GrammarSynthesizerUtil::getFeatureValue(constraints, posFeature); + if (!posVal.empty()) vals.push_back(posVal); + + return vals; +} + +// Fallback noun/verb inflection unchanged; subjunctive handled via constraintValues +static std::optional guessFallbackNounInflection( + const std::u16string& phrase, + const std::vector& constraintValues, + const dialog::DictionaryLookupInflector& dictionaryInflector) +{ + std::unique_ptr tokenizer( + inflection::tokenizer::TokenizerFactory::createTokenizer(util::LocaleUtils::MALAYALAM())); + if (!tokenizer) return std::nullopt; + + std::unique_ptr tokenChain( + tokenizer->createTokenChain(phrase)); + if (!tokenChain || tokenChain->getWordCount() == 0) return std::nullopt; + + const inflection::tokenizer::Token* lastSignificantToken = nullptr; + int64_t lastTokenGrammemes = 0; + + for (auto& token : *tokenChain) { + if (!token.isSignificant()) continue; + + int64_t combinedType = 0; + dictionaryInflector.getDictionary().getCombinedBinaryType(&combinedType, token.getValue()); + + if (inflection::util::UnicodeSetUtils::containsSome(malayalamInflectableChars, token.getValue())) { + lastSignificantToken = &token; + lastTokenGrammemes = combinedType; + } + } + + if (!lastSignificantToken) return phrase; + + std::u16string result; + for (auto& token : *tokenChain) { + std::u16string tokenVal = token.getValue(); + + if (token.isSignificant() && &token == lastSignificantToken) { + auto inflected = dictionaryInflector.inflect(tokenVal, lastTokenGrammemes, constraintValues); + if (!inflected.has_value()) { + MlGrammarSynthesizer::Number num = MlGrammarSynthesizer::Number::undefined; + MlGrammarSynthesizer::Case kase = MlGrammarSynthesizer::Case::undefined; + for (const auto& val : constraintValues) { + if (num == MlGrammarSynthesizer::Number::undefined) + num = MlGrammarSynthesizer::numberFromConstraint(val); + if (kase == MlGrammarSynthesizer::Case::undefined) + kase = MlGrammarSynthesizer::caseFromConstraint(val); + } + + auto key = MlGrammarSynthesizer::makeLookupKey(num, kase); + if ((key & 0x20) != 0 && !tokenVal.ends_with(u"കൾ")) { + tokenVal += MlGrammarSynthesizer::getSuffix(key); + } + } else { + tokenVal = *inflected; + } + } + + if (!result.empty()) result += u" "; + result += tokenVal; + } + + return result; +} + +static std::optional guessFallbackVerbInflection( + const std::u16string& token, + const std::vector& constraintValues) +{ + auto key = MlGrammarSynthesizer::buildVerbSuffixKey(constraintValues); + return token + MlGrammarSynthesizer::getVerbSuffix(key); +} + +std::u16string MlGrammarSynthesizer_MlDisplayFunction::inflectPhrase( + const std::u16string& phrase, + const std::vector& constraintValues, + bool enableInflectionGuess) const +{ + std::unique_ptr tokenizer( + inflection::tokenizer::TokenizerFactory::createTokenizer(util::LocaleUtils::MALAYALAM())); + if (!tokenizer) return phrase; + + std::unique_ptr tokenChain( + tokenizer->createTokenChain(phrase)); + if (!tokenChain || tokenChain->getWordCount() == 0) return phrase; + + std::u16string posVal; + for (const auto& val : constraintValues) { + if (val == GrammemeConstants::POS_NOUN() || + val == GrammemeConstants::POS_PRONOUN() || + val == GrammemeConstants::POS_VERB()) + { + posVal = val; + break; + } + } + + const inflection::tokenizer::Token* lastSignificantToken = nullptr; + for (const auto& token : *tokenChain) { + if (token.isSignificant()) lastSignificantToken = &token; + } + + std::u16string result; + for (const auto& token : *tokenChain) { + if (!result.empty() && token.isSignificant()) result += u" "; + std::u16string tokenVal = token.getValue(); + + if (&token == lastSignificantToken) { + int64_t lastTokenGrammemes = 0; + dictionaryInflector.getDictionary().getCombinedBinaryType(&lastTokenGrammemes, tokenVal); + + auto inflectedOpt = dictionaryInflector.inflect(tokenVal, lastTokenGrammemes, constraintValues); + + if (!inflectedOpt.has_value() && enableInflectionGuess) { + if (posVal == GrammemeConstants::POS_NOUN() || posVal == GrammemeConstants::POS_PRONOUN()) { + std::u16string fullPhrase; + for (const auto& t : *tokenChain) fullPhrase += t.getValue(); + inflectedOpt = guessFallbackNounInflection(fullPhrase, constraintValues, dictionaryInflector); + } else if (posVal == GrammemeConstants::POS_VERB()) { + inflectedOpt = guessFallbackVerbInflection(tokenVal, constraintValues); + } + } + + if (inflectedOpt.has_value()) { + tokenVal = *inflectedOpt; + } else { + MlGrammarSynthesizer::Number num = MlGrammarSynthesizer::Number::undefined; + MlGrammarSynthesizer::Case kase = MlGrammarSynthesizer::Case::undefined; + for (const auto& val : constraintValues) { + if (num == MlGrammarSynthesizer::Number::undefined) + num = MlGrammarSynthesizer::numberFromConstraint(val); + if (kase == MlGrammarSynthesizer::Case::undefined) + kase = MlGrammarSynthesizer::caseFromConstraint(val); + } + + auto key = MlGrammarSynthesizer::makeLookupKey(num, kase); + if ((key & 0x20) != 0 && !tokenVal.ends_with(u"കൾ")) { + tokenVal += MlGrammarSynthesizer::getSuffix(key); + } + } + } + + result += tokenVal; + } + + return result; +} + +::inflection::dialog::DisplayValue* MlGrammarSynthesizer_MlDisplayFunction::getDisplayValue( + const ::inflection::dialog::SemanticFeatureModel_DisplayData& displayData, + const std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, + bool enableInflectionGuess) const +{ + const auto displayValue = GrammarSynthesizerUtil::getTheBestDisplayValue(displayData, constraints); + if (!displayValue || displayValue->getDisplayString().empty()) return nullptr; + + const std::u16string& firstDisplayValue = displayValue->getDisplayString(); + if (constraints.empty() || + !inflection::util::UnicodeSetUtils::containsSome(malayalamInflectableChars, firstDisplayValue) || + inflection::util::UnicodeSetUtils::containsSome(nonMalayalamChars, firstDisplayValue)) + { + return new DisplayValue(firstDisplayValue, constraints); + } + + std::vector constraintValues = buildConstraintVector( + constraints, + posFeature, + caseFeature, + numberFeature, + genderFeature, + formalityFeature, + clusivityFeature, + personFeature, + determinationFeature, + tenseFeature, + moodFeature, + pronounTypeFeature); + + std::u16string inflected = inflectPhrase(firstDisplayValue, constraintValues, enableInflectionGuess); + if (!inflected.empty() && inflected != firstDisplayValue) return new DisplayValue(inflected, constraints); + + return nullptr; +} + +MlGrammarSynthesizer_MlDisplayFunction::~MlGrammarSynthesizer_MlDisplayFunction() = default; + +} // namespace inflection::grammar::synthesis diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp new file mode 100644 index 00000000..b564c573 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp @@ -0,0 +1,53 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + class MlGrammarSynthesizer_MlDisplayFunction : public virtual ::inflection::dialog::DefaultDisplayFunction { + public: + using super = ::inflection::dialog::DefaultDisplayFunction; + + private: + const ::inflection::dialog::SemanticFeature& caseFeature; + const ::inflection::dialog::SemanticFeature& numberFeature; + const ::inflection::dialog::SemanticFeature& genderFeature; + const ::inflection::dialog::SemanticFeature& posFeature; + const ::inflection::dialog::SemanticFeature& formalityFeature; + const ::inflection::dialog::SemanticFeature& clusivityFeature; + const ::inflection::dialog::SemanticFeature& personFeature; + const ::inflection::dialog::SemanticFeature& tenseFeature; + const ::inflection::dialog::SemanticFeature& moodFeature; + const ::inflection::dialog::SemanticFeature& pronounTypeFeature; + const ::inflection::dialog::SemanticFeature& determinationFeature; + ::inflection::dialog::DictionaryLookupInflector dictionaryInflector; + + public: + ::inflection::dialog::DisplayValue* getDisplayValue( + const ::inflection::dialog::SemanticFeatureModel_DisplayData& displayData, + const std::map<::inflection::dialog::SemanticFeature, std::u16string>& constraints, + bool enableInflectionGuess) const override; + + ::std::u16string inflectPhrase( + const ::std::u16string& phrase, + const ::std::vector<::std::u16string>& constraintValues, + bool enableInflectionGuess) const; + + public: + explicit MlGrammarSynthesizer_MlDisplayFunction(const ::inflection::dialog::SemanticFeatureModel& model); + ~MlGrammarSynthesizer_MlDisplayFunction() override; + MlGrammarSynthesizer_MlDisplayFunction(MlGrammarSynthesizer_MlDisplayFunction&) = delete; + MlGrammarSynthesizer_MlDisplayFunction& operator=(MlGrammarSynthesizer_MlDisplayFunction&) = delete; + + private: + friend class MlGrammarSynthesizer; + }; +} // namespace inflection::grammar::synthesis \ No newline at end of file diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.cpp new file mode 100644 index 00000000..a27fefc7 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.cpp @@ -0,0 +1,71 @@ +/* + * Copyright 2025 Unicode Incorporated and others. All rights reserved. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace inflection::grammar::synthesis { + +MlGrammarSynthesizer_NumberLookupFunction::MlGrammarSynthesizer_NumberLookupFunction() + : super(::inflection::util::LocaleUtils::MALAYALAM(), + {GrammemeConstants::NUMBER_SINGULAR(), GrammemeConstants::NUMBER_PLURAL()}, + {GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_VERB()}) + , tokenizer(::inflection::tokenizer::TokenizerFactory::createTokenizer(::inflection::util::LocaleUtils::MALAYALAM())) + , dictionary(getDictionary()) +{ + ::inflection::util::Validate::notNull(dictionary.getBinaryProperties(&nounProperty, {u"noun"})); +} + +MlGrammarSynthesizer_NumberLookupFunction::~MlGrammarSynthesizer_NumberLookupFunction() +{ +} + +::std::u16string MlGrammarSynthesizer_NumberLookupFunction::determine(const ::std::u16string& word) const +{ + if (word.empty()) { + return {}; + } + + auto out = super::determine(word); + if (!out.empty()) { + return out; + } + + std::unique_ptr<::inflection::tokenizer::TokenChain> tokenChain( + npc(npc(tokenizer.get())->createTokenChain(word))); + + for (const auto& token : *tokenChain) { + if (dynamic_cast(&token) != nullptr) { + if (dictionary.hasAllProperties(token.getCleanValue(), nounProperty)) { + out = super::determine(token.getValue()); + if (!out.empty()) { + return out; + } + } + } + } + + // plural suffix detection + static const std::vector PLURAL_SUFFIXES = { + u"കൾ", u"ങ്ങൾ", u"മാർ", u"വർ", u"കളുടെ", u"ങ്ങൾക്ക്" + }; + + const auto& lastToken = npc(npc(tokenChain->getEnd())->getPrevious())->getValue(); + for (const auto& suffix : PLURAL_SUFFIXES) { + if (lastToken.size() >= suffix.size() && lastToken.ends_with(suffix)) { + return GrammemeConstants::NUMBER_PLURAL(); + } + } + + return GrammemeConstants::NUMBER_SINGULAR(); +} + +} // namespace inflection::grammar::synthesis diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.hpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.hpp new file mode 100644 index 00000000..10d2bbd3 --- /dev/null +++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_NumberLookupFunction.hpp @@ -0,0 +1,29 @@ +/* +* Copyright 2025 Unicode Incorporated and others. All rights reserved. +*/ +#pragma once + +#include +#include +#include + +class inflection::grammar::synthesis::MlGrammarSynthesizer_NumberLookupFunction + : public ::inflection::dialog::DictionaryLookupFunction +{ +public: + typedef ::inflection::dialog::DictionaryLookupFunction super; + +private: + const ::std::unique_ptr<::inflection::tokenizer::Tokenizer> tokenizer; + const ::inflection::dictionary::DictionaryMetaData& dictionary; + int64_t nounProperty {}; + +public: + ::std::u16string determine(const ::std::u16string& word) const override; + + MlGrammarSynthesizer_NumberLookupFunction(); + ~MlGrammarSynthesizer_NumberLookupFunction() override; + MlGrammarSynthesizer_NumberLookupFunction(const MlGrammarSynthesizer_NumberLookupFunction&) = delete; + MlGrammarSynthesizer_NumberLookupFunction& operator=(const MlGrammarSynthesizer_NumberLookupFunction&) = delete; +}; + diff --git a/inflection/src/inflection/grammar/synthesis/fwd.hpp b/inflection/src/inflection/grammar/synthesis/fwd.hpp index 0693277e..1e02f219 100644 --- a/inflection/src/inflection/grammar/synthesis/fwd.hpp +++ b/inflection/src/inflection/grammar/synthesis/fwd.hpp @@ -69,6 +69,11 @@ namespace inflection class NbGrammarSynthesizer; class NbGrammarSynthesizer_ArticleLookupFunction; class NbGrammarSynthesizer_NbDisplayFunction; + class MlGrammarSynthesizer; + class MlGrammarSynthesizer_NumberLookupFunction; + class MlGrammarSynthesizer_GenderLookupFunction; + class MlGrammarSynthesizer_CaseLookupFunction; + class MlGrammarSynthesizer_MlDisplayFunction; class NlGrammarSynthesizer; class NlGrammarSynthesizer_ArticleLookupFunction; class NlGrammarSynthesizer_DefinitenessLookupFunction; diff --git a/inflection/src/inflection/lang/StringFilterUtil.cpp b/inflection/src/inflection/lang/StringFilterUtil.cpp index dd095729..f9942dff 100644 --- a/inflection/src/inflection/lang/StringFilterUtil.cpp +++ b/inflection/src/inflection/lang/StringFilterUtil.cpp @@ -41,6 +41,13 @@ const icu4cxx::UnicodeSet& StringFilterUtil::HEBREW_SCRIPT() return *npc(HEBREW_SCRIPT_); } +const icu4cxx::UnicodeSet& StringFilterUtil::MALAYALAM_SCRIPT() +{ + static auto MALAYALAM_SCRIPT_ = + ::inflection::util::UnicodeSetUtils::freeze(new ::icu4cxx::UnicodeSet(u"[:Malayalam:]")); + return *npc(MALAYALAM_SCRIPT_); +} + const icu4cxx::UnicodeSet& StringFilterUtil::HAN_SCRIPT() { static auto HAN_SCRIPT_ = ::inflection::util::UnicodeSetUtils::freeze(new ::icu4cxx::UnicodeSet(u"[:Han:]")); diff --git a/inflection/src/inflection/lang/StringFilterUtil.hpp b/inflection/src/inflection/lang/StringFilterUtil.hpp index 10a32b85..d266bbae 100644 --- a/inflection/src/inflection/lang/StringFilterUtil.hpp +++ b/inflection/src/inflection/lang/StringFilterUtil.hpp @@ -69,6 +69,10 @@ class INFLECTION_INTERNAL_API inflection::lang::StringFilterUtil final * A set of all characters in the Hebrew script. */ static const ::icu4cxx::UnicodeSet& HEBREW_SCRIPT(); + /** + * A set of all characters in the Malayalam script. + */ + static const ::icu4cxx::UnicodeSet& MALAYALAM_SCRIPT(); /** * A set of all characters in the Han script. The Han script is unified between Chinese, Japanese and Korean. */ diff --git a/inflection/src/inflection/util/LocaleUtils.cpp b/inflection/src/inflection/util/LocaleUtils.cpp index 0a5cdc9f..8238bec9 100644 --- a/inflection/src/inflection/util/LocaleUtils.cpp +++ b/inflection/src/inflection/util/LocaleUtils.cpp @@ -407,6 +407,18 @@ const ULocale& LocaleUtils::MALAYSIA() return *npc(MALAYSIA_); } +const ULocale& LocaleUtils::MALAYALAM() +{ + static auto MALAYALAM_ = new ULocale("ml"); + return *npc(MALAYALAM_); +} + +const ULocale& LocaleUtils::INDIA_MALAYALAM() +{ + static auto INDIA_MALAYALAM_ = new ULocale("ml", "IN"); + return *npc(INDIA_MALAYALAM_); +} + const ULocale& LocaleUtils::NORWEGIAN() { static auto NORWEGIAN_ = new ULocale("nb"); diff --git a/inflection/src/inflection/util/LocaleUtils.hpp b/inflection/src/inflection/util/LocaleUtils.hpp index e5fa8582..ac4ec784 100644 --- a/inflection/src/inflection/util/LocaleUtils.hpp +++ b/inflection/src/inflection/util/LocaleUtils.hpp @@ -376,6 +376,14 @@ class INFLECTION_CLASS_API inflection::util::LocaleUtils final * ms_MY: Malay (Malaysia) */ static const ::inflection::util::ULocale& MALAYSIA(); + /** + * ml: Malayalam + */ + static const ::inflection::util::ULocale& MALAYALAM(); + /** + * ml_IN: Malayalam (India) + */ + static const ::inflection::util::ULocale& INDIA_MALAYALAM(); /** * nb: Norwegian Bokmål */ diff --git a/inflection/test/resources/inflection/dialog/inflection/ml.xml b/inflection/test/resources/inflection/dialog/inflection/ml.xml new file mode 100644 index 00000000..3fff089c --- /dev/null +++ b/inflection/test/resources/inflection/dialog/inflection/ml.xml @@ -0,0 +1,68 @@ + + + + + + മരംമരങ്ങൾ + കഥകഥകൾ + + + അട്ടുകഅട്ടുക + അട്ടുകഅട്ടുക + + + പക്ഷിപക്ഷി + പക്ഷിപക്ഷിയെ + പക്ഷിപക്ഷിക്കു് + പക്ഷിപക്ഷിയുടെ + പക്ഷിപക്ഷിയിൽ + പക്ഷിപക്ഷിയാൽ + + + മരം + മരങ്ങൾ + + + വളപ്പുറത്തെ ലൈറ്റ് + വളപ്പുറത്തെ ലൈറ്റുകൾ + തോട്ടത്തിലെ ലൈറ്റുകൾ + + + അട്ടുകഅട്ടുക + അട്ടുകഅട്ടുക + അട്ടുകഅട്ടുക + + + പോകുകപോകുക + പോകുകപോകുക + അട്ടുകഅട്ടുക + അട്ടുകഅട്ടുക + ആർക്കുകആർക്കുക + ആർക്കുകആർക്കുക + + + മീറ്റർമീറ്റർ + മീറ്റർമീറ്ററുകൾ + + + കപ്പ്കപ്പുകൾ + പൂച്ചപൂച്ചകൾ + + + ക്യാമ്പസ് ലൈറ്റ് + തോട്ടത്തിലെ ലൈറ്റുകൾ + + + അട്ടുകഅട്ടുക + + + ഇടുകഇടുക + അട്ടുകഅട്ടുക + + + അട്ടുകഅട്ടുക + പോകുകപോകുക + + diff --git a/inflection/test/resources/inflection/dialog/pronoun/ml.xml b/inflection/test/resources/inflection/dialog/pronoun/ml.xml new file mode 100644 index 00000000..ea41396f --- /dev/null +++ b/inflection/test/resources/inflection/dialog/pronoun/ml.xml @@ -0,0 +1,65 @@ + + + + + അവൻ + + + ഞാൻ + എനിക്ക് + എന്റെ + + + നാം + ഞങ്ങൾ + നമുക്ക് + ഞങ്ങൾക്ക് + നമ്മുടെ + ഞങ്ങളുടെ + + + നീ + താങ്കൾ + നിനെ + താങ്കളെ + നിന്റെ + താങ്കളുടെ + + + നിങ്ങൾ + നിങ്ങളെ + നിങ്ങൾക്ക് + നിങ്ങളുടെ + + + അവൻ + അവനെ + അവന്റെ + + + അവൾ + അവളെ + അവളുടെ + + + അത് + അതിനെ + അതിന്റെ + + + അവർ + അവരെ + അവരുടെ + + + ഞാൻഅവൻ + ഞാൻഅവൾ + ഞാൻതാങ്കൾ + + + ഞങ്ങൾ + നാം + + \ No newline at end of file diff --git a/inflection/test/resources/inflection/tokenizer/ml.xml b/inflection/test/resources/inflection/tokenizer/ml.xml new file mode 100644 index 00000000..6b9ee454 --- /dev/null +++ b/inflection/test/resources/inflection/tokenizer/ml.xml @@ -0,0 +1,39 @@ + + + + + + കേരളം + + + + കേരളസര്‍ക്കാര്‍ + + + + കേരളം|സര്‍ക്കാര്‍ + + + + പുസ്തകം|ഉണ്ട് + + + + വീട്|ക്ക് + + + + ശ്രീ|നാരായണ|ഗുരു + + + + കേരളബ്ലാസ്റ്റേഴ്സ് + + + + സംഗീതോത്സവം + + + diff --git a/inflection/test/src/inflection/util/LocaleUtilsTest.cpp b/inflection/test/src/inflection/util/LocaleUtilsTest.cpp index 884d558c..723843d3 100644 --- a/inflection/test/src/inflection/util/LocaleUtilsTest.cpp +++ b/inflection/test/src/inflection/util/LocaleUtilsTest.cpp @@ -96,6 +96,7 @@ TEST_CASE("LocaleUtilsTest#testCoverage") inflection::util::LocaleUtils::KOREAN(), inflection::util::LocaleUtils::LITHUANIAN(), inflection::util::LocaleUtils::MALAY(), + inflection::util::LocaleUtils::MALAYALAM(), inflection::util::LocaleUtils::NORWEGIAN(), inflection::util::LocaleUtils::DUTCH(), inflection::util::LocaleUtils::POLISH(), @@ -142,6 +143,7 @@ TEST_CASE("LocaleUtilsTest#testCoverage") inflection::util::LocaleUtils::FRANCE(), inflection::util::LocaleUtils::SWITZERLAND_FRENCH(), inflection::util::LocaleUtils::INDIA_HINDI(), + inflection::util::LocaleUtils::INDIA_MALAYALAM(), inflection::util::LocaleUtils::CROATIA(), inflection::util::LocaleUtils::ISRAEL(), inflection::util::LocaleUtils::HUNGARY(),