unicode-org · nciric · Aug 19, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.cpp b/inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.cpp
@@ -11,14 +11,18 @@
 #include <inflection/dialog/SemanticFeatureModel.hpp>
 #include <inflection/dialog/SemanticFeatureModel_DisplayData.hpp>
 #include <inflection/dialog/DisplayValue.hpp>
+#include <inflection/dictionary/PhraseProperties.hpp>
 #include <inflection/grammar/synthesis/GrammemeConstants.hpp>
 #include <inflection/grammar/synthesis/GrammarSynthesizerUtil.hpp>
 #include <inflection/lang/StringFilterUtil.hpp>
 #include <inflection/util/LocaleUtils.hpp>
+#include <inflection/util/StringViewUtils.hpp>
 #include <inflection/util/UnicodeSetUtils.hpp>
 #include <inflection/npc.hpp>
+#include <array>
 #include <iterator>
 #include <memory>
+#include <string>
 
 namespace inflection::grammar::synthesis {
 
@@ -42,7 +46,7 @@ SrGrammarSynthesizer_SrDisplayFunction::~SrGrammarSynthesizer_SrDisplayFunction(
 {
 }
 
-::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
+::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectFromDictionary(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
 {
     ::std::u16string countString(GrammarSynthesizerUtil::getFeatureValue(constraints, numberFeature));
     ::std::u16string caseString(GrammarSynthesizerUtil::getFeatureValue(constraints, caseFeature));
@@ -61,7 +65,6 @@ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::s
     if (!genderString.empty()) {
         string_constraints.emplace_back(genderString);
     }
-    // The nominative/caseless is unmarked in the patterns, so we need to do something like this
     int64_t wordGrammemes = 0;
     dictionary.getCombinedBinaryType(&wordGrammemes, lemma);
 
@@ -77,7 +80,66 @@ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::s
     return inflection;
 }
 
-::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue(const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool /* enableInflectionGuess */) const
+namespace {
+
+// Rule based inflectors for four declination groups.
+// Masculine or neuter ending in o or e and masculine ending with consonant.
+::std::u16string inflectByRuleOE(const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
+// Neuter ending in e
+::std::u16string inflectByRuleE(const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
+// All genders ending in a
+::std::u16string inflectByRuleA(const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase);
+// Feminine, ending with consonant
+::std::u16string inflectByRuleConsonant(const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
+
+// Number of cases in Serbian.
+static constexpr auto NUMBER_OF_CASES = 7UL;
+
+// Given the table of all suffixes, both for singular and plural, append suffix to lemma, matching the number and case.
+::std::u16string applySuffix(const ::std::u16string&, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>&, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>&, const ::std::u16string&, const ::std::u16string&);
+
+// Check if proper noun by checking the first character is capital letter.
+bool isProperNoun(const ::std::u16string &lemma);
+
+} // namespace
+
+::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectWithRule(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
+{
+    ::std::u16string countString(GrammarSynthesizerUtil::getFeatureValue(constraints, numberFeature));
+    ::std::u16string caseString(GrammarSynthesizerUtil::getFeatureValue(constraints, caseFeature));
+    auto genderString = GrammarSynthesizerUtil::getFeatureValue(constraints, genderFeature);
+
+    ::std::u16string inflection;
+
+    // If one of singular/plural, case and gender are not specified return lemma.
+    if (countString.empty() || caseString.empty() || genderString.empty()) {
+        return lemma;
+    }
+
+    // Do nothing for singular, nominative.
+    if (countString == GrammemeConstants::NUMBER_SINGULAR() && caseString == GrammemeConstants::CASE_NOMINATIVE()) {
+        return lemma;
+    }
+
+    // These are four declention groups in the language.
+    if ((lemma.ends_with(u'о') || lemma.ends_with(u'е')) && (genderString == GrammemeConstants::GENDER_MASCULINE() || genderString == GrammemeConstants::GENDER_NEUTER())) {
+        inflection = inflectByRuleOE(lemma, countString, caseString, genderString);
+    } else if (lemma.ends_with(u'е') && genderString == GrammemeConstants::GENDER_NEUTER()) {
+        inflection = inflectByRuleE(lemma, countString, caseString, genderString);
+    } else if (lemma.ends_with(u'а')) {
+        inflection = inflectByRuleA(lemma, countString, caseString);
+    } else {
+        inflection = inflectByRuleConsonant(lemma, countString, caseString, genderString);
+    }
+
+    if (inflection.empty()) {
+        inflection = lemma;
+    }
+
+    return inflection;
+}
+
+::inflection::dialog::DisplayValue *SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue(const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool enableInflectionGuess) const
 {
     ::std::u16string displayString;
     if (!displayData.getValues().empty()) {
@@ -87,9 +149,154 @@ ::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::get
         return nullptr;
     }
     if (dictionary.isKnownWord(displayString)) {
-        displayString = inflectString(constraints, displayString);
+        displayString = inflectFromDictionary(constraints, displayString);
+    } else if (enableInflectionGuess) {
+        // Let's use rule based inflection for nouns. Assume lemma is singular, nominative.
+        displayString = inflectWithRule(constraints, displayString);
     }
     return new ::inflection::dialog::DisplayValue(displayString, constraints);
 }
 
+namespace {
+
+static bool isConsonant(char16_t ch) {
+    return ::inflection::lang::StringFilterUtil::CYRILLIC_SCRIPT().contains(ch) && !::inflection::dictionary::PhraseProperties::DEFAULT_VOWELS_START().contains(ch);
+}
+
+static bool isVowel(char16_t ch) {
+    return ::inflection::lang::StringFilterUtil::CYRILLIC_SCRIPT().contains(ch) && ::inflection::dictionary::PhraseProperties::DEFAULT_VOWELS_START().contains(ch);
+}
+
+// Some rules require number of syllables in the word. It's counted as all vowels plus r if in between consonants, or if it starts a word followed by a consonant.
+// We care about 1, 2 and more than 2 cases.
+enum class Syllables {
+    ONE_SYLLABLE,
+    TWO_SYLLABLES,
+    MULTI_SYLLABLES,
+};
+Syllables countSyllables(const ::std::u16string& lemma) {
+    uint16_t total = 0;
+    size_t index = 0;
+    const size_t length = lemma.length();
+    for (const char16_t ch: lemma) {
+        if (isVowel(ch)) {
+            ++total;
+        }
+        // Check case where R is at the begining followed by a consonant.
+        if ((ch == u'р' || ch == u'Р') && (index == 0 && index + 1 < length)) {
+            if (isConsonant(lemma[index + 1])) {
+                ++total;
+            }
+        } else if ((ch == u'р' || ch == u'Р') && (index != 0 && index + 1 < length)) {
+            if (isConsonant(lemma[index - 1]) && isConsonant(lemma[index + 1])) {
+                ++total;
+            }
+        }
+        ++index;
+    }
+
+    if (total == 1) {
+        return Syllables::ONE_SYLLABLE;
+    } else if (total == 2) {
+        return Syllables::TWO_SYLLABLES;
+    } else {
+        return Syllables::MULTI_SYLLABLES;
+    }
+}
+
+::std::u16string inflectByRuleOE(const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string &gender)
+{
+    // TODO(nciric): implement logic.
+    return lemma;
+}
+
+::std::u16string inflectByRuleE(const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string &gender)
+{
+    // TODO(nciric): implement logic.
+    return lemma;
+}
+
+::std::u16string inflectByRuleA(const ::std::u16string &lemma, const ::std::u16string &number, const ::std::u16string &targetCase)
+{
+    static constexpr auto suffix_sg = ::std::to_array<::std::u16string_view>({u"а", u"е", u"и", u"у", u"а", u"ом", u"и"});
+    static constexpr auto suffix_pl = ::std::to_array<::std::u16string_view>({u"е", u"а", u"ама", u"е", u"е", u"ама", u"ама"});
+
+    ::std::u16string base = lemma;
+    // Remove trailing a and apply suffix.
+    base.pop_back();
+    base = applySuffix(base, suffix_sg, suffix_pl, number, targetCase);
+
+    // Vocative singular and genitive plural require special processing in some cases.
+    if (number == GrammemeConstants::NUMBER_SINGULAR() && targetCase == GrammemeConstants::CASE_VOCATIVE()) {
+        Syllables syllables = countSyllables(lemma);
+        if (lemma.ends_with(u"ица") && syllables == Syllables::MULTI_SYLLABLES) {
+            base.back() = u'е';
+        }
+        if (isProperNoun(lemma) && syllables == Syllables::TWO_SYLLABLES) {
+            base.back() = u'о';
+        }
+    }
+
+    if (number == GrammemeConstants::NUMBER_PLURAL() && targetCase == GrammemeConstants::CASE_GENITIVE()) {
+        if (lemma.ends_with(u"тња") || lemma.ends_with(u"дња") || lemma.ends_with(u"пта") || lemma.ends_with(u"лба") || lemma.ends_with(u"рва")) {
+            base.back() = u'и';
+        }
+        static const char16_t *mappings[][2] = {
+            {u"јка", u"јака"},
+            {u"мља", u"маља"},
+            {u"вца", u"ваца"},
+            {u"тка", u"така"},
+            {u"пка", u"пака"},
+        };
+        for (const auto &[suffix, replacement] : mappings) {
+            if (base.ends_with(suffix)) {
+                auto suffix_length = std::u16string_view(suffix).length();
+                base.replace(base.length() - suffix_length, suffix_length, replacement);
+            }
+        }
+    }
+
+    return base;
+}
+
+::std::u16string inflectByRuleConsonant(const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string & gender)
+{
+    // TODO(nciric): implement logic.
+    return lemma;
+}
+
+::std::u16string applySuffix(const ::std::u16string &lemma, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>& suffix_sg, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>& suffix_pl,
+    const ::std::u16string &number, const ::std::u16string &targetCase)
+{
+    const ::std::map<::std::u16string, size_t> case_index = {
+        {GrammemeConstants::CASE_NOMINATIVE(), 0},
+        {GrammemeConstants::CASE_GENITIVE(), 1},
+        {GrammemeConstants::CASE_DATIVE(), 2},
+        {GrammemeConstants::CASE_ACCUSATIVE(), 3},
+        {GrammemeConstants::CASE_VOCATIVE(), 4},
+        {GrammemeConstants::CASE_INSTRUMENTAL(), 5},
+        {GrammemeConstants::CASE_LOCATIVE(), 6}
+    };
+
+    auto index = case_index.at(targetCase);
+
+    if (number == GrammemeConstants::NUMBER_SINGULAR()) {
+        return lemma + ::std::u16string(suffix_sg[index]);
+    } else {
+        return lemma + ::std::u16string(suffix_pl[index]);
+    }
+}
+
+bool isProperNoun(const ::std::u16string &lemma) {
+    // Check if first character is in range of Cyrl capital letters.
+    auto first_ch = lemma.front();
+    if (0x402 <= first_ch && first_ch <= 0x428) {
+        return true;
+    }
+
+    return false;
+}
+
+} // namespace
+
 } // namespace inflection::grammar::synthesis
diff --git a/inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.hpp b/inflection/src/inflection/grammar/synthesis/SrGrammarSynthesizer_SrDisplayFunction.hpp
@@ -30,7 +30,8 @@ class inflection::grammar::synthesis::SrGrammarSynthesizer_SrDisplayFunction
     SrGrammarSynthesizer_SrDisplayFunction& operator=(const SrGrammarSynthesizer_SrDisplayFunction&) = delete;
 
 private:
-    ::std::u16string inflectString(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const;
+    ::std::u16string inflectFromDictionary(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const;
+    ::std::u16string inflectWithRule(const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const;
 
     const ::inflection::dictionary::DictionaryMetaData& dictionary;
     const ::inflection::dialog::SemanticFeature& caseFeature;

diff --git a/inflection/test/resources/inflection/dialog/inflection/sr.xml b/inflection/test/resources/inflection/dialog/inflection/sr.xml
@@ -18,4 +18,25 @@
     <!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">уранак</source><result>уранче</result></test -->
     <!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">игроказ</source><result>игрокаже</result></test -->
     <!-- test><source case="vocative" number="singular" gender="masculine" pos="noun">пашњак</source><result>пашњаче</result></test -->
+    <!-- Rule based inflection, group 3, all nouns ending with a -->
+    <test><source case="instrumental" number="singular" gender="feminine" pos="noun">Италија</source><result>Италијом</result></test>
+    <test><source case="instrumental" number="singular" gender="feminine" pos="noun">авенија</source><result>авенијом</result></test>
+    <test><source case="locative" number="plural" gender="feminine" pos="noun">авенија</source><result>авенијама</result></test>
+    <test><source case="vocative" number="singular" gender="masculine" pos="noun">кадија</source><result>кадија</result></test>
+    <test><source case="vocative" number="singular" gender="feminine" pos="noun">уметница</source><result>уметнице</result></test>
+    <test><source case="vocative" number="singular" gender="feminine" pos="noun">птица</source><result>птица</result></test>
+    <test><source case="vocative" number="singular" gender="feminine" pos="noun">Стана</source><result>Стано</result></test>
+    <test><source case="vocative" number="singular" gender="feminine" pos="noun">Зора</source><result>Зоро</result></test>
+    <test><source case="vocative" number="singular" gender="masculine" pos="noun">Божа</source><result>Божо</result></test>
+    <test><source case="vocative" number="singular" gender="masculine" pos="noun">Љуба</source><result>Љубо</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">пратња</source><result>пратњи</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">радња</source><result>радњи</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">лопта</source><result>лопти</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">молба</source><result>молби</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">конзерва</source><result>конзерви</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">гошћа</source><result>гошћа</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">двојка</source><result>двојака</result></test>
+    <test><source case="genitive" number="plural" gender="feminine" pos="noun">битка</source><result>битака</result></test>
+    <!-- There are some exception, like pripovetka where tk -> dak because of the base word. This has to be dictionary exception -->
+    <!-- <test><source case="genitive" number="plural" gender="feminine" pos="noun">приповетка</source><result>приповедака</result></test> -->
 </inflectionTest>