1111#include < inflection/dialog/SemanticFeatureModel.hpp>
1212#include < inflection/dialog/SemanticFeatureModel_DisplayData.hpp>
1313#include < inflection/dialog/DisplayValue.hpp>
14+ #include < inflection/dictionary/PhraseProperties.hpp>
1415#include < inflection/grammar/synthesis/GrammemeConstants.hpp>
1516#include < inflection/grammar/synthesis/GrammarSynthesizerUtil.hpp>
1617#include < inflection/lang/StringFilterUtil.hpp>
1718#include < inflection/util/LocaleUtils.hpp>
19+ #include < inflection/util/StringViewUtils.hpp>
1820#include < inflection/util/UnicodeSetUtils.hpp>
1921#include < inflection/npc.hpp>
22+ #include < array>
2023#include < iterator>
2124#include < memory>
25+ #include < string>
2226
2327namespace inflection ::grammar::synthesis {
2428
@@ -42,7 +46,7 @@ SrGrammarSynthesizer_SrDisplayFunction::~SrGrammarSynthesizer_SrDisplayFunction(
4246{
4347}
4448
45- ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString (const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
49+ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectFromDictionary (const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
4650{
4751 ::std::u16string countString (GrammarSynthesizerUtil::getFeatureValue (constraints, numberFeature));
4852 ::std::u16string caseString (GrammarSynthesizerUtil::getFeatureValue (constraints, caseFeature));
@@ -61,7 +65,6 @@ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::s
6165 if (!genderString.empty ()) {
6266 string_constraints.emplace_back (genderString);
6367 }
64- // The nominative/caseless is unmarked in the patterns, so we need to do something like this
6568 int64_t wordGrammemes = 0 ;
6669 dictionary.getCombinedBinaryType (&wordGrammemes, lemma);
6770
@@ -77,7 +80,66 @@ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectString(const ::s
7780 return inflection;
7881}
7982
80- ::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue (const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool /* enableInflectionGuess */ ) const
83+ namespace {
84+
85+ // Rule based inflectors for four declination groups.
86+ // Masculine or neuter ending in o or e and masculine ending with consonant.
87+ ::std::u16string inflectByRuleOE (const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
88+ // Neuter ending in e
89+ ::std::u16string inflectByRuleE (const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
90+ // All genders ending in a
91+ ::std::u16string inflectByRuleA (const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase);
92+ // Feminine, ending with consonant
93+ ::std::u16string inflectByRuleConsonant (const ::std::u16string& lemma, const ::std::u16string& number, const ::std::u16string& targetCase, const ::std::u16string& gender);
94+
95+ // Number of cases in Serbian.
96+ static constexpr auto NUMBER_OF_CASES = 7UL ;
97+
98+ // Given the table of all suffixes, both for singular and plural, append suffix to lemma, matching the number and case.
99+ ::std::u16string applySuffix (const ::std::u16string&, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>&, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>&, const ::std::u16string&, const ::std::u16string&);
100+
101+ // Check if proper noun by checking the first character is capital letter.
102+ bool isProperNoun (const ::std::u16string &lemma);
103+
104+ } // namespace
105+
106+ ::std::u16string SrGrammarSynthesizer_SrDisplayFunction::inflectWithRule (const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints, const ::std::u16string& lemma) const
107+ {
108+ ::std::u16string countString (GrammarSynthesizerUtil::getFeatureValue (constraints, numberFeature));
109+ ::std::u16string caseString (GrammarSynthesizerUtil::getFeatureValue (constraints, caseFeature));
110+ auto genderString = GrammarSynthesizerUtil::getFeatureValue (constraints, genderFeature);
111+
112+ ::std::u16string inflection;
113+
114+ // If one of singular/plural, case and gender are not specified return lemma.
115+ if (countString.empty () || caseString.empty () || genderString.empty ()) {
116+ return lemma;
117+ }
118+
119+ // Do nothing for singular, nominative.
120+ if (countString == GrammemeConstants::NUMBER_SINGULAR () && caseString == GrammemeConstants::CASE_NOMINATIVE ()) {
121+ return lemma;
122+ }
123+
124+ // These are four declention groups in the language.
125+ if ((lemma.ends_with (u' о' ) || lemma.ends_with (u' е' )) && (genderString == GrammemeConstants::GENDER_MASCULINE () || genderString == GrammemeConstants::GENDER_NEUTER ())) {
126+ inflection = inflectByRuleOE (lemma, countString, caseString, genderString);
127+ } else if (lemma.ends_with (u' е' ) && genderString == GrammemeConstants::GENDER_NEUTER ()) {
128+ inflection = inflectByRuleE (lemma, countString, caseString, genderString);
129+ } else if (lemma.ends_with (u' а' )) {
130+ inflection = inflectByRuleA (lemma, countString, caseString);
131+ } else {
132+ inflection = inflectByRuleConsonant (lemma, countString, caseString, genderString);
133+ }
134+
135+ if (inflection.empty ()) {
136+ inflection = lemma;
137+ }
138+
139+ return inflection;
140+ }
141+
142+ ::inflection::dialog::DisplayValue *SrGrammarSynthesizer_SrDisplayFunction::getDisplayValue (const dialog::SemanticFeatureModel_DisplayData &displayData, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string> &constraints, bool enableInflectionGuess) const
81143{
82144 ::std::u16string displayString;
83145 if (!displayData.getValues ().empty ()) {
@@ -87,9 +149,154 @@ ::inflection::dialog::DisplayValue * SrGrammarSynthesizer_SrDisplayFunction::get
87149 return nullptr ;
88150 }
89151 if (dictionary.isKnownWord (displayString)) {
90- displayString = inflectString (constraints, displayString);
152+ displayString = inflectFromDictionary (constraints, displayString);
153+ } else if (enableInflectionGuess) {
154+ // Let's use rule based inflection for nouns. Assume lemma is singular, nominative.
155+ displayString = inflectWithRule (constraints, displayString);
91156 }
92157 return new ::inflection::dialog::DisplayValue (displayString, constraints);
93158}
94159
160+ namespace {
161+
162+ static bool isConsonant (char16_t ch) {
163+ return ::inflection::lang::StringFilterUtil::CYRILLIC_SCRIPT ().contains (ch) && !::inflection::dictionary::PhraseProperties::DEFAULT_VOWELS_START ().contains (ch);
164+ }
165+
166+ static bool isVowel (char16_t ch) {
167+ return ::inflection::lang::StringFilterUtil::CYRILLIC_SCRIPT ().contains (ch) && ::inflection::dictionary::PhraseProperties::DEFAULT_VOWELS_START ().contains (ch);
168+ }
169+
170+ // Some rules require number of syllables in the word. It's counted as all vowels plus r if in between consonants, or if it starts a word followed by a consonant.
171+ // We care about 1, 2 and more than 2 cases.
172+ enum class Syllables {
173+ ONE_SYLLABLE,
174+ TWO_SYLLABLES,
175+ MULTI_SYLLABLES,
176+ };
177+ Syllables countSyllables (const ::std::u16string& lemma) {
178+ uint16_t total = 0 ;
179+ size_t index = 0 ;
180+ const size_t length = lemma.length ();
181+ for (const char16_t ch: lemma) {
182+ if (isVowel (ch)) {
183+ ++total;
184+ }
185+ // Check case where R is at the begining followed by a consonant.
186+ if ((ch == u' р' || ch == u' Р' ) && (index == 0 && index + 1 < length)) {
187+ if (isConsonant (lemma[index + 1 ])) {
188+ ++total;
189+ }
190+ } else if ((ch == u' р' || ch == u' Р' ) && (index != 0 && index + 1 < length)) {
191+ if (isConsonant (lemma[index - 1 ]) && isConsonant (lemma[index + 1 ])) {
192+ ++total;
193+ }
194+ }
195+ ++index;
196+ }
197+
198+ if (total == 1 ) {
199+ return Syllables::ONE_SYLLABLE;
200+ } else if (total == 2 ) {
201+ return Syllables::TWO_SYLLABLES;
202+ } else {
203+ return Syllables::MULTI_SYLLABLES;
204+ }
205+ }
206+
207+ ::std::u16string inflectByRuleOE (const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string &gender)
208+ {
209+ // TODO(nciric): implement logic.
210+ return lemma;
211+ }
212+
213+ ::std::u16string inflectByRuleE (const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string &gender)
214+ {
215+ // TODO(nciric): implement logic.
216+ return lemma;
217+ }
218+
219+ ::std::u16string inflectByRuleA (const ::std::u16string &lemma, const ::std::u16string &number, const ::std::u16string &targetCase)
220+ {
221+ static constexpr auto suffix_sg = ::std::to_array<::std::u16string_view>({u" а" , u" е" , u" и" , u" у" , u" а" , u" ом" , u" и" });
222+ static constexpr auto suffix_pl = ::std::to_array<::std::u16string_view>({u" е" , u" а" , u" ама" , u" е" , u" е" , u" ама" , u" ама" });
223+
224+ ::std::u16string base = lemma;
225+ // Remove trailing a and apply suffix.
226+ base.pop_back ();
227+ base = applySuffix (base, suffix_sg, suffix_pl, number, targetCase);
228+
229+ // Vocative singular and genitive plural require special processing in some cases.
230+ if (number == GrammemeConstants::NUMBER_SINGULAR () && targetCase == GrammemeConstants::CASE_VOCATIVE ()) {
231+ Syllables syllables = countSyllables (lemma);
232+ if (lemma.ends_with (u" ица" ) && syllables == Syllables::MULTI_SYLLABLES) {
233+ base.back () = u' е' ;
234+ }
235+ if (isProperNoun (lemma) && syllables == Syllables::TWO_SYLLABLES) {
236+ base.back () = u' о' ;
237+ }
238+ }
239+
240+ if (number == GrammemeConstants::NUMBER_PLURAL () && targetCase == GrammemeConstants::CASE_GENITIVE ()) {
241+ if (lemma.ends_with (u" тња" ) || lemma.ends_with (u" дња" ) || lemma.ends_with (u" пта" ) || lemma.ends_with (u" лба" ) || lemma.ends_with (u" рва" )) {
242+ base.back () = u' и' ;
243+ }
244+ static const char16_t *mappings[][2 ] = {
245+ {u" јка" , u" јака" },
246+ {u" мља" , u" маља" },
247+ {u" вца" , u" ваца" },
248+ {u" тка" , u" така" },
249+ {u" пка" , u" пака" },
250+ };
251+ for (const auto &[suffix, replacement] : mappings) {
252+ if (base.ends_with (suffix)) {
253+ auto suffix_length = std::u16string_view (suffix).length ();
254+ base.replace (base.length () - suffix_length, suffix_length, replacement);
255+ }
256+ }
257+ }
258+
259+ return base;
260+ }
261+
262+ ::std::u16string inflectByRuleConsonant (const ::std::u16string &lemma, [[maybe_unused]] const ::std::u16string &number, [[maybe_unused]] const ::std::u16string &targetCase, [[maybe_unused]] const ::std::u16string & gender)
263+ {
264+ // TODO(nciric): implement logic.
265+ return lemma;
266+ }
267+
268+ ::std::u16string applySuffix (const ::std::u16string &lemma, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>& suffix_sg, const ::std::array<::std::u16string_view, NUMBER_OF_CASES>& suffix_pl,
269+ const ::std::u16string &number, const ::std::u16string &targetCase)
270+ {
271+ const ::std::map<::std::u16string, size_t > case_index = {
272+ {GrammemeConstants::CASE_NOMINATIVE (), 0 },
273+ {GrammemeConstants::CASE_GENITIVE (), 1 },
274+ {GrammemeConstants::CASE_DATIVE (), 2 },
275+ {GrammemeConstants::CASE_ACCUSATIVE (), 3 },
276+ {GrammemeConstants::CASE_VOCATIVE (), 4 },
277+ {GrammemeConstants::CASE_INSTRUMENTAL (), 5 },
278+ {GrammemeConstants::CASE_LOCATIVE (), 6 }
279+ };
280+
281+ auto index = case_index.at (targetCase);
282+
283+ if (number == GrammemeConstants::NUMBER_SINGULAR ()) {
284+ return lemma + ::std::u16string (suffix_sg[index]);
285+ } else {
286+ return lemma + ::std::u16string (suffix_pl[index]);
287+ }
288+ }
289+
290+ bool isProperNoun (const ::std::u16string &lemma) {
291+ // Check if first character is in range of Cyrl capital letters.
292+ auto first_ch = lemma.front ();
293+ if (0x402 <= first_ch && first_ch <= 0x428 ) {
294+ return true ;
295+ }
296+
297+ return false ;
298+ }
299+
300+ } // namespace
301+
95302} // namespace inflection::grammar::synthesis
0 commit comments