Skip to content

Commit b7528a8

Browse files
committed
Same file as before but with corrected indentations
1 parent f2b59c6 commit b7528a8

File tree

1 file changed

+93
-93
lines changed

1 file changed

+93
-93
lines changed

inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.cpp

Lines changed: 93 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright 2025 Unicode Incorporated and others. All rights reserved.
3-
*/
2+
* Copyright 2025 Unicode Incorporated and others. All rights reserved.
3+
*/
44
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp>
55
#include <inflection/dialog/SemanticFeature.hpp>
66
#include <inflection/dialog/SemanticFeatureModel_DisplayData.hpp>
@@ -11,6 +11,15 @@
1111
#include <inflection/grammar/synthesis/GrammemeConstants.hpp>
1212
#include <inflection/grammar/synthesis/GrammarSynthesizerUtil.hpp>
1313
#include <inflection/grammar/synthesis/MlGrammarSynthesizer.hpp>
14+
#include <inflection/tokenizer/Tokenizer.hpp>
15+
#include <inflection/tokenizer/TokenChain.hpp>
16+
#include <inflection/tokenizer/TokenizerFactory.hpp>
17+
#include <inflection/util/StringViewUtils.hpp>
18+
#include <inflection/util/Validate.hpp>
19+
#include <inflection/util/UnicodeSetUtils.hpp>
20+
#include <inflection/lang/StringFilterUtil.hpp>
21+
#include <icu4cxx/UnicodeSet.hpp>
22+
#include <unicode/uchar.h>
1423
#include <inflection/npc.hpp>
1524
#include <memory>
1625

@@ -19,103 +28,103 @@ namespace inflection::grammar::synthesis {
1928
// Helper function to get string feature by name from the feature map
2029
static std::u16string getStrFeature(
2130
const std::u16string& name,
22-
const std::map<dialog::SemanticFeature, std::u16string>& features)
31+
const std::map<dialog::SemanticFeature, std::u16string>& features)
2332
{
2433
for (const auto& [key, value] : features) {
25-
if (key.getName() == name) {
34+
if (key.getName() == name) {
2635
return value;
2736
}
2837
}
2938
return u"";
3039
}
3140

3241
// Changed parameter type from FeatureSet to map<SemanticFeature,u16string>
33-
std::u16string fallbackMalayalamPronoun(const std::map<dialog::SemanticFeature, std::u16string>& features) {
42+
std::u16string fallbackMalayalamPronoun(
43+
const std::map<dialog::SemanticFeature, std::u16string>& features)
44+
{
3445
auto person = getStrFeature(u"person", features);
3546
auto number = getStrFeature(u"number", features);
3647
auto case_ = getStrFeature(u"case", features);
3748
auto gender = getStrFeature(u"gender", features);
3849
auto clusivity = getStrFeature(u"clusivity", features);
3950
auto formality = getStrFeature(u"formality", features);
4051

41-
// STRICT MINIMUM CHECKS: No fallback unless these are defined
4252
if (person.empty() || number.empty() || case_.empty()) {
4353
return u"";
4454
}
45-
46-
// Only fallback for known safe combinations (e.g., 1st person, no gender)
47-
// You can later extend this to add valid known forms (e.g., ഞാൻ, നീ, etc.)
4855
if (person == u"first" && number == u"singular" && case_ == u"dative") {
4956
return u"എനിക്ക്";
5057
}
51-
52-
// Optional: Add more valid fallback patterns here if you have confidence
53-
// Otherwise: return empty
5458
return u"";
5559
}
5660

5761
using dialog::SemanticFeature;
5862
using dialog::SemanticFeatureModel_DisplayData;
5963
using dialog::DisplayValue;
6064

61-
// Malayalam-specific grammemes
6265
static constexpr auto CASE_NOMINATIVE = u"nominative";
6366
static constexpr auto CASE_ACCUSATIVE = u"accusative";
64-
static constexpr auto CASE_DATIVE = u"dative";
65-
static constexpr auto CASE_GENITIVE = u"genitive";
67+
static constexpr auto CASE_DATIVE = u"dative";
68+
static constexpr auto CASE_GENITIVE = u"genitive";
6669
static constexpr auto CASE_INSTRUMENTAL = u"instrumental";
67-
static constexpr auto CASE_LOCATIVE = u"locative";
68-
static constexpr auto CASE_SOCIATIVE = u"sociative";
70+
static constexpr auto CASE_LOCATIVE = u"locative";
71+
static constexpr auto CASE_SOCIATIVE = u"sociative";
72+
6973
static constexpr auto NUMBER_SINGULAR = u"singular";
70-
static constexpr auto NUMBER_PLURAL = u"plural";
74+
static constexpr auto NUMBER_PLURAL = u"plural";
75+
7176
static constexpr auto GENDER_MASCULINE = u"masculine";
72-
static constexpr auto GENDER_FEMININE = u"feminine";
73-
static constexpr auto GENDER_NEUTER = u"neuter";
74-
static constexpr auto FORMALITY_FORMAL = u"formal";
77+
static constexpr auto GENDER_FEMININE = u"feminine";
78+
static constexpr auto GENDER_NEUTER = u"neuter";
79+
80+
static constexpr auto FORMALITY_FORMAL = u"formal";
7581
static constexpr auto FORMALITY_INFORMAL = u"informal";
76-
static constexpr auto CLUSIVITY_INCLUSIVE = u"inclusive";
77-
static constexpr auto CLUSIVITY_EXCLUSIVE = u"exclusive";
82+
83+
static constexpr auto CLUSIVITY_INCLUSIVE = u"inclusive";
84+
static constexpr auto CLUSIVITY_EXCLUSIVE = u"exclusive";
85+
7886
static constexpr auto PERSON_FIRST = u"first";
7987
static constexpr auto PERSON_SECOND = u"second";
80-
static constexpr auto PERSON_THIRD = u"third";
81-
static constexpr auto TENSE_PAST = u"past";
88+
static constexpr auto PERSON_THIRD = u"third";
89+
90+
static constexpr auto TENSE_PAST = u"past";
8291
static constexpr auto TENSE_PRESENT = u"present";
83-
static constexpr auto TENSE_FUTURE = u"future";
84-
static constexpr auto MOOD_INDICATIVE = u"indicative";
85-
static constexpr auto MOOD_IMPERATIVE = u"imperative";
86-
static constexpr auto MOOD_SUBJUNCTIVE = u"subjunctive";
92+
static constexpr auto TENSE_FUTURE = u"future";
93+
94+
static constexpr auto MOOD_INDICATIVE = u"indicative";
95+
static constexpr auto MOOD_IMPERATIVE = u"imperative";
96+
static constexpr auto MOOD_SUBJUNCTIVE = u"subjunctive";
8797

8898
MlGrammarSynthesizer_MlDisplayFunction::MlGrammarSynthesizer_MlDisplayFunction(
8999
const ::inflection::dialog::SemanticFeatureModel& model)
90-
: caseFeature(*npc(model.getFeature(GrammemeConstants::CASE)))
91-
, numberFeature(*npc(model.getFeature(GrammemeConstants::NUMBER)))
92-
, genderFeature(*npc(model.getFeature(GrammemeConstants::GENDER)))
93-
, posFeature(*npc(model.getFeature(GrammemeConstants::POS)))
94-
, formalityFeature(*npc(model.getFeature(u"formality")))
95-
, clusivityFeature(*npc(model.getFeature(u"clusivity")))
96-
, personFeature(*npc(model.getFeature(GrammemeConstants::PERSON)))
97-
, tenseFeature(*npc(model.getFeature(u"tense")))
98-
, moodFeature(*npc(model.getFeature(u"mood")))
99-
, pronounTypeFeature(*npc(model.getFeature(u"pronounType")))
100-
, determinationFeature(*npc(model.getFeature(u"determination")))
101-
, dictionaryInflector(
102-
util::LocaleUtils::MALAYALAM(),
103-
{
104-
{GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_ADJECTIVE(), GrammemeConstants::POS_VERB()},
105-
{CASE_NOMINATIVE, CASE_ACCUSATIVE, CASE_DATIVE, CASE_GENITIVE, CASE_LOCATIVE, CASE_INSTRUMENTAL, CASE_SOCIATIVE},
106-
{NUMBER_SINGULAR, NUMBER_PLURAL},
107-
{GENDER_MASCULINE, GENDER_FEMININE, GENDER_NEUTER},
108-
{FORMALITY_FORMAL, FORMALITY_INFORMAL},
109-
{CLUSIVITY_INCLUSIVE, CLUSIVITY_EXCLUSIVE},
110-
{PERSON_FIRST, PERSON_SECOND, PERSON_THIRD},
111-
{TENSE_PAST, TENSE_PRESENT, TENSE_FUTURE},
112-
{MOOD_INDICATIVE, MOOD_IMPERATIVE, MOOD_SUBJUNCTIVE}
113-
},
114-
{},
115-
true)
116-
{
117-
// Constructor initializes feature references and dictionary inflector
118-
}
100+
: caseFeature(*npc(model.getFeature(GrammemeConstants::CASE))),
101+
numberFeature(*npc(model.getFeature(GrammemeConstants::NUMBER))),
102+
genderFeature(*npc(model.getFeature(GrammemeConstants::GENDER))),
103+
posFeature(*npc(model.getFeature(GrammemeConstants::POS))),
104+
formalityFeature(*npc(model.getFeature(u"formality"))),
105+
clusivityFeature(*npc(model.getFeature(u"clusivity"))),
106+
personFeature(*npc(model.getFeature(GrammemeConstants::PERSON))),
107+
tenseFeature(*npc(model.getFeature(u"tense"))),
108+
moodFeature(*npc(model.getFeature(u"mood"))),
109+
pronounTypeFeature(*npc(model.getFeature(u"pronounType"))),
110+
determinationFeature(*npc(model.getFeature(u"determination"))),
111+
dictionaryInflector(
112+
util::LocaleUtils::MALAYALAM(),
113+
{
114+
{GrammemeConstants::POS_NOUN(), GrammemeConstants::POS_VERB()},
115+
{CASE_NOMINATIVE, CASE_ACCUSATIVE, CASE_DATIVE, CASE_GENITIVE,
116+
CASE_LOCATIVE, CASE_INSTRUMENTAL, CASE_SOCIATIVE},
117+
{NUMBER_SINGULAR, NUMBER_PLURAL},
118+
{GENDER_MASCULINE, GENDER_FEMININE, GENDER_NEUTER},
119+
{FORMALITY_FORMAL, FORMALITY_INFORMAL},
120+
{CLUSIVITY_INCLUSIVE, CLUSIVITY_EXCLUSIVE},
121+
{PERSON_FIRST, PERSON_SECOND, PERSON_THIRD},
122+
{TENSE_PAST, TENSE_PRESENT, TENSE_FUTURE},
123+
{MOOD_INDICATIVE, MOOD_IMPERATIVE, MOOD_SUBJUNCTIVE}
124+
},
125+
{},
126+
true)
127+
{}
119128

120129
static std::u16string guessPluralForm(const std::u16string& token) {
121130
if (token.ends_with(u"")) {
@@ -127,10 +136,7 @@ static std::u16string guessPluralForm(const std::u16string& token) {
127136
if (token.ends_with(u"")) {
128137
return token + u"മാർ";
129138
}
130-
if (token.ends_with(u"ി")) {
131-
return token + u"കൾ";
132-
}
133-
if (token.ends_with(u"")) {
139+
if (token.ends_with(u"ി") || token.ends_with(u"")) {
134140
return token + u"കൾ";
135141
}
136142
if (!token.empty() && token.back() != u'') {
@@ -142,31 +148,20 @@ static std::u16string guessPluralForm(const std::u16string& token) {
142148
::inflection::dialog::DisplayValue* MlGrammarSynthesizer_MlDisplayFunction::getDisplayValue(
143149
const SemanticFeatureModel_DisplayData& displayData,
144150
const std::map<SemanticFeature, std::u16string>& constraints,
145-
bool enableInflectionGuess) const
151+
bool enableInflectionGuess) const
146152
{
147153
const auto displayValue = GrammarSynthesizerUtil::getTheBestDisplayValue(displayData, constraints);
148-
if (displayValue == nullptr) {
149-
if (GrammarSynthesizerUtil::getFeatureValue(constraints, posFeature) == u"pronoun") {
150-
std::u16string fallback = fallbackMalayalamPronoun(constraints);
151-
if (!fallback.empty()) {
152-
return new DisplayValue(fallback, constraints);
154+
if (displayValue == nullptr || displayValue->getDisplayString().empty()) {
155+
if (GrammarSynthesizerUtil::getFeatureValue(constraints, posFeature) == u"pronoun") {
156+
std::u16string fallback = fallbackMalayalamPronoun(constraints);
157+
if (!fallback.empty()) {
158+
return new DisplayValue(fallback, constraints);
159+
}
153160
}
161+
return nullptr;
154162
}
155-
return nullptr;
156-
}
157163

158164
const std::u16string baseForm = displayValue->getDisplayString();
159-
160-
if (baseForm.empty()) {
161-
if (GrammarSynthesizerUtil::getFeatureValue(constraints, posFeature) == u"pronoun") {
162-
std::u16string fallback = fallbackMalayalamPronoun(constraints);
163-
if (!fallback.empty()) {
164-
return new DisplayValue(fallback, constraints);
165-
}
166-
}
167-
return nullptr;
168-
}
169-
170165
const std::u16string posFeatureValue = GrammarSynthesizerUtil::getFeatureValue(constraints, posFeature);
171166
const std::u16string numberFeatureValue = GrammarSynthesizerUtil::getFeatureValue(constraints, numberFeature);
172167
const std::u16string caseValue = GrammarSynthesizerUtil::getFeatureValue(constraints, caseFeature);
@@ -220,7 +215,7 @@ if (displayValue == nullptr) {
220215
constraintValues.push_back(posFeatureValue);
221216
}
222217

223-
if (posFeatureValue == u"adjective" || posFeatureValue == GrammemeConstants::POS_PRONOUN()) {
218+
if (posFeatureValue == GrammemeConstants::POS_PRONOUN()) {
224219
addIfNotEmpty(genderFeature);
225220
}
226221

@@ -239,8 +234,8 @@ if (displayValue == nullptr) {
239234
}
240235

241236
auto inflectedOpt = dictionaryInflector.inflect(baseForm, wordGrammemes, constraintValues);
242-
if (inflectedOpt.has_value() && *inflectedOpt != baseForm) {
243237

238+
if (inflectedOpt.has_value() && *inflectedOpt != baseForm) {
244239
std::u16string result = *inflectedOpt;
245240

246241
if (std::find(constraintValues.begin(), constraintValues.end(), u"first") != constraintValues.end() &&
@@ -258,7 +253,6 @@ if (displayValue == nullptr) {
258253
(result == u"നാം" || baseForm == u"നാം")) {
259254
return new DisplayValue(u"നമ്മൾ", constraints);
260255
}
261-
262256
return new DisplayValue(result, constraints);
263257
}
264258

@@ -275,13 +269,18 @@ if (displayValue == nullptr) {
275269

276270
if (posFeatureValue == u"verb") {
277271
std::u16string stem = baseForm;
278-
if (stem.size() >= 3 && stem.compare(stem.size() - 3, 3, u"ക്കുക") == 0) {
279-
stem = stem.substr(0, stem.size() - 3);
280-
} else if (stem.size() >= 2 && stem.compare(stem.size() - 2, 2, u"കുക") == 0) {
281-
stem = stem.substr(0, stem.size() - 2);
272+
static const std::vector<std::u16string> infinitiveSuffixes = {u"ക്കുക", u"കുക", u"വിക്കുക", u"പിക്കുക"};
273+
274+
for (const auto& suffix : infinitiveSuffixes) {
275+
if (stem.size() >= suffix.size() &&
276+
stem.compare(stem.size() - suffix.size(), suffix.size(), suffix) == 0) {
277+
stem = stem.substr(0, stem.size() - suffix.size());
278+
break;
279+
}
282280
}
283281

284282
std::u16string conjugatedVerb;
283+
285284
if (moodVal == MOOD_INDICATIVE) {
286285
if (tenseVal == TENSE_PRESENT) {
287286
conjugatedVerb = stem + u"ിക്കുന്നു";
@@ -291,9 +290,9 @@ if (displayValue == nullptr) {
291290
conjugatedVerb = stem + u"ിക്കും";
292291
}
293292
} else if (moodVal == MOOD_IMPERATIVE) {
294-
conjugatedVerb = stem + u"ുക";
293+
conjugatedVerb = baseForm;
295294
} else if (moodVal == MOOD_SUBJUNCTIVE) {
296-
conjugatedVerb = stem + u"മെന്ന്";
295+
conjugatedVerb = stem + u"ക്കുമെന്ന്";
297296
}
298297

299298
if (!conjugatedVerb.empty()) {
@@ -303,6 +302,7 @@ if (displayValue == nullptr) {
303302

304303
if (!caseValue.empty()) {
305304
std::u16string result;
305+
306306
if (caseValue == CASE_ACCUSATIVE) {
307307
if (baseForm.ends_with(u"")) {
308308
result = baseForm.substr(0, baseForm.size() - 1) + u"നെ";
@@ -329,16 +329,16 @@ if (displayValue == nullptr) {
329329
}
330330
}
331331

332-
// Move this outside so it always runs last
333332
if (posFeatureValue == u"pronoun") {
334333
std::u16string fallback = fallbackMalayalamPronoun(constraints);
335334
if (!fallback.empty()) {
336335
return new DisplayValue(fallback, constraints);
337336
}
338-
}
337+
}
338+
339339
return nullptr;
340340
}
341341

342342
MlGrammarSynthesizer_MlDisplayFunction::~MlGrammarSynthesizer_MlDisplayFunction() = default;
343343

344-
} // namespace inflection::grammar::synthesis
344+
} // namespace inflection::grammar::synthesis

0 commit comments

Comments
 (0)