From 3dd415342861e07d32d143b99488837b776287e7 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Thu, 8 May 2025 09:13:34 -0700 Subject: [PATCH 1/2] Fix LT-22121: Analysis guesser should not guess ras for rAs --- .../DomainServices/AnalysisGuessServices.cs | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index b34a90cb..c96830b0 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -729,18 +729,9 @@ public IAnalysis GetBestGuess(AnalysisOccurrence occurrence, bool onlyIndexZeroL private IWfiWordform GetLowercaseWordform(AnalysisOccurrence occurrence, int ws, bool onlyIndexZeroLowercaseMatching, IWfiWordform wordform) { - // TODO: make it look for the first word in the sentence...may not be at Index 0! - if (occurrence == null) - return null; - if (onlyIndexZeroLowercaseMatching && occurrence.Index != 0) - return null; - if (occurrence.Segment == null || !occurrence.Segment.IsValidObject) - return null; - ITsString tssWfBaseline = occurrence.BaselineText; - var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(ws)); - string sLower = cf.ToLower(tssWfBaseline.Text); + string sLower = GetLowercaseOfTitleCase(occurrence, ws, onlyIndexZeroLowercaseMatching); // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. - if (sLower != wordform.ShortName) + if (sLower != null && sLower != wordform.ShortName) { return GetWordformIfNeeded(sLower, ws); } @@ -752,18 +743,37 @@ private IWfiWordform GetLowercaseWordform(AnalysisOccurrence occurrence, int ws, // if it lowercases to the given wordform. // Otherwise, return null. // - private IWfiWordform GetOriginalCaseWordform(AnalysisOccurrence occurrence, IWfiWordform wordform, int ws) + private IWfiWordform GetOriginalCaseWordform(AnalysisOccurrence occurrence, IWfiWordform wordform, + int ws, bool onlyIndexZeroLowercaseMatching) { - ITsString tssWfBaseline = occurrence.BaselineText; - var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(ws)); - string sLower = cf.ToLower(tssWfBaseline.Text); + string sLower = GetLowercaseOfTitleCase(occurrence, ws, onlyIndexZeroLowercaseMatching); if (sLower == wordform.GetForm(ws).Text) { + ITsString tssWfBaseline = occurrence.BaselineText; return GetWordformIfNeeded(tssWfBaseline.Text, ws); } return null; } + // + // Get the lowercase form of occurrence if it is Title case. + // + private string GetLowercaseOfTitleCase(AnalysisOccurrence occurrence, int ws, bool onlyIndexZeroLowercaseMatching) + { + // TODO: make it look for the first word in the sentence...may not be at Index 0! + if (occurrence == null) + return null; + if (onlyIndexZeroLowercaseMatching && occurrence.Index != 0) + return null; + if (occurrence.Segment == null || !occurrence.Segment.IsValidObject) + return null; + ITsString tssWfBaseline = occurrence.BaselineText; + var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(ws)); + if (cf.StringCase(tssWfBaseline.Text) == StringCaseStatus.title) + return cf.ToLower(tssWfBaseline.Text); + return null; + } + /// /// Get a wordform for word if it already exists or /// if it has an entry in the lexicon. @@ -862,7 +872,7 @@ private List GetSortedAnalysisGuesses(IWfiWordform wordform, int w { // Sometimes the user selects a lowercase wordform for an uppercase word. // Get the original case so that we can include uppercase analyses. - var originalCaseWf = GetOriginalCaseWordform(occurrence, wordform, ws); + var originalCaseWf = GetOriginalCaseWordform(occurrence, wordform, ws, onlyIndexZeroLowercaseMatching); if (originalCaseWf != null) wordform = originalCaseWf; } From 6610e6c2800e903f38d485f40ec7e69f60484d2f Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Fri, 9 May 2025 08:14:10 -0700 Subject: [PATCH 2/2] Add test case for mixed case words --- .../AnalysisGuessServicesTests.cs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index a14b308a..40261ccc 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -170,6 +170,17 @@ internal void DoDataSetup() " " + Words_para0[4].Form.BestVernacularAlternative.Text + ".", wsVern)); Para0.Contents = bldr4.GetString(); + /* rAs ras */ + IWfiWordform rAs = wfFactory.Create(TsStringUtils.MakeString("rAs", wsVern)); + Words_para0.Add(rAs); + IWfiWordform ras = wfFactory.Create(TsStringUtils.MakeString("ras", wsVern)); + Words_para0.Add(ras); + var bldr5 = Para0.Contents.GetIncBldr(); + bldr5.AppendTsString(TsStringUtils.MakeString( + " " + Words_para0[20].Form.BestVernacularAlternative.Text + + " " + Words_para0[21].Form.BestVernacularAlternative.Text + + ".", wsVern)); + Para0.Contents = bldr5.GetString(); using (ParagraphParser pp = new ParagraphParser(Cache)) { foreach (IStTxtPara para in StText.ParagraphsOS) @@ -921,6 +932,21 @@ public void ExpectedAnalysisGuess_ForSentenceInitialOnlyLowercase() } } + /// + /// if a wordform is mixed case, don't look for lower case. + /// + [Test] + public void ExpectedAnalysisGuess_ForSentenceInitialMixedCase() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + WordAnalysisOrGlossServices.CreateNewAnalysisWAG(setup.Words_para0[21]); // ras + var wagLowercaseB = new AnalysisOccurrence(setup.Para0.SegmentsOS[5], 0); // rAs + var guessActual = setup.GuessServices.GetBestGuess(wagLowercaseB); + Assert.AreEqual(new NullWAG(), guessActual); + } + } + /// /// This class allows us to fake out the guesser by passing an analysis occurrence with the analyis we want, /// even though it isn't the analysis recorded in the paragraph.