Skip to content

Commit 9433640

Browse files
authored
Merge duplicate analyses (#282)
* Add MergeEquivalentAnalyses * Fix a performance bug * Optimization should include when MergeEquivalentAnalyses is false * Uses TryGetWord as suggested by Damien
1 parent c304a75 commit 9433640

File tree

3 files changed

+89
-9
lines changed

3 files changed

+89
-9
lines changed

src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,17 @@ public IEnumerable<Word> Apply(Word input)
6767
if (_morpher.TraceManager.IsTracing)
6868
_morpher.TraceManager.BeginUnapplyStratum(_stratum, input);
6969

70+
Word origInput = input;
7071
input = input.Clone();
7172
input.Stratum = _stratum;
7273

7374
_prulesRule.Apply(input);
7475
input.Freeze();
76+
IDictionary<Shape, Word> shapeWord = null;
77+
// Don't merge if tracing because it messes up the tracing.
78+
bool mergeEquivalentAnalyses = _morpher.MergeEquivalentAnalyses && !_morpher.TraceManager.IsTracing;
79+
if (mergeEquivalentAnalyses)
80+
shapeWord = new Dictionary<Shape, Word>(FreezableEqualityComparer<Shape>.Default);
7581

7682
// AnalysisStratumRule.Apply should cover the inverse of SynthesisStratumRule.Apply.
7783
IEnumerable<Word> mruleOutWords = ApplyTemplates(input).Concat(ApplyMorphologicalRules(input));
@@ -82,6 +88,19 @@ public IEnumerable<Word> Apply(Word input)
8288
_morpher.TraceManager.EndUnapplyStratum(_stratum, input);
8389
foreach (Word mruleOutWord in mruleOutWords)
8490
{
91+
// Skip intermediate sources from phonological rules, templates, and morphological rules.
92+
mruleOutWord.Source = origInput;
93+
if (mergeEquivalentAnalyses)
94+
{
95+
Shape shape = mruleOutWord.Shape;
96+
Word canonicalWord;
97+
if (shapeWord.TryGetValue(shape, out canonicalWord))
98+
{
99+
canonicalWord.Alternatives.Add(mruleOutWord);
100+
continue;
101+
}
102+
shapeWord[shape] = mruleOutWord;
103+
}
85104
output.Add(mruleOutWord);
86105
if (_morpher.TraceManager.IsTracing)
87106
_morpher.TraceManager.EndUnapplyStratum(_stratum, mruleOutWord);

src/SIL.Machine.Morphology.HermitCrab/Morpher.cs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public Morpher(ITraceManager traceManager, Language lang)
5656
_synthesisRule = lang.CompileSynthesisRule(this);
5757
MaxStemCount = 2;
5858
MaxUnapplications = 0;
59+
MergeEquivalentAnalyses = true;
5960
LexEntrySelector = entry => true;
6061
RuleSelector = rule => true;
6162

@@ -78,6 +79,12 @@ public ITraceManager TraceManager
7879
/// </summary>
7980
public int MaxUnapplications { get; set; }
8081

82+
/// <summary>
83+
/// Merge analyses that have equivalent shapes.
84+
/// Merged analyses will be expanded if lexical lookup succeeds.
85+
/// </summary>
86+
public bool MergeEquivalentAnalyses { get; set; }
87+
8188
public Func<LexEntry, bool> LexEntrySelector { get; set; }
8289
public Func<IHCRule, bool> RuleSelector { get; set; }
8390

@@ -139,10 +146,13 @@ public IEnumerable<Word> ParseWord(string word, out object trace, bool guessRoot
139146
var lexicalGuesses = LexicalGuess(analysisWord).Distinct();
140147
foreach (Word synthesisWord in lexicalGuesses)
141148
{
142-
foreach (Word validWord in _synthesisRule.Apply(synthesisWord).Where(IsWordValid))
149+
foreach (Word alternative in synthesisWord.ExpandAlternatives())
143150
{
144-
if (IsMatch(word, validWord))
145-
matches.Add(validWord);
151+
foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid))
152+
{
153+
if (IsMatch(word, validWord))
154+
matches.Add(validWord);
155+
}
146156
}
147157
}
148158
}
@@ -278,10 +288,13 @@ private IEnumerable<Word> Synthesize(string word, IEnumerable<Word> analyses)
278288
{
279289
foreach (Word synthesisWord in LexicalLookup(analysisWord))
280290
{
281-
foreach (Word validWord in _synthesisRule.Apply(synthesisWord).Where(IsWordValid))
291+
foreach (Word alternative in synthesisWord.ExpandAlternatives())
282292
{
283-
if (IsMatch(word, validWord))
284-
matches.Add(validWord);
293+
foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid))
294+
{
295+
if (IsMatch(word, validWord))
296+
matches.Add(validWord);
297+
}
285298
}
286299
}
287300
}
@@ -304,10 +317,13 @@ private IEnumerable<Word> Synthesize(string word, ConcurrentQueue<Word> analyses
304317
analyses.TryDequeue(out Word analysisWord);
305318
foreach (Word synthesisWord in LexicalLookup(analysisWord))
306319
{
307-
foreach (Word validWord in _synthesisRule.Apply(synthesisWord).Where(IsWordValid))
320+
foreach (Word alternative in synthesisWord.ExpandAlternatives())
308321
{
309-
if (IsMatch(word, validWord))
310-
matches.Add(validWord);
322+
foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid))
323+
{
324+
if (IsMatch(word, validWord))
325+
matches.Add(validWord);
326+
}
311327
}
312328
}
313329
}

src/SIL.Machine.Morphology.HermitCrab/Word.cs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public class Word : Freezable<Word>, IAnnotatedData<ShapeNode>, ICloneable<Word>
3131
private bool _isPartial;
3232
private readonly Dictionary<string, HashSet<int>> _disjunctiveAllomorphIndices;
3333
private int _mruleAppCount = 0;
34+
private readonly IList<Word> _alternatives = new List<Word>();
3435

3536
public Word(RootAllomorph rootAllomorph, FeatureStruct realizationalFS)
3637
{
@@ -72,6 +73,8 @@ protected Word(Word word)
7273
{
7374
_allomorphs = new Dictionary<string, Allomorph>(word._allomorphs);
7475
Stratum = word.Stratum;
76+
Source = word;
77+
// Don't copy Alternatives.
7578
_shape = word._shape.Clone();
7679
_rootAllomorph = word._rootAllomorph;
7780
SyntacticFeatureStruct = word.SyntacticFeatureStruct.Clone();
@@ -396,6 +399,48 @@ internal void NonHeadUnapplied(Word nonHead)
396399
_nonHeadAppIndex++;
397400
}
398401

402+
internal Word Source { get; set; }
403+
404+
internal IList<Word> Alternatives
405+
{
406+
get { return _alternatives; }
407+
}
408+
409+
internal IList<Word> ExpandAlternatives()
410+
{
411+
IList<Word> alternatives = new List<Word>();
412+
IList<Word> originals = Source?.ExpandAlternatives();
413+
// Update the alternatives of CloneOf with any changes made since the clone.
414+
if (originals == null || originals.Count < 2)
415+
{
416+
// Special case.
417+
alternatives.Add(this);
418+
}
419+
else
420+
{
421+
foreach (Word original in originals)
422+
{
423+
Word alternative = original.Clone();
424+
alternative._shape = this.Shape;
425+
// Add new rules to alternative.
426+
int m_start = Source == null ? 0 : Source._mruleApps.Count();
427+
for (int i = m_start; i < _mruleApps.Count(); i++)
428+
alternative.MorphologicalRuleUnapplied(_mruleApps[i]);
429+
int nh_start = Source == null ? 0 : Source._nonHeadApps.Count();
430+
for (int i = nh_start; i < _nonHeadApps.Count(); i++)
431+
alternative.NonHeadUnapplied(_nonHeadApps[i]);
432+
if (RootAllomorph != null)
433+
alternative.RootAllomorph = RootAllomorph;
434+
alternative.Freeze();
435+
alternatives.Add(alternative);
436+
}
437+
}
438+
// Add local alternatives.
439+
foreach (Word alternative in _alternatives)
440+
alternatives.AddRange(alternative.ExpandAlternatives());
441+
return alternatives;
442+
}
443+
399444
public Allomorph GetAllomorph(Annotation<ShapeNode> morph)
400445
{
401446
var alloID = (string)morph.FeatureStruct.GetValue(HCFeatureSystem.Allomorph);

0 commit comments

Comments
 (0)