Skip to content

Commit 659c828

Browse files
authored
De-duplicate prediction results with the history results (#3543)
1 parent 96737bf commit 659c828

File tree

2 files changed

+187
-12
lines changed

2 files changed

+187
-12
lines changed

PSReadLine/Prediction.Views.cs

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,9 @@ protected List<SuggestionEntry> GetHistorySuggestions(string input, int count)
144144
continue;
145145
}
146146

147-
if (results == null)
148-
{
149-
results = new List<SuggestionEntry>(capacity: count);
150-
}
151-
152147
_cacheHistorySet.Add(line);
148+
results ??= new List<SuggestionEntry>(capacity: count);
149+
153150
if (matchIndex == 0)
154151
{
155152
results.Add(new SuggestionEntry(line, matchIndex));
@@ -224,8 +221,11 @@ private class PredictionListView : PredictionViewBase
224221
private bool _updatePending;
225222

226223
// Caches re-used when aggregating the suggestion results from predictors and history.
224+
// Those caches help us avoid allocation on tons of short-lived collections.
227225
private List<int> _cacheList1;
228226
private List<int> _cacheList2;
227+
private HashSet<string> _cachedHistorySet;
228+
private StringComparer _cachedComparer;
229229

230230
/// <summary>
231231
/// Gets whether the current window size meets the minimum requirement for the List view to work.
@@ -376,7 +376,7 @@ private void AggregateSuggestions()
376376

377377
// Assign the results of each plugin to the average slots.
378378
// Note that it's possible a plugin may return less results than the average slots,
379-
// and in that case, the unused slots will be come remaining slots that are to be
379+
// and in that case, the unused slots will become remaining slots which are to be
380380
// distributed again.
381381
for (int i = 0; i < pCount; i++)
382382
{
@@ -419,6 +419,18 @@ private void AggregateSuggestions()
419419
if (hCount > 0)
420420
{
421421
_listItems.RemoveRange(hCount, _listItems.Count - hCount);
422+
423+
if (_cachedComparer != _singleton._options.HistoryStringComparer)
424+
{
425+
// Create the cached history set if not yet, or re-create the set if case-sensitivity was changed by the user.
426+
_cachedComparer = _singleton._options.HistoryStringComparer;
427+
_cachedHistorySet = new HashSet<string>(_cachedComparer);
428+
}
429+
430+
foreach (SuggestionEntry entry in _listItems)
431+
{
432+
_cachedHistorySet.Add(entry.SuggestionText);
433+
}
422434
}
423435

424436
int index = -1;
@@ -435,19 +447,46 @@ private void AggregateSuggestions()
435447
break;
436448
}
437449

450+
int skipCount = 0;
438451
int num = _cacheList2[index];
439-
for (int i = 0; i < num; i++)
452+
foreach (PredictiveSuggestion suggestion in item.Suggestions)
440453
{
441-
string sugText = item.Suggestions[i].SuggestionText ?? string.Empty;
454+
string sugText = suggestion.SuggestionText ?? string.Empty;
455+
if (_cachedHistorySet?.Contains(sugText) == true)
456+
{
457+
// Skip the prediction result that is exactly the same as one of the history results.
458+
skipCount++;
459+
continue;
460+
}
461+
442462
int matchIndex = sugText.IndexOf(_inputText, comparison);
443463
_listItems.Add(new SuggestionEntry(item.Name, item.Id, item.Session, sugText, matchIndex));
464+
465+
if (--num == 0)
466+
{
467+
// Break after we've added the desired number of prediction results.
468+
break;
469+
}
444470
}
445471

446-
if (item.Session.HasValue)
472+
// Get the number of prediction results that were actually put in the list after filtering out the duplicate ones.
473+
int count = _cacheList2[index] - num;
474+
if (item.Session.HasValue && count > 0)
447475
{
448-
// Send feedback only if the mini-session id is specified.
449-
// When it's not specified, we consider the predictor doesn't accept feedback.
450-
_singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, num);
476+
// Send feedback only if the mini-session id is specified and we truely have its results in the list to be rendered.
477+
// When the mini-session id is not specified, we consider the predictor doesn't accept feedback.
478+
//
479+
// NOTE: when any duplicate results were skipped, the 'count' passed in here won't be accurate as it still includes
480+
// those skipped ones. This is due to the limitation of the 'OnSuggestionDisplayed' interface method, which didn't
481+
// assume any prediction results from a predictor could be filtered out at the initial design time. We will have to
482+
// change the predictor interface to pass in accurate information, such as:
483+
// void OnSuggestionDisplayed(Guid predictorId, uint session, int countOrIndex, int[] skippedIndices)
484+
//
485+
// However, an interface change has huge impacts. At least, a newer version of PSReadLine will stop working on the
486+
// existing PowerShell 7+ versions. For this particular issue, the chance that it could happen is low and the impact
487+
// of the inaccurate feedback is also low, so we should delay this interface change until another highly-demanded
488+
// change to the interface is required in future (e.g. changes related to supporting OpenAI models).
489+
_singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, count + skipCount);
451490
}
452491
}
453492
}
@@ -456,6 +495,7 @@ private void AggregateSuggestions()
456495
{
457496
_cacheList1.Clear();
458497
_cacheList2.Clear();
498+
_cachedHistorySet?.Clear();
459499
}
460500
}
461501

test/ListPredictionTest.cs

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ private Disposable SetPrediction(PredictionSource source, PredictionViewStyle vi
3737
new SetPSReadLineOption { PredictionSource = oldSource, PredictionViewStyle = oldView }));
3838
}
3939

40+
private Disposable SetHistorySearchCaseSensitive(bool caseSensitive)
41+
{
42+
var options = PSConsoleReadLine.GetOptions();
43+
var oldValue = options.HistorySearchCaseSensitive;
44+
45+
PSConsoleReadLine.SetOptions(new SetPSReadLineOption { HistorySearchCaseSensitive = caseSensitive });
46+
return new Disposable(() => PSConsoleReadLine.SetOptions(
47+
new SetPSReadLineOption { HistorySearchCaseSensitive = oldValue }));
48+
}
49+
4050
private void AssertDisplayedSuggestions(int count, Guid predictorId, uint session, int countOrIndex)
4151
{
4252
Assert.Equal(count, _mockedMethods.displayedSuggestions.Count);
@@ -1711,6 +1721,131 @@ public void List_HistoryAndPluginSource_Acceptance()
17111721
Assert.Equal("SOME NEW TEX SOME TEXT AFTER", _mockedMethods.commandHistory[3]);
17121722
}
17131723

1724+
[SkippableFact]
1725+
public void List_HistoryAndPluginSource_Deduplication()
1726+
{
1727+
TestSetup(KeyMode.Cmd);
1728+
int listWidth = CheckWindowSize();
1729+
var emphasisColors = Tuple.Create(PSConsoleReadLineOptions.DefaultEmphasisColor, _console.BackgroundColor);
1730+
1731+
// Using the 'HistoryAndPlugin' source will make PSReadLine get prediction from both history and plugin.
1732+
using var disp1 = SetPrediction(PredictionSource.HistoryAndPlugin, PredictionViewStyle.ListView);
1733+
_mockedMethods.ClearPredictionFields();
1734+
1735+
// The 1st result from 'predictorId_1' is the same as the 1st entry in history with case-insensitive comparison,
1736+
// which is the default comparison. So, that result will be filtered out due to the de-duplication logic.
1737+
SetHistory("some TEXT BEFORE de-dup", "de-dup -of");
1738+
Test("de-dup", Keys(
1739+
"de-dup", CheckThat(() => AssertScreenIs(6,
1740+
TokenClassification.Command, "de-dup",
1741+
NextLine,
1742+
TokenClassification.ListPrediction, '>',
1743+
TokenClassification.None, ' ',
1744+
emphasisColors, "de-dup",
1745+
TokenClassification.None, " -of",
1746+
TokenClassification.None, new string(' ', listWidth - 21), // 21 is the length of '> de-dup -of' plus '[History]'.
1747+
TokenClassification.None, '[',
1748+
TokenClassification.ListPrediction, "History",
1749+
TokenClassification.None, ']',
1750+
NextLine,
1751+
TokenClassification.ListPrediction, '>',
1752+
TokenClassification.None, " some TEXT BEFORE ",
1753+
emphasisColors, "de-dup",
1754+
TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> SOME TEXT BEFORE de-dup' plus '[History]'.
1755+
TokenClassification.None, '[',
1756+
TokenClassification.ListPrediction, "History",
1757+
TokenClassification.None, ']',
1758+
NextLine,
1759+
TokenClassification.ListPrediction, '>',
1760+
TokenClassification.None, ' ',
1761+
emphasisColors, "de-dup",
1762+
TokenClassification.None, " SOME TEXT AFTER",
1763+
TokenClassification.None, new string(' ', listWidth - 39), // 35 is the length of '> de-dup SOME TEXT AFTER' plus '[TestPredictor]'.
1764+
TokenClassification.None, '[',
1765+
TokenClassification.ListPrediction, "TestPredictor",
1766+
TokenClassification.None, ']',
1767+
NextLine,
1768+
TokenClassification.ListPrediction, '>',
1769+
TokenClassification.None, " SOME NEW TEXT",
1770+
TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
1771+
TokenClassification.None, '[',
1772+
TokenClassification.ListPrediction, "LongNamePred...",
1773+
TokenClassification.None, ']',
1774+
// List view is done, no more list item following.
1775+
NextLine,
1776+
NextLine
1777+
)),
1778+
// `OnSuggestionDisplayed` should be fired for both predictors.
1779+
// For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 1st result was filtered out due to duplication.
1780+
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
1781+
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
1782+
CheckThat(() => _mockedMethods.ClearPredictionFields()),
1783+
// Once accepted, the list should be cleared.
1784+
_.Enter, CheckThat(() => AssertScreenIs(2,
1785+
TokenClassification.Command, "de-dup",
1786+
NextLine,
1787+
NextLine))
1788+
));
1789+
1790+
// Change the setting to be case sensitive, and check the list view content.
1791+
using var disp2 = SetHistorySearchCaseSensitive(caseSensitive: true);
1792+
_mockedMethods.ClearPredictionFields();
1793+
1794+
// The 1st result from 'predictorId_1' is not the same as the 2nd entry in history with the case-sensitive comparison.
1795+
// But the 2nd result from 'predictorId_1' is the same as teh 1st entry in history with the case-sensitive comparison,
1796+
// so, that result will be filtered out due to the de-duplication logic.
1797+
SetHistory("de-dup SOME TEXT AFTER", "some TEXT BEFORE de-dup");
1798+
Test("de-dup", Keys(
1799+
"de-dup", CheckThat(() => AssertScreenIs(6,
1800+
TokenClassification.Command, "de-dup",
1801+
NextLine,
1802+
TokenClassification.ListPrediction, '>',
1803+
TokenClassification.None, ' ',
1804+
emphasisColors, "de-dup",
1805+
TokenClassification.None, " SOME TEXT AFTER",
1806+
TokenClassification.None, new string(' ', listWidth - 33), // 33 is the length of '> de-dup SOME TEXT AFTER' plus '[History]'.
1807+
TokenClassification.None, '[',
1808+
TokenClassification.ListPrediction, "History",
1809+
TokenClassification.None, ']',
1810+
NextLine,
1811+
TokenClassification.ListPrediction, '>',
1812+
TokenClassification.None, " some TEXT BEFORE ",
1813+
emphasisColors, "de-dup",
1814+
TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> some TEXT BEFORE de-dup' plus '[History]'.
1815+
TokenClassification.None, '[',
1816+
TokenClassification.ListPrediction, "History",
1817+
TokenClassification.None, ']',
1818+
NextLine,
1819+
TokenClassification.ListPrediction, '>',
1820+
TokenClassification.None, " SOME TEXT BEFORE ",
1821+
emphasisColors, "de-dup",
1822+
TokenClassification.None, new string(' ', listWidth - 40), // 40 is the length of '> SOME TEXT BEFORE de-dup' plus '[TestPredictor]'.
1823+
TokenClassification.None, '[',
1824+
TokenClassification.ListPrediction, "TestPredictor",
1825+
TokenClassification.None, ']',
1826+
NextLine,
1827+
TokenClassification.ListPrediction, '>',
1828+
TokenClassification.None, " SOME NEW TEXT",
1829+
TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
1830+
TokenClassification.None, '[',
1831+
TokenClassification.ListPrediction, "LongNamePred...",
1832+
TokenClassification.None, ']',
1833+
// List view is done, no more list item following.
1834+
NextLine,
1835+
NextLine
1836+
)),
1837+
// `OnSuggestionDisplayed` should be fired for both predictors.
1838+
// For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 2nd result was filtered out due to duplication.
1839+
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
1840+
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
1841+
// Once accepted, the list should be cleared.
1842+
_.Enter, CheckThat(() => AssertScreenIs(2,
1843+
TokenClassification.Command, "de-dup",
1844+
NextLine,
1845+
NextLine))
1846+
));
1847+
}
1848+
17141849
[SkippableFact]
17151850
public void List_NoneSource_ExecutionStatus()
17161851
{

0 commit comments

Comments
 (0)