De-duplicate prediction results with the history results (#3543)

daxian-dbw · web-flow · commit 659c8282cd9a · 2023-01-23T11:04:40.000-08:00
diff --git a/PSReadLine/Prediction.Views.cs b/PSReadLine/Prediction.Views.cs
@@ -144,12 +144,9 @@ protected List<SuggestionEntry> GetHistorySuggestions(string input, int count)
                         continue;
                     }
 
-                    if (results == null)
-                    {
-                        results = new List<SuggestionEntry>(capacity: count);
-                    }
-
                     _cacheHistorySet.Add(line);
+                    results ??= new List<SuggestionEntry>(capacity: count);
+
                     if (matchIndex == 0)
                     {
                         results.Add(new SuggestionEntry(line, matchIndex));
@@ -224,8 +221,11 @@ private class PredictionListView : PredictionViewBase
             private bool _updatePending;
 
             // Caches re-used when aggregating the suggestion results from predictors and history.
+            // Those caches help us avoid allocation on tons of short-lived collections.
             private List<int> _cacheList1;
             private List<int> _cacheList2;
+            private HashSet<string> _cachedHistorySet;
+            private StringComparer _cachedComparer;
 
             /// <summary>
             /// Gets whether the current window size meets the minimum requirement for the List view to work.
@@ -376,7 +376,7 @@ private void AggregateSuggestions()
 
                         // Assign the results of each plugin to the average slots.
                         // Note that it's possible a plugin may return less results than the average slots,
-                        // and in that case, the unused slots will be come remaining slots that are to be
+                        // and in that case, the unused slots will become remaining slots which are to be
                         // distributed again.
                         for (int i = 0; i < pCount; i++)
                         {
@@ -419,6 +419,18 @@ private void AggregateSuggestions()
                         if (hCount > 0)
                         {
                             _listItems.RemoveRange(hCount, _listItems.Count - hCount);
+
+                            if (_cachedComparer != _singleton._options.HistoryStringComparer)
+                            {
+                                // Create the cached history set if not yet, or re-create the set if case-sensitivity was changed by the user.
+                                _cachedComparer = _singleton._options.HistoryStringComparer;
+                                _cachedHistorySet = new HashSet<string>(_cachedComparer);
+                            }
+
+                            foreach (SuggestionEntry entry in _listItems)
+                            {
+                                _cachedHistorySet.Add(entry.SuggestionText);
+                            }
                         }
 
                         int index = -1;
@@ -435,19 +447,46 @@ private void AggregateSuggestions()
                                     break;
                                 }
 
+                                int skipCount = 0;
                                 int num = _cacheList2[index];
-                                for (int i = 0; i < num; i++)
+                                foreach (PredictiveSuggestion suggestion in item.Suggestions)
                                 {
-                                    string sugText = item.Suggestions[i].SuggestionText ?? string.Empty;
+                                    string sugText = suggestion.SuggestionText ?? string.Empty;
+                                    if (_cachedHistorySet?.Contains(sugText) == true)
+                                    {
+                                        // Skip the prediction result that is exactly the same as one of the history results.
+                                        skipCount++;
+                                        continue;
+                                    }
+
                                     int matchIndex = sugText.IndexOf(_inputText, comparison);
                                     _listItems.Add(new SuggestionEntry(item.Name, item.Id, item.Session, sugText, matchIndex));
+
+                                    if (--num == 0)
+                                    {
+                                        // Break after we've added the desired number of prediction results.
+                                        break;
+                                    }
                                 }
 
-                                if (item.Session.HasValue)
+                                // Get the number of prediction results that were actually put in the list after filtering out the duplicate ones.
+                                int count = _cacheList2[index] - num;
+                                if (item.Session.HasValue && count > 0)
                                 {
-                                    // Send feedback only if the mini-session id is specified.
-                                    // When it's not specified, we consider the predictor doesn't accept feedback.
-                                    _singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, num);
+                                    // Send feedback only if the mini-session id is specified and we truely have its results in the list to be rendered.
+                                    // When the mini-session id is not specified, we consider the predictor doesn't accept feedback.
+                                    //
+                                    // NOTE: when any duplicate results were skipped, the 'count' passed in here won't be accurate as it still includes
+                                    // those skipped ones. This is due to the limitation of the 'OnSuggestionDisplayed' interface method, which didn't
+                                    // assume any prediction results from a predictor could be filtered out at the initial design time. We will have to
+                                    // change the predictor interface to pass in accurate information, such as:
+                                    //   void OnSuggestionDisplayed(Guid predictorId, uint session, int countOrIndex, int[] skippedIndices)
+                                    //
+                                    // However, an interface change has huge impacts. At least, a newer version of PSReadLine will stop working on the
+                                    // existing PowerShell 7+ versions. For this particular issue, the chance that it could happen is low and the impact
+                                    // of the inaccurate feedback is also low, so we should delay this interface change until another highly-demanded
+                                    // change to the interface is required in future (e.g. changes related to supporting OpenAI models).
+                                    _singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, count + skipCount);
                                 }
                             }
                         }
@@ -456,6 +495,7 @@ private void AggregateSuggestions()
                     {
                         _cacheList1.Clear();
                         _cacheList2.Clear();
+                        _cachedHistorySet?.Clear();
                     }
                 }
 
diff --git a/test/ListPredictionTest.cs b/test/ListPredictionTest.cs
@@ -37,6 +37,16 @@ private Disposable SetPrediction(PredictionSource source, PredictionViewStyle vi
                 new SetPSReadLineOption { PredictionSource = oldSource, PredictionViewStyle = oldView }));
         }
 
+        private Disposable SetHistorySearchCaseSensitive(bool caseSensitive)
+        {
+            var options = PSConsoleReadLine.GetOptions();
+            var oldValue = options.HistorySearchCaseSensitive;
+
+            PSConsoleReadLine.SetOptions(new SetPSReadLineOption { HistorySearchCaseSensitive = caseSensitive });
+            return new Disposable(() => PSConsoleReadLine.SetOptions(
+                new SetPSReadLineOption { HistorySearchCaseSensitive = oldValue }));
+        }
+
         private void AssertDisplayedSuggestions(int count, Guid predictorId, uint session, int countOrIndex)
         {
             Assert.Equal(count, _mockedMethods.displayedSuggestions.Count);
@@ -1711,6 +1721,131 @@ public void List_HistoryAndPluginSource_Acceptance()
             Assert.Equal("SOME NEW TEX SOME TEXT AFTER", _mockedMethods.commandHistory[3]);
         }
 
+        [SkippableFact]
+        public void List_HistoryAndPluginSource_Deduplication()
+        {
+            TestSetup(KeyMode.Cmd);
+            int listWidth = CheckWindowSize();
+            var emphasisColors = Tuple.Create(PSConsoleReadLineOptions.DefaultEmphasisColor, _console.BackgroundColor);
+
+            // Using the 'HistoryAndPlugin' source will make PSReadLine get prediction from both history and plugin.
+            using var disp1 = SetPrediction(PredictionSource.HistoryAndPlugin, PredictionViewStyle.ListView);
+            _mockedMethods.ClearPredictionFields();
+
+            // The 1st result from 'predictorId_1' is the same as the 1st entry in history with case-insensitive comparison,
+            // which is the default comparison. So, that result will be filtered out due to the de-duplication logic.
+            SetHistory("some TEXT BEFORE de-dup", "de-dup -of");
+            Test("de-dup", Keys(
+                "de-dup", CheckThat(() => AssertScreenIs(6,
+                        TokenClassification.Command, "de-dup",
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, ' ',
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, " -of",
+                        TokenClassification.None, new string(' ', listWidth - 21), // 21 is the length of '> de-dup -of' plus '[History]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "History",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, " some TEXT BEFORE ",
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> SOME TEXT BEFORE de-dup' plus '[History]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "History",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, ' ',
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, " SOME TEXT AFTER",
+                        TokenClassification.None, new string(' ', listWidth - 39), // 35 is the length of '> de-dup SOME TEXT AFTER' plus '[TestPredictor]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "TestPredictor",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, " SOME NEW TEXT",
+                        TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "LongNamePred...",
+                        TokenClassification.None, ']',
+                        // List view is done, no more list item following.
+                        NextLine,
+                        NextLine
+                     )),
+                // `OnSuggestionDisplayed` should be fired for both predictors.
+                // For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 1st result was filtered out due to duplication.
+                CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
+                CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
+                CheckThat(() => _mockedMethods.ClearPredictionFields()),
+                // Once accepted, the list should be cleared.
+                _.Enter, CheckThat(() => AssertScreenIs(2,
+                        TokenClassification.Command, "de-dup",
+                        NextLine,
+                        NextLine))
+            ));
+
+            // Change the setting to be case sensitive, and check the list view content.
+            using var disp2 = SetHistorySearchCaseSensitive(caseSensitive: true);
+            _mockedMethods.ClearPredictionFields();
+
+            // The 1st result from 'predictorId_1' is not the same as the 2nd entry in history with the case-sensitive comparison.
+            // But the 2nd result from 'predictorId_1' is the same as teh 1st entry in history with the case-sensitive comparison,
+            // so, that result will be filtered out due to the de-duplication logic.
+            SetHistory("de-dup SOME TEXT AFTER", "some TEXT BEFORE de-dup");
+            Test("de-dup", Keys(
+                "de-dup", CheckThat(() => AssertScreenIs(6,
+                        TokenClassification.Command, "de-dup",
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, ' ',
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, " SOME TEXT AFTER",
+                        TokenClassification.None, new string(' ', listWidth - 33), // 33 is the length of '> de-dup SOME TEXT AFTER' plus '[History]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "History",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, " some TEXT BEFORE ",
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> some TEXT BEFORE de-dup' plus '[History]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "History",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, " SOME TEXT BEFORE ",
+                        emphasisColors, "de-dup",
+                        TokenClassification.None, new string(' ', listWidth - 40), // 40 is the length of '> SOME TEXT BEFORE de-dup' plus '[TestPredictor]'.
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "TestPredictor",
+                        TokenClassification.None, ']',
+                        NextLine,
+                        TokenClassification.ListPrediction, '>',
+                        TokenClassification.None, " SOME NEW TEXT",
+                        TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
+                        TokenClassification.None, '[',
+                        TokenClassification.ListPrediction, "LongNamePred...",
+                        TokenClassification.None, ']',
+                        // List view is done, no more list item following.
+                        NextLine,
+                        NextLine
+                     )),
+                // `OnSuggestionDisplayed` should be fired for both predictors.
+                // For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 2nd result was filtered out due to duplication.
+                CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
+                CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
+                // Once accepted, the list should be cleared.
+                _.Enter, CheckThat(() => AssertScreenIs(2,
+                        TokenClassification.Command, "de-dup",
+                        NextLine,
+                        NextLine))
+            ));
+        }
+
         [SkippableFact]
         public void List_NoneSource_ExecutionStatus()
         {