SSR: Refactor to not rely on recursive search for nesting of matches

davidlattimore · davidlattimore · commit 02fc3d50ee4d · 2020-07-24T21:34:00.000+10:00
Previously, submatches were handled simply by searching in placeholders
for more matches. That only works if we search all nodes in the tree
recursively. In a subsequent commit, I intend to make search not always
be recursive recursive. This commit prepares for that by finding all
matches, even if they overlap, then nesting them and removing
overlapping matches.
diff --git a/crates/ra_ssr/src/lib.rs b/crates/ra_ssr/src/lib.rs
@@ -4,6 +4,7 @@
 //! based on a template.
 
 mod matching;
+mod nester;
 mod parsing;
 mod replacing;
 mod search;
@@ -90,8 +91,10 @@ impl<'db> MatchFinder<'db> {
     /// Returns matches for all added rules.
     pub fn matches(&self) -> SsrMatches {
         let mut matches = Vec::new();
-        self.find_all_matches(&mut matches);
-        SsrMatches { matches }
+        for rule in &self.rules {
+            self.find_matches_for_rule(rule, &mut matches);
+        }
+        nester::nest_and_remove_collisions(matches, &self.sema)
     }
 
     /// Finds all nodes in `file_id` whose text is exactly equal to `snippet` and attempts to match
diff --git a/crates/ra_ssr/src/matching.rs b/crates/ra_ssr/src/matching.rs
@@ -49,6 +49,8 @@ pub struct Match {
     pub(crate) placeholder_values: FxHashMap<Var, PlaceholderMatch>,
     pub(crate) ignored_comments: Vec<ast::Comment>,
     pub(crate) rule_index: usize,
+    /// The depth of matched_node.
+    pub(crate) depth: usize,
 }
 
 /// Represents a `$var` in an SSR query.
@@ -130,10 +132,12 @@ impl<'db, 'sema> Matcher<'db, 'sema> {
             placeholder_values: FxHashMap::default(),
             ignored_comments: Vec::new(),
             rule_index: rule.index,
+            depth: 0,
         };
         // Second matching pass, where we record placeholder matches, ignored comments and maybe do
         // any other more expensive checks that we didn't want to do on the first pass.
         match_state.attempt_match_node(&mut Phase::Second(&mut the_match), &rule.pattern, code)?;
+        the_match.depth = sema.ancestors_with_macros(the_match.matched_node.clone()).count();
         Ok(the_match)
     }
 
diff --git a/crates/ra_ssr/src/nester.rs b/crates/ra_ssr/src/nester.rs
@@ -0,0 +1,98 @@
+//! Converts a flat collection of matches into a nested form suitable for replacement. When there
+//! are multiple matches for a node, or that overlap, priority is given to the earlier rule. Nested
+//! matches are only permitted if the inner match is contained entirely within a placeholder of an
+//! outer match.
+//!
+//! For example, if our search pattern is `foo(foo($a))` and the code had `foo(foo(foo(foo(42))))`,
+//! then we'll get 3 matches, however only the outermost and innermost matches can be accepted. The
+//! middle match would take the second `foo` from the outer match.
+
+use crate::{Match, SsrMatches};
+use ra_syntax::SyntaxNode;
+use rustc_hash::FxHashMap;
+
+pub(crate) fn nest_and_remove_collisions(
+    mut matches: Vec<Match>,
+    sema: &hir::Semantics<ra_ide_db::RootDatabase>,
+) -> SsrMatches {
+    // We sort the matches by depth then by rule index. Sorting by depth means that by the time we
+    // see a match, any parent matches or conflicting matches will have already been seen. Sorting
+    // by rule_index means that if there are two matches for the same node, the rule added first
+    // will take precedence.
+    matches.sort_by(|a, b| a.depth.cmp(&b.depth).then_with(|| a.rule_index.cmp(&b.rule_index)));
+    let mut collector = MatchCollector::default();
+    for m in matches {
+        collector.add_match(m, sema);
+    }
+    collector.into()
+}
+
+#[derive(Default)]
+struct MatchCollector {
+    matches_by_node: FxHashMap<SyntaxNode, Match>,
+}
+
+impl MatchCollector {
+    /// Attempts to add `m` to matches. If it conflicts with an existing match, it is discarded. If
+    /// it is entirely within the a placeholder of an existing match, then it is added as a child
+    /// match of the existing match.
+    fn add_match(&mut self, m: Match, sema: &hir::Semantics<ra_ide_db::RootDatabase>) {
+        let matched_node = m.matched_node.clone();
+        if let Some(existing) = self.matches_by_node.get_mut(&matched_node) {
+            try_add_sub_match(m, existing, sema);
+            return;
+        }
+        for ancestor in sema.ancestors_with_macros(m.matched_node.clone()) {
+            if let Some(existing) = self.matches_by_node.get_mut(&ancestor) {
+                try_add_sub_match(m, existing, sema);
+                return;
+            }
+        }
+        self.matches_by_node.insert(matched_node, m);
+    }
+}
+
+/// Attempts to add `m` as a sub-match of `existing`.
+fn try_add_sub_match(
+    m: Match,
+    existing: &mut Match,
+    sema: &hir::Semantics<ra_ide_db::RootDatabase>,
+) {
+    for p in existing.placeholder_values.values_mut() {
+        // Note, no need to check if p.range.file is equal to m.range.file, since we
+        // already know we're within `existing`.
+        if p.range.range.contains_range(m.range.range) {
+            // Convert the inner matches in `p` into a temporary MatchCollector. When
+            // we're done, we then convert it back into an SsrMatches. If we expected
+            // lots of inner matches, it might be worthwhile keeping a MatchCollector
+            // around for each placeholder match. However we expect most placeholder
+            // will have 0 and a few will have 1. More than that should hopefully be
+            // exceptional.
+            let mut collector = MatchCollector::default();
+            for m in std::mem::replace(&mut p.inner_matches.matches, Vec::new()) {
+                collector.matches_by_node.insert(m.matched_node.clone(), m);
+            }
+            collector.add_match(m, sema);
+            p.inner_matches = collector.into();
+            break;
+        }
+    }
+}
+
+impl From<MatchCollector> for SsrMatches {
+    fn from(mut match_collector: MatchCollector) -> Self {
+        let mut matches = SsrMatches::default();
+        for (_, m) in match_collector.matches_by_node.drain() {
+            matches.matches.push(m);
+        }
+        matches.matches.sort_by(|a, b| {
+            // Order matches by file_id then by start range. This should be sufficient since ranges
+            // shouldn't be overlapping.
+            a.range
+                .file_id
+                .cmp(&b.range.file_id)
+                .then_with(|| a.range.range.start().cmp(&b.range.range.start()))
+        });
+        matches
+    }
+}
diff --git a/crates/ra_ssr/src/search.rs b/crates/ra_ssr/src/search.rs
@@ -1,54 +1,41 @@
 //! Searching for matches.
 
-use crate::{matching, Match, MatchFinder};
+use crate::{matching, parsing::ParsedRule, Match, MatchFinder};
 use ra_db::FileRange;
 use ra_syntax::{ast, AstNode, SyntaxNode};
 
 impl<'db> MatchFinder<'db> {
-    pub(crate) fn find_all_matches(&self, matches_out: &mut Vec<Match>) {
+    /// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make
+    /// replacement impossible, so further processing is required in order to properly nest matches
+    /// and remove overlapping matches. This is done in the `nesting` module.
+    pub(crate) fn find_matches_for_rule(&self, rule: &ParsedRule, matches_out: &mut Vec<Match>) {
         // FIXME: Use resolved paths in the pattern to find places to search instead of always
         // scanning every node.
-        self.slow_scan(matches_out);
+        self.slow_scan(rule, matches_out);
     }
 
-    fn slow_scan(&self, matches_out: &mut Vec<Match>) {
+    fn slow_scan(&self, rule: &ParsedRule, matches_out: &mut Vec<Match>) {
         use ra_db::SourceDatabaseExt;
         use ra_ide_db::symbol_index::SymbolsDatabase;
         for &root in self.sema.db.local_roots().iter() {
             let sr = self.sema.db.source_root(root);
             for file_id in sr.iter() {
                 let file = self.sema.parse(file_id);
                 let code = file.syntax();
-                self.slow_scan_node(code, &None, matches_out);
+                self.slow_scan_node(code, rule, &None, matches_out);
             }
         }
     }
 
     fn slow_scan_node(
         &self,
         code: &SyntaxNode,
+        rule: &ParsedRule,
         restrict_range: &Option<FileRange>,
         matches_out: &mut Vec<Match>,
     ) {
-        for rule in &self.rules {
-            if let Ok(mut m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {
-                // Continue searching in each of our placeholders.
-                for placeholder_value in m.placeholder_values.values_mut() {
-                    if let Some(placeholder_node) = &placeholder_value.node {
-                        // Don't search our placeholder if it's the entire matched node, otherwise we'd
-                        // find the same match over and over until we got a stack overflow.
-                        if placeholder_node != code {
-                            self.slow_scan_node(
-                                placeholder_node,
-                                restrict_range,
-                                &mut placeholder_value.inner_matches.matches,
-                            );
-                        }
-                    }
-                }
-                matches_out.push(m);
-                return;
-            }
+        if let Ok(m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {
+            matches_out.push(m);
         }
         // If we've got a macro call, we already tried matching it pre-expansion, which is the only
         // way to match the whole macro, now try expanding it and matching the expansion.
@@ -60,14 +47,15 @@ impl<'db> MatchFinder<'db> {
                     // i.e. we don't want to match something that came from the macro itself.
                     self.slow_scan_node(
                         &expanded,
+                        rule,
                         &Some(self.sema.original_range(tt.syntax())),
                         matches_out,
                     );
                 }
             }
         }
         for child in code.children() {
-            self.slow_scan_node(&child, restrict_range, matches_out);
+            self.slow_scan_node(&child, rule, restrict_range, matches_out);
         }
     }
 }