|
1 | 1 | use super::super::build_types::*; |
2 | 2 | use crate::helpers; |
3 | 3 | use ahash::AHashSet; |
| 4 | +use std::collections::{HashMap, HashSet, VecDeque}; |
4 | 5 |
|
5 | 6 | pub fn find(modules: &Vec<(&String, &Module)>) -> Vec<String> { |
6 | | - let mut visited: AHashSet<String> = AHashSet::new(); |
7 | | - let mut stack: Vec<String> = vec![]; |
| 7 | + find_shortest_cycle(modules) |
| 8 | +} |
8 | 9 |
|
9 | | - // we want to sort the module names so that we always return the same |
10 | | - // dependency cycle (there can be more than one) |
11 | | - let mut module_names = modules |
12 | | - .iter() |
13 | | - .map(|(name, _)| name.to_string()) |
14 | | - .collect::<Vec<String>>(); |
| 10 | +fn find_shortest_cycle(modules: &Vec<(&String, &Module)>) -> Vec<String> { |
| 11 | + let mut shortest_cycle: Vec<String> = Vec::new(); |
| 12 | + |
| 13 | + // Build a graph representation for easier traversal |
15 | 14 |
|
16 | | - module_names.sort(); |
17 | | - for module_name in module_names { |
18 | | - if find_dependency_cycle_helper(&module_name, modules, &mut visited, &mut stack) { |
19 | | - return stack; |
| 15 | + let mut graph: HashMap<&String, &AHashSet<String>> = HashMap::new(); |
| 16 | + let mut in_degrees: HashMap<&String, usize> = HashMap::new(); |
| 17 | + |
| 18 | + let empty = AHashSet::new(); |
| 19 | + // First pass: collect all nodes and initialize in-degrees |
| 20 | + for (name, _) in modules { |
| 21 | + graph.insert(name, &empty); |
| 22 | + in_degrees.insert(name, 0); |
| 23 | + } |
| 24 | + |
| 25 | + // Second pass: build the graph and count in-degrees |
| 26 | + for (name, module) in modules { |
| 27 | + // Update in-degrees |
| 28 | + for dep in module.deps.iter() { |
| 29 | + if let Some(count) = in_degrees.get_mut(dep) { |
| 30 | + *count += 1; |
| 31 | + } |
20 | 32 | } |
21 | | - visited.clear(); |
22 | | - stack.clear(); |
| 33 | + |
| 34 | + // Update the graph |
| 35 | + *graph.get_mut(*name).unwrap() = &module.deps; |
23 | 36 | } |
24 | | - stack |
25 | | -} |
| 37 | + // Remove all nodes in the graph that have no incoming edges |
| 38 | + graph.retain(|_, deps| !deps.is_empty()); |
26 | 39 |
|
27 | | -fn find_dependency_cycle_helper( |
28 | | - module_name: &String, |
29 | | - modules: &Vec<(&String, &Module)>, |
30 | | - visited: &mut AHashSet<String>, |
31 | | - stack: &mut Vec<String>, |
32 | | -) -> bool { |
33 | | - if let Some(module) = modules |
34 | | - .iter() |
35 | | - .find(|(name, _)| *name == module_name) |
36 | | - .map(|(_, module)| module) |
37 | | - { |
38 | | - visited.insert(module_name.to_string()); |
39 | | - // if the module is a mlmap (namespace), we don't want to show this in the path |
40 | | - // because the namespace is not a module the user created, so only add source files |
41 | | - // to the stack |
42 | | - if let SourceType::SourceFile(_) = module.source_type { |
43 | | - stack.push(module_name.to_string()) |
| 40 | + // OPTIMIZATION 1: Start with nodes that are more likely to be in cycles |
| 41 | + // Sort nodes by their connectivity (in-degree + out-degree) |
| 42 | + let mut start_nodes: Vec<&String> = graph.keys().cloned().collect(); |
| 43 | + start_nodes.sort_by(|a, b| { |
| 44 | + let a_connectivity = in_degrees.get(a).unwrap_or(&0) + graph.get(a).map_or(0, |v| v.len()); |
| 45 | + let b_connectivity = in_degrees.get(b).unwrap_or(&0) + graph.get(b).map_or(0, |v| v.len()); |
| 46 | + b_connectivity.cmp(&a_connectivity) // Sort in descending order |
| 47 | + }); |
| 48 | + |
| 49 | + // OPTIMIZATION 2: Keep track of the current shortest cycle length for early termination |
| 50 | + let mut current_shortest_length = usize::MAX; |
| 51 | + |
| 52 | + // OPTIMIZATION 3: Cache nodes that have been checked and don't have cycles |
| 53 | + let mut no_cycle_cache: HashSet<String> = HashSet::new(); |
| 54 | + |
| 55 | + // Try BFS from each node to find the shortest cycle |
| 56 | + for start_node in start_nodes { |
| 57 | + // Skip nodes that we know don't have cycles |
| 58 | + if no_cycle_cache.contains(start_node) { |
| 59 | + continue; |
44 | 60 | } |
45 | | - for dep in &module.deps { |
46 | | - if !visited.contains(dep) { |
47 | | - if find_dependency_cycle_helper(dep, modules, visited, stack) { |
48 | | - return true; |
| 61 | + |
| 62 | + // Skip nodes with no incoming edges |
| 63 | + if in_degrees.get(&start_node).map_or(true, |&d| d == 0) { |
| 64 | + no_cycle_cache.insert(start_node.clone()); |
| 65 | + continue; |
| 66 | + } |
| 67 | + |
| 68 | + if let Some(cycle) = find_cycle_bfs(&start_node, &graph, current_shortest_length) { |
| 69 | + if shortest_cycle.is_empty() || cycle.len() < shortest_cycle.len() { |
| 70 | + shortest_cycle = cycle.clone(); |
| 71 | + current_shortest_length = cycle.len(); |
| 72 | + |
| 73 | + // OPTIMIZATION 4: If we find a very short cycle (length <= 3), we can stop early |
| 74 | + // as it's unlikely to find a shorter one |
| 75 | + if cycle.len() <= 3 { |
| 76 | + break; |
49 | 77 | } |
50 | | - } else if stack.contains(dep) { |
51 | | - stack.push(dep.to_string()); |
52 | | - return true; |
53 | 78 | } |
| 79 | + } else { |
| 80 | + // Cache this node as not having a cycle |
| 81 | + no_cycle_cache.insert(start_node.to_string()); |
54 | 82 | } |
55 | | - // because we only pushed source files to the stack, we also only need to |
56 | | - // pop these from the stack if we don't find a dependency cycle |
57 | | - if let SourceType::SourceFile(_) = module.source_type { |
58 | | - let _ = stack.pop(); |
| 83 | + } |
| 84 | + |
| 85 | + shortest_cycle |
| 86 | +} |
| 87 | + |
| 88 | +fn find_cycle_bfs( |
| 89 | + start: &String, |
| 90 | + graph: &HashMap<&String, &AHashSet<String>>, |
| 91 | + max_length: usize, |
| 92 | +) -> Option<Vec<String>> { |
| 93 | + // Use a BFS to find the shortest cycle |
| 94 | + let mut queue = VecDeque::new(); |
| 95 | + // Store node -> (distance, parent) |
| 96 | + let mut visited: HashMap<String, (usize, Option<String>)> = HashMap::new(); |
| 97 | + |
| 98 | + // Initialize with start node |
| 99 | + visited.insert(start.clone(), (0, None)); |
| 100 | + queue.push_back(start.clone()); |
| 101 | + |
| 102 | + while let Some(current) = queue.pop_front() { |
| 103 | + let (dist, _) = *visited.get(¤t).unwrap(); |
| 104 | + |
| 105 | + // OPTIMIZATION: Early termination if we've gone too far |
| 106 | + // If we're already at max_length, we won't find a shorter cycle from here |
| 107 | + if dist >= max_length - 1 { |
| 108 | + continue; |
| 109 | + } |
| 110 | + |
| 111 | + // Check all neighbors |
| 112 | + if let Some(neighbors) = graph.get(¤t) { |
| 113 | + for neighbor in neighbors.iter() { |
| 114 | + // If we found the start node again, we have a cycle |
| 115 | + if neighbor == start { |
| 116 | + // Reconstruct the cycle |
| 117 | + let mut path = Vec::new(); |
| 118 | + path.push(start.clone()); |
| 119 | + |
| 120 | + // Backtrack from current to start using parent pointers |
| 121 | + let mut curr = current.clone(); |
| 122 | + while curr != *start { |
| 123 | + path.push(curr.clone()); |
| 124 | + curr = visited.get(&curr).unwrap().1.clone().unwrap(); |
| 125 | + } |
| 126 | + |
| 127 | + return Some(path); |
| 128 | + } |
| 129 | + |
| 130 | + // If not visited, add to queue |
| 131 | + if !visited.contains_key(neighbor) { |
| 132 | + visited.insert(neighbor.clone(), (dist + 1, Some(current.clone()))); |
| 133 | + queue.push_back(neighbor.clone()); |
| 134 | + } |
| 135 | + } |
59 | 136 | } |
60 | | - return false; |
61 | 137 | } |
62 | | - false |
| 138 | + |
| 139 | + None |
63 | 140 | } |
64 | 141 |
|
65 | 142 | pub fn format(cycle: &[String]) -> String { |
| 143 | + let mut cycle = cycle.to_vec(); |
| 144 | + cycle.reverse(); |
| 145 | + // add the first module to the end of the cycle |
| 146 | + cycle.push(cycle[0].clone()); |
| 147 | + |
66 | 148 | cycle |
67 | 149 | .iter() |
68 | 150 | .map(|s| helpers::format_namespaced_module_name(s)) |
69 | 151 | .collect::<Vec<String>>() |
70 | | - .join(" -> ") |
| 152 | + .join("\n → ") |
71 | 153 | } |
0 commit comments