Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.

Commit 36688fc

Browse files
Merge pull request #205 from github/new-similar-path-detection
2 parents 7e0a48f + 154505b commit 36688fc

File tree

6 files changed

+257
-120
lines changed

6 files changed

+257
-120
lines changed

stack-graphs/include/stack-graphs.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,13 @@ struct sg_forward_path_stitcher *sg_forward_path_stitcher_new(const struct sg_st
10241024
size_t count,
10251025
const sg_node_handle *starting_nodes);
10261026

1027+
// Sets whether similar path detection should be enabled during path stitching. Paths are similar
1028+
// if start and end node, and pre- and postconditions are the same. The presence of similar paths
1029+
// can lead to exponential blow up during path stitching. Similar path detection is disabled by
1030+
// default because of the accociated preformance cost.
1031+
void sg_forward_path_stitcher_set_similar_path_detection(struct sg_forward_path_stitcher *stitcher,
1032+
bool detect_similar_paths);
1033+
10271034
// Sets the maximum amount of work that can be performed during each phase of the algorithm. By
10281035
// bounding our work this way, you can ensure that it's not possible for our CPU-bound algorithm
10291036
// to starve any worker threads or processes that you might be using. If you don't call this
@@ -1078,6 +1085,13 @@ struct sg_forward_partial_path_stitcher *sg_forward_partial_path_stitcher_from_p
10781085
size_t count,
10791086
const struct sg_partial_path *initial_partial_paths);
10801087

1088+
// Sets whether similar path detection should be enabled during path stitching. Paths are similar
1089+
// if start and end node, and pre- and postconditions are the same. The presence of similar paths
1090+
// can lead to exponential blow up during path stitching. Similar path detection is disabled by
1091+
// default because of the accociated preformance cost.
1092+
void sg_forward_partial_path_stitcher_set_similar_path_detection(struct sg_forward_partial_path_stitcher *stitcher,
1093+
bool detect_similar_paths);
1094+
10811095
// Sets the maximum amount of work that can be performed during each phase of the algorithm. By
10821096
// bounding our work this way, you can ensure that it's not possible for our CPU-bound algorithm
10831097
// to starve any worker threads or processes that you might be using. If you don't call this

stack-graphs/src/c.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1825,6 +1825,21 @@ pub extern "C" fn sg_forward_path_stitcher_new(
18251825
Box::into_raw(Box::new(ForwardPathStitcher::new(stitcher, paths))) as *mut _
18261826
}
18271827

1828+
/// Sets whether similar path detection should be enabled during path stitching. Paths are similar
1829+
/// if start and end node, and pre- and postconditions are the same. The presence of similar paths
1830+
/// can lead to exponential blow up during path stitching. Similar path detection is disabled by
1831+
/// default because of the accociated preformance cost.
1832+
#[no_mangle]
1833+
pub extern "C" fn sg_forward_path_stitcher_set_similar_path_detection(
1834+
stitcher: *mut sg_forward_path_stitcher,
1835+
detect_similar_paths: bool,
1836+
) {
1837+
let stitcher = unsafe { &mut *(stitcher as *mut ForwardPathStitcher) };
1838+
stitcher
1839+
.stitcher
1840+
.set_similar_path_detection(detect_similar_paths);
1841+
}
1842+
18281843
/// Sets the maximum amount of work that can be performed during each phase of the algorithm. By
18291844
/// bounding our work this way, you can ensure that it's not possible for our CPU-bound algorithm
18301845
/// to starve any worker threads or processes that you might be using. If you don't call this
@@ -2011,6 +2026,21 @@ pub extern "C" fn sg_forward_partial_path_stitcher_from_partial_paths(
20112026
))) as *mut _
20122027
}
20132028

2029+
/// Sets whether similar path detection should be enabled during path stitching. Paths are similar
2030+
/// if start and end node, and pre- and postconditions are the same. The presence of similar paths
2031+
/// can lead to exponential blow up during path stitching. Similar path detection is disabled by
2032+
/// default because of the accociated preformance cost.
2033+
#[no_mangle]
2034+
pub extern "C" fn sg_forward_partial_path_stitcher_set_similar_path_detection(
2035+
stitcher: *mut sg_forward_partial_path_stitcher,
2036+
detect_similar_paths: bool,
2037+
) {
2038+
let stitcher = unsafe { &mut *(stitcher as *mut InternalForwardPartialPathStitcher) };
2039+
stitcher
2040+
.stitcher
2041+
.set_similar_path_detection(detect_similar_paths);
2042+
}
2043+
20142044
/// Sets the maximum amount of work that can be performed during each phase of the algorithm. By
20152045
/// bounding our work this way, you can ensure that it's not possible for our CPU-bound algorithm
20162046
/// to starve any worker threads or processes that you might be using. If you don't call this

stack-graphs/src/cycles.rs

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,9 @@
2929
//! always use this particular heuristic, however! We reserve the right to change the heuristic at
3030
//! any time.
3131
32-
use std::collections::HashMap;
33-
3432
use enumset::EnumSet;
3533
use smallvec::SmallVec;
34+
use std::collections::HashMap;
3635

3736
use crate::arena::Handle;
3837
use crate::arena::List;
@@ -45,6 +44,7 @@ use crate::partial::PartialPath;
4544
use crate::partial::PartialPaths;
4645
use crate::paths::Path;
4746
use crate::paths::PathResolutionError;
47+
use crate::paths::Paths;
4848
use crate::stitching::Database;
4949
use crate::stitching::OwnedOrDatabasePath;
5050

@@ -58,44 +58,48 @@ pub struct SimilarPathDetector<P> {
5858
pub struct PathKey {
5959
start_node: Handle<Node>,
6060
end_node: Handle<Node>,
61+
symbol_stack_precondition_len: usize,
62+
scope_stack_precondition_len: usize,
63+
symbol_stack_postcondition_len: usize,
64+
scope_stack_postcondition_len: usize,
6165
}
6266

6367
#[doc(hidden)]
6468
pub trait HasPathKey: Clone {
69+
type Arena;
6570
fn key(&self) -> PathKey;
66-
fn is_shorter_than(&self, other: &Self) -> bool;
6771
}
6872

6973
impl HasPathKey for Path {
74+
type Arena = Paths;
75+
7076
fn key(&self) -> PathKey {
7177
PathKey {
7278
start_node: self.start_node,
7379
end_node: self.end_node,
80+
symbol_stack_precondition_len: 0,
81+
scope_stack_precondition_len: 0,
82+
symbol_stack_postcondition_len: self.symbol_stack.len(),
83+
scope_stack_postcondition_len: self.scope_stack.len(),
7484
}
7585
}
76-
77-
fn is_shorter_than(&self, other: &Self) -> bool {
78-
self.edges.len() < other.edges.len() && self.symbol_stack.len() <= other.symbol_stack.len()
79-
}
8086
}
8187

8288
impl HasPathKey for PartialPath {
89+
type Arena = PartialPaths;
90+
8391
fn key(&self) -> PathKey {
8492
PathKey {
8593
start_node: self.start_node,
8694
end_node: self.end_node,
95+
symbol_stack_precondition_len: self.symbol_stack_precondition.len(),
96+
scope_stack_precondition_len: self.scope_stack_precondition.len(),
97+
symbol_stack_postcondition_len: self.symbol_stack_postcondition.len(),
98+
scope_stack_postcondition_len: self.scope_stack_postcondition.len(),
8799
}
88100
}
89-
90-
fn is_shorter_than(&self, other: &Self) -> bool {
91-
self.edges.len() < other.edges.len()
92-
&& (self.symbol_stack_precondition.len() + self.symbol_stack_postcondition.len())
93-
<= (other.symbol_stack_precondition.len() + other.symbol_stack_postcondition.len())
94-
}
95101
}
96102

97-
const MAX_SIMILAR_PATH_COUNT: usize = 7;
98-
99103
impl<P> SimilarPathDetector<P>
100104
where
101105
P: HasPathKey,
@@ -107,34 +111,35 @@ where
107111
}
108112
}
109113

110-
/// Determines whether we should process this path during the path-finding algorithm. If our
111-
/// heuristics decide that this path is a duplicate, or is "non-productive", then we return
112-
/// `false`, and the path-finding algorithm will skip this path.
113-
pub fn should_process_path<F>(&mut self, path: &P, cmp: F) -> bool
114+
/// Determines whether we should process this path during the path-finding algorithm. If we have seen
115+
/// a path with the same start and end node, and the same pre- and postcondition, then we return false.
116+
/// Otherwise, we return true.
117+
pub fn has_similar_path<Eq>(
118+
&mut self,
119+
_graph: &StackGraph,
120+
arena: &mut P::Arena,
121+
path: &P,
122+
eq: Eq,
123+
) -> bool
114124
where
115-
F: FnMut(&P) -> std::cmp::Ordering,
125+
Eq: Fn(&mut P::Arena, &P, &P) -> bool,
116126
{
117127
let key = path.key();
118-
let paths_with_same_nodes = self.paths.entry(key).or_default();
119-
let index = match paths_with_same_nodes.binary_search_by(cmp) {
120-
// We've already seen this exact path before; no need to process it again.
121-
Ok(_) => return false,
122-
// Otherwise add it to the list.
123-
Err(index) => index,
124-
};
125128

126-
// Count how many paths we've already processed that have the same endpoints and are
127-
// "shorter".
128-
let similar_path_count = paths_with_same_nodes
129-
.iter()
130-
.filter(|similar_path| similar_path.is_shorter_than(path))
131-
.count();
132-
if similar_path_count > MAX_SIMILAR_PATH_COUNT {
133-
return false;
129+
let possibly_similar_paths = self.paths.entry(key).or_default();
130+
for other_path in possibly_similar_paths.iter() {
131+
if eq(arena, path, other_path) {
132+
return true;
133+
}
134134
}
135135

136-
paths_with_same_nodes.insert(index, path.clone());
137-
true
136+
possibly_similar_paths.push(path.clone());
137+
false
138+
}
139+
140+
#[cfg(feature = "copious-debugging")]
141+
pub fn max_bucket_size(&self) -> usize {
142+
self.paths.iter().map(|b| b.1.len()).max().unwrap_or(0)
138143
}
139144
}
140145

0 commit comments

Comments
 (0)