Skip to content

Commit 4cc6a98

Browse files
Merge pull request #38 from pangenome/tweaks
Merge overlapping/contiguous ranges to visit in `query_transitive` to strongly improve performance
2 parents d1d150d + b14d1e2 commit 4cc6a98

File tree

2 files changed

+27
-13
lines changed

2 files changed

+27
-13
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/impg.rs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,22 +140,22 @@ impl SortedRanges {
140140
} else {
141141
(new_range.1, new_range.0)
142142
};
143-
143+
144144
// Return regions that don't overlap with existing ranges
145145
let mut non_overlapping = Vec::new();
146146
let mut current = start;
147-
147+
148148
// Find the first range that could overlap
149149
let mut i = match self.ranges.binary_search_by_key(&start, |&(s, _)| s) {
150150
Ok(pos) => pos,
151151
Err(pos) => pos,
152152
};
153-
153+
154154
// Check previous range for overlap
155155
if i > 0 && self.ranges[i - 1].1 > start {
156156
i -= 1;
157157
}
158-
158+
159159
// Process all potentially overlapping ranges
160160
while i < self.ranges.len() && current < end {
161161
let (range_start, range_end) = self.ranges[i];
@@ -168,11 +168,11 @@ impl SortedRanges {
168168
current = max(current, range_end);
169169
i += 1;
170170
}
171-
171+
172172
if current < end {
173173
non_overlapping.push((current, end));
174174
}
175-
175+
176176
// Now insert the range while maintaining sorted order and merging overlaps
177177
match self.ranges.binary_search_by_key(&start, |&(s, _)| s) {
178178
Ok(pos) | Err(pos) => {
@@ -184,12 +184,12 @@ impl SortedRanges {
184184
self.ranges[pos].0 = min(start, self.ranges[pos].0);
185185
self.ranges[pos].1 = max(end, self.ranges[pos].1);
186186
self.merge_forward_from(pos);
187-
} else {
187+
} else {
188188
self.ranges.insert(pos, (start, end));
189189
}
190190
}
191191
}
192-
192+
193193
non_overlapping
194194
}
195195

@@ -384,14 +384,14 @@ impl Impg {
384384
.map(|(&k, v)| (k, (*v).clone()))
385385
.collect()
386386
} else {
387-
FxHashMap::default()
387+
FxHashMap::with_capacity_and_hasher(self.seq_index.len(), Default::default())
388388
};
389389
// Initialize first visited range for target_id if not already present
390390
visited_ranges.entry(target_id)
391391
.or_default()
392392
.insert((range_start, range_end));
393393

394-
while let Some((current_target_id, current_target_start, current_target_end)) = stack.pop() {
394+
while let Some((current_target_id, current_target_start, current_target_end)) = stack.pop() {
395395
if let Some(tree) = self.trees.get(&current_target_id) {
396396
tree.query(current_target_start, current_target_end, |interval| {
397397
let metadata = &interval.metadata;
@@ -430,7 +430,21 @@ impl Impg {
430430
}
431431
}
432432
});
433-
}
433+
434+
// Merge contiguous/overlapping ranges with same sequence_id
435+
stack.sort_by_key(|(id, start, _)| (*id, *start));
436+
let mut write = 0;
437+
for read in 1..stack.len() {
438+
if stack[write].0 == stack[read].0 && // Same sequence_id
439+
stack[write].2 >= stack[read].1 { // Overlapping or contiguous
440+
stack[write].2 = stack[write].2.max(stack[read].2);
441+
} else {
442+
write += 1;
443+
stack.swap(write, read);
444+
}
445+
}
446+
stack.truncate(write + 1);
447+
}
434448
}
435449

436450
results

0 commit comments

Comments
 (0)