Skip to content

Commit 21d70bd

Browse files
Merge pull request #65 from pangenome/merge_bad_with_stradness
`impg query`: Consider orientation while BED merging
2 parents beea057 + f47531b commit 21d70bd

File tree

1 file changed

+37
-10
lines changed

1 file changed

+37
-10
lines changed

src/main.rs

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -635,11 +635,15 @@ fn output_results_bed(impg: &Impg, results: &mut Vec<AdjustedInterval>, merge_di
635635
// New helper function to merge BED intervals
636636
fn merge_bed_intervals(results: &mut Vec<AdjustedInterval>, merge_distance: i32) {
637637
if results.len() > 1 && merge_distance >= 0 {
638-
// Sort by sequence ID and start position
638+
// Sort by sequence ID, strand orientation, and start position
639639
results.par_sort_by_key(|(query_interval, _, _)| {
640+
let is_forward = query_interval.first <= query_interval.last;
641+
let start = if is_forward { query_interval.first } else { query_interval.last };
642+
640643
(
641-
query_interval.metadata,
642-
std::cmp::min(query_interval.first, query_interval.last),
644+
query_interval.metadata, // First sort by sequence ID
645+
is_forward, // Then by strand orientation
646+
start // Finally by actual start position
643647
)
644648
});
645649

@@ -648,19 +652,42 @@ fn merge_bed_intervals(results: &mut Vec<AdjustedInterval>, merge_distance: i32)
648652
let (curr_interval, _, _) = &results[write_idx];
649653
let (next_interval, _, _) = &results[read_idx];
650654

651-
let curr_min = std::cmp::min(curr_interval.first, curr_interval.last);
652-
let curr_max = std::cmp::max(curr_interval.first, curr_interval.last);
653-
let next_min = std::cmp::min(next_interval.first, next_interval.last);
654-
let next_max = std::cmp::max(next_interval.first, next_interval.last);
655+
// Check if both intervals are on the same sequence and have same orientation
656+
let curr_is_forward = curr_interval.first <= curr_interval.last;
657+
let next_is_forward = next_interval.first <= next_interval.last;
658+
659+
// Extract actual start/end positions based on orientation
660+
let (curr_start, curr_end) = if curr_is_forward {
661+
(curr_interval.first, curr_interval.last)
662+
} else {
663+
(curr_interval.last, curr_interval.first)
664+
};
665+
666+
let (next_start, next_end) = if next_is_forward {
667+
(next_interval.first, next_interval.last)
668+
} else {
669+
(next_interval.last, next_interval.first)
670+
};
655671

656-
if curr_interval.metadata != next_interval.metadata || next_min > curr_max + merge_distance {
672+
// Only merge if same sequence, same orientation, and within merge distance
673+
if curr_interval.metadata != next_interval.metadata ||
674+
curr_is_forward != next_is_forward ||
675+
next_start > curr_end + merge_distance {
657676
write_idx += 1;
658677
if write_idx != read_idx {
659678
results.swap(write_idx, read_idx);
660679
}
661680
} else {
662-
results[write_idx].0.first = curr_min.min(next_min);
663-
results[write_idx].0.last = curr_max.max(next_max);
681+
// Merge while preserving orientation
682+
if curr_is_forward {
683+
// Forward orientation
684+
results[write_idx].0.first = curr_start.min(next_start);
685+
results[write_idx].0.last = curr_end.max(next_end);
686+
} else {
687+
// Reverse orientation
688+
results[write_idx].0.first = curr_end.max(next_end);
689+
results[write_idx].0.last = curr_start.min(next_start);
690+
}
664691
}
665692
}
666693
results.truncate(write_idx + 1);

0 commit comments

Comments
 (0)