@@ -116,7 +116,15 @@ enum Args {
116116 /// Output format: 'auto' (BED for -r, BEDPE for -b), 'bed', 'bedpe', or 'paf'
117117 #[ clap( short = 'o' , long, value_parser, default_value = "auto" ) ]
118118 output_format : String ,
119-
119+
120+ /// Maximum distance between regions to merge for BED output
121+ #[ clap( short = 'd' , long, value_parser, conflicts_with = "no_merge_bed" , default_value_t = 0 ) ]
122+ merge_distance : i32 ,
123+
124+ /// Disable merging for BED output
125+ #[ clap( long, action, conflicts_with = "merge_distance" ) ]
126+ no_merge_bed : bool ,
127+
120128 /// Enable transitive queries
121129 #[ clap( short = 'x' , long, action) ]
122130 transitive : bool ,
@@ -182,6 +190,8 @@ fn main() -> io::Result<()> {
182190 target_range,
183191 target_bed,
184192 output_format,
193+ merge_distance,
194+ no_merge_bed,
185195 transitive,
186196 transitive_bfs,
187197 max_depth,
@@ -194,7 +204,7 @@ fn main() -> io::Result<()> {
194204
195205 if let Some ( target_range) = target_range {
196206 let ( target_name, target_range) = parse_target_range ( & target_range) ?;
197- let results = perform_query (
207+ let mut results = perform_query (
198208 & impg,
199209 & target_name,
200210 target_range,
@@ -216,15 +226,17 @@ fn main() -> io::Result<()> {
216226 output_results_paf ( & impg, results. into_iter ( ) . skip ( 1 ) , None ) ;
217227 } ,
218228 _ => { // 'auto' or 'bed'
229+ let merge_distance = if no_merge_bed { -1 } else { merge_distance } ;
230+
219231 // BED format - include the first element
220- output_results_bed ( & impg, results) ;
232+ output_results_bed ( & impg, & mut results, merge_distance ) ;
221233 }
222234 }
223235 } else if let Some ( target_bed) = target_bed {
224236 let targets = parse_bed_file ( & target_bed) ?;
225237 info ! ( "Parsed {} target ranges from BED file" , targets. len( ) ) ;
226238 for ( target_name, target_range, name) in targets {
227- let results = perform_query (
239+ let mut results = perform_query (
228240 & impg,
229241 & target_name,
230242 target_range,
@@ -238,8 +250,10 @@ fn main() -> io::Result<()> {
238250 // Output results based on the format
239251 match output_format. as_str ( ) {
240252 "bed" => {
253+ let merge_distance = if no_merge_bed { -1 } else { merge_distance } ;
254+
241255 // BED format - include the first element
242- output_results_bed ( & impg, results) ;
256+ output_results_bed ( & impg, & mut results, merge_distance ) ;
243257 } ,
244258 "paf" => {
245259 // Skip the first element (the input range) for PAF output
@@ -604,7 +618,9 @@ fn perform_query(
604618 }
605619}
606620
607- fn output_results_bed ( impg : & Impg , results : Vec < AdjustedInterval > ) {
621+ fn output_results_bed ( impg : & Impg , results : & mut Vec < AdjustedInterval > , merge_distance : i32 ) {
622+ merge_bed_intervals ( results, merge_distance) ;
623+
608624 for ( overlap, _, _) in results {
609625 let overlap_name = impg. seq_index . get_name ( overlap. metadata ) . unwrap ( ) ;
610626 let ( first, last, strand) = if overlap. first <= overlap. last {
@@ -616,6 +632,41 @@ fn output_results_bed(impg: &Impg, results: Vec<AdjustedInterval>) {
616632 }
617633}
618634
635+ // New helper function to merge BED intervals
636+ fn merge_bed_intervals ( results : & mut Vec < AdjustedInterval > , merge_distance : i32 ) {
637+ if results. len ( ) > 1 && merge_distance >= 0 {
638+ // Sort by sequence ID and start position
639+ results. par_sort_by_key ( |( query_interval, _, _) | {
640+ (
641+ query_interval. metadata ,
642+ std:: cmp:: min ( query_interval. first , query_interval. last ) ,
643+ )
644+ } ) ;
645+
646+ let mut write_idx = 0 ;
647+ for read_idx in 1 ..results. len ( ) {
648+ let ( curr_interval, _, _) = & results[ write_idx] ;
649+ let ( next_interval, _, _) = & results[ read_idx] ;
650+
651+ let curr_min = std:: cmp:: min ( curr_interval. first , curr_interval. last ) ;
652+ let curr_max = std:: cmp:: max ( curr_interval. first , curr_interval. last ) ;
653+ let next_min = std:: cmp:: min ( next_interval. first , next_interval. last ) ;
654+ let next_max = std:: cmp:: max ( next_interval. first , next_interval. last ) ;
655+
656+ if curr_interval. metadata != next_interval. metadata || next_min > curr_max + merge_distance {
657+ write_idx += 1 ;
658+ if write_idx != read_idx {
659+ results. swap ( write_idx, read_idx) ;
660+ }
661+ } else {
662+ results[ write_idx] . 0 . first = curr_min. min ( next_min) ;
663+ results[ write_idx] . 0 . last = curr_max. max ( next_max) ;
664+ }
665+ }
666+ results. truncate ( write_idx + 1 ) ;
667+ }
668+ }
669+
619670fn output_results_bedpe < I > ( impg : & Impg , results : I , name : Option < String > )
620671where
621672 I : Iterator < Item = AdjustedInterval > ,
0 commit comments