Skip to content

Commit 43d53e7

Browse files
committed
Implement fasta+paf output
1 parent e00209f commit 43d53e7

File tree

1 file changed

+66
-22
lines changed

1 file changed

+66
-22
lines changed

src/main.rs

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -523,11 +523,15 @@ enum Args {
523523
#[clap(flatten)]
524524
query: QueryOpts,
525525

526-
/// Output format: 'auto' ('bed' for -r, 'bedpe' for -b), 'bed', 'bedpe', 'paf', 'gfa' (v1.0), 'maf', or 'fasta' ('gfa', 'maf', and 'fasta' require --sequence-files or --sequence-list)
526+
/// Output format: 'auto' ('bed' for -r, 'bedpe' for -b), 'bed', 'bedpe', 'paf', 'gfa' (v1.0), 'maf', 'fasta', or 'fasta+paf' ('gfa', 'maf', 'fasta', and 'fasta+paf' require --sequence-files or --sequence-list)
527527
#[arg(help_heading = "Output options")]
528528
#[clap(short = 'o', long, value_parser, default_value = "auto")]
529529
output_format: String,
530530

531+
/// Destination file basename, or nothing for standard output
532+
#[clap(short = 'O', long, value_parser, default_value = None)]
533+
output_basename: Option<String>,
534+
531535
#[clap(flatten)]
532536
gfa_maf_fasta: GfaMafFastaOpts,
533537

@@ -801,13 +805,14 @@ fn main() -> io::Result<()> {
801805
paf,
802806
query,
803807
output_format,
808+
output_basename,
804809
gfa_maf_fasta,
805810
} => {
806811
initialize_threads_and_log(&common);
807812

808813
validate_output_format(
809814
&output_format,
810-
&["auto", "bed", "bedpe", "paf", "gfa", "maf", "fasta"],
815+
&["auto", "bed", "bedpe", "paf", "gfa", "maf", "fasta", "fasta+paf"],
811816
)?;
812817

813818
let impg = initialize_impg(&common, &paf)?;
@@ -898,15 +903,18 @@ fn main() -> io::Result<()> {
898903
subset_filter.as_ref(),
899904
)?;
900905

906+
// TODO: Why is name an Option for all the output functions?
907+
let name_opt = Some(name);
908+
901909
// Output results based on the resolved format
902910
match resolved_output_format {
903911
"bed" => {
904912
// BED format - include the first element
905913
output_results_bed(
906914
&impg,
907915
&mut results,
908-
&mut io::stdout(),
909-
Some(name),
916+
&mut find_output_stream(&output_basename, "bed")?,
917+
&name_opt,
910918
query.effective_merge_distance(),
911919
query.original_sequence_coordinates,
912920
)?;
@@ -917,8 +925,8 @@ fn main() -> io::Result<()> {
917925
output_results_bedpe(
918926
&impg,
919927
&mut results,
920-
&mut io::stdout(),
921-
Some(name),
928+
&mut find_output_stream(&output_basename, "bed")?,
929+
&name_opt,
922930
query.effective_merge_distance(),
923931
query.original_sequence_coordinates,
924932
)?;
@@ -929,8 +937,8 @@ fn main() -> io::Result<()> {
929937
output_results_paf(
930938
&impg,
931939
&mut results,
932-
&mut io::stdout(),
933-
Some(name),
940+
&mut find_output_stream(&output_basename, "paf")?,
941+
&name_opt,
934942
query.effective_merge_distance(),
935943
query.original_sequence_coordinates,
936944
sequence_index.as_ref(),
@@ -940,9 +948,9 @@ fn main() -> io::Result<()> {
940948
output_results_gfa(
941949
&impg,
942950
&mut results,
943-
&mut io::stdout(),
951+
&mut find_output_stream(&output_basename, "gfa")?,
944952
sequence_index.as_ref().unwrap(),
945-
Some(name),
953+
&name_opt,
946954
query.effective_merge_distance(),
947955
scoring_params.unwrap(),
948956
)?;
@@ -951,9 +959,9 @@ fn main() -> io::Result<()> {
951959
output_results_maf(
952960
&impg,
953961
&mut results,
954-
&mut io::stdout(),
962+
&mut find_output_stream(&output_basename, "maf")?,
955963
sequence_index.as_ref().unwrap(),
956-
Some(name),
964+
&name_opt,
957965
query.effective_merge_distance(),
958966
scoring_params.unwrap(),
959967
)?;
@@ -962,13 +970,35 @@ fn main() -> io::Result<()> {
962970
output_results_fasta(
963971
&impg,
964972
&mut results,
965-
&mut io::stdout(),
973+
&mut find_output_stream(&output_basename, "fa")?,
966974
sequence_index.as_ref().unwrap(),
967-
Some(name),
975+
&name_opt,
968976
query.effective_merge_distance(),
969977
reverse_complement,
970978
)?;
971-
}
979+
},
980+
"fasta+paf" => {
981+
output_results_fasta(
982+
&impg,
983+
&mut results,
984+
&mut find_output_stream(&output_basename, "fa")?,
985+
sequence_index.as_ref().unwrap(),
986+
&name_opt,
987+
query.effective_merge_distance(),
988+
reverse_complement,
989+
)?;
990+
// Skip the first element (the input range) for PAF output
991+
results.remove(0);
992+
output_results_paf(
993+
&impg,
994+
&mut results,
995+
&mut find_output_stream(&output_basename, "paf")?,
996+
&name_opt,
997+
query.effective_merge_distance(),
998+
query.original_sequence_coordinates,
999+
sequence_index.as_ref(),
1000+
)?;
1001+
},
9721002
_ => {
9731003
return Err(io::Error::new(
9741004
io::ErrorKind::InvalidInput,
@@ -1164,7 +1194,7 @@ fn main() -> io::Result<()> {
11641194
fn validate_selection_mode(mode: &str) -> io::Result<()> {
11651195
match mode {
11661196
"longest" | "total" => Ok(()),
1167-
mode if mode == "sample" || mode == "haplotype"
1197+
mode if mode == "sample" || mode == "haplotype"
11681198
|| mode.starts_with("sample,") || mode.starts_with("haplotype,") => Ok(()),
11691199
_ => Err(io::Error::new(
11701200
io::ErrorKind::InvalidInput,
@@ -1389,6 +1419,20 @@ fn get_auto_reader(path: &str) -> io::Result<Box<dyn BufRead>> {
13891419
Ok(Box::new(BufReader::new(reader)))
13901420
}
13911421

1422+
/// Helper function to return a Write implementer that is either standard output or a file with the
1423+
/// appropriate basename and extension. When no basename is provided, uses standard output.
1424+
fn find_output_stream(basename: &Option<String>, extension: &str) -> io::Result<Box<dyn Write>> {
1425+
// Anthropic's Claude came up with this.
1426+
match basename {
1427+
Some(name) => {
1428+
let filename = format!("{}.{}", name, extension);
1429+
let file = File::create(filename)?;
1430+
Ok(Box::new(file))
1431+
}
1432+
None => Ok(Box::new(io::stdout())),
1433+
}
1434+
}
1435+
13921436
/// Load/generate index based on common and PAF options
13931437
fn initialize_impg(common: &CommonOpts, paf: &PafOpts) -> io::Result<Impg> {
13941438
// Resolve the list of PAF files
@@ -1773,7 +1817,7 @@ fn output_results_bed(
17731817
impg: &Impg,
17741818
results: &mut Vec<AdjustedInterval>,
17751819
out: &mut dyn Write,
1776-
name: Option<String>,
1820+
name: &Option<String>,
17771821
merge_distance: i32,
17781822
original_coordinates: bool,
17791823
) -> io::Result<()> {
@@ -1814,7 +1858,7 @@ fn output_results_bedpe(
18141858
impg: &Impg,
18151859
results: &mut Vec<AdjustedInterval>,
18161860
out: &mut dyn Write,
1817-
name: Option<String>,
1861+
name: &Option<String>,
18181862
merge_distance: i32,
18191863
original_coordinates: bool,
18201864
) -> io::Result<()> {
@@ -1900,7 +1944,7 @@ fn output_results_paf(
19001944
impg: &Impg,
19011945
results: &mut Vec<AdjustedInterval>,
19021946
out: &mut dyn Write,
1903-
name: Option<String>,
1947+
name: &Option<String>,
19041948
merge_distance: i32,
19051949
original_coordinates: bool,
19061950
sequence_index: Option<&UnifiedSequenceIndex>,
@@ -2007,7 +2051,7 @@ fn output_results_gfa(
20072051
results: &mut Vec<AdjustedInterval>,
20082052
out: &mut dyn Write,
20092053
sequence_index: &UnifiedSequenceIndex,
2010-
_name: Option<String>,
2054+
_name: &Option<String>,
20112055
merge_distance: i32,
20122056
scoring_params: (u8, u8, u8, u8, u8, u8),
20132057
) -> io::Result<()> {
@@ -2035,7 +2079,7 @@ fn output_results_fasta(
20352079
results: &mut Vec<AdjustedInterval>,
20362080
out: &mut dyn Write,
20372081
sequence_index: &UnifiedSequenceIndex,
2038-
_name: Option<String>,
2082+
_name: &Option<String>,
20392083
merge_distance: i32,
20402084
reverse_complement: bool,
20412085
) -> io::Result<()> {
@@ -2100,7 +2144,7 @@ fn output_results_maf(
21002144
results: &mut Vec<AdjustedInterval>,
21012145
out: &mut dyn Write,
21022146
sequence_index: &UnifiedSequenceIndex,
2103-
_name: Option<String>,
2147+
_name: &Option<String>,
21042148
merge_distance: i32,
21052149
scoring_params: (u8, u8, u8, u8, u8, u8),
21062150
) -> io::Result<()> {

0 commit comments

Comments
 (0)