@@ -2,7 +2,9 @@ use std::io::Write;
22
33use crate :: analyse:: GenomeOutput ;
44use crate :: barrnap:: BarrnapAnalyser ;
5+ use crate :: checkm2:: CheckM2Analyser ;
56use crate :: trnascan:: TrnascanAnalyser ;
7+ use crate :: QualityFinder ;
68use crate :: RrnaFinder ;
79use crate :: TrnaFinder ;
810use bird_tool_utils:: clap_utils:: * ;
@@ -11,6 +13,33 @@ use bird_tool_utils_man::prelude::{Author, Flag, Manual, Opt, Section};
1113use clap:: * ;
1214use std:: collections:: HashMap ;
1315
16+ pub enum QualityAnalyser {
17+ CheckM2 ( crate :: checkm2:: CheckM2Analyser ) ,
18+ }
19+
20+ impl QualityFinder for QualityAnalyser {
21+ fn prepare_comp_cont (
22+ & mut self ,
23+ genome_paths : & [ String ] ,
24+ threads : usize ,
25+ tmp_path : & std:: path:: Path ,
26+ ) {
27+ match self {
28+ QualityAnalyser :: CheckM2 ( a) => a. prepare_comp_cont ( genome_paths, threads, tmp_path) ,
29+ }
30+ }
31+ fn find_comp_cont ( & self , genome_path : & str ) -> ( f64 , f64 ) {
32+ match self {
33+ QualityAnalyser :: CheckM2 ( a) => a. find_comp_cont ( genome_path) ,
34+ }
35+ }
36+ fn method_name ( & self ) -> & str {
37+ match self {
38+ QualityAnalyser :: CheckM2 ( a) => a. method_name ( ) ,
39+ }
40+ }
41+ }
42+
1443pub enum RrnaAnalyser {
1544 Barrnap ( BarrnapAnalyser ) ,
1645}
@@ -48,32 +77,40 @@ impl TrnaFinder for TrnaAnalyser {
4877
4978pub struct GalahAnalyser < ' a > {
5079 pub genome_fasta_files : & ' a [ std:: string:: String ] ,
80+ pub threads : usize ,
81+ pub quality_analyser : QualityAnalyser ,
5182 pub rrna_analyser : RrnaAnalyser ,
5283 pub trna_analyser : TrnaAnalyser ,
5384}
5485
5586impl GalahAnalyser < ' _ > {
56- pub fn analyse ( & self ) -> std:: collections:: HashMap < String , GenomeOutput > {
87+ pub fn analyse ( & mut self ) -> std:: collections:: HashMap < String , GenomeOutput > {
5788 crate :: analyse:: analyse (
5889 self . genome_fasta_files ,
90+ self . threads ,
91+ & mut self . quality_analyser ,
5992 & self . rrna_analyser ,
6093 & self . trna_analyser ,
6194 )
6295 }
6396}
6497
6598pub struct GalahAnalyserCommandDefinition {
99+ pub quality_method_argument : String ,
66100 pub rrna_method_argument : String ,
67101 pub trna_method_argument : String ,
68102 pub output_mimag_summary_argument : String ,
103+ pub checkm2_db_path_argument : String ,
69104}
70105
71106lazy_static ! {
72107 static ref ANALYSE_COMMAND_DEFINITION : GalahAnalyserCommandDefinition = {
73108 GalahAnalyserCommandDefinition {
109+ quality_method_argument: "quality-method" . to_string( ) ,
74110 rrna_method_argument: "rrna-method" . to_string( ) ,
75111 trna_method_argument: "trna-method" . to_string( ) ,
76112 output_mimag_summary_argument: "output-mimag-summary" . to_string( ) ,
113+ checkm2_db_path_argument: "checkm2-db-path" . to_string( ) ,
77114 }
78115 } ;
79116}
@@ -96,9 +133,11 @@ lazy_static! {
96133
97134 {} analyse --genome-fasta-directory input_genomes/
98135 --output-mimag-summary mimag_summary.tsv
136+ --checkm2-db-path /path/to/checkm2_db
99137
100138{}
101139
140+ CHECKM2DB=/path/to/checkm2_db
102141 {} analyse --genome-fasta-list genomes.txt
103142 --output-mimag-summary mimag_summary.tsv
104143
@@ -115,7 +154,8 @@ See {} analyse --full-help for further options and further detail.
115154 ansi_term:: Colour :: Green . paint( "Analyse (determine MIMAG status of) genomes" ) ,
116155 ansi_term:: Colour :: Purple . paint(
117156 "Example: Analyse the rRNA/tRNA content of a directory of .fna\n \
118- FASTA files and output a summary of gene counts and MIMAG status\n \
157+ FASTA files, using CheckM2 database specified by argument,\n \
158+ and output a summary of gene counts and MIMAG status\n \
119159 to mimag_summary.tsv:"
120160 . to_string( ) ,
121161 ) ,
@@ -125,7 +165,8 @@ See {} analyse --full-help for further options and further detail.
125165 . and_then( |s| s. into_string( ) . ok( ) )
126166 . expect( "Failed to find running program basename" ) ,
127167 ansi_term:: Colour :: Purple . paint(
128- "Example: Analyse a set of genomes with paths specified in genomes.txt, and\n \
168+ "Example: Analyse a set of genomes with paths specified in genomes.txt,\n \
169+ using CheckM2 database specified by environment variable, and\n \
129170 output the MIMAG summary to mimag_summary.tsv:"
130171 ) ,
131172 std:: env:: current_exe( )
@@ -248,6 +289,20 @@ pub fn add_analyse_subcommand(app: clap::Command) -> clap::Command {
248289 . value_parser ( crate :: TRNA_METHODS )
249290 . default_value ( crate :: DEFAULT_TRNA_METHOD )
250291 . help ( "Method for tRNA analysis" ) ,
292+ )
293+ . arg (
294+ Arg :: new ( & * ANALYSE_COMMAND_DEFINITION . quality_method_argument )
295+ . long ( "quality-method" )
296+ . value_parser ( crate :: QUALITY_METHODS )
297+ . default_value ( crate :: DEFAULT_QUALITY_METHOD )
298+ . help ( "Method for quality analysis" ) ,
299+ )
300+ . arg (
301+ Arg :: new ( & * ANALYSE_COMMAND_DEFINITION . checkm2_db_path_argument )
302+ . long ( "checkm2-db-path" )
303+ . value_name ( "CHECKM2DB" )
304+ . help ( "Path to CheckM2 database (required for checkm2 quality method) [default: from CHECKM2DB environment variable]" )
305+ . required ( false ) ,
251306 ) ;
252307
253308 analyse_subcommand =
@@ -329,7 +384,7 @@ pub fn run_analyse_subcommand(
329384
330385 let genome_fasta_files: Vec < String > = parse_list_of_genome_fasta_files ( m, true ) . unwrap ( ) ;
331386
332- let galah = generate_galah_analyser ( & genome_fasta_files, m, & ANALYSE_COMMAND_DEFINITION )
387+ let mut galah = generate_galah_analyser ( & genome_fasta_files, m, & ANALYSE_COMMAND_DEFINITION )
333388 . expect ( "Failed to parse galah analyse arguments correctly" ) ;
334389
335390 // Open file handles here so errors are caught before CPU-heavy commands
@@ -347,6 +402,21 @@ fn generate_galah_analyser<'a>(
347402 m : & ArgMatches ,
348403 command_definition : & GalahAnalyserCommandDefinition ,
349404) -> Result < GalahAnalyser < ' a > , String > {
405+ let threads = * m. get_one :: < u16 > ( "threads" ) . unwrap ( ) as usize ;
406+ let checkm2_db_path = m
407+ . get_one :: < String > ( & command_definition. checkm2_db_path_argument )
408+ . map ( |s| s. to_string ( ) )
409+ . or_else ( || std:: env:: var ( "CHECKM2DB" ) . ok ( ) )
410+ . unwrap_or_default ( ) ;
411+
412+ let quality_analyser = match m
413+ . get_one :: < String > ( & command_definition. quality_method_argument )
414+ . map ( |s| s. as_str ( ) )
415+ {
416+ Some ( "checkm2" ) => QualityAnalyser :: CheckM2 ( CheckM2Analyser :: new ( checkm2_db_path) ) ,
417+ _ => return Err ( "Invalid quality method specified" . to_string ( ) ) ,
418+ } ;
419+
350420 let rrna_analyser = match m
351421 . get_one :: < String > ( & command_definition. rrna_method_argument )
352422 . map ( |s| s. as_str ( ) )
@@ -365,6 +435,8 @@ fn generate_galah_analyser<'a>(
365435
366436 Ok ( GalahAnalyser {
367437 genome_fasta_files,
438+ threads,
439+ quality_analyser,
368440 rrna_analyser,
369441 trna_analyser,
370442 } )
@@ -378,15 +450,17 @@ fn write_analyse_outputs(
378450 if let Some ( mut f) = output_definitions. output_mimag_summary {
379451 writeln ! (
380452 f,
381- "genome\t rRNA_5S\t rRNA_16S\t rRNA_23S\t tRNAs\t MIMAG_quality" ,
453+ "genome\t completeness \t contamination \ t rRNA_5S\t rRNA_16S\t rRNA_23S\t tRNAs\t MIMAG_quality" ,
382454 )
383455 . unwrap ( ) ;
384456 for genome in genome_fasta_files {
385457 if let Some ( output_data) = analysis. get ( & * genome) {
386458 writeln ! (
387459 f,
388- "{genome}\t {r5s}\t {r16s}\t {r23s}\t {trnas}\t {mimag_quality}" ,
460+ "{genome}\t {completeness} \t {contamination} \t { r5s}\t {r16s}\t {r23s}\t {trnas}\t {mimag_quality}" ,
389461 genome = genome,
462+ completeness = output_data. completeness,
463+ contamination = output_data. contamination,
390464 r5s = output_data. r5s,
391465 r16s = output_data. r16s,
392466 r23s = output_data. r23s,
0 commit comments