44GREEN=' \033[0;32m'
55RED=' \033[0;31m'
66NC=' \033[0m'
7- VERSION=" v1.2 .1"
7+ VERSION=" v1.3 .1"
88
99
1010if [ " $1 " == " --version" ] || [ " $1 " == " -v" ]; then
@@ -69,7 +69,7 @@ if [ "$#" == 0 ] || [ $1 == "-h" ] || [ $1 == "help" ]; then
6969 printf " - [-T <str>] default: FastTree\n"
7070 printf " Which program to use for tree generation. Currently supported are\n"
7171 printf " \" FastTree\" and \" IQ-TREE\" . As of now, these run with default settings\n"
72- printf " only (and QT -TREE includes \" -mset WAG,LG\" . To run either with more\n"
72+ printf " only (and IQ -TREE includes \" -mset WAG,LG\" . To run either with more\n"
7373 printf " specific options (and there is a lot of room for variation here), you\n"
7474 printf " can use the output alignment file from GToTree as input.\n\n"
7575
@@ -142,6 +142,19 @@ if ! command -v FastTree > /dev/null; then
142142 exit
143143fi
144144
145+ # ############################################################################
146+ # ######### SETTING VARIABLES TO REPORT WHAT SHOULD BE CITED AT END #########
147+ # ############################################################################
148+ parallel_used=" false"
149+ prodigal_used=" false"
150+ hmmer_used=" false"
151+ muscle_used=" false"
152+ trimal_used=" false"
153+ taxonkit_used=" false"
154+ fasttree_used=" false"
155+ iqtree_used=" false"
156+ universal_SCGs_used=" false"
157+
145158
146159# ############################################################################
147160# ########################### PARSING ARGUMENTS ############################
@@ -190,6 +203,26 @@ if [ $taxonkit_id_swap != "false" ]; then
190203 fi
191204fi
192205
206+ # checking iqtree is available if it was specified, and checking tree program properly specified
207+ if [ $tree_program != " FastTree" ]; then
208+ if [ $tree_program == " IQ-TREE" ]; then
209+ if ! command -v iqtree > /dev/null; then
210+ printf " \n ${RED} You specified to use IQ-TREE, but 'iqtree' not found in your PATH :(${NC} \n"
211+ printf " \nExiting for now.\n\n"
212+ exit
213+ fi
214+ else
215+ printf " \n ${RED} You specified to use $tree_program , but that is not one of the options :(${NC} \n"
216+ printf " \n Currently available options are 'FastTree' (the default), or 'IQ-TREE'.\n\n"
217+
218+ printf " You can also run GToTree in alignment-only mode by adding the \" -N\" flag,\n"
219+ printf " and then take your concatenated alignment to another tree program :)\n"
220+ printf " \nExiting for now.\n\n"
221+ exit
222+ fi
223+ fi
224+
225+
193226# checking no duplicates in NCBI accession file
194227if [ -f " $NCBI_acc_file " ]; then
195228 num_dupes=$( uniq -d " $NCBI_acc_file " | wc -l | sed " s/^ *//" | cut -d " " -f 1)
263296 printf " \nExiting for now.\n\n"
264297 exit
265298 fi
299+
266300fi
267301
268302
@@ -468,6 +502,11 @@ printf "\n ${GREEN}Total input genomes: $total_input
468502# ### checking and reporting specified hmm source ####
469503printf " \n HMM source to be used:\n" | tee -a $gtotree_log
470504
505+ if [ $hmm_file == " Universal" ]; then
506+ hmm_file=" Universal_Hug_et_al"
507+ universal_SCGs_used=" true"
508+ fi
509+
471510if [ -f " $hmm_file " ]; then
472511 grep " ^NAME" $hmm_file | tr -s " " | cut -f2 -d " " > uniq_hmm_names.tmp
473512 hmm_target_genes_total=$( wc -l uniq_hmm_names.tmp | sed " s/^ *//" | cut -d " " -f 1)
489528fi
490529
491530
492-
493531# ############################################################################
494532# ############# EXPLICITLY STATING IF DEFAULT BEHAVIOR CHANGED #############
495533# ############################################################################
@@ -515,6 +553,7 @@ if [ $output_dir != "GToTree_output" ] || [ "$file_to_genome_id_map" != "" ] ||
515553
516554 if [ $taxonkit_id_swap != " false" ]; then
517555 printf " - Taxonkit will be used to add lineage info to labels.\n" | tee -a $gtotree_log
556+ taxonkit_used=" true"
518557 fi
519558
520559 if [ $lineage_spec != " Domain,Phylum,Class,Species,Strain" ]; then
@@ -548,6 +587,7 @@ if [ $output_dir != "GToTree_output" ] || [ "$file_to_genome_id_map" != "" ] ||
548587
549588 if [ $num_jobs != " 1" ]; then
550589 printf " - Number of jobs to run during parallelizable steps has been set to $num_jobs .\n" | tee -a $gtotree_log
590+ parallel_used=" true"
551591 fi
552592
553593 if [ $best_hit_mode == " true" ]; then
@@ -581,14 +621,17 @@ if [ $total_input_genomes -le 20 ]; then
581621
582622 printf " ${RED} ****************************************************************************${NC} \n\n" | tee -a $gtotree_log
583623
584- # # if i want this as manual input can use the following...
585- # # for now just warning and moving on so this stays fully automated once started
624+ # ## if i want this as manual input can use the following... ###
625+
586626# read -n 1 -s -p " Press any key to continue with \"-c\" set to $len_cutoff for this run, or
587627# press \"CTRL+C\" to cancel and exit.
588628# "
589629
590630 # printf "\n\n\t Moving forward with \"-c\" set to $len_cutoff this run.\n\n"
591631
632+
633+ # ## for now just warning and moving on so this stays fully automated once started ###
634+
592635 sleep 3
593636fi
594637
@@ -788,6 +831,11 @@ if [ -n "$NCBI_acc_file" ]; then
788831 cat ${tmp_dir} /ncbi_accessions_info.tmp | parallel -j $num_jobs gtt-ncbi-parallel.sh {} $tmp_dir $hmm_file $num_cpus $hmm_target_genes_total $output_dir $best_hit_mode
789832 fi
790833
834+ # # checking if prodigal was used to add to citations list being reported at end
835+ if [ -s ${tmp_dir} /prodigal_used ]; then
836+ prodigal_used=" true"
837+ fi
838+
791839 printf " ________________________________________________________________________________\n\n" | tee -a $gtotree_log
792840
793841
@@ -953,6 +1001,11 @@ if [ -n "$genbank_list_file" ]; then
9531001 # adding retained genomes to genomes from all sources file
9541002 cat ${tmp_dir} /final_included_genbank_genomes.tmp >> ${tmp_dir} /genomes_from_all_sources.tmp
9551003
1004+ # # checking if prodigal was used to add to citations list being reported at end
1005+ if [ -s ${tmp_dir} /prodigal_used ]; then
1006+ prodigal_used=" true"
1007+ fi
1008+
9561009 printf " ________________________________________________________________________________\n\n" | tee -a $gtotree_log
9571010
9581011fi
@@ -1061,12 +1114,13 @@ if [ -n "$fasta_files" ]; then
10611114
10621115 fi
10631116
1064-
1065-
1066-
10671117 # adding retained genomes to genomes from all sources file
10681118 cat ${tmp_dir} /fasta_genomes_list.tmp >> ${tmp_dir} /genomes_from_all_sources.tmp
10691119
1120+ # # prodigal must have been used with input fastas, setting to add to citations list being reported at end
1121+ prodigal_used=" true"
1122+
1123+
10701124 printf " _______________________________________________________________________________\n\n" | tee -a $gtotree_log
10711125
10721126fi
@@ -2008,6 +2062,8 @@ if [ $align_only == 'false' ]; then
20082062 FastTree ${output_dir} /Aligned_SCGs.faa > ${output_dir} /Aligned_SCGs.tre | tee -a $gtotree_log
20092063 fi
20102064
2065+ fasttree_used=" true" # setting to report citations at end
2066+
20112067 else
20122068
20132069 printf " \n ############################################################################## \n" | tee -a $gtotree_log
@@ -2031,6 +2087,8 @@ if [ $align_only == 'false' ]; then
20312087 mv iqtree_out* ${output_dir} /iqtree_out/
20322088 cp ${output_dir} /iqtree_out/iqtree_out.treefile ${output_dir} /Aligned_SCGs_mod_names.tre
20332089
2090+ iqtree_used=" true" # setting to report citations at end
2091+
20342092 fi
20352093
20362094fi
@@ -2142,6 +2200,39 @@ printf "________________________________________________________________________
21422200printf " Log file written to:\n" | tee -a $gtotree_log
21432201printf " ${GREEN}${output_dir} /gtotree-runlog.txt${NC} \n\n" | tee -a $gtotree_log
21442202
2203+ # ## checking programs used and reporting a citations file ###
2204+ printf " Programs used and their citations have been written to:\n" | tee -a $gtotree_log
2205+ printf " ${GREEN}${output_dir} /citations.txt${NC} \n\n" | tee -a $gtotree_log
2206+
2207+ printf " GToTree\nLee MD. GToTree: a user-friendly workflow for phylogenomics. Bioinformatics. 2019; (March):1-3. doi:10.1093/bioinformatics/btz188\n\n" >> ${output_dir} /citations.txt
2208+ printf " HMMER3\nEddy SR. Accelerated profile HMM searches. PLoS Comput. Biol. 2011; (7)10. doi:10.1371/journal.pcbi.1002195\n\n" >> ${output_dir} /citations.txt
2209+ printf " Muscle\nEdgar RC. MUSCLE: a multiple sequence alignment method with reduced time and space complexity. BMC Bioinformatics. 2004; 5, 113. doi:10.1093/nar/gkh340\n\n" >> ${output_dir} /citations.txt
2210+ printf " TrimAl\nGutierrez SC. et al. TrimAl: a Tool for automatic alignment trimming. Bioinformatics. 2009; 25, 1972–1973. doi:10.1093/bioinformatics/btp348\n\n" >> ${output_dir} /citations.txt
2211+
2212+ if [ $prodigal_used == " true" ]; then
2213+ printf " Prodigal\nHyatt,D. et al. Gene and translation initiation site prediction in metagenomic sequences. Bioinformatics. 2010; 28, 2223–2230. doi:10.1186/1471-2105-11-119\n\n" >> ${output_dir} /citations.txt
2214+ fi
2215+
2216+ if [ $taxonkit_used == " true" ]; then
2217+ printf " TaxonKit\nShen W. and Xiong J. TaxonKit: a cross-platform and efficient NCBI taxonomy toolkit. bioRxiv. 2019. doi:10.1101/513523\n\n" >> ${output_dir} /citations.txt
2218+ fi
2219+
2220+ if [ $fasttree_used == " true" ]; then
2221+ printf " FastTree 2\nPrice MN. et al. FastTree 2 - approximately maximum-likelihood trees for large alignments. PLoS One. 2010; 5. doi:10.1371/journal.pone.0009490\n\n" >> ${output_dir} /citations.txt
2222+ fi
2223+
2224+ if [ $iqtree_used == " true" ]; then
2225+ printf " IQ-TREE\nNguyen L.-T. et al. IQ-TREE: a fast and effective stochastic algorithm for estimating maximum likelihood phylogenies. Mol. Biol. Evol. 2015; 32, 268–274. doi:10.1093/molbev/msu300\n\n" >> ${output_dir} /citations.txt
2226+ fi
2227+
2228+ if [ $parallel_used == " true" ]; then
2229+ printf " GNU Parallel\nTange O. GNU Parallel 2018. doi:10.5281/zenodo.1146014\n\n" >> ${output_dir} /citations.txt
2230+ fi
2231+
2232+ if [ $universal_SCGs_used == " true" ]; then
2233+ printf " Universal SCG-set\nHug LA. et al. A new view of the tree of life. Nat. Microbiol. 2016; 1, 1–6. doi:10.1038/NMICROBIOL.2016.48\n\n" >> ${output_dir} /citations.txt
2234+ fi
2235+
21452236duration=$SECONDS
21462237
21472238printf " Total process runtime: $(( $duration / 60 / 60 )) hours and $(( ($duration / 60 ) % 60 )) minutes.\n" | tee -a $gtotree_log
0 commit comments