|
| 1 | +<tool id="metawrapmg_bin_refinement" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> |
| 2 | + <description>metagenome bin refinement</description> |
| 3 | + <macros> |
| 4 | + <import>macros.xml</import> |
| 5 | + </macros> |
| 6 | + <expand macro="xrefs"/> |
| 7 | + <requirements> |
| 8 | + <requirement type="package" version="@TOOL_VERSION@">metawrap-refinement</requirement> |
| 9 | + </requirements> |
| 10 | + <command detect_errors="exit_code"><![CDATA[ |
| 11 | + ## set memory usage |
| 12 | + if [ -n "\${GALAXY_MEMORY_MB}" ] ; then |
| 13 | + export GALAXY_MEMORY_GB="\$((GALAXY_MEMORY_MB / 1024))" ; |
| 14 | + fi ; |
| 15 | +
|
| 16 | + mkdir -p INITIAL_BINNING/A && |
| 17 | + #for i, a in enumerate($A) |
| 18 | + #if $a.ext.endswith(".gz") |
| 19 | + gunzip -c '$a' > INITIAL_BINNING/A/bin.${i}.fa && |
| 20 | + #else |
| 21 | + cp '$a' INITIAL_BINNING/A/bin.${i}.fa && |
| 22 | + #end if |
| 23 | + #end for |
| 24 | +
|
| 25 | + #if $B |
| 26 | + mkdir -p INITIAL_BINNING/B && |
| 27 | + #for i, b in enumerate($B) |
| 28 | + #if $b.ext.endswith(".gz") |
| 29 | + gunzip -c '$b' > INITIAL_BINNING/B/bin.${i}.fa && |
| 30 | + #else |
| 31 | + cp '$b' INITIAL_BINNING/B/bin.${i}.fa && |
| 32 | + #end if |
| 33 | + #end for |
| 34 | + #end if |
| 35 | + |
| 36 | + #if $C |
| 37 | + mkdir -p INITIAL_BINNING/C && |
| 38 | + #for i, c in enumerate($C) |
| 39 | + #if $c.ext.endswith(".gz") |
| 40 | + gunzip -c '$c' > INITIAL_BINNING/C/bin.${i}.fa && |
| 41 | + #else |
| 42 | + cp '$c' INITIAL_BINNING/C/bin.${i}.fa && |
| 43 | + #end if |
| 44 | + #end for |
| 45 | + #end if |
| 46 | +
|
| 47 | + #################### |
| 48 | + ## BIN REFINEMENT ## |
| 49 | + #################### |
| 50 | +
|
| 51 | + ## The checkm database is in the conda package, see |
| 52 | + ## https://github.com/bioconda/bioconda-recipes/pull/38299. |
| 53 | +
|
| 54 | + metawrap bin_refinement |
| 55 | + -t "\${GALAXY_SLOTS:-4}" |
| 56 | + -m "\${GALAXY_MEMORY_GB:-16}" |
| 57 | + '$hidden_quick' |
| 58 | + -c '${binning.c}' |
| 59 | + -x '${binning.x}' |
| 60 | + -o BIN_REFINEMENT |
| 61 | + ## Only run bin_refinement on bins with contigs |
| 62 | + -A INITIAL_BINNING/A/ |
| 63 | + #if $B and len($B) |
| 64 | + -B INITIAL_BINNING/B/ |
| 65 | + #end if |
| 66 | + #if $C and len($C) |
| 67 | + -C INITIAL_BINNING/C/ |
| 68 | + #end if |
| 69 | + ]]></command> |
| 70 | + <inputs> |
| 71 | + <param argument="-A" type="data" multiple="true" format="fasta,fasta.gz" label="Metagenomic bins"/> |
| 72 | + <param argument="-B" type="data" multiple="true" optional="true" format="fasta,fasta.gz" label="Another set of metagenomic bins"/> |
| 73 | + <param argument="-C" type="data" multiple="true" optional="true" format="fasta,fasta.gz" label="Another set of metagenomic bins"/> |
| 74 | + <section name="binning" title="Binning parameters" expanded="false"> |
| 75 | + <param argument="-c" type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins"/> |
| 76 | + <param argument="-x" type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable"/> |
| 77 | + </section> |
| 78 | + <!-- the pplacer component requires 40 GB per thread. Skip pplacer for |
| 79 | + testing by setting this to "quick" --> |
| 80 | + <param name="hidden_quick" type="hidden" value=""/> |
| 81 | + </inputs> |
| 82 | + <outputs> |
| 83 | + <!-- contigs binned into fasta files --> |
| 84 | + <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> |
| 85 | + <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true"/> |
| 86 | + </collection> |
| 87 | + <!-- summary figures --> |
| 88 | + <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> |
| 89 | + <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures"/> |
| 90 | + </collection> |
| 91 | + <!-- statistics on binning --> |
| 92 | + <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> |
| 93 | + <discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT"/> |
| 94 | + </collection> |
| 95 | + <!-- which contig went into which bin --> |
| 96 | + <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> |
| 97 | + <discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT"/> |
| 98 | + </collection> |
| 99 | + </outputs> |
| 100 | + <tests> |
| 101 | + <!-- 01: basic function --> |
| 102 | + <test> |
| 103 | + <param name="A" ftype="fasta.gz" value="concoct_bins/bin.0.fa.gz,concoct_bins/bin.1.fa.gz,concoct_bins/bin.2.fa.gz,concoct_bins/bin.3.fa.gz,concoct_bins/bin.4.fa.gz,concoct_bins/bin.5.fa.gz,concoct_bins/bin.6.fa.gz,concoct_bins/bin.7.fa.gz,concoct_bins/bin.8.fa.gz,concoct_bins/bin.9.fa.gz,concoct_bins/bin.10.fa.gz,concoct_bins/bin.11.fa.gz,concoct_bins/bin.12.fa.gz,concoct_bins/bin.13.fa.gz,concoct_bins/bin.14.fa.gz,concoct_bins/bin.15.fa.gz,concoct_bins/bin.16.fa.gz,concoct_bins/bin.17.fa.gz,concoct_bins/bin.18.fa.gz,concoct_bins/bin.19.fa.gz,concoct_bins/bin.20.fa.gz,concoct_bins/bin.21.fa.gz,concoct_bins/bin.22.fa.gz,concoct_bins/bin.23.fa.gz,concoct_bins/bin.24.fa.gz,concoct_bins/bin.25.fa.gz,concoct_bins/bin.26.fa.gz"/> |
| 104 | + <param name="B" ftype="fasta.gz" value="maxbin2_bins/bin.0.fa.gz,maxbin2_bins/bin.1.fa.gz"/> |
| 105 | + <section name="binning"> |
| 106 | + <param name="c" value="60"/> |
| 107 | + <param name="x" value="15"/> |
| 108 | + </section> |
| 109 | + <param name="hidden_quick" value="--quick"/> |
| 110 | + <output_collection name="metawrap_bins" type="list"> |
| 111 | + <element name="bin.1" ftype="fasta"> |
| 112 | + <assert_contents> |
| 113 | + <has_text text="NODE_2_length_"/> |
| 114 | + </assert_contents> |
| 115 | + </element> |
| 116 | + </output_collection> |
| 117 | + <output_collection name="metawrap_stats" type="list"> |
| 118 | + <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> |
| 119 | + </output_collection> |
| 120 | + <output_collection name="metawrap_contigs" type="list"> |
| 121 | + <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> |
| 122 | + </output_collection> |
| 123 | + </test> |
| 124 | + </tests> |
| 125 | + <help><![CDATA[ |
| 126 | +MetaWRAP |
| 127 | +-------- |
| 128 | +
|
| 129 | +MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that |
| 130 | +accomplishes the core tasks of metagenomic analysis. Additionally, |
| 131 | +metaWRAP takes bin extraction and analysis to the next level. metaWRAP |
| 132 | +is meant to be a fast and simple approach before you delve deeper into |
| 133 | +parameterization of your analysis. MetaWRAP can be applied to a variety |
| 134 | +of environments, including gut, water, and soil microbiomes (see |
| 135 | +metaWRAP paper for benchmarks). |
| 136 | +
|
| 137 | +MetaWRAP bin refinement |
| 138 | +~~~~~~~~~~~~~~~~~~~~~~~ |
| 139 | +
|
| 140 | +The metaWRAP::Bin_refinement module utilizes a hybrid approach to take |
| 141 | +in two or three bin sets that were obtained with different software and |
| 142 | +produces a consolidated, improved bin set. First, binning_refiner is |
| 143 | +used to create hybridized bins from every possible combination of sets. |
| 144 | +If there were three bin sets: A, B, and C, then the following hybrid |
| 145 | +sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM |
| 146 | +is then run to evaluate the completion and contamination of the bins in |
| 147 | +each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are |
| 148 | +then iteratively compared to each other, and each pair is consolidated |
| 149 | +into an improved bin set. To do this, the same bin is identified within |
| 150 | +the two bin sets based on a minimum of 80% overlap in genome length, and |
| 151 | +the better bin is determined based on which bin has the higher score. |
| 152 | +The scoring function is S=Completion-5*Contamination. After all bin sets |
| 153 | +are incorporated into the consolidated bin collection, a de-replication |
| 154 | +function removes any duplicate contigs. If a contig is present in more |
| 155 | +than one bin, it is removed from all but the best bin (based on scoring |
| 156 | +function). CheckM is then run on the final bin set and a final report |
| 157 | +file is generated showing the completion, contamination, and other |
| 158 | +statistics generated by CheckM for each bin. Completion and |
| 159 | +contamination rank plots are also generated to evaluate the success of |
| 160 | +the Bin_refinement module, and compare its output to the quality of the |
| 161 | +original bins. |
| 162 | +
|
| 163 | +-------------- |
| 164 | +
|
| 165 | +MetaWRAP’s home page is |
| 166 | +`bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__. |
| 167 | +
|
| 168 | +This tool was wrapped by the Galaxy Australia team. |
| 169 | + ]]></help> |
| 170 | + <expand macro="citations"/> |
| 171 | +</tool> |
0 commit comments