Skip to content

Commit 450d1ac

Browse files
Update Medaka to version 2 (#7400)
* use latest medaka versions * update models available in this version * reset version suffix * madaka snp command has been removed * add scipy requirement * update commands, rework optional log output * rename tool to match rename * docs are no longer there * remove broken docs link * Update tools/medaka/consensus.xml Co-authored-by: Saim Momin <[email protected]> --------- Co-authored-by: Saim Momin <[email protected]>
1 parent 722ac22 commit 450d1ac

File tree

5 files changed

+82
-236
lines changed

5 files changed

+82
-236
lines changed

tools/medaka/consensus.xml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
<tool id="medaka_consensus" name="medaka consensus tool" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
2-
<description>Assembly polishing via neural networks</description>
1+
<tool id="medaka_consensus" name="medaka inference tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2+
<description> inference from a trained model and alignments.</description>
33
<macros>
44
<import>macros.xml</import>
55
</macros>
@@ -13,7 +13,7 @@ ln -s '${bam.metadata.bam_index}' alignment.bam.bai &&
1313
## Possibly new options bam_chunk and bam_workers. Should we be setting these?
1414
1515
## run
16-
medaka consensus
16+
medaka inference
1717
## optional
1818
--debug ## increase log level
1919
--threads \${GALAXY_SLOTS:-4}
@@ -43,9 +43,7 @@ $save_features
4343
$tag_keep_missing
4444
## required
4545
alignment.bam ## bam
46-
'$out_result' ## output
47-
48-
2>&1 | tee '$out_log'
46+
'$out_result' 2>&1 | tee log.txt
4947
]]></command>
5048
<inputs>
5149
<param argument="bam" type="data" format="bam" label="Select input alignment"/>
@@ -84,7 +82,7 @@ alignment.bam ## bam
8482
<filter>'result' in out</filter>
8583
</data>
8684
<!-- optional -->
87-
<data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">
85+
<data name="out_log" format="txt" from_work_dir="log.txt" label="${tool.name} on ${on_string}: Log">
8886
<filter>'log' in out</filter>
8987
</data>
9088
</outputs>
@@ -94,14 +92,14 @@ alignment.bam ## bam
9492
<param name="bam" value="alignment.bam"/>
9593
<output name="out_result">
9694
<assert_contents>
97-
<has_size value="30960"/>
95+
<has_size value="26144"/>
9896
</assert_contents>
9997
</output>
10098
</test>
10199
<!-- #2 -->
102100
<test expect_num_outputs="2">
103101
<param name="bam" value="alignment.bam"/>
104-
<param name="model" value="r941_min_fast_g303"/>
102+
<param name="model" value="r941_min_fast_g507"/>
105103
<param name="batch_size" value="99"/>
106104
<param name="chunk_len" value="9999"/>
107105
<param name="chunk_ovlp" value="999"/>
@@ -111,12 +109,12 @@ alignment.bam ## bam
111109
<param name="out" value="result,log"/>
112110
<output name="out_result">
113111
<assert_contents>
114-
<has_size value="37088"/>
112+
<has_size value="27104"/>
115113
</assert_contents>
116114
</output>
117115
<output name="out_log">
118116
<assert_contents>
119-
<has_text_matching expression=".+Validating and finalising output data."/>
117+
<has_text_matching expression=".+Running prediction at full precision"/>
120118
</assert_contents>
121119
</output>
122120
</test>
@@ -150,4 +148,4 @@ Medaka requires a BAM file as input, and generates a Hierarchical Data Format (H
150148
@REFERENCES@
151149
]]></help>
152150
<expand macro="citations"/>
153-
</tool>
151+
</tool>

tools/medaka/macros.xml

Lines changed: 58 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<macros>
2-
<token name="@TOOL_VERSION@">1.7.2</token>
3-
<token name="@VERSION_SUFFIX@">1</token>
2+
<token name="@TOOL_VERSION@">2.1.1</token>
3+
<token name="@VERSION_SUFFIX@">0</token>
44
<token name="@PROFILE@">21.01</token>
55
<xml name="bio_tools">
66
<xrefs>
@@ -10,6 +10,7 @@
1010
<xml name="requirements">
1111
<requirements>
1212
<requirement type="package" version="@TOOL_VERSION@">medaka</requirement>
13+
<requirement type="package" version="1.16.2">scipy</requirement>
1314
<yield />
1415
</requirements>
1516
</xml>
@@ -52,80 +53,101 @@
5253
<param argument="@ARGUMENT@" type="integer" value="100" min="1" label="Set inference batch size"/>
5354
</xml>
5455
<xml name="model" token_argument="-m" token_label="Select model">
55-
<param argument="@ARGUMENT@" type="select" label="@LABEL@" help="For best results it is important to specify the correct model,
56-
according to the basecaller used. Medaka models are named to indicate i) the pore type, ii) the sequencing device (MinION
56+
<param argument="@ARGUMENT@" type="select" label="@LABEL@" help="For best results it is important to specify the correct model,
57+
according to the basecaller used. Medaka models are named to indicate i) the pore type, ii) the sequencing device (MinION
5758
or PromethION), iii) the basecaller variant, and iv) the basecaller version">
5859
<option value="r103_fast_g507">r103_fast_g507</option>
5960
<option value="r103_fast_snp_g507">r103_fast_snp_g507</option>
6061
<option value="r103_fast_variant_g507">r103_fast_variant_g507</option>
6162
<option value="r103_hac_g507">r103_hac_g507</option>
6263
<option value="r103_hac_snp_g507">r103_hac_snp_g507</option>
6364
<option value="r103_hac_variant_g507">r103_hac_variant_g507</option>
64-
<option value="r103_min_high_g345">r103_min_high_g345</option>
65-
<option value="r103_min_high_g360">r103_min_high_g360</option>
66-
<option value="r103_prom_high_g360">r103_prom_high_g360</option>
67-
<option value="r103_prom_snp_g3210">r103_prom_snp_g3210</option>
68-
<option value="r103_prom_variant_g3210">r103_prom_variant_g3210</option>
6965
<option value="r103_sup_g507">r103_sup_g507</option>
7066
<option value="r103_sup_snp_g507">r103_sup_snp_g507</option>
7167
<option value="r103_sup_variant_g507">r103_sup_variant_g507</option>
68+
<option value="r1041_e82_260bps_fast_g632">r1041_e82_260bps_fast_g632</option>
69+
<option value="r1041_e82_260bps_fast_variant_g632">r1041_e82_260bps_fast_variant_g632</option>
70+
<option value="r1041_e82_260bps_hac_g632">r1041_e82_260bps_hac_g632</option>
71+
<option value="r1041_e82_260bps_hac_v4.0.0">r1041_e82_260bps_hac_v4.0.0</option>
72+
<option value="r1041_e82_260bps_hac_v4.1.0">r1041_e82_260bps_hac_v4.1.0</option>
73+
<option value="r1041_e82_260bps_hac_variant_g632">r1041_e82_260bps_hac_variant_g632</option>
74+
<option value="r1041_e82_260bps_hac_variant_v4.1.0">r1041_e82_260bps_hac_variant_v4.1.0</option>
75+
<option value="r1041_e82_260bps_joint_apk_ulk_v5.0.0">r1041_e82_260bps_joint_apk_ulk_v5.0.0</option>
76+
<option value="r1041_e82_260bps_sup_g632">r1041_e82_260bps_sup_g632</option>
77+
<option value="r1041_e82_260bps_sup_v4.0.0">r1041_e82_260bps_sup_v4.0.0</option>
78+
<option value="r1041_e82_260bps_sup_v4.1.0">r1041_e82_260bps_sup_v4.1.0</option>
79+
<option value="r1041_e82_260bps_sup_variant_g632">r1041_e82_260bps_sup_variant_g632</option>
80+
<option value="r1041_e82_260bps_sup_variant_v4.1.0">r1041_e82_260bps_sup_variant_v4.1.0</option>
81+
<option value="r1041_e82_400bps_bacterial_methylation">r1041_e82_400bps_bacterial_methylation</option>
7282
<option value="r1041_e82_400bps_fast_g615">r1041_e82_400bps_fast_g615</option>
83+
<option value="r1041_e82_400bps_fast_g632">r1041_e82_400bps_fast_g632</option>
7384
<option value="r1041_e82_400bps_fast_variant_g615">r1041_e82_400bps_fast_variant_g615</option>
85+
<option value="r1041_e82_400bps_fast_variant_g632">r1041_e82_400bps_fast_variant_g632</option>
7486
<option value="r1041_e82_400bps_hac_g615">r1041_e82_400bps_hac_g615</option>
87+
<option value="r1041_e82_400bps_hac_g632">r1041_e82_400bps_hac_g632</option>
88+
<option value="r1041_e82_400bps_hac_v4.0.0">r1041_e82_400bps_hac_v4.0.0</option>
89+
<option value="r1041_e82_400bps_hac_v4.1.0">r1041_e82_400bps_hac_v4.1.0</option>
90+
<option value="r1041_e82_400bps_hac_v4.2.0">r1041_e82_400bps_hac_v4.2.0</option>
91+
<option value="r1041_e82_400bps_hac_v4.3.0">r1041_e82_400bps_hac_v4.3.0</option>
92+
<option value="r1041_e82_400bps_hac_v5.0.0">r1041_e82_400bps_hac_v5.0.0</option>
93+
<option value="r1041_e82_400bps_hac_v5.0.0_rl_lstm384_dwells">r1041_e82_400bps_hac_v5.0.0_rl_lstm384_dwells</option>
94+
<option value="r1041_e82_400bps_hac_v5.0.0_rl_lstm384_no_dwells">r1041_e82_400bps_hac_v5.0.0_rl_lstm384_no_dwells</option>
95+
<option value="r1041_e82_400bps_hac_v5.2.0">r1041_e82_400bps_hac_v5.2.0</option>
96+
<option value="r1041_e82_400bps_hac_v5.2.0_rl_lstm384_dwells">r1041_e82_400bps_hac_v5.2.0_rl_lstm384_dwells</option>
97+
<option value="r1041_e82_400bps_hac_v5.2.0_rl_lstm384_no_dwells">r1041_e82_400bps_hac_v5.2.0_rl_lstm384_no_dwells</option>
7598
<option value="r1041_e82_400bps_hac_variant_g615">r1041_e82_400bps_hac_variant_g615</option>
99+
<option value="r1041_e82_400bps_hac_variant_g632">r1041_e82_400bps_hac_variant_g632</option>
100+
<option value="r1041_e82_400bps_hac_variant_v4.1.0">r1041_e82_400bps_hac_variant_v4.1.0</option>
101+
<option value="r1041_e82_400bps_hac_variant_v4.2.0">r1041_e82_400bps_hac_variant_v4.2.0</option>
102+
<option value="r1041_e82_400bps_hac_variant_v4.3.0">r1041_e82_400bps_hac_variant_v4.3.0</option>
103+
<option value="r1041_e82_400bps_hac_variant_v5.0.0">r1041_e82_400bps_hac_variant_v5.0.0</option>
76104
<option value="r1041_e82_400bps_sup_g615">r1041_e82_400bps_sup_g615</option>
105+
<option value="r1041_e82_400bps_sup_v4.0.0">r1041_e82_400bps_sup_v4.0.0</option>
106+
<option value="r1041_e82_400bps_sup_v4.1.0">r1041_e82_400bps_sup_v4.1.0</option>
107+
<option value="r1041_e82_400bps_sup_v4.2.0">r1041_e82_400bps_sup_v4.2.0</option>
108+
<option value="r1041_e82_400bps_sup_v4.3.0">r1041_e82_400bps_sup_v4.3.0</option>
109+
<option value="r1041_e82_400bps_sup_v5.0.0">r1041_e82_400bps_sup_v5.0.0</option>
110+
<option value="r1041_e82_400bps_sup_v5.0.0_rl_lstm384_dwells">r1041_e82_400bps_sup_v5.0.0_rl_lstm384_dwells</option>
111+
<option value="r1041_e82_400bps_sup_v5.0.0_rl_lstm384_no_dwells">r1041_e82_400bps_sup_v5.0.0_rl_lstm384_no_dwells</option>
112+
<option value="r1041_e82_400bps_sup_v5.2.0">r1041_e82_400bps_sup_v5.2.0</option>
113+
<option value="r1041_e82_400bps_sup_v5.2.0_rl_lstm384_dwells">r1041_e82_400bps_sup_v5.2.0_rl_lstm384_dwells</option>
114+
<option value="r1041_e82_400bps_sup_v5.2.0_rl_lstm384_no_dwells">r1041_e82_400bps_sup_v5.2.0_rl_lstm384_no_dwells</option>
77115
<option value="r1041_e82_400bps_sup_variant_g615">r1041_e82_400bps_sup_variant_g615</option>
116+
<option value="r1041_e82_400bps_sup_variant_v4.1.0">r1041_e82_400bps_sup_variant_v4.1.0</option>
117+
<option value="r1041_e82_400bps_sup_variant_v4.2.0">r1041_e82_400bps_sup_variant_v4.2.0</option>
118+
<option value="r1041_e82_400bps_sup_variant_v4.3.0">r1041_e82_400bps_sup_variant_v4.3.0</option>
119+
<option value="r1041_e82_400bps_sup_variant_v5.0.0">r1041_e82_400bps_sup_variant_v5.0.0</option>
78120
<option value="r104_e81_fast_g5015">r104_e81_fast_g5015</option>
79-
<option value="r104_e81_hac_g5015">r104_e81_hac_g5015</option>
80-
<option value="r104_e81_sup_g5015">r104_e81_sup_g5015</option>
81121
<option value="r104_e81_fast_variant_g5015">r104_e81_fast_variant_g5015</option>
122+
<option value="r104_e81_hac_g5015">r104_e81_hac_g5015</option>
82123
<option value="r104_e81_hac_variant_g5015">r104_e81_hac_variant_g5015</option>
124+
<option value="r104_e81_sup_g5015">r104_e81_sup_g5015</option>
83125
<option value="r104_e81_sup_g610">r104_e81_sup_g610</option>
84-
<option value="r104_e81_sup_variant_g610">r104_e81_sup_variant_g610</option>
85-
<option value="r10_min_high_g303">r10_min_high_g303</option>
86-
<option value="r10_min_high_g340">r10_min_high_g340</option>
126+
<option value="r104_e81_sup_variant_g610">r104_e81_sup_variant_g610</option>
127+
<option value="r941_e81_fast_g514">r941_e81_fast_g514</option>
87128
<option value="r941_e81_fast_variant_g514">r941_e81_fast_variant_g514</option>
88129
<option value="r941_e81_hac_g514">r941_e81_hac_g514</option>
89130
<option value="r941_e81_hac_variant_g514">r941_e81_hac_variant_g514</option>
90131
<option value="r941_e81_sup_g514">r941_e81_sup_g514</option>
91132
<option value="r941_e81_sup_variant_g514">r941_e81_sup_variant_g514</option>
92-
<option value="r941_min_fast_g303">r941_min_fast_g303</option>
93133
<option value="r941_min_fast_g507">r941_min_fast_g507</option>
94134
<option value="r941_min_fast_snp_g507">r941_min_fast_snp_g507</option>
95135
<option value="r941_min_fast_variant_g507">r941_min_fast_variant_g507</option>
96136
<option value="r941_min_hac_g507">r941_min_hac_g507</option>
97137
<option value="r941_min_hac_snp_g507">r941_min_hac_snp_g507</option>
98138
<option value="r941_min_hac_variant_g507">r941_min_hac_variant_g507</option>
99-
<option value="r941_min_high_g303">r941_min_high_g303</option>
100-
<option value="r941_min_high_g330">r941_min_high_g330</option>
101-
<option value="r941_min_high_g340_rle">r941_min_high_g340_rle</option>
102-
<option value="r941_min_high_g344">r941_min_high_g344</option>
103-
<option value="r941_min_high_g351">r941_min_high_g351</option>
104-
<option value="r941_min_high_g360" selected="true">r941_min_high_g360</option>
105139
<option value="r941_min_sup_g507">r941_min_sup_g507</option>
106140
<option value="r941_min_sup_snp_g507">r941_min_sup_snp_g507</option>
107141
<option value="r941_min_sup_variant_g507">r941_min_sup_variant_g507</option>
108-
<option value="r941_prom_fast_g303">r941_prom_fast_g303</option>
109142
<option value="r941_prom_fast_g507">r941_prom_fast_g507</option>
110143
<option value="r941_prom_fast_snp_g507">r941_prom_fast_snp_g507</option>
111144
<option value="r941_prom_fast_variant_g507">r941_prom_fast_variant_g507</option>
112145
<option value="r941_prom_hac_g507">r941_prom_hac_g507</option>
113146
<option value="r941_prom_hac_snp_g507">r941_prom_hac_snp_g507</option>
114147
<option value="r941_prom_hac_variant_g507">r941_prom_hac_variant_g507</option>
115-
<option value="r941_prom_high_g303">r941_prom_high_g303</option>
116-
<option value="r941_prom_high_g330">r941_prom_high_g330</option>
117-
<option value="r941_prom_high_g344">r941_prom_high_g344</option>
118-
<option value="r941_prom_high_g360">r941_prom_high_g360</option>
119-
<option value="r941_prom_high_g4011">r941_prom_high_g4011</option>
120-
<option value="r941_prom_snp_g303">r941_prom_snp_g303</option>
121-
<option value="r941_prom_snp_g322">r941_prom_snp_g322</option>
122-
<option value="r941_prom_snp_g360">r941_prom_snp_g360</option>
123148
<option value="r941_prom_sup_g507">r941_prom_sup_g507</option>
124149
<option value="r941_prom_sup_snp_g507">r941_prom_sup_snp_g507</option>
125150
<option value="r941_prom_sup_variant_g507">r941_prom_sup_variant_g507</option>
126-
<option value="r941_prom_variant_g303">r941_prom_variant_g303</option>
127-
<option value="r941_prom_variant_g322">r941_prom_variant_g322</option>
128-
<option value="r941_prom_variant_g360">r941_prom_variant_g360</option>
129151
<option value="r941_sup_plant_g610">r941_sup_plant_g610</option>
130152
<option value="r941_sup_plant_variant_g610">r941_sup_plant_variant_g610</option>
131153
</param>
@@ -177,12 +199,12 @@ Medaka models are named to indicate i) the pore type, ii) the sequencing device
177199
178200
{pore}_{device}_{caller variant}_{caller version}
179201
180-
For example the model named r941_min_fast_g303 should be used with data from MinION (or GridION) R9.4.1 flowcells using the fast Guppy basecaller version 3.0.3. By contrast the model
202+
For example the model named r941_min_fast_g303 should be used with data from MinION (or GridION) R9.4.1 flowcells using the fast Guppy basecaller version 3.0.3. By contrast the model
181203
r941_prom_hac_g303 should be used with PromethION data and the high accuracy basecaller (termed "hac" in Guppy configuration files). Where a version of Guppy has been used without an exactly corresponding medaka model, the medaka model with the highest version equal to or less than the guppy version should be selected.
182-
204+
183205
]]></token>
184206

185207
<token name="@REFERENCES@"><![CDATA[
186-
More information are available in the `manual <https://github.com/nanoporetech/medaka/tree/master/docs>`_ and `github <https://github.com/nanoporetech/medaka>`_.
208+
More information are available in the `github <https://github.com/nanoporetech/medaka>`_.
187209
]]></token>
188210
</macros>

tools/medaka/medaka_consensus.xml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@ medaka_consensus
1616
-t \${GALAXY_SLOTS:-4}
1717
## required
1818
-i '$i'
19-
-d 'input_assembly.fa'
20-
21-
2>&1 | tee '$out_log'
19+
-d 'input_assembly.fa' 2>&1 | tee log.txt
2220
]]></command>
2321
<inputs>
2422
<param argument="-i" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select basecalls"/>
@@ -49,7 +47,7 @@ medaka_consensus
4947
<filter>'gaps' in out</filter>
5048
</data>
5149
<!-- optional -->
52-
<data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">
50+
<data name="out_log" format="txt" from_work_dir="log.txt" label="${tool.name} on ${on_string}: Log">
5351
<filter>'log' in out</filter>
5452
</data>
5553
</outputs>
@@ -67,7 +65,7 @@ medaka_consensus
6765
</output>
6866
<output name="out_probs">
6967
<assert_contents>
70-
<has_size value="169146" delta="100"/>
68+
<has_size value="187317" delta="100"/>
7169
</assert_contents>
7270
</output>
7371
<output name="out_calls">
@@ -80,7 +78,7 @@ medaka_consensus
8078
<test expect_num_outputs="5">
8179
<param name="i" value="basecalls.fastq.gz"/>
8280
<param name="d" value="assembly.fasta"/>
83-
<param name="m" value="r941_min_fast_g303"/>
81+
<param name="m" value="r941_min_fast_g507"/>
8482
<param name="b" value="99"/>
8583
<param name="out" value="consensus,probs,calls,log,gaps"/>
8684
<output name="out_consensus">
@@ -91,7 +89,7 @@ medaka_consensus
9189
</output>
9290
<output name="out_probs">
9391
<assert_contents>
94-
<has_size value="169863" delta="100"/>
92+
<has_size value="187317" delta="100"/>
9593
</assert_contents>
9694
</output>
9795
<output name="out_calls">
@@ -126,7 +124,7 @@ The *medaka_consensus* pipeline performs assembly polishing via neural networks.
126124
127125
**Input**
128126
129-
An *assembly* in .fasta format and *basecalls* in .fasta or .fastq format are required. See `Creating a Draft Assembly <https://github.com/nanoporetech/medaka/blob/master/docs/walkthrough.rst>`_ for a detailed example of one method of obtaining these.
127+
An *assembly* in .fasta format and *basecalls* in .fasta or .fastq format are required.
130128
131129
----
132130

0 commit comments

Comments
 (0)