Skip to content

Commit 749acab

Browse files
Add kMetaShot and DM (#1734)
* Add kMetaShot and DM * add error test case * Apply suggestions from code review --------- Co-authored-by: Björn Grüning <bjoern@gruenings.eu>
1 parent 528a2a0 commit 749acab

File tree

13 files changed

+279
-0
lines changed

13 files changed

+279
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
categories:
2+
- Data Managers
3+
- Metagenomics
4+
homepage_url: https://github.com/gdefazio/kMetaShot
5+
description: Data manager for kMetaShot reference data
6+
long_description: Data manager for kMetaShot reference data
7+
name: kmetashot_build_database
8+
owner: bgruening
9+
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kmetashot
10+
type: unrestricted
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
<tool id="kmetashot_build_database" name="kMetaShot" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2+
<description>database builder</description>
3+
<macros>
4+
<token name="@TOOL_VERSION@">2.0</token>
5+
<token name="@VERSION_SUFFIX@">0</token>
6+
<token name="@PROFILE@">24.1</token>
7+
</macros>
8+
<requirements>
9+
<requirement type="package" version="@TOOL_VERSION@">kmetashot</requirement>
10+
</requirements>
11+
<command><![CDATA[
12+
mkdir -p "$out_file.extra_files_path" &&
13+
#if $test != "true":
14+
#if $release == "1":
15+
wget "https://zenodo.org/records/17591095/files/kMetaShot_reference.h5" &&
16+
mv "kMetaShot_reference.h5" "$out_file.extra_files_path" &&
17+
#else:
18+
wget "https://zenodo.org/records/17375120/files/kMetaShot_bacteria_archaea_2025-05-22.h5" &&
19+
mv "kMetaShot_bacteria_archaea_2025-05-22.h5" "$out_file.extra_files_path" &&
20+
#end if
21+
#else:
22+
touch '$out_file.extra_files_path'/kMetaShot_bacteria_archaea_2025-05-22.h5 &&
23+
#end if
24+
cp "$dmjson" "$out_file"
25+
]]></command>
26+
<configfiles>
27+
<configfile name="dmjson"><![CDATA[
28+
{
29+
"data_tables":{
30+
"kmetashot":[
31+
{
32+
"dbkey":"kmetashot",
33+
"version":"${release}",
34+
#if $test == "true":
35+
"path":"${out_file.extra_files_path}/kMetaShot_bacteria_archaea_2025-05-22.h5",
36+
"name":"kMetaShot reference data 2025-05-22 - TEST",
37+
"value":"2025-05-22"
38+
#else:
39+
#if $release == "1":
40+
"path":"${out_file.extra_files_path}/kMetaShot_reference.h5",
41+
"name":"kMetaShot reference data 2022-07-31",
42+
"value":"2022-07-31"
43+
#else:
44+
"path":"${out_file.extra_files_path}/kMetaShot_bacteria_archaea_2025-05-22.h5",
45+
"name":"kMetaShot reference data 2025-05-22",
46+
"value":"2025-05-22"
47+
#end if
48+
#end if
49+
}
50+
]
51+
}
52+
}]]>
53+
</configfile>
54+
</configfiles>
55+
<inputs>
56+
<param name="release" type="select" multiple="false" label="kMetaShot reference data release">
57+
<option value="1">First release</option>
58+
<option value="2">Second release</option>
59+
</param>
60+
<param name="test" type="hidden" value="" checked="false" label="Run test"/>
61+
</inputs>
62+
<outputs>
63+
<data name="out_file" format="data_manager_json" />
64+
</outputs>
65+
<tests>
66+
<test expect_num_outputs="1">
67+
<param name="release" value="2"/>
68+
<param name="test" value="true"/>
69+
<output name="out_file">
70+
<assert_contents>
71+
<has_text text="25-05-22"/>
72+
<has_text text="kMetaShot reference data 2025-05-22 - TEST"/>
73+
</assert_contents>
74+
</output>
75+
</test>
76+
</tests>
77+
<help><![CDATA[
78+
Download and extract kMetaShot reference data.
79+
]]></help>
80+
<citations>
81+
<citation type="doi">10.1038/s41592-023-01940-w</citation>
82+
</citations>
83+
</tool>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<data_managers>
2+
<data_manager tool_file="data_manager/kmetashot_datamanager.xml" id="kmetashot_build_database">
3+
<data_table name="kmetashot">
4+
<output>
5+
<column name="value"/>
6+
<column name="dbkey"/>
7+
<column name="name"/>
8+
<column name="version"/>
9+
<column name="path" output_ref="out_file">
10+
<move type="file">
11+
<source>${path}</source>
12+
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kmetashot/${value}/${path}</target>
13+
</move>
14+
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kmetashot/${value}/${path}</value_translation>
15+
<value_translation type="function">abspath</value_translation>
16+
</column>
17+
</output>
18+
</data_table>
19+
</data_manager>
20+
</data_managers>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
25-05-22 kMetaShot-25-05-22 kMetaShot reference data 2025-05-22 2 /tmp/tmpf_hplx2a/galaxy-dev/tool-data/kmetashot/2/kMetaShot_bacteria_archaea_2025-05-22.h5
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#This is a sample file distributed with Galaxy that enables tools
2+
#to use a the kMetaShot database.
3+
#You will need to create these data files using the following command
4+
5+
#wget [selected version] [url_from_donwlaod]
6+
7+
#The <version> column indicates the version from the kMetaShot ref data was downloaded
8+
9+
#25-05-22 kMetaShot-25-05-22 kMetaShot reference data 2025-05-22 2 /mnt/galaxyIndices/kMetaShot_database/kMetaShot_bacteria_archaea_2025-05-22.h5
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<tables>
2+
<table name="kmetashot" comment_char="#" allow_duplicate_entries="False">
3+
<columns>value, dbkey, name, version, path</columns>
4+
<file path="tool-data/kmetashot.loc" />
5+
</table>
6+
</tables>
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<tables>
2+
<!-- Location of kmetashot indexes for testing -->
3+
<table name="kmetashot" comment_char="#" allow_duplicate_entries="False">
4+
<columns>value, dbkey, name, version, path</columns>
5+
<file path="${__HERE__}/test-data/kmetashot.loc" />
6+
</table>
7+
</tables>

tools/kmetashot/.shed.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: kmetashot
2+
owner: bgruening
3+
description: an alignment-free taxonomic classifier based on k-mer/minimizer counting
4+
long_description: |
5+
kMetaShot, a bioinformatic approach relying on k-mer/minimizer profiling
6+
from the reference prokaryotic genomes, in order to build a concise
7+
representation of genomic diversity and perform MAG taxonomic
8+
classification up to the strain level
9+
homepage_url: https://github.com/gdefazio/kMetaShot
10+
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/kmetashot
11+
categories:
12+
- Metagenomics
13+
type: unrestricted

tools/kmetashot/kmetashot.xml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
<tool id="kmetashot" name="kMetaShot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2+
<description>an alignment-free taxonomic classifier based on k-mer/minimizer counting</description>
3+
<macros>
4+
<token name="@TOOL_VERSION@">2.0</token>
5+
<token name="@VERSION_SUFFIX@">0</token>
6+
<token name="@PROFILE@">25.0</token>
7+
</macros>
8+
<requirements>
9+
<requirement type="package" version="@TOOL_VERSION@">kmetashot</requirement>
10+
</requirements>
11+
<command detect_errors="exit_code">
12+
<![CDATA[
13+
#import re
14+
15+
mkdir -p output bins &&
16+
17+
#for $file in $bins_dir:
18+
ln -s '$file' 'bins/$file.element_identifier' &&
19+
#end for
20+
21+
kMetaShot_classifier_NV.py
22+
-b bins
23+
-o output
24+
-r '$reference.fields.path'
25+
-p "\${GALAXY_SLOTS:-10}"
26+
-a ${ass2ref}
27+
28+
]]>
29+
</command>
30+
<inputs>
31+
<param argument="--bins_dir" type="data" multiple="true" format="fasta,fasta.gz" label="Bin(s)/MAG(s) fasta file"/>
32+
<param argument="--reference" type="select" label="Select reference">
33+
<options from_data_table="kmetashot">
34+
<filter type="sort_by" column="2"/>
35+
</options>
36+
<validator type="no_options" message="No reference data for kMetaShot is installed. Please contact the Galaxy adminstrators to request one be installed."/>
37+
</param>
38+
<param argument="--ass2ref" type="float" min="0.0" value="0.0" max="1.0" label="Set ass2ref parameter" help="Ass2ref is a ratio between the number of MAG minimizers and the reference minimizers related to the assigned strain"/>
39+
</inputs>
40+
<outputs>
41+
<collection name="result" type="list" label="${tool.name} on ${on_string}: RESULTS">
42+
<discover_datasets pattern="(?P&lt;designation&gt;.*)\.csv" format="tabular" directory="output"/>
43+
</collection>
44+
</outputs>
45+
<tests>
46+
<test expect_exit_code="1" expect_failure="true">
47+
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
48+
<param name="ass2ref" value="0.2"/>
49+
<assert_command>
50+
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
51+
<has_text text="-o output"/>
52+
<has_text text="-a 0.2"/>
53+
</assert_command>
54+
</test>
55+
<!-- Since this tool need his ref data to work there is no way to test this tool really because of this there is only this test to see of the tool is starting or not
56+
<test expect_num_outputs="1">
57+
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
58+
<param name="ass2ref" value="0.2"/>
59+
<param name="reference" value="25-05-22"/>
60+
<output_collection name="result" type="list" count="2"/>
61+
<assert_command>
62+
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
63+
<has_text text="-o output"/>
64+
<has_text text="-a 0.2"/>
65+
</assert_command>
66+
</test>
67+
<test expect_num_outputs="1">
68+
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
69+
<param name="ass2ref" value="0.3"/>
70+
<param name="reference" value="25-05-22"/>
71+
<output_collection name="result" type="list" count="2"/>
72+
<assert_command>
73+
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
74+
<has_text text="-o output"/>
75+
<has_text text="-a 0.3"/>
76+
</assert_command>
77+
</test>
78+
<test expect_num_outputs="1">
79+
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
80+
<param name="ass2ref" value="0.0"/>
81+
<param name="reference" value="25-05-22"/>
82+
<output_collection name="result" type="list" count="1"/>
83+
<assert_command>
84+
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
85+
<has_text text="-o output"/>
86+
<has_text text="-a 0.0"/>
87+
</assert_command>
88+
</test>
89+
-->
90+
</tests>
91+
<help>
92+
<![CDATA[
93+
94+
This tool is a taxonomic classifier which used a new algorithm which is alignment free.
95+
The data from the input files will be transformed into bites and each base is represented by 2 bites.
96+
97+
A = 00
98+
C = 01
99+
G = 10
100+
T = 11
101+
102+
Therefore the reference which was build for this can used this transformed data and match them to create a classification for the bin(s)/MAG(s).
103+
104+
**Input**
105+
106+
Fasta file(s) in fasta format or/and fasta.gz format (.fa, .fasta, .fna, .fa.gz, .fasta.gz, .fna.gz are allowed extensions)
107+
108+
**Reference**
109+
110+
The reference data needed for this tool is provided from a data manager which always installed the latest version of the data
111+
112+
**Output**
113+
114+
The Output is a collection with csv file(s) which contained the classification for the inputted bin(s)/MAG(s)
115+
116+
]]>
117+
</help>
118+
<citations>
119+
<citation type="doi">10.1093/bib/bbae680</citation>
120+
</citations>
121+
</tool>
837 KB
Binary file not shown.

0 commit comments

Comments
 (0)