Skip to content

Commit 64b61ff

Browse files
authored
Merge pull request #629 from acquayefrank/ipa
ipaPy2 PR
2 parents 0765a69 + 8174abc commit 64b61ff

36 files changed

+3136
-0
lines changed

tools/ipapy2/.shed.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: ipaPy2
2+
owner: recetox
3+
remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2"
4+
homepage_url: "https://github.com/francescodc87/ipaPy2"
5+
categories:
6+
- Metabolomics
7+
description: "Mass spectrometry data annotation tool."
8+
long_description: "New Python implementation of the Integrated Probabilistic Annotation (IPA) - A Bayesian annotation method for LC/MS data integrating biochemical relations, isotope patterns and adduct formation."
9+
auto_tool_repositories:
10+
name_template: "{{ tool_id }}"
11+
description_template: "{{ tool_name }} tool from the ipaPy2 package"
12+
suite:
13+
name: suite_ipapy2
14+
description: tools from the ipaPy2 suite are used for annotation of mass spectrometry data
15+
type: repository_suite_definition
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from ipaPy2 import ipa
2+
from utils import flattern_annotations, MSArgumentParser
3+
4+
5+
def main(
6+
input_dataset_database,
7+
input_dataset_adduct,
8+
ppm,
9+
ratiosd,
10+
ppmunk,
11+
ratiounk,
12+
ppmthr,
13+
pRTNone,
14+
pRTout,
15+
output_dataset,
16+
ncores,
17+
):
18+
write_func, file_path = output_dataset
19+
20+
annotations = ipa.MS1annotation(
21+
input_dataset_database,
22+
input_dataset_adduct,
23+
ppm=ppm,
24+
ratiosd=ratiosd,
25+
ppmunk=ppmunk,
26+
ratiounk=ratiounk,
27+
ppmthr=ppmthr,
28+
pRTNone=pRTNone,
29+
pRTout=pRTout,
30+
ncores=ncores,
31+
)
32+
annotations_flat = flattern_annotations(annotations)
33+
write_func(annotations_flat, file_path)
34+
35+
36+
if __name__ == "__main__":
37+
parser = MSArgumentParser("""
38+
Annotation of the dataset based on the MS1 information. Prior probabilities
39+
are based on mass only, while post probabilities are based on mass, RT,
40+
previous knowledge and isotope patterns.
41+
""")
42+
parser.add_argument(
43+
"--input_dataset_database",
44+
nargs=2,
45+
action="load_data",
46+
required=True,
47+
help=(
48+
"A dataset containing the MS1 data. Ideally obtained from"
49+
" map_isotope_patterns"
50+
),
51+
)
52+
parser.add_argument(
53+
"--input_dataset_adducts",
54+
nargs=2,
55+
action="load_data",
56+
required=True,
57+
help="A dataset containing information on all possible adducts.",
58+
)
59+
60+
args = parser.parse_args()
61+
main(
62+
args.input_dataset_database,
63+
args.input_dataset_adducts,
64+
args.ppm,
65+
args.ratiosd,
66+
args.ppmunk,
67+
args.ratiounk,
68+
args.ppmthr,
69+
args.pRTNone,
70+
args.pRTout,
71+
args.output_dataset,
72+
args.ncores,
73+
)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<tool id="ipapy2_MS1_annotation" name="ipaPy2 MS1 annotation" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
2+
<description>annotate clustered features using a supplied MS1 database</description>
3+
<macros>
4+
<import>macros.xml</import>
5+
</macros>
6+
7+
<expand macro="requirements"/>
8+
9+
<command detect_errors="exit_code"><![CDATA[
10+
#set $computed_ppmthr = float($ppm) * 2
11+
python3 '${__tool_directory__}/ipapy2_MS1_annotation.py'
12+
--input_dataset_database '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
13+
--input_dataset_adducts '${all_adducts}' '${all_adducts.ext}'
14+
--ppm ${ppm}
15+
--ratiosd ${ratiosd}
16+
#if $ppmunk
17+
--ppmunk ${ppmunk}
18+
#else
19+
--ppmunk ${ppm}
20+
#end if
21+
--ratiounk ${ratiounk}
22+
#if $ppmthr
23+
--ppmthr ${ppmthr}
24+
#else
25+
--ppmthr ${computed_ppmthr}
26+
#end if
27+
--pRTNone ${pRTNone}
28+
--pRTout ${pRTout}
29+
--output_dataset '${MS1_annotations}' '${MS1_annotations.ext}'
30+
--ncores \${GALAXY_SLOTS:-1}
31+
]]></command>
32+
33+
<inputs>
34+
<param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns" />
35+
<param label="all possible adducts" name="all_adducts" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
36+
<expand macro="ppm"/>
37+
<section name="unknown" title="settings for the identification of unknowns">
38+
<expand macro="ms_unknown"/>
39+
</section>
40+
<section name="optional_settings" title="optional settings">
41+
<expand macro="ms_options"/>
42+
</section>
43+
</inputs>
44+
45+
<outputs>
46+
<data label="${tool.name} on ${on_string}" name="MS1_annotations" format_source="mapped_isotope_patterns"/>
47+
</outputs>
48+
49+
<tests>
50+
<test>
51+
<param name="mapped_isotope_patterns" value="mapped_isotope_patterns.csv"/>
52+
<param name="all_adducts" value="all_adducts.csv"/>
53+
<param name="ppm" value="3"/>
54+
<output name="MS1_annotations" file="MS1_annotations.csv" lines_diff="20"/>
55+
</test>
56+
</tests>
57+
58+
<help><![CDATA[
59+
60+
.. _ipapy2_ms1_annotation:
61+
62+
==========================
63+
ipaPy2 MS1 Annotation Tool
64+
==========================
65+
66+
**Tool Description**
67+
68+
This tool annotates clustered features in your dataset using MS1 information and a supplied adducts table. It calculates prior and posterior probabilities for each feature, leveraging mass, retention time (RT), chemical knowledge, and isotope patterns to provide high-confidence annotations.
69+
70+
How it works
71+
------------
72+
73+
- **Prior probabilities** are calculated using only the mass information.
74+
- **Posterior probabilities** incorporate mass, RT, prior knowledge, and isotope patterns for more accurate annotation.
75+
- The tool matches features in your data to possible adducts and database entries, considering user-defined tolerances for mass (ppm) and other optional parameters.
76+
77+
Inputs
78+
------
79+
80+
1. **Mapped isotope patterns**
81+
Dataset containing MS1 data, ideally obtained from the ``map_isotope_patterns`` tool.
82+
83+
2. **All possible adducts**
84+
Table listing all possible adducts for the database, ideally obtained from the ``compute_all_adducts`` tool.
85+
86+
3. **Parameters**
87+
- **ppm**: Mass tolerance in parts per million for matching.
88+
- **Unknown settings**: Parameters for handling unknowns, such as ppm and ratio thresholds.
89+
- **Optional settings**: Additional parameters for advanced annotation control.
90+
91+
Outputs
92+
-------
93+
94+
- **MS1_annotations**
95+
Annotated dataset with additional columns describing the best-matching database entries, probabilities, and isotope pattern scores.
96+
97+
Example
98+
-------
99+
100+
Suppose you have mapped isotope patterns and a list of all adducts. You can use this tool to annotate your features as follows:
101+
102+
.. code-block::
103+
104+
mapped_isotope_patterns.csv
105+
all_adducts.csv
106+
107+
Set the desired tolerances (e.g., ``ppm = 3``) and run the tool. The output will be a table with annotations for each feature.
108+
109+
Notes
110+
-----
111+
112+
- For best results, ensure your input files are correctly formatted and contain the required columns.
113+
- The tool is designed to be flexible and can handle various input formats (CSV, TSV, Parquet, Tabular).
114+
115+
References
116+
----------
117+
118+
- For more details on the annotation algorithm and scoring, refer to the ipaPy2 documentation or associated publications.
119+
120+
]]></help>
121+
122+
<expand macro="citations"/>
123+
</tool>
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
from ipaPy2 import ipa
2+
from utils import flattern_annotations, MSArgumentParser
3+
4+
5+
def main(
6+
input_dataset_mapped_isotope_patterns,
7+
input_dataset_MS2,
8+
input_dataset_adducts,
9+
input_dataset_MS2_DB,
10+
ppm,
11+
ratiosd,
12+
ppmunk,
13+
ratiounk,
14+
ppmthr,
15+
pRTNone,
16+
pRTout,
17+
mzdCS,
18+
ppmCS,
19+
CSunk,
20+
evfilt,
21+
output_dataset,
22+
ncores,
23+
):
24+
annotations = ipa.MSMSannotation(
25+
input_dataset_mapped_isotope_patterns,
26+
input_dataset_MS2,
27+
input_dataset_adducts,
28+
input_dataset_MS2_DB,
29+
ppm=ppm,
30+
ratiosd=ratiosd,
31+
ppmunk=ppmunk,
32+
ratiounk=ratiounk,
33+
ppmthr=ppmthr,
34+
pRTNone=pRTNone,
35+
pRTout=pRTout,
36+
mzdCS=mzdCS,
37+
ppmCS=ppmCS,
38+
CSunk=CSunk,
39+
evfilt=evfilt,
40+
ncores=ncores,
41+
)
42+
annotations_flat = flattern_annotations(annotations)
43+
write_func, file_path = output_dataset
44+
write_func(annotations_flat, file_path)
45+
46+
47+
if __name__ == "__main__":
48+
parser = MSArgumentParser(
49+
"""Annotation of the dataset base on the MS1 and MS2 information. Prior
50+
probabilities are based on mass only, while post probabilities are based
51+
on mass, RT, previous knowledge and isotope patterns."""
52+
)
53+
parser.add_argument(
54+
"--input_dataset_mapped_isotope_patterns",
55+
nargs=2,
56+
action="load_data",
57+
required=True,
58+
help=(
59+
"A dataset containing the MS1 data. Ideally obtained from"
60+
" map_isotope_patterns"
61+
),
62+
)
63+
parser.add_argument(
64+
"--input_dataset_MS2",
65+
nargs=2,
66+
action="load_data",
67+
required=True,
68+
help="A dataset containing the MS2 fragmentation data",
69+
)
70+
parser.add_argument(
71+
"--input_dataset_adducts",
72+
nargs=2,
73+
action="load_data",
74+
required=True,
75+
help=(
76+
"A dataset containing the information on all the possible adducts given the"
77+
" database. Ideally obtained from compute_all_adducts"
78+
),
79+
)
80+
parser.add_argument(
81+
"--input_dataset_MS2_DB",
82+
nargs=2,
83+
action="load_data",
84+
required=True,
85+
help="A dataset containing the MS2 database",
86+
)
87+
parser.add_argument(
88+
"--mzdCS",
89+
type=int,
90+
default=0,
91+
help="""maximum mz difference allowed when computing cosine similarity
92+
scores. If one wants to use this parameter instead of ppmCS, this
93+
must be set to 0. Default 0.""",
94+
)
95+
parser.add_argument(
96+
"--ppmCS",
97+
type=int,
98+
default=10,
99+
help="""maximum ppm allowed when computing cosine similarity scores.
100+
If one wants to use this parameter instead of mzdCS, this must be
101+
set to 0. Default 10.""",
102+
)
103+
parser.add_argument(
104+
"--CSunk",
105+
type=float,
106+
default=0.7,
107+
help="""cosine similarity score associated with the 'unknown' annotation.
108+
Default 0.7""",
109+
)
110+
parser.add_argument(
111+
"--evfilt",
112+
type=bool,
113+
default=False,
114+
help="""Default value False. If true, only spectrum acquired with the same
115+
collision energy are considered.""",
116+
)
117+
args = parser.parse_args()
118+
main(
119+
args.input_dataset_mapped_isotope_patterns,
120+
args.input_dataset_MS2,
121+
args.input_dataset_adducts,
122+
args.input_dataset_MS2_DB,
123+
args.ppm,
124+
args.ratiosd,
125+
args.ppmunk,
126+
args.ratiounk,
127+
args.ppmthr,
128+
args.pRTNone,
129+
args.pRTout,
130+
args.mzdCS,
131+
args.ppmCS,
132+
args.CSunk,
133+
args.evfilt,
134+
args.output_dataset,
135+
args.ncores,
136+
)

0 commit comments

Comments
 (0)