Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3fabee0
fixed 3 tests
acquayefrank Jan 23, 2025
cf93eb6
fixed a few more tests
acquayefrank Jan 23, 2025
822ee55
cleaner test data
acquayefrank Jan 24, 2025
67f7cfe
cleaner test data
acquayefrank Jan 24, 2025
2c265d3
Merge branch 'RECETOX:master' into ipa
acquayefrank Jan 27, 2025
c8bb50e
working state
acquayefrank Jan 27, 2025
6e41fbf
Merge branch 'ipa' of github.com:acquayefrank/galaxytools into ipa
acquayefrank Jan 27, 2025
5be1b0d
Update tools/ipapy2/.shed.yml
acquayefrank Jan 28, 2025
77a0c8b
made changes based on code review
acquayefrank Jan 29, 2025
4cfdafd
completed ms annotation code cleanup
acquayefrank Jan 30, 2025
b3b9e04
finished refactoring
acquayefrank Jan 31, 2025
fe475ce
lint
hechth Feb 3, 2025
40f4207
lint and fixed tests
hechth Feb 3, 2025
5411edf
removed not needed code
hechth Feb 3, 2025
0694a63
added min and max values
hechth Feb 3, 2025
d806e37
lint
hechth Feb 3, 2025
96b7fb8
Merge branch 'RECETOX:master' into ipa
acquayefrank Feb 4, 2025
c978d8b
Update tools/ipapy2/ipapy2_MS1_annotation.xml
acquayefrank Feb 4, 2025
fc5625d
added some extra references in the README
acquayefrank Feb 4, 2025
a8c80e2
added imporvements from code review
acquayefrank Feb 4, 2025
17d9f6d
added descriptions
hechth Feb 4, 2025
4bd53bc
small updates
hechth Feb 4, 2025
bf6e48e
Delete tools/ipapy2/__pycache__ directory
acquayefrank Feb 25, 2025
146effa
Merge branch 'master' into ipa
hechth May 15, 2025
5465e87
formatting with black
hechth May 15, 2025
fc306f0
updated help texts and linting
hechth May 16, 2025
2d6ea01
fixed linting
hechth May 16, 2025
9c34bdc
fixed connection handling
hechth May 16, 2025
251dc7e
cleaned macros file
hechth May 16, 2025
8174abc
updated galaxy profile
hechth May 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions tools/ipapy2/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: ipaPy2
owner: recetox
remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2"
homepage_url: "https://github.com/francescodc87/ipaPy2"
categories:
- Metabolomics
description: "Mass spectrometry data annotation tool."
long_description: "New Python implementation of the Integrated Probabilistic Annotation (IPA) - A Bayesian annotation method for LC/MS data integrating biochemical relations, isotope patterns and adduct formation."
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "{{ tool_name }} tool from the ipaPy2 package"
suite:
name: suite_ipapy2
description: tools from the ipaPy2 suite are used for annotation of mass spectrometry data
type: repository_suite_definition
73 changes: 73 additions & 0 deletions tools/ipapy2/ipapy2_MS1_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from ipaPy2 import ipa
from utils import flattern_annotations, MSArgumentParser


def main(
input_dataset_database,
input_dataset_adduct,
ppm,
ratiosd,
ppmunk,
ratiounk,
ppmthr,
pRTNone,
pRTout,
output_dataset,
ncores,
):
write_func, file_path = output_dataset

annotations = ipa.MS1annotation(
input_dataset_database,
input_dataset_adduct,
ppm=ppm,
ratiosd=ratiosd,
ppmunk=ppmunk,
ratiounk=ratiounk,
ppmthr=ppmthr,
pRTNone=pRTNone,
pRTout=pRTout,
ncores=ncores,
)
annotations_flat = flattern_annotations(annotations)
write_func(annotations_flat, file_path)


if __name__ == "__main__":
parser = MSArgumentParser("""
Annotation of the dataset based on the MS1 information. Prior probabilities
are based on mass only, while post probabilities are based on mass, RT,
previous knowledge and isotope patterns.
""")
parser.add_argument(
"--input_dataset_database",
nargs=2,
action="load_data",
required=True,
help=(
"A dataset containing the MS1 data. Ideally obtained from"
" map_isotope_patterns"
),
)
parser.add_argument(
"--input_dataset_adducts",
nargs=2,
action="load_data",
required=True,
help="A dataset containing information on all possible adducts.",
)

args = parser.parse_args()
main(
args.input_dataset_database,
args.input_dataset_adducts,
args.ppm,
args.ratiosd,
args.ppmunk,
args.ratiounk,
args.ppmthr,
args.pRTNone,
args.pRTout,
args.output_dataset,
args.ncores,
)
123 changes: 123 additions & 0 deletions tools/ipapy2/ipapy2_MS1_annotation.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<tool id="ipapy2_MS1_annotation" name="ipaPy2 MS1 annotation" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>annotate clustered features using a supplied MS1 database</description>
<macros>
<import>macros.xml</import>
</macros>

<expand macro="requirements"/>

<command detect_errors="exit_code"><![CDATA[
#set $computed_ppmthr = float($ppm) * 2
python3 '${__tool_directory__}/ipapy2_MS1_annotation.py'
--input_dataset_database '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
--input_dataset_adducts '${all_adducts}' '${all_adducts.ext}'
--ppm ${ppm}
--ratiosd ${ratiosd}
#if $ppmunk
--ppmunk ${ppmunk}
#else
--ppmunk ${ppm}
#end if
--ratiounk ${ratiounk}
#if $ppmthr
--ppmthr ${ppmthr}
#else
--ppmthr ${computed_ppmthr}
#end if
--pRTNone ${pRTNone}
--pRTout ${pRTout}
--output_dataset '${MS1_annotations}' '${MS1_annotations.ext}'
--ncores \${GALAXY_SLOTS:-1}
]]></command>

<inputs>
<param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns" />
<param label="all possible adducts" name="all_adducts" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
<expand macro="ppm"/>
<section name="unknown" title="settings for the identification of unknowns">
<expand macro="ms_unknown"/>
</section>
<section name="optional_settings" title="optional settings">
<expand macro="ms_options"/>
</section>
</inputs>

<outputs>
<data label="${tool.name} on ${on_string}" name="MS1_annotations" format_source="mapped_isotope_patterns"/>
</outputs>

<tests>
<test>
<param name="mapped_isotope_patterns" value="mapped_isotope_patterns.csv"/>
<param name="all_adducts" value="all_adducts.csv"/>
<param name="ppm" value="3"/>
<output name="MS1_annotations" file="MS1_annotations.csv" lines_diff="20"/>
</test>
</tests>

<help><![CDATA[

.. _ipapy2_ms1_annotation:

==========================
ipaPy2 MS1 Annotation Tool
==========================

**Tool Description**

This tool annotates clustered features in your dataset using MS1 information and a supplied adducts table. It calculates prior and posterior probabilities for each feature, leveraging mass, retention time (RT), chemical knowledge, and isotope patterns to provide high-confidence annotations.

How it works
------------

- **Prior probabilities** are calculated using only the mass information.
- **Posterior probabilities** incorporate mass, RT, prior knowledge, and isotope patterns for more accurate annotation.
- The tool matches features in your data to possible adducts and database entries, considering user-defined tolerances for mass (ppm) and other optional parameters.

Inputs
------

1. **Mapped isotope patterns**
Dataset containing MS1 data, ideally obtained from the ``map_isotope_patterns`` tool.

2. **All possible adducts**
Table listing all possible adducts for the database, ideally obtained from the ``compute_all_adducts`` tool.

3. **Parameters**
- **ppm**: Mass tolerance in parts per million for matching.
- **Unknown settings**: Parameters for handling unknowns, such as ppm and ratio thresholds.
- **Optional settings**: Additional parameters for advanced annotation control.

Outputs
-------

- **MS1_annotations**
Annotated dataset with additional columns describing the best-matching database entries, probabilities, and isotope pattern scores.

Example
-------

Suppose you have mapped isotope patterns and a list of all adducts. You can use this tool to annotate your features as follows:

.. code-block::

mapped_isotope_patterns.csv
all_adducts.csv

Set the desired tolerances (e.g., ``ppm = 3``) and run the tool. The output will be a table with annotations for each feature.

Notes
-----

- For best results, ensure your input files are correctly formatted and contain the required columns.
- The tool is designed to be flexible and can handle various input formats (CSV, TSV, Parquet, Tabular).

References
----------

- For more details on the annotation algorithm and scoring, refer to the ipaPy2 documentation or associated publications.

]]></help>

<expand macro="citations"/>
</tool>
136 changes: 136 additions & 0 deletions tools/ipapy2/ipapy2_MS2_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from ipaPy2 import ipa
from utils import flattern_annotations, MSArgumentParser


def main(
input_dataset_mapped_isotope_patterns,
input_dataset_MS2,
input_dataset_adducts,
input_dataset_MS2_DB,
ppm,
ratiosd,
ppmunk,
ratiounk,
ppmthr,
pRTNone,
pRTout,
mzdCS,
ppmCS,
CSunk,
evfilt,
output_dataset,
ncores,
):
annotations = ipa.MSMSannotation(
input_dataset_mapped_isotope_patterns,
input_dataset_MS2,
input_dataset_adducts,
input_dataset_MS2_DB,
ppm=ppm,
ratiosd=ratiosd,
ppmunk=ppmunk,
ratiounk=ratiounk,
ppmthr=ppmthr,
pRTNone=pRTNone,
pRTout=pRTout,
mzdCS=mzdCS,
ppmCS=ppmCS,
CSunk=CSunk,
evfilt=evfilt,
ncores=ncores,
)
annotations_flat = flattern_annotations(annotations)
write_func, file_path = output_dataset
write_func(annotations_flat, file_path)


if __name__ == "__main__":
parser = MSArgumentParser(
"""Annotation of the dataset base on the MS1 and MS2 information. Prior
probabilities are based on mass only, while post probabilities are based
on mass, RT, previous knowledge and isotope patterns."""
)
parser.add_argument(
"--input_dataset_mapped_isotope_patterns",
nargs=2,
action="load_data",
required=True,
help=(
"A dataset containing the MS1 data. Ideally obtained from"
" map_isotope_patterns"
),
)
parser.add_argument(
"--input_dataset_MS2",
nargs=2,
action="load_data",
required=True,
help="A dataset containing the MS2 fragmentation data",
)
parser.add_argument(
"--input_dataset_adducts",
nargs=2,
action="load_data",
required=True,
help=(
"A dataset containing the information on all the possible adducts given the"
" database. Ideally obtained from compute_all_adducts"
),
)
parser.add_argument(
"--input_dataset_MS2_DB",
nargs=2,
action="load_data",
required=True,
help="A dataset containing the MS2 database",
)
parser.add_argument(
"--mzdCS",
type=int,
default=0,
help="""maximum mz difference allowed when computing cosine similarity
scores. If one wants to use this parameter instead of ppmCS, this
must be set to 0. Default 0.""",
)
parser.add_argument(
"--ppmCS",
type=int,
default=10,
help="""maximum ppm allowed when computing cosine similarity scores.
If one wants to use this parameter instead of mzdCS, this must be
set to 0. Default 10.""",
)
parser.add_argument(
"--CSunk",
type=float,
default=0.7,
help="""cosine similarity score associated with the 'unknown' annotation.
Default 0.7""",
)
parser.add_argument(
"--evfilt",
type=bool,
default=False,
help="""Default value False. If true, only spectrum acquired with the same
collision energy are considered.""",
)
args = parser.parse_args()
main(
args.input_dataset_mapped_isotope_patterns,
args.input_dataset_MS2,
args.input_dataset_adducts,
args.input_dataset_MS2_DB,
args.ppm,
args.ratiosd,
args.ppmunk,
args.ratiounk,
args.ppmthr,
args.pRTNone,
args.pRTout,
args.mzdCS,
args.ppmCS,
args.CSunk,
args.evfilt,
args.output_dataset,
args.ncores,
)
Loading