RECETOX · hechth · May 16, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 24, 2025
diff --git a/tools/ipapy2/.shed.yml b/tools/ipapy2/.shed.yml
@@ -0,0 +1,15 @@
+name: ipaPy2
+owner: recetox
+remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2"
+homepage_url: "https://github.com/francescodc87/ipaPy2"
+categories:
+  - Metabolomics
+description: "Mass spectrometry data annotation tool."
+long_description: "New Python implementation of the Integrated Probabilistic Annotation (IPA) - A Bayesian annotation method for LC/MS data integrating biochemical relations, isotope patterns and adduct formation."
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }} tool from the ipaPy2 package"
+suite:
+  name: suite_ipapy2
+  description: tools from the ipaPy2 suite are used for annotation of mass spectrometry data
+  type: repository_suite_definition
diff --git a/tools/ipapy2/ipapy2_MS1_annotation.py b/tools/ipapy2/ipapy2_MS1_annotation.py
@@ -0,0 +1,73 @@
+from ipaPy2 import ipa
+from utils import flattern_annotations, MSArgumentParser
+
+
+def main(
+    input_dataset_database,
+    input_dataset_adduct,
+    ppm,
+    ratiosd,
+    ppmunk,
+    ratiounk,
+    ppmthr,
+    pRTNone,
+    pRTout,
+    output_dataset,
+    ncores,
+):
+    write_func, file_path = output_dataset
+
+    annotations = ipa.MS1annotation(
+        input_dataset_database,
+        input_dataset_adduct,
+        ppm=ppm,
+        ratiosd=ratiosd,
+        ppmunk=ppmunk,
+        ratiounk=ratiounk,
+        ppmthr=ppmthr,
+        pRTNone=pRTNone,
+        pRTout=pRTout,
+        ncores=ncores,
+    )
+    annotations_flat = flattern_annotations(annotations)
+    write_func(annotations_flat, file_path)
+
+
+if __name__ == "__main__":
+    parser = MSArgumentParser("""
+    Annotation of the dataset based on the MS1 information. Prior probabilities
+        are based on mass only, while post probabilities are based on mass, RT,
+        previous knowledge and isotope patterns.
+    """)
+    parser.add_argument(
+        "--input_dataset_database",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help=(
+            "A dataset containing the MS1 data. Ideally obtained from"
+            " map_isotope_patterns"
+        ),
+    )
+    parser.add_argument(
+        "--input_dataset_adducts",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help="A dataset containing information on all possible adducts.",
+    )
+
+    args = parser.parse_args()
+    main(
+        args.input_dataset_database,
+        args.input_dataset_adducts,
+        args.ppm,
+        args.ratiosd,
+        args.ppmunk,
+        args.ratiounk,
+        args.ppmthr,
+        args.pRTNone,
+        args.pRTout,
+        args.output_dataset,
+        args.ncores,
+    )
diff --git a/tools/ipapy2/ipapy2_MS1_annotation.xml b/tools/ipapy2/ipapy2_MS1_annotation.xml
@@ -0,0 +1,123 @@
+<tool id="ipapy2_MS1_annotation" name="ipaPy2 MS1 annotation" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>annotate clustered features using a supplied MS1 database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #set $computed_ppmthr = float($ppm) * 2
+        python3  '${__tool_directory__}/ipapy2_MS1_annotation.py'
+        --input_dataset_database '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
+        --input_dataset_adducts '${all_adducts}' '${all_adducts.ext}'
+        --ppm ${ppm}
+        --ratiosd ${ratiosd}
+        #if $ppmunk
+            --ppmunk ${ppmunk}
+        #else
+            --ppmunk ${ppm}
+        #end if
+        --ratiounk ${ratiounk}
+        #if $ppmthr
+            --ppmthr ${ppmthr}
+        #else
+            --ppmthr ${computed_ppmthr}
+        #end if
+        --pRTNone ${pRTNone}
+        --pRTout ${pRTout}
+        --output_dataset '${MS1_annotations}' '${MS1_annotations.ext}'
+        --ncores \${GALAXY_SLOTS:-1}
+    ]]></command>
+
+    <inputs>
+        <param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns" />
+        <param label="all possible adducts" name="all_adducts" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
+        <expand macro="ppm"/>
+        <section name="unknown" title="settings for the identification of unknowns">
+            <expand macro="ms_unknown"/>
+        </section>
+        <section name="optional_settings" title="optional settings">
+            <expand macro="ms_options"/>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="MS1_annotations" format_source="mapped_isotope_patterns"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="mapped_isotope_patterns" value="mapped_isotope_patterns.csv"/>
+            <param name="all_adducts" value="all_adducts.csv"/>
+            <param name="ppm" value="3"/>
+            <output name="MS1_annotations" file="MS1_annotations.csv" lines_diff="20"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+.. _ipapy2_ms1_annotation:
+
+==========================
+ipaPy2 MS1 Annotation Tool
+==========================
+
+**Tool Description**
+
+This tool annotates clustered features in your dataset using MS1 information and a supplied adducts table. It calculates prior and posterior probabilities for each feature, leveraging mass, retention time (RT), chemical knowledge, and isotope patterns to provide high-confidence annotations.
+
+How it works
+------------
+
+- **Prior probabilities** are calculated using only the mass information.
+- **Posterior probabilities** incorporate mass, RT, prior knowledge, and isotope patterns for more accurate annotation.
+- The tool matches features in your data to possible adducts and database entries, considering user-defined tolerances for mass (ppm) and other optional parameters.
+
+Inputs
+------
+
+1. **Mapped isotope patterns**  
+   Dataset containing MS1 data, ideally obtained from the ``map_isotope_patterns`` tool.
+
+2. **All possible adducts**  
+   Table listing all possible adducts for the database, ideally obtained from the ``compute_all_adducts`` tool.
+
+3. **Parameters**  
+   - **ppm**: Mass tolerance in parts per million for matching.
+   - **Unknown settings**: Parameters for handling unknowns, such as ppm and ratio thresholds.
+   - **Optional settings**: Additional parameters for advanced annotation control.
+
+Outputs
+-------
+
+- **MS1_annotations**  
+  Annotated dataset with additional columns describing the best-matching database entries, probabilities, and isotope pattern scores.
+
+Example
+-------
+
+Suppose you have mapped isotope patterns and a list of all adducts. You can use this tool to annotate your features as follows:
+
+.. code-block::
+
+    mapped_isotope_patterns.csv
+    all_adducts.csv
+
+Set the desired tolerances (e.g., ``ppm = 3``) and run the tool. The output will be a table with annotations for each feature.
+
+Notes
+-----
+
+- For best results, ensure your input files are correctly formatted and contain the required columns.
+- The tool is designed to be flexible and can handle various input formats (CSV, TSV, Parquet, Tabular).
+
+References
+----------
+
+- For more details on the annotation algorithm and scoring, refer to the ipaPy2 documentation or associated publications.
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/ipapy2/ipapy2_MS2_annotation.py b/tools/ipapy2/ipapy2_MS2_annotation.py
@@ -0,0 +1,136 @@
+from ipaPy2 import ipa
+from utils import flattern_annotations, MSArgumentParser
+
+
+def main(
+    input_dataset_mapped_isotope_patterns,
+    input_dataset_MS2,
+    input_dataset_adducts,
+    input_dataset_MS2_DB,
+    ppm,
+    ratiosd,
+    ppmunk,
+    ratiounk,
+    ppmthr,
+    pRTNone,
+    pRTout,
+    mzdCS,
+    ppmCS,
+    CSunk,
+    evfilt,
+    output_dataset,
+    ncores,
+):
+    annotations = ipa.MSMSannotation(
+        input_dataset_mapped_isotope_patterns,
+        input_dataset_MS2,
+        input_dataset_adducts,
+        input_dataset_MS2_DB,
+        ppm=ppm,
+        ratiosd=ratiosd,
+        ppmunk=ppmunk,
+        ratiounk=ratiounk,
+        ppmthr=ppmthr,
+        pRTNone=pRTNone,
+        pRTout=pRTout,
+        mzdCS=mzdCS,
+        ppmCS=ppmCS,
+        CSunk=CSunk,
+        evfilt=evfilt,
+        ncores=ncores,
+    )
+    annotations_flat = flattern_annotations(annotations)
+    write_func, file_path = output_dataset
+    write_func(annotations_flat, file_path)
+
+
+if __name__ == "__main__":
+    parser = MSArgumentParser(
+        """Annotation of the dataset base on the MS1 and MS2 information. Prior
+    probabilities are based on mass only, while post probabilities are based
+    on mass, RT, previous knowledge and isotope patterns."""
+    )
+    parser.add_argument(
+        "--input_dataset_mapped_isotope_patterns",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help=(
+            "A dataset containing the MS1 data. Ideally obtained from"
+            " map_isotope_patterns"
+        ),
+    )
+    parser.add_argument(
+        "--input_dataset_MS2",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help="A dataset containing the MS2 fragmentation data",
+    )
+    parser.add_argument(
+        "--input_dataset_adducts",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help=(
+            "A dataset containing the information on all the possible adducts given the"
+            " database. Ideally obtained from compute_all_adducts"
+        ),
+    )
+    parser.add_argument(
+        "--input_dataset_MS2_DB",
+        nargs=2,
+        action="load_data",
+        required=True,
+        help="A dataset containing the MS2 database",
+    )
+    parser.add_argument(
+        "--mzdCS",
+        type=int,
+        default=0,
+        help="""maximum mz difference allowed when computing cosine similarity
+           scores. If one wants to use this parameter instead of ppmCS, this
+           must be set to 0. Default 0.""",
+    )
+    parser.add_argument(
+        "--ppmCS",
+        type=int,
+        default=10,
+        help="""maximum ppm allowed when computing cosine similarity scores.
+           If one wants to use this parameter instead of mzdCS, this must be
+           set to 0. Default 10.""",
+    )
+    parser.add_argument(
+        "--CSunk",
+        type=float,
+        default=0.7,
+        help="""cosine similarity score associated with the 'unknown' annotation.
+            Default 0.7""",
+    )
+    parser.add_argument(
+        "--evfilt",
+        type=bool,
+        default=False,
+        help="""Default value False. If true, only spectrum acquired with the same
+            collision energy are considered.""",
+    )
+    args = parser.parse_args()
+    main(
+        args.input_dataset_mapped_isotope_patterns,
+        args.input_dataset_MS2,
+        args.input_dataset_adducts,
+        args.input_dataset_MS2_DB,
+        args.ppm,
+        args.ratiosd,
+        args.ppmunk,
+        args.ratiounk,
+        args.ppmthr,
+        args.pRTNone,
+        args.pRTout,
+        args.mzdCS,
+        args.ppmCS,
+        args.CSunk,
+        args.evfilt,
+        args.output_dataset,
+        args.ncores,
+    )