Skip to content

Commit a0f3d7f

Browse files
committed
Add ion mobility prediction support through IM2Deep
1 parent 801b6fb commit a0f3d7f

File tree

4 files changed

+89
-4
lines changed

4 files changed

+89
-4
lines changed

ms2pip/__main__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ def predict_single(*args, **kwargs):
8686

8787
@cli.command(help=ms2pip.core.predict_batch.__doc__)
8888
@click.argument("psms", required=True)
89+
@click.option("--psm-filetype", "-t", type=click.Choice(PSM_FILETYPES), default=None)
8990
@click.option("--output-name", "-o", type=str)
9091
@click.option("--output-format", "-f", type=click.Choice(SUPPORTED_FORMATS), default="tsv")
9192
@click.option("--add-retention-time", "-r", is_flag=True)
93+
@click.option("--add-ion-mobility", "-i", is_flag=True)
9294
@click.option("--model", type=click.Choice(MODELS), default="HCD")
9395
@click.option("--model-dir")
9496
@click.option("--processes", "-n", type=int)
@@ -110,6 +112,7 @@ def predict_batch(*args, **kwargs):
110112
@click.option("--output-name", "-o", type=str)
111113
@click.option("--output-format", "-f", type=click.Choice(SUPPORTED_FORMATS), default="msp")
112114
@click.option("--add-retention-time", "-r", is_flag=True)
115+
@click.option("--add-ion-mobility", "-i", is_flag=True)
113116
@click.option("--model", type=click.Choice(MODELS), default="HCD")
114117
@click.option("--model-dir")
115118
@click.option("--batch-size", type=int, default=100000)
@@ -136,6 +139,7 @@ def predict_library(*args, **kwargs):
136139
@click.option("--spectrum-id-pattern", "-p")
137140
@click.option("--compute-correlations", "-x", is_flag=True)
138141
@click.option("--add-retention-time", "-r", is_flag=True)
142+
@click.option("--add-ion-mobility", "-i", is_flag=True)
139143
@click.option("--model", type=click.Choice(MODELS), default="HCD")
140144
@click.option("--model-dir")
141145
@click.option("--ms2-tolerance", type=float, default=0.02)

ms2pip/_utils/ion_mobility.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""Module for ion mobility prediction with IM²Deep."""
2+
3+
import logging
4+
5+
import pandas as pd
6+
from psm_utils import PSMList
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
class IonMobility:
12+
"""Predict ion mobility using IM²Deep."""
13+
14+
def __init__(self, processes=1) -> None:
15+
# Lazy import to avoid loading loading heavy dependencies when not needed
16+
try:
17+
from im2deep.im2deep import predict_ccs # noqa: F401
18+
19+
self.predict_fn = predict_ccs
20+
self.processes = processes
21+
except ImportError as e:
22+
raise ImportError(
23+
"The 'im2deep' package is required for ion mobility prediction."
24+
) from e
25+
26+
def add_im_predictions(self, psm_list: PSMList) -> None:
27+
"""Add ion mobility predictions to the PSMList."""
28+
logger.info("Predicting ion mobility...")
29+
predictions: pd.Series = self.predict_fn(
30+
psm_list, write_output=False, n_jobs=self.processes
31+
)
32+
psm_list["ion_mobility"] = predictions.values

ms2pip/core.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from ms2pip._utils.feature_names import get_feature_names
2424
from ms2pip._utils.psm_input import read_psms
2525
from ms2pip._utils.retention_time import RetentionTime
26+
from ms2pip._utils.ion_mobility import IonMobility
2627
from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
2728
from ms2pip.constants import MODELS
2829
from ms2pip.result import ProcessingResult, calculate_correlations
@@ -74,6 +75,7 @@ def predict_single(
7475
def predict_batch(
7576
psms: Union[PSMList, str, Path],
7677
add_retention_time: bool = False,
78+
add_ion_mobility: bool = False,
7779
psm_filetype: Optional[str] = None,
7880
model: Optional[str] = "HCD",
7981
model_dir: Optional[Union[str, Path]] = None,
@@ -91,6 +93,8 @@ def predict_batch(
9193
filetypes. See https://psm-utils.readthedocs.io/en/stable/#supported-file-formats.
9294
add_retention_time
9395
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
96+
add_ion_mobility
97+
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
9498
model
9599
Model to use for prediction. Default: "HCD".
96100
model_dir
@@ -113,6 +117,11 @@ def predict_batch(
113117
rt_predictor = RetentionTime(processes=processes)
114118
rt_predictor.add_rt_predictions(psm_list)
115119

120+
if add_ion_mobility:
121+
logger.info("Adding ion mobility predictions")
122+
im_predictor = IonMobility(processes=processes)
123+
im_predictor.add_im_predictions(psm_list)
124+
116125
with Encoder.from_psm_list(psm_list) as encoder:
117126
ms2pip_parallelized = _Parallelized(
118127
encoder=encoder,
@@ -130,6 +139,7 @@ def predict_library(
130139
fasta_file: Optional[Union[str, Path]] = None,
131140
config: Optional[Union[ProteomeSearchSpace, dict, str, Path]] = None,
132141
add_retention_time: bool = False,
142+
add_ion_mobility: bool = False,
133143
model: Optional[str] = "HCD",
134144
model_dir: Optional[Union[str, Path]] = None,
135145
batch_size: int = 100000,
@@ -148,6 +158,8 @@ def predict_library(
148158
parameters. Required if `fasta_file` is not provided.
149159
add_retention_time
150160
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
161+
add_ion_mobility
162+
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
151163
model
152164
Model to use for prediction. Default: "HCD".
153165
model_dir
@@ -157,6 +169,11 @@ def predict_library(
157169
processes
158170
Number of parallel processes for multiprocessing steps. By default, all available.
159171
172+
Yields
173+
------
174+
predictions: List[ProcessingResult]
175+
Predicted spectra with theoretical m/z and predicted intensity values.
176+
160177
"""
161178
if fasta_file and config:
162179
# Use provided proteome, but overwrite fasta_file
@@ -183,6 +200,7 @@ def predict_library(
183200
yield predict_batch(
184201
search_space.filter_psms_by_mz(PSMList(psm_list=list(batch))),
185202
add_retention_time=add_retention_time,
203+
add_ion_mobility=add_ion_mobility,
186204
model=model,
187205
model_dir=model_dir,
188206
processes=processes,
@@ -197,6 +215,7 @@ def correlate(
197215
spectrum_id_pattern: Optional[str] = None,
198216
compute_correlations: bool = False,
199217
add_retention_time: bool = False,
218+
add_ion_mobility: bool = False,
200219
model: Optional[str] = "HCD",
201220
model_dir: Optional[Union[str, Path]] = None,
202221
ms2_tolerance: float = 0.02,
@@ -221,6 +240,8 @@ def correlate(
221240
Compute correlations between predictions and targets.
222241
add_retention_time
223242
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
243+
add_ion_mobility
244+
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
224245
model
225246
Model to use for prediction. Default: "HCD".
226247
model_dir
@@ -245,6 +266,11 @@ def correlate(
245266
rt_predictor = RetentionTime(processes=processes)
246267
rt_predictor.add_rt_predictions(psm_list)
247268

269+
if add_ion_mobility:
270+
logger.info("Adding ion mobility predictions")
271+
im_predictor = IonMobility(processes=processes)
272+
im_predictor.add_im_predictions(psm_list)
273+
248274
with Encoder.from_psm_list(psm_list) as encoder:
249275
ms2pip_parallelized = _Parallelized(
250276
encoder=encoder,

ms2pip/spectrum_output.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class TSV(_Writer):
157157
"predicted",
158158
"observed",
159159
"rt",
160+
"im",
160161
]
161162

162163
def write(self, processing_results: List[ProcessingResult]):
@@ -194,6 +195,7 @@ def _write_row(result: ProcessingResult, ion_type: str, ion_index: int):
194195
if result.observed_intensity
195196
else None,
196197
"rt": result.psm.retention_time if result.psm.retention_time else None,
198+
"im": result.psm.ion_mobility if result.psm.ion_mobility else None,
197199
}
198200

199201

@@ -243,9 +245,12 @@ def _format_single_modification(
243245
if not modifications:
244246
return None
245247
if len(modifications) > 1:
246-
raise ValueError("Multiple modifications per amino acid not supported.")
248+
raise ValueError("Multiple modifications per amino acid not supported in MSP.")
247249
modification = modifications[0]
248-
return f"{position},{amino_acid},{modification.name}"
250+
try:
251+
return f"{position},{amino_acid},{modification.name}"
252+
except AttributeError: # MassModification has no attribute `name`
253+
return f"{position},{amino_acid},{modification.value}"
249254

250255
sequence_mods = [
251256
_format_single_modification(aa, pos + 1, mods)
@@ -286,6 +291,14 @@ def _format_retention_time(psm: PSM) -> Union[str, None]:
286291
else:
287292
return None
288293

294+
@staticmethod
295+
def _format_ion_mobility(psm: PSM) -> Union[str, None]:
296+
"""Format ion mobility as string."""
297+
if psm.ion_mobility:
298+
return f"IonMobility={psm.ion_mobility}"
299+
else:
300+
return None
301+
289302
@staticmethod
290303
def _format_identifier(psm: PSM) -> str:
291304
"""Format MS2PIP ID as string."""
@@ -302,6 +315,7 @@ def _format_comment_line(psm: PSM) -> str:
302315
MSP._format_parent_mass(psm.peptidoform),
303316
MSP._format_protein_string(psm),
304317
MSP._format_retention_time(psm),
318+
MSP._format_ion_mobility(psm),
305319
MSP._format_identifier(psm),
306320
],
307321
)
@@ -310,7 +324,11 @@ def _format_comment_line(psm: PSM) -> str:
310324

311325

312326
class MGF(_Writer):
313-
"""Write MGF files from MS2PIP processing results."""
327+
"""
328+
Write MGF files from MS2PIP processing results.
329+
330+
See http://www.matrixscience.com/help/data_file_help.html for documentation on the MGF format.
331+
"""
314332

315333
suffix = ".mgf"
316334

@@ -333,6 +351,7 @@ def _write_result(self, result: ProcessingResult):
333351
f"CHARGE={result.psm.get_precursor_charge()}+",
334352
f"SCANS={result.psm.spectrum_id}",
335353
f"RTINSECONDS={result.psm.retention_time}" if result.psm.retention_time else None,
354+
f"ION_MOBILITY={result.psm.ion_mobility}" if result.psm.ion_mobility else None,
336355
]
337356

338357
# Peaks
@@ -428,7 +447,9 @@ class Bibliospec(_Writer):
428447
"""
429448
Write Bibliospec SSL and MS2 files from MS2PIP processing results.
430449
431-
Bibliospec SSL and MS2 files are also compatible with Skyline.
450+
Bibliospec SSL and MS2 files are also compatible with Skyline. See
451+
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats
452+
for documentation on the Bibliospec file formats.
432453
433454
"""
434455

@@ -442,6 +463,7 @@ class Bibliospec(_Writer):
442463
"score-type",
443464
"score",
444465
"retention-time",
466+
"ion-mobility",
445467
]
446468

447469
def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
@@ -551,6 +573,7 @@ def _write_result_to_ssl(
551573
"score-type": None,
552574
"score": None,
553575
"retention-time": result.psm.retention_time if result.psm.retention_time else None,
576+
"ion-mobility": result.psm.ion_mobility if result.psm.ion_mobility else None,
554577
}
555578
)
556579

0 commit comments

Comments
 (0)