Skip to content

Commit 0e3fe1f

Browse files
authored
Merge pull request #189 from compomics/spectrum-reader-changes
Spectrum reader changes
2 parents e79cc41 + ec3397f commit 0e3fe1f

File tree

2 files changed

+34
-18
lines changed

2 files changed

+34
-18
lines changed

ms2pip/ms2pipC.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,11 @@ def process_spectra(
324324
ces = specdict["ce"]
325325
else:
326326
specdict = (
327-
data[["spec_id", "peptide", "modifications"]].set_index("spec_id").to_dict()
327+
data[["spec_id", "peptide", "modifications", "charge"]].set_index("spec_id").to_dict()
328328
)
329329
peptides = specdict["peptide"]
330330
modifications = specdict["modifications"]
331+
charges = specdict["charge"]
331332

332333
# cols contains the names of the computed features
333334
cols_n = get_feature_names_new()
@@ -377,6 +378,11 @@ def process_spectra(
377378
peptide = peptide.replace("L", "I")
378379
mods = modifications[title]
379380

381+
if spectrum.precursor_charge:
382+
charge = spectrum.precursor_charge
383+
else:
384+
charge = charges[title] # If charge cannot be parsed from MGF
385+
380386
if "mut" in mods:
381387
continue
382388

@@ -428,15 +434,15 @@ def process_spectra(
428434
dvectors.append(
429435
np.array(
430436
ms2pip_pyx.get_vector_ce(
431-
peptide, modpeptide, spectrum.charge, colen
437+
peptide, modpeptide, charge, colen
432438
),
433439
dtype=np.uint16,
434440
)
435441
) # SD: added collision energy
436442
else:
437443
dvectors.append(
438444
np.array(
439-
ms2pip_pyx.get_vector(peptide, modpeptide, spectrum.charge),
445+
ms2pip_pyx.get_vector(peptide, modpeptide, charge),
440446
dtype=np.uint16,
441447
)
442448
)
@@ -460,7 +466,7 @@ def process_spectra(
460466
# Predict the b- and y-ion intensities from the peptide
461467
pepid_buf.append(title)
462468
peplen_buf.append(len(peptide) - 2)
463-
charge_buf.append(spectrum.charge)
469+
charge_buf.append(charge)
464470

465471
# get/append ion mzs, targets and predictions
466472
targets = ms2pip_pyx.get_targets(
@@ -479,13 +485,13 @@ def process_spectra(
479485
if "xgboost_model_files" in MODELS[model].keys():
480486
vector_buf.append(
481487
np.array(
482-
ms2pip_pyx.get_vector(peptide, modpeptide, spectrum.charge),
488+
ms2pip_pyx.get_vector(peptide, modpeptide, charge),
483489
dtype=np.uint16,
484490
)
485491
)
486492
else:
487493
predictions = ms2pip_pyx.get_predictions(
488-
peptide, modpeptide, spectrum.charge, model_id, peaks_version, colen
494+
peptide, modpeptide, charge, model_id, peaks_version, colen
489495
)
490496
prediction_buf.append(
491497
[np.array(p, dtype=np.float32) for p in predictions]

ms2pip/spectrum.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
11
"""Read MS2 spectra."""
22

33
from pathlib import Path
4-
from typing import Generator, List
4+
from typing import Generator
55

66
import numpy as np
7-
from pyteomics import mzml, mgf
7+
from pyteomics import mgf, mzml
88

99
from ms2pip.exceptions import (
10-
UnsupportedSpectrumFiletypeError,
11-
InvalidSpectrumError,
1210
EmptySpectrumError,
11+
InvalidSpectrumError,
12+
UnsupportedSpectrumFiletypeError,
1313
)
1414

1515

1616
class Spectrum:
17-
def __init__(self, title, charge, pepmass, msms, peaks) -> None:
17+
def __init__(
18+
self, title, msms, peaks, precursor_charge=None, precursor_mz=None
19+
) -> None:
1820
"""Minimal information on observed MS2 spectrum."""
1921
self.title = str(title)
20-
self.charge = int(charge)
21-
self.pepmass = float(pepmass)
2222
self.msms = np.array(msms, dtype=np.float32)
2323
self.peaks = np.array(peaks, dtype=np.float32)
24+
self.precursor_charge = int(precursor_charge) if precursor_charge else None
25+
self.precursor_mz = float(precursor_mz) if precursor_mz else None
2426

2527
self.tic = np.sum(self.peaks)
2628

@@ -57,7 +59,9 @@ def remove_reporter_ions(self, label_type=None) -> None:
5759
def remove_precursor(self, tolerance=0.02) -> None:
5860
"""Remove precursor peak."""
5961
for mi, mp in enumerate(self.msms):
60-
if (mp >= self.pepmass - tolerance) & (mp <= self.pepmass + tolerance):
62+
if (mp >= self.precursor_mz - tolerance) & (
63+
mp <= self.precursor_mz + tolerance
64+
):
6165
self.peaks[mi] = 0
6266

6367
def tic_norm(self) -> None:
@@ -91,10 +95,16 @@ def read_mgf(spec_file) -> Generator[Spectrum, None, None]:
9195
spec_id = spectrum["params"]["title"]
9296
peaks = spectrum["intensity array"]
9397
msms = spectrum["m/z array"]
94-
precursor_mz = spectrum["params"]["pepmass"][0]
95-
precursor_charge = spectrum["params"]["charge"][0]
98+
try:
99+
precursor_charge = spectrum["params"]["charge"][0]
100+
except KeyError:
101+
precursor_charge = None
102+
try:
103+
precursor_mz = spectrum["params"]["pepmass"][0]
104+
except KeyError:
105+
precursor_mz = None
96106
parsed_spectrum = Spectrum(
97-
spec_id, precursor_charge, precursor_mz, msms, peaks
107+
spec_id, msms, peaks, precursor_charge, precursor_mz
98108
)
99109
yield parsed_spectrum
100110

@@ -128,7 +138,7 @@ def read_mzml(spec_file) -> Generator[Spectrum, None, None]:
128138
precursor_mz = precursor["selected ion m/z"]
129139
precursor_charge = precursor["charge state"]
130140
parsed_spectrum = Spectrum(
131-
spec_id, precursor_charge, precursor_mz, msms, peaks
141+
spec_id, msms, peaks, precursor_charge, precursor_mz
132142
)
133143
yield parsed_spectrum
134144

0 commit comments

Comments
 (0)