Skip to content

Commit dd3490b

Browse files
committed
Add type hints; add more error handling
1 parent 59d2904 commit dd3490b

File tree

1 file changed

+39
-15
lines changed

1 file changed

+39
-15
lines changed

psm_utils/io/fragpipe.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
from typing import Any, cast
2020

2121
import pandas as pd
22-
from pyteomics.proforma import MassModification, to_proforma
22+
from pyteomics.proforma import MassModification, to_proforma # type: ignore[import]
2323

2424
from psm_utils.io._base_classes import ReaderBase
2525
from psm_utils.io._utils import set_csv_field_size_limit
26+
from psm_utils.io.exceptions import InvalidModificationError
2627
from psm_utils.psm import PSM
2728
from psm_utils.psm_list import PSMList
2829

@@ -111,21 +112,44 @@ def _get_peptide_spectrum_match(self, psm_dict: dict[str, Any]) -> PSM:
111112
@staticmethod
112113
def _parse_peptidoform(peptide: str, modifications: str, charge: str | None) -> str:
113114
"""Parse the peptidoform from the modified peptide, peptide, and charge columns."""
114-
sequence = [(aa, []) for aa in peptide]
115-
n_term, c_term = [], []
115+
sequence: list[tuple[str, list[MassModification]]] = [(aa, []) for aa in peptide]
116+
n_term: list[MassModification] = []
117+
c_term: list[MassModification] = []
118+
119+
if not modifications:
120+
return to_proforma(sequence, n_term=n_term, c_term=c_term, charge_state=charge)
121+
116122
for mod_entry in modifications.split(", "):
117-
if mod_entry:
118-
site, mass = mod_entry[:-1].split("(")
119-
mass = float(mass)
120-
if site == "N-term":
121-
n_term.append(MassModification(mass))
122-
elif site == "C-term":
123-
c_term.append(MassModification(mass))
124-
else:
125-
res = site[-1]
126-
idx = int(site[:-1]) - 1
127-
assert sequence[idx][0] == res
128-
sequence[idx][1].append(MassModification(mass))
123+
if not mod_entry:
124+
continue
125+
126+
parsed_mod_entry: list[str] = mod_entry[:-1].split("(")
127+
if not len(parsed_mod_entry) == 2:
128+
raise InvalidModificationError(
129+
f"Could not parse modification entry '{mod_entry}'."
130+
)
131+
site: str = parsed_mod_entry[0]
132+
mass: float = float(parsed_mod_entry[1])
133+
134+
if site == "N-term":
135+
n_term.append(MassModification(mass))
136+
elif site == "C-term":
137+
c_term.append(MassModification(mass))
138+
else:
139+
residue: str = site[-1]
140+
idx: int = int(site[:-1]) - 1
141+
if idx < 0 or idx >= len(sequence):
142+
raise InvalidModificationError(
143+
f"Modification position {idx + 1} is out of bounds for peptide of "
144+
f"length {len(sequence)}."
145+
)
146+
if sequence[idx][0] != residue:
147+
raise InvalidModificationError(
148+
f"Modification site residue '{residue}' does not match "
149+
f"peptide sequence residue '{sequence[idx][0]}' at position {idx + 1}."
150+
)
151+
sequence[idx][1].append(MassModification(mass))
152+
129153
return to_proforma(sequence, n_term=n_term, c_term=c_term, charge_state=charge)
130154

131155
@staticmethod

0 commit comments

Comments
 (0)