|
19 | 19 | from typing import Any, cast |
20 | 20 |
|
21 | 21 | import pandas as pd |
22 | | -from pyteomics.proforma import MassModification, to_proforma |
| 22 | +from pyteomics.proforma import MassModification, to_proforma # type: ignore[import] |
23 | 23 |
|
24 | 24 | from psm_utils.io._base_classes import ReaderBase |
25 | 25 | from psm_utils.io._utils import set_csv_field_size_limit |
| 26 | +from psm_utils.io.exceptions import InvalidModificationError |
26 | 27 | from psm_utils.psm import PSM |
27 | 28 | from psm_utils.psm_list import PSMList |
28 | 29 |
|
@@ -111,21 +112,44 @@ def _get_peptide_spectrum_match(self, psm_dict: dict[str, Any]) -> PSM: |
111 | 112 | @staticmethod |
112 | 113 | def _parse_peptidoform(peptide: str, modifications: str, charge: str | None) -> str: |
113 | 114 | """Parse the peptidoform from the modified peptide, peptide, and charge columns.""" |
114 | | - sequence = [(aa, []) for aa in peptide] |
115 | | - n_term, c_term = [], [] |
| 115 | + sequence: list[tuple[str, list[MassModification]]] = [(aa, []) for aa in peptide] |
| 116 | + n_term: list[MassModification] = [] |
| 117 | + c_term: list[MassModification] = [] |
| 118 | + |
| 119 | + if not modifications: |
| 120 | + return to_proforma(sequence, n_term=n_term, c_term=c_term, charge_state=charge) |
| 121 | + |
116 | 122 | for mod_entry in modifications.split(", "): |
117 | | - if mod_entry: |
118 | | - site, mass = mod_entry[:-1].split("(") |
119 | | - mass = float(mass) |
120 | | - if site == "N-term": |
121 | | - n_term.append(MassModification(mass)) |
122 | | - elif site == "C-term": |
123 | | - c_term.append(MassModification(mass)) |
124 | | - else: |
125 | | - res = site[-1] |
126 | | - idx = int(site[:-1]) - 1 |
127 | | - assert sequence[idx][0] == res |
128 | | - sequence[idx][1].append(MassModification(mass)) |
| 123 | + if not mod_entry: |
| 124 | + continue |
| 125 | + |
| 126 | + parsed_mod_entry: list[str] = mod_entry[:-1].split("(") |
| 127 | + if not len(parsed_mod_entry) == 2: |
| 128 | + raise InvalidModificationError( |
| 129 | + f"Could not parse modification entry '{mod_entry}'." |
| 130 | + ) |
| 131 | + site: str = parsed_mod_entry[0] |
| 132 | + mass: float = float(parsed_mod_entry[1]) |
| 133 | + |
| 134 | + if site == "N-term": |
| 135 | + n_term.append(MassModification(mass)) |
| 136 | + elif site == "C-term": |
| 137 | + c_term.append(MassModification(mass)) |
| 138 | + else: |
| 139 | + residue: str = site[-1] |
| 140 | + idx: int = int(site[:-1]) - 1 |
| 141 | + if idx < 0 or idx >= len(sequence): |
| 142 | + raise InvalidModificationError( |
| 143 | + f"Modification position {idx + 1} is out of bounds for peptide of " |
| 144 | + f"length {len(sequence)}." |
| 145 | + ) |
| 146 | + if sequence[idx][0] != residue: |
| 147 | + raise InvalidModificationError( |
| 148 | + f"Modification site residue '{residue}' does not match " |
| 149 | + f"peptide sequence residue '{sequence[idx][0]}' at position {idx + 1}." |
| 150 | + ) |
| 151 | + sequence[idx][1].append(MassModification(mass)) |
| 152 | + |
129 | 153 | return to_proforma(sequence, n_term=n_term, c_term=c_term, charge_state=charge) |
130 | 154 |
|
131 | 155 | @staticmethod |
|
0 commit comments