77from typing import Optional , Set , Tuple
88
99import numpy as np
10- from ms2rescore_rs import get_precursor_info
10+ from ms2rescore_rs import Precursor , get_precursor_info
1111from psm_utils import PSMList
1212
13- from ms2rescore .exceptions import MS2RescoreError
13+ from ms2rescore .exceptions import MS2RescoreConfigurationError , MS2RescoreError
1414from ms2rescore .utils import infer_spectrum_path
1515
1616LOGGER = logging .getLogger (__name__ )
@@ -149,6 +149,43 @@ def add_precursor_values(
149149 return available_data_types
150150
151151
152+ def _apply_spectrum_id_pattern (
153+ precursors : dict [str , Precursor ], pattern : str
154+ ) -> dict [str , Precursor ]:
155+ """Apply spectrum ID pattern to precursor IDs."""
156+ # Map precursor IDs using regex pattern
157+ compiled_pattern = re .compile (pattern )
158+ id_mapping = {
159+ match .group (1 ): spectrum_id
160+ for spectrum_id in precursors .keys ()
161+ if (match := compiled_pattern .search (spectrum_id )) is not None
162+ }
163+
164+ # Validate that any IDs were matched
165+ if not id_mapping :
166+ raise MS2RescoreConfigurationError (
167+ "'spectrum_id_pattern' did not match any spectrum-file IDs. Please check and try "
168+ "again. See "
169+ "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra "
170+ "for more information."
171+ )
172+
173+ # Validate that the same number of unique IDs were matched
174+ elif len (id_mapping ) != len (precursors ):
175+ new_id , old_id = next (iter (id_mapping .items ()))
176+ raise MS2RescoreConfigurationError (
177+ "'spectrum_id_pattern' resulted in a different number of unique spectrum IDs. This "
178+ "indicates issues with the regex pattern. Please check and try again. "
179+ f"Example old ID: '{ old_id } ' -> new ID: '{ new_id } '. "
180+ "See https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra "
181+ "for more information."
182+ )
183+
184+ precursors = {new_id : precursors [orig_id ] for new_id , orig_id in id_mapping .items ()}
185+
186+ return precursors
187+
188+
152189def _get_precursor_values (
153190 psm_list : PSMList , spectrum_path : str , spectrum_id_pattern : Optional [str ] = None
154191) -> Tuple [np .ndarray , np .ndarray , np .ndarray ]:
@@ -162,23 +199,18 @@ def _get_precursor_values(
162199 spectrum_file = infer_spectrum_path (spectrum_path , run_name )
163200
164201 LOGGER .debug ("Reading spectrum file: '%s'" , spectrum_file )
165- precursors = get_precursor_info (str (spectrum_file ))
202+ precursors : dict [ str , Precursor ] = get_precursor_info (str (spectrum_file ))
166203
167204 # Parse spectrum IDs with regex pattern if provided
168205 if spectrum_id_pattern :
169- compiled_pattern = re .compile (spectrum_id_pattern )
170- precursors = {
171- match .group (1 ): precursor
172- for spectrum_id , precursor in precursors .items ()
173- if (match := compiled_pattern .search (spectrum_id )) is not None
174- }
175-
176- # Ensure all PSMs have a precursor values
206+ precursors = _apply_spectrum_id_pattern (precursors , spectrum_id_pattern )
207+
208+ # Ensure all PSMs have precursor values
177209 for psm in psm_list_run :
178210 if psm .spectrum_id not in precursors :
179- raise SpectrumParsingError (
180- "Mismatch between PSM and spectrum file IDs. Could find precursor values "
181- f"for PSM with ID { psm .spectrum_id } in run { run_name } .\n "
211+ raise MS2RescoreConfigurationError (
212+ "Mismatch between PSM and spectrum file IDs. Could not find precursor "
213+ f"values for PSM with ID { psm .spectrum_id } in run { run_name } .\n "
182214 "Please check that the `spectrum_id_pattern` and `psm_id_pattern` options "
183215 "are configured correctly. See "
184216 "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra"
@@ -199,6 +231,6 @@ def _get_precursor_values(
199231
200232
201233class SpectrumParsingError (MS2RescoreError ):
202- """Error parsing retention time from spectrum file."""
234+ """Error while parsing spectrum file."""
203235
204236 pass
0 commit comments