33from multiprocessing import cpu_count
44from typing import Dict , Optional
55
6- import numpy as np
76import psm_utils .io
87from mokapot .dataset import LinearPsmDataset
98from psm_utils import PSMList
109
1110from ms2rescore import exceptions
1211from ms2rescore .feature_generators import FEATURE_GENERATORS
1312from ms2rescore .parse_psms import parse_psms
14- from ms2rescore .parse_spectra import get_missing_values
13+ from ms2rescore .parse_spectra import add_precursor_values
1514from ms2rescore .report import generate
1615from ms2rescore .rescoring_engines import mokapot , percolator
1716from ms2rescore .rescoring_engines .mokapot import add_peptide_confidence , add_psm_confidence
@@ -62,20 +61,28 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
6261 )
6362
6463 # Add missing precursor info from spectrum file if needed
65- psm_list = _fill_missing_precursor_info (psm_list , config )
64+ available_ms_data = add_precursor_values (
65+ psm_list , config ["spectrum_path" ], config ["spectrum_id_pattern" ]
66+ )
6667
6768 # Add rescoring features
6869 for fgen_name , fgen_config in config ["feature_generators" ].items ():
69- # TODO: Handle this somewhere else, more generally?
70- if fgen_name == "maxquant" and not (psm_list ["source" ] == "msms" ).all ():
71- logger .warning (
72- "MaxQuant feature generator requires PSMs from a MaxQuant msms.txt file. Skipping "
73- "this feature generator."
74- )
75- continue
70+ # Compile configuration
7671 conf = config .copy ()
7772 conf .update (fgen_config )
7873 fgen = FEATURE_GENERATORS [fgen_name ](** conf )
74+
75+ # Check if required MS data is available
76+ missing_ms_data = fgen .required_ms_data - available_ms_data
77+ if missing_ms_data :
78+ logger .warning (
79+ f"Skipping feature generator { fgen_name } because required MS data is missing: "
80+ f"{ missing_ms_data } . Ensure that the required MS data is present in the input "
81+ "files or disable the feature generator."
82+ )
83+ continue
84+
85+ # Add features
7986 fgen .add_features (psm_list )
8087 logger .debug (f"Adding features from { fgen_name } : { set (fgen .feature_names )} " )
8188 feature_names [fgen_name ] = set (fgen .feature_names )
@@ -102,6 +109,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
102109 # Write feature names to file
103110 _write_feature_names (feature_names , output_file_root )
104111
112+ # Rename PSMs to USIs if requested
105113 if config ["rename_to_usi" ]:
106114 logging .debug (f"Creating USIs for { len (psm_list )} PSMs" )
107115 psm_list ["spectrum_id" ] = [psm .get_usi (as_url = False ) for psm in psm_list ]
@@ -183,51 +191,6 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
183191 logger .exception (e )
184192
185193
186- def _fill_missing_precursor_info (psm_list : PSMList , config : Dict ) -> PSMList :
187- """Fill missing precursor info from spectrum file if needed."""
188- # Check if required
189- # TODO: avoid hard coding feature generators in some way
190- rt_required = ("deeplc" in config ["feature_generators" ]) and any (
191- v is None or v == 0 or np .isnan (v ) for v in psm_list ["retention_time" ]
192- )
193- im_required = (
194- "ionmob" in config ["feature_generators" ] or "im2deep" in config ["feature_generators" ]
195- ) and any (v is None or v == 0 or np .isnan (v ) for v in psm_list ["ion_mobility" ])
196- logger .debug (f"RT required: { rt_required } , IM required: { im_required } " )
197-
198- # Add missing values
199- if rt_required or im_required :
200- logger .info ("Parsing missing retention time and/or ion mobility values from spectra..." )
201- get_missing_values (psm_list , config , rt_required = rt_required , im_required = im_required )
202-
203- # Check if values are now present
204- for value_name , required in [("retention_time" , rt_required ), ("ion_mobility" , im_required )]:
205- if required and (
206- 0.0 in psm_list [value_name ]
207- or None in psm_list [value_name ]
208- or np .isnan (psm_list [value_name ]).any ()
209- ):
210- if all (v is None or v == 0.0 or np .isnan (v ) for v in psm_list [value_name ]):
211- raise exceptions .MissingValuesError (
212- f"Could not find any '{ value_name } ' values in PSM or spectrum files. Disable "
213- f"feature generators that require '{ value_name } ' or ensure that the values are "
214- "present in the input files."
215- )
216- else :
217- missing_value_psms = psm_list [
218- [v is None or np .isnan (v ) for v in psm_list [value_name ]]
219- ]
220- logger .warning (
221- f"Found { len (missing_value_psms )} PSMs with missing '{ value_name } ' values. "
222- "These PSMs will be removed."
223- )
224- psm_list = psm_list [
225- [v is not None and not np .isnan (v ) for v in psm_list [value_name ]]
226- ]
227-
228- return psm_list
229-
230-
231194def _filter_by_rank (psm_list : PSMList , max_rank : int , lower_score_better : bool ) -> PSMList :
232195 """Filter PSMs by rank."""
233196 psm_list .set_ranks (lower_score_better = lower_score_better )
0 commit comments