Skip to content

Commit 902ac1e

Browse files
committed
move PepSpecMatch_AlphaTims to alphatims
1 parent 29cbf9f commit 902ac1e

File tree

3 files changed

+10
-2130
lines changed

3 files changed

+10
-2130
lines changed
Lines changed: 10 additions & 321 deletions
Original file line numberDiff line numberDiff line change
@@ -1,327 +1,16 @@
1-
# TODO to be remove as already implemented in alphaDIA.
2-
from typing import Tuple, Union
1+
# TODO to be removed
32

4-
import numpy as np
5-
import pandas as pd
6-
import tqdm
7-
from alphatims.bruker import TimsTOF
3+
_DEPRECATION_MSG = "has been moved to alphaviz and will be removed from alpharaw in a future version."
84

9-
from alpharaw.ms_data_base import MSData_Base, ms_reader_provider
10-
from alpharaw.wrappers.alphatims_wrapper import AlphaTimsWrapper
115

12-
from .psm_match import PepSpecMatch
6+
def load_ms_data_tims(*args, **kwargs):
7+
raise DeprecationWarning(
8+
f"load_ms_data_tims {_DEPRECATION_MSG}"
9+
)
1310

14-
alphatims_hdf_types = [
15-
"alphatims",
16-
"alphatims_hdf",
17-
"tims.hdf",
18-
]
1911

20-
21-
def load_ms_data_tims(
22-
ms_file: Union[str, MSData_Base, TimsTOF],
23-
ms_file_type: str = "alpharaw_hdf",
24-
dda: bool = False,
25-
spectra_sorted_by_rt: bool = True,
26-
) -> Tuple[MSData_Base, TimsTOF]:
27-
"""Load ms data as TimsTOF object
28-
29-
Parameters
30-
----------
31-
ms_file : str
32-
ms2 file path
33-
34-
ms_file_type : str, optional
35-
ms2 file type, could be
36-
["alpharaw_hdf","raw.hdf","thermo","sciex","alphapept_hdf","mgf"].
37-
Default to 'alphatims_hdf'
38-
39-
dda : bool, optional
40-
if it is DDA data, by default False
41-
42-
spectra_sorted_by_rt : bool, optional
43-
If spectra are already sorted by RT.
44-
Defaults to True
45-
46-
Returns
47-
-------
48-
tuple
49-
MSData_Base: alpharaw MS Data (Reader) object
50-
TimsTOF: AlphaTims object
51-
"""
52-
if isinstance(ms_file, TimsTOF):
53-
return None, ms_file
54-
elif ms_file_type.lower() in alphatims_hdf_types:
55-
return None, TimsTOF(ms_file)
56-
else:
57-
if isinstance(ms_file, MSData_Base):
58-
raw_data = ms_file
59-
else:
60-
raw_data = ms_reader_provider.get_reader(ms_file_type)
61-
raw_data.import_raw(ms_file)
62-
63-
if not spectra_sorted_by_rt:
64-
# RT may not be sorted in AP HDF for timsTOF after preprocessing
65-
raw_data._sort_rt()
66-
67-
tims_data = AlphaTimsWrapper(raw_data, dda=dda)
68-
return raw_data, tims_data
69-
70-
71-
class PepSpecMatch_AlphaTims(PepSpecMatch):
72-
"""
73-
Inherited from :class:`alpharaw.match.psm_match.PepSpecMatch`, but
74-
this can be used for DIA PSM matching by selecting
75-
MS2 spectra with RT (and IM) values.
76-
"""
77-
78-
#: RT win to get a MS2 spectrum by slicing
79-
rt_sec_tol_to_slice_ms2 = 3.0
80-
81-
#: IM win to get a MS2 spectrum by slicing
82-
im_tol_to_slice_ms2 = 0.03
83-
84-
#: find closest MS2 for the given RT when slicing
85-
find_k_nearest_ms2_by_rt = True
86-
k_rt_nearest = 1
87-
88-
# : find closest MS2 for the given RT when slicing
89-
find_k_nearest_ms2_by_im = False
90-
k_im_nearest = 11
91-
92-
def get_peak_df(
93-
self,
94-
precursor_mz: float,
95-
rt_sec: float,
96-
im: float = 0.0,
97-
) -> pd.DataFrame:
98-
"""
99-
Parameters
100-
----------
101-
precursor_mz : float
102-
Precursor m/z value
103-
rt_sec : float
104-
RT value in seconds
105-
im : float, optional
106-
Ion mobility, by default 0.0
107-
108-
Returns
109-
-------
110-
pd.DataFrame
111-
peak_df in alphatims DF format
112-
"""
113-
rt_slice = slice(
114-
rt_sec - self.rt_sec_tol_to_slice_ms2,
115-
rt_sec + self.rt_sec_tol_to_slice_ms2,
116-
)
117-
118-
if im == 0 or self.tims_data.scan_max_index == 1:
119-
im_slice = slice(None)
120-
elif self.find_k_nearest_ms2_by_im and self.tims_data.scan_max_index > 1:
121-
# AlphaTims without AlphaRaw for .d files
122-
im_slice = self.tims_data.scan_max_index - np.searchsorted(
123-
self.tims_data.mobility_values[::-1], im
124-
)
125-
else:
126-
im_slice = slice(
127-
im - self.im_tol_to_slice_ms2, im + self.im_tol_to_slice_ms2
128-
)
129-
130-
spec_df = self.tims_data[rt_slice, im_slice, precursor_mz:precursor_mz]
131-
132-
def find_k_nearest(array, val, k=3):
133-
nearest = np.argmin(np.abs(array - val))
134-
if nearest <= k // 2:
135-
return slice(k)
136-
elif nearest >= len(array) - k // 2 - 1:
137-
return slice(-k, None)
138-
else:
139-
return slice(nearest - k // 2, nearest + k // 2 + 1)
140-
141-
if (
142-
self.find_k_nearest_ms2_by_im
143-
and im > 0
144-
and self.tims_data.scan_max_index > 1
145-
):
146-
# RAW from AlphaRaw, mobility===0 in AlphaTims wrapper obj
147-
scan_idxes = np.sort(spec_df.scan_indices.unique())
148-
if len(scan_idxes) > 1: # im from psm
149-
scan_idxes = scan_idxes[
150-
find_k_nearest(
151-
self.raw_data.spectrum_df.mobility.values[scan_idxes],
152-
im,
153-
self.k_im_nearest,
154-
)
155-
]
156-
spec_df = spec_df[spec_df.scan_indices.isin(scan_idxes)]
157-
158-
if self.find_k_nearest_ms2_by_rt:
159-
rt_values = np.sort(spec_df.rt_values.unique())
160-
if len(rt_values) > 1:
161-
closest_rts = rt_values[
162-
find_k_nearest(rt_values, rt_sec, self.k_rt_nearest)
163-
]
164-
spec_df = spec_df[spec_df.rt_values.isin(closest_rts)]
165-
166-
return spec_df
167-
168-
def get_peaks(
169-
self,
170-
precursor_mz: float,
171-
rt_sec: float,
172-
im: float = 0.0,
173-
) -> tuple:
174-
"""
175-
Parameters
176-
----------
177-
precursor_mz : float
178-
Precursor m/z value
179-
rt_sec : float
180-
RT value in seconds
181-
im : float, optional
182-
Ion mobility, by default 0.0
183-
184-
Returns
185-
-------
186-
tuple
187-
np.ndarray: peak m/z values
188-
np.ndarray: peak intensity values
189-
"""
190-
spec_df = self.get_peak_df(precursor_mz, rt_sec, im)
191-
spec_df = spec_df.sort_values("mz_values").reset_index(drop=True)
192-
return (spec_df.mz_values.values, spec_df.intensity_values.values)
193-
194-
def load_ms_data(
195-
self,
196-
ms_file,
197-
ms_file_type,
198-
dda=False,
199-
spectra_sorted_by_rt=True,
200-
):
201-
self.raw_data, self.tims_data = load_ms_data_tims(
202-
ms_file, ms_file_type, dda, spectra_sorted_by_rt
203-
)
204-
205-
def match_ms2_one_raw(
206-
self, psm_df_one_raw: pd.DataFrame, verbose: bool = False
207-
) -> tuple:
208-
"""
209-
Matching psm_df_one_raw against
210-
self.tims_data and self.raw_data
211-
after `self.load_ms_data()`
212-
213-
Parameters
214-
----------
215-
psm_df_one_raw : pd.DataFrame
216-
psm dataframe
217-
that contains only one raw file
218-
219-
Returns
220-
-------
221-
tuple:
222-
pd.DataFrame: psm dataframe with fragment index information.
223-
224-
pd.DataFrame: fragment mz dataframe.
225-
226-
pd.DataFrame: matched intensity dataframe.
227-
228-
pd.DataFrame: matched mass error dataframe.
229-
np.inf if a fragment is not matched.
230-
231-
"""
232-
self.psm_df = psm_df_one_raw
233-
234-
psm_df_one_raw = self._add_missing_columns_to_psm_df(psm_df_one_raw)
235-
236-
(
237-
fragment_mz_df,
238-
matched_intensity_df,
239-
matched_mz_err_df,
240-
) = self._prepare_matching_dfs()
241-
242-
if (
243-
"mobility" in psm_df_one_raw.columns
244-
and "mobility" in self.raw_data.spectrum_df.columns
245-
):
246-
query_columns = [
247-
"frag_start_idx",
248-
"frag_stop_idx",
249-
"precursor_mz",
250-
"rt",
251-
"mobility",
252-
]
253-
else:
254-
query_columns = [
255-
"frag_start_idx",
256-
"frag_stop_idx",
257-
"precursor_mz",
258-
"rt",
259-
]
260-
261-
psm_iters = psm_df_one_raw[query_columns].values
262-
if verbose:
263-
psm_iters = tqdm.tqdm(psm_iters)
264-
265-
for items in psm_iters:
266-
frag_start_idx = int(items[0])
267-
frag_stop_idx = int(items[1])
268-
269-
spec_mzs, spec_intens = self.get_peaks(
270-
*items[2:],
271-
)
272-
self._match_one_psm(
273-
spec_mzs,
274-
spec_intens,
275-
fragment_mz_df,
276-
matched_intensity_df,
277-
matched_mz_err_df,
278-
frag_start_idx,
279-
frag_stop_idx,
280-
)
281-
return (psm_df_one_raw, fragment_mz_df, matched_intensity_df, matched_mz_err_df)
282-
283-
def match_ms2_multi_raw(
284-
self,
285-
psm_df: pd.DataFrame,
286-
ms_files: Union[dict, list],
287-
ms_file_type: str = "alphatims",
288-
dda: bool = False,
289-
):
290-
"""
291-
Matching PSM dataframe against the ms2 files in ms_files
292-
This method will store matched values as attributes:
293-
- self.psm_df
294-
- self.fragment_mz_df
295-
- self.matched_intensity_df
296-
- self.matched_mz_err_df
297-
298-
Parameters
299-
----------
300-
psm_df : pd.DataFrame
301-
PSM dataframe
302-
303-
ms_files : dict | list
304-
if dict: {raw_name: ms2 path}
305-
if list: [ms2 path1, ms2 path2]
306-
307-
ms_file_type : str, optional
308-
One of ["alphatims_hdf","alpharaw_hdf","thermo","sciex","alphapept_hdf","mgf"]
309-
Defaults to 'alphapept'.
310-
311-
Returns
312-
-------
313-
tuple:
314-
pd.DataFrame: psm dataframe with fragment index information.
315-
316-
pd.DataFrame: fragment mz dataframe.
317-
318-
pd.DataFrame: matched intensity dataframe.
319-
320-
pd.DataFrame: matched mass error dataframe.
321-
np.inf if a fragment is not matched.
322-
323-
"""
324-
raise NotImplementedError(
325-
"Not necessary for matching multiple raw files using AlphaTims, "
326-
"loop through `match_ms2_one_raw()`"
12+
class PepSpecMatch_AlphaTims:
13+
def __init__(self, *args, **kwargs):
14+
raise DeprecationWarning(
15+
f"PepSpecMatch_AlphaTims {_DEPRECATION_MSG}"
32716
)

0 commit comments

Comments
 (0)