Skip to content

Commit 24f2a00

Browse files
authored
Merge pull request #110 from compomics/fix/psm_list_dtypes
Fix dtypes in numpy arrays from PSMList accession
2 parents 0ba376d + 3961624 commit 24f2a00

File tree

3 files changed

+35
-3
lines changed

3 files changed

+35
-3
lines changed

psm_utils/psm.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from typing import Any, Dict, List, Optional, Union
44

5-
from pydantic import ConfigDict, BaseModel
5+
from pydantic import BaseModel, ConfigDict
66

77
from psm_utils.peptidoform import Peptidoform
88

@@ -135,3 +135,25 @@ def get_usi(self, as_url=False) -> str:
135135
if as_url:
136136
usi = "http://proteomecentral.proteomexchange.org/usi/?usi=" + usi
137137
return usi
138+
139+
140+
NUMPY_DTYPES = {
141+
"peptidoform": Peptidoform,
142+
"spectrum_id": object,
143+
"run": object,
144+
"collection": object,
145+
"spectrum": object,
146+
"is_decoy": bool,
147+
"score": float,
148+
"qvalue": float,
149+
"pep": float,
150+
"precursor_mz": float,
151+
"retention_time": float,
152+
"ion_mobility": float,
153+
"protein_list": object,
154+
"rank": int,
155+
"source": object,
156+
"provenance_data": object,
157+
"metadata": object,
158+
"rescoring_features": object,
159+
}

psm_utils/psm_list.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pyteomics import auxiliary, proforma
1010
from rich.pretty import pretty_repr
1111

12-
from psm_utils.psm import PSM
12+
from psm_utils.psm import NUMPY_DTYPES, PSM
1313

1414

1515
class PSMList(BaseModel):
@@ -98,7 +98,14 @@ def __getitem__(self, item) -> PSM | list[PSM]:
9898
return PSMList(psm_list=self.psm_list[item])
9999
elif isinstance(item, str):
100100
# Return PSM property as array across full PSMList
101-
return np.fromiter([psm[item] for psm in self.psm_list], dtype=object, count=len(self))
101+
try:
102+
return np.fromiter(
103+
(psm[item] for psm in self.psm_list), dtype=NUMPY_DTYPES[item], count=len(self)
104+
)
105+
except TypeError:
106+
return np.fromiter(
107+
(psm[item] for psm in self.psm_list), dtype=object, count=len(self)
108+
)
102109
elif _is_iterable_of_bools(item):
103110
# Return new PSMList with items that were True
104111
return PSMList(psm_list=[self.psm_list[i] for i in np.flatnonzero(item)])

tests/test_psm_list.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ def test___get_item__(self):
3838

3939
# PSM property as array
4040
np.testing.assert_equal(psm_list["spectrum_id"], np.array(["1", "2", "3"]))
41+
np.testing.assert_equal(psm_list["score"], np.array([140.2, 132.9, 55.7]))
42+
np.testing.assert_equal(psm_list["rank"], np.array([None, None, None]))
43+
np.testing.assert_equal(psm_list["qvalue"], np.array([np.nan, np.nan, np.nan]))
4144

4245
# Multiple PSM properties as 2D array
4346
np.testing.assert_equal(

0 commit comments

Comments
 (0)