Skip to content

Commit 16873bf

Browse files
authored
Merge pull request #113 from singjc/main
Add: Ion Mobility data to Sage Reader
2 parents 224f401 + 487bcf9 commit 16873bf

File tree

5 files changed

+62
-1
lines changed

5 files changed

+62
-1
lines changed

psm_utils/io/sage.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:
5757
rescoring_features[ft] = psm_dict[ft]
5858
except KeyError:
5959
continue
60+
61+
# If ion mobility is not 0.0 (not present), add it to the rescoring features
62+
if float(psm_dict['ion_mobility']):
63+
rescoring_features.update({
64+
'ion_mobility': float(psm_dict['ion_mobility']),
65+
'predicted_mobility': float(psm_dict['predicted_mobility']),
66+
'delta_mobility': float(psm_dict['delta_mobility'])
67+
})
6068

6169
return PSM(
6270
peptidoform=self._parse_peptidoform(
@@ -70,6 +78,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:
7078
score=float(psm_dict[self.score_column]),
7179
precursor_mz=self._parse_precursor_mz(psm_dict["expmass"], psm_dict["charge"]),
7280
retention_time=float(psm_dict["rt"]),
81+
ion_mobility=float(psm_dict["ion_mobility"]) if float(psm_dict["ion_mobility"]) else None,
7382
protein_list=psm_dict["proteins"].split(";"),
7483
source="sage",
7584
rank=int(float(psm_dict["rank"])),

tests/test_data/resultsIM.sage.tsv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
psm_id peptide proteins num_proteins filename scannr rank label expmass calcmass charge peptide_len missed_cleavages semi_enzymatic isotope_error precursor_ppm fragment_ppm hyperscore delta_next delta_best rt aligned_rt predicted_rt delta_rt_model ion_mobility predicted_mobility delta_mobility matched_peaks longest_b longest_y longest_y_pct matched_intensity_pct scored_candidates poisson sage_discriminant_score posterior_error spectrum_q peptide_q protein_q ms2_intensity
2+
529791 YVDDTQFVRFDSDAASPR sp|P01889|HLAB_HUMAN;sp|P10321|HLAC_HUMAN 2 G220824_028_Slot2-34_1_6753.mzml index=45761 1 1 2086.9507 2087.9548 3 18 0 0 -1.00335 0.38332784 1.5193833 44.947527657385436 24.626872218188044 0.0 23.004148 0.5305706 0.5301316 0.0004390478 0.96470714 0.9236175 0.041089654 17 2 6 0.33333334 46.215378 33149 -14.997740845471464 0.47067946 -96.47973 0.0000107030855 0.00009881538 0.0006954081 26230.0

tests/test_data/test_out_sage.idXML

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
<UserParam type="float" name="delta_rt_model" value="0.993444"/>
2828
<UserParam type="float" name="aligned_rt" value="0.993444"/>
2929
<UserParam type="float" name="predicted_rt" value="0.0"/>
30+
<UserParam type="float" name="predicted_mobility" value="0.0"/>
3031
<UserParam type="float" name="matched_peaks" value="22.0"/>
3132
<UserParam type="float" name="longest_b" value="9.0"/>
3233
<UserParam type="float" name="longest_y" value="12.0"/>

tests/test_io/test_idxml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test_write_file_with_pyopenms_objects(self):
107107
assert sha == expected_sha
108108

109109
def test_write_file_without_pyopenms_objects(self):
110-
expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd"
110+
expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd" #TODO: can cause problems locally depending on dependency versions
111111
reader = SageTSVReader("./tests/test_data/results.sage.tsv")
112112
psm_list = reader.read_file()
113113
writer = IdXMLWriter("./tests/test_data/test_out_sage.idXML")

tests/test_io/test_sage.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,62 @@
4747
},
4848
)
4949

50+
test_psm_im = PSM(
51+
peptidoform="YVDDTQFVRFDSDAASPR/3",
52+
spectrum_id="index=45761",
53+
run="G220824_028_Slot2-34_1_6753",
54+
collection=None,
55+
spectrum=None,
56+
is_decoy=False,
57+
score=0.47067946,
58+
qvalue=0.0000107030855,
59+
pep=None,
60+
precursor_mz=696.6580583654032,
61+
retention_time=23.004148,
62+
ion_mobility=0.96470714,
63+
protein_list=['sp|P01889|HLAB_HUMAN','sp|P10321|HLAC_HUMAN'],
64+
rank=1,
65+
source="sage",
66+
metadata={},
67+
rescoring_features={
68+
"expmass": 2086.9507,
69+
"calcmass": 2087.9548,
70+
"peptide_len": 18.0,
71+
"missed_cleavages": 0.0,
72+
"isotope_error": -1.00335,
73+
"precursor_ppm": 0.38332784,
74+
"fragment_ppm": 1.5193833,
75+
"hyperscore": 44.947527657385436,
76+
"delta_next": 24.626872218188044,
77+
"delta_best": 0.0,
78+
"delta_rt_model": 0.0004390478,
79+
"aligned_rt": 0.5305706,
80+
"predicted_rt": 0.5301316,
81+
"matched_peaks": 17.0,
82+
"longest_b": 2.0,
83+
"longest_y": 6.0,
84+
"longest_y_pct": 0.33333334,
85+
"matched_intensity_pct": 46.215378,
86+
"scored_candidates": 33149.0,
87+
"poisson": -14.997740845471464,
88+
"ms2_intensity": 26230.0,
89+
"ion_mobility": 0.96470714,
90+
"predicted_mobility": 0.9236175,
91+
"delta_mobility": 0.041089654,
92+
}
93+
)
94+
5095

5196
class TestSageTSVReader:
5297
def test_iter(self):
5398
with SageTSVReader("./tests/test_data/results.sage.tsv") as reader:
5499
for psm in reader:
55100
psm.provenance_data = {}
56101
assert psm == test_psm
102+
with SageTSVReader("./tests/test_data/resultsIM.sage.tsv") as reader:
103+
for psm in reader:
104+
psm.provenance_data = {}
105+
assert psm == test_psm_im
57106

58107

59108
class TestSageParquetReader:

0 commit comments

Comments
 (0)