Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions psm_utils/io/sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:
rescoring_features[ft] = psm_dict[ft]
except KeyError:
continue

# If ion mobility is not 0.0 (not present), add it to the rescoring features
if float(psm_dict['ion_mobility']):
rescoring_features.update({
'ion_mobility': float(psm_dict['ion_mobility']),
'predicted_mobility': float(psm_dict['predicted_mobility']),
'delta_mobility': float(psm_dict['delta_mobility'])
})

return PSM(
peptidoform=self._parse_peptidoform(
Expand All @@ -70,6 +78,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:
score=float(psm_dict[self.score_column]),
precursor_mz=self._parse_precursor_mz(psm_dict["expmass"], psm_dict["charge"]),
retention_time=float(psm_dict["rt"]),
ion_mobility=float(psm_dict["ion_mobility"]) if float(psm_dict["ion_mobility"]) else None,
protein_list=psm_dict["proteins"].split(";"),
source="sage",
rank=int(float(psm_dict["rank"])),
Expand Down
2 changes: 2 additions & 0 deletions tests/test_data/resultsIM.sage.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
psm_id peptide proteins num_proteins filename scannr rank label expmass calcmass charge peptide_len missed_cleavages semi_enzymatic isotope_error precursor_ppm fragment_ppm hyperscore delta_next delta_best rt aligned_rt predicted_rt delta_rt_model ion_mobility predicted_mobility delta_mobility matched_peaks longest_b longest_y longest_y_pct matched_intensity_pct scored_candidates poisson sage_discriminant_score posterior_error spectrum_q peptide_q protein_q ms2_intensity
529791 YVDDTQFVRFDSDAASPR sp|P01889|HLAB_HUMAN;sp|P10321|HLAC_HUMAN 2 G220824_028_Slot2-34_1_6753.mzml index=45761 1 1 2086.9507 2087.9548 3 18 0 0 -1.00335 0.38332784 1.5193833 44.947527657385436 24.626872218188044 0.0 23.004148 0.5305706 0.5301316 0.0004390478 0.96470714 0.9236175 0.041089654 17 2 6 0.33333334 46.215378 33149 -14.997740845471464 0.47067946 -96.47973 0.0000107030855 0.00009881538 0.0006954081 26230.0
1 change: 1 addition & 0 deletions tests/test_data/test_out_sage.idXML
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
<UserParam type="float" name="delta_rt_model" value="0.993444"/>
<UserParam type="float" name="aligned_rt" value="0.993444"/>
<UserParam type="float" name="predicted_rt" value="0.0"/>
<UserParam type="float" name="predicted_mobility" value="0.0"/>
<UserParam type="float" name="matched_peaks" value="22.0"/>
<UserParam type="float" name="longest_b" value="9.0"/>
<UserParam type="float" name="longest_y" value="12.0"/>
Expand Down
2 changes: 1 addition & 1 deletion tests/test_io/test_idxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_write_file_with_pyopenms_objects(self):
assert sha == expected_sha

def test_write_file_without_pyopenms_objects(self):
expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd"
expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd" #TODO: can cause problems locally depending on dependency versions
reader = SageTSVReader("./tests/test_data/results.sage.tsv")
psm_list = reader.read_file()
writer = IdXMLWriter("./tests/test_data/test_out_sage.idXML")
Expand Down
49 changes: 49 additions & 0 deletions tests/test_io/test_sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,62 @@
},
)

test_psm_im = PSM(
peptidoform="YVDDTQFVRFDSDAASPR/3",
spectrum_id="index=45761",
run="G220824_028_Slot2-34_1_6753",
collection=None,
spectrum=None,
is_decoy=False,
score=0.47067946,
qvalue=0.0000107030855,
pep=None,
precursor_mz=696.6580583654032,
retention_time=23.004148,
ion_mobility=0.96470714,
protein_list=['sp|P01889|HLAB_HUMAN','sp|P10321|HLAC_HUMAN'],
rank=1,
source="sage",
metadata={},
rescoring_features={
"expmass": 2086.9507,
"calcmass": 2087.9548,
"peptide_len": 18.0,
"missed_cleavages": 0.0,
"isotope_error": -1.00335,
"precursor_ppm": 0.38332784,
"fragment_ppm": 1.5193833,
"hyperscore": 44.947527657385436,
"delta_next": 24.626872218188044,
"delta_best": 0.0,
"delta_rt_model": 0.0004390478,
"aligned_rt": 0.5305706,
"predicted_rt": 0.5301316,
"matched_peaks": 17.0,
"longest_b": 2.0,
"longest_y": 6.0,
"longest_y_pct": 0.33333334,
"matched_intensity_pct": 46.215378,
"scored_candidates": 33149.0,
"poisson": -14.997740845471464,
"ms2_intensity": 26230.0,
"ion_mobility": 0.96470714,
"predicted_mobility": 0.9236175,
"delta_mobility": 0.041089654,
}
)


class TestSageTSVReader:
def test_iter(self):
with SageTSVReader("./tests/test_data/results.sage.tsv") as reader:
for psm in reader:
psm.provenance_data = {}
assert psm == test_psm
with SageTSVReader("./tests/test_data/resultsIM.sage.tsv") as reader:
for psm in reader:
psm.provenance_data = {}
assert psm == test_psm_im


class TestSageParquetReader:
Expand Down
Loading