Skip to content

Commit 8e4a9d4

Browse files
authored
Merge pull request #140 from CompOmics/fix/various-io-fixes
Various fixes in io modules
2 parents 19a657d + 07e6544 commit 8e4a9d4

File tree

6 files changed

+75
-30
lines changed

6 files changed

+75
-30
lines changed

psm_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Common utilities for parsing and handling PSMs, and search engine results."""
22

3-
__version__ = "1.5.0.post1"
3+
__version__ = "1.5.1"
44
__all__ = ["Peptidoform", "PSM", "PSMList"]
55

66
from warnings import filterwarnings

psm_utils/io/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,16 @@ def _supports_write_psm(writer: type[WriterBase]) -> bool:
241241
temp_file.close()
242242
Path(temp_file.name).unlink()
243243
example_psm = PSM(peptidoform="ACDE", spectrum_id="0")
244+
245+
# Prepare writer-specific kwargs for writers that need them
246+
writer_kwargs = {}
247+
if writer == percolator.PercolatorTabWriter:
248+
writer_kwargs["style"] = "pin"
249+
244250
try:
245-
with writer(temp_file.name, example_psm=example_psm) as writer_instance:
251+
with writer(
252+
temp_file.name, example_psm=example_psm, **writer_kwargs
253+
) as writer_instance:
246254
writer_instance.write_psm(example_psm)
247255
except NotImplementedError:
248256
supports_write_psm = False

psm_utils/io/idxml.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@
3434
import pyopenms as oms # type: ignore[import]
3535

3636
_has_openms = True
37+
# Check if we have pyOpenMS 3.5+ with PeptideIdentificationList
38+
_has_peptide_id_list = hasattr(oms, "PeptideIdentificationList")
3739
except ImportError:
3840
_has_openms = False
41+
_has_peptide_id_list = False
3942
oms = None # type: ignore[assignment]
4043

4144
logger = logging.getLogger(__name__)
@@ -157,8 +160,17 @@ def _parse_idxml(self) -> tuple[Any, Any]:
157160
158161
"""
159162
protein_ids: Any = [] # list[oms.ProteinIdentification]
160-
peptide_ids: Any = [] # list[oms.PeptideIdentification]
161-
oms.IdXMLFile().load(str(self.filename), protein_ids, peptide_ids) # type: ignore
163+
# In pyOpenMS 3.5+, peptide_ids must be a PeptideIdentificationList
164+
if _has_peptide_id_list:
165+
peptide_ids: Any = oms.PeptideIdentificationList() # type: ignore
166+
else:
167+
peptide_ids = [] # list[oms.PeptideIdentification] for pyOpenMS <3.5
168+
169+
# Load the idXML file - the lists will be populated by pyOpenMS
170+
idxml_file = oms.IdXMLFile() # type: ignore
171+
# Ensure filename is a string, not a Path object
172+
filename_str: str = str(self.filename)
173+
idxml_file.load(filename_str, protein_ids, peptide_ids)
162174

163175
if len(protein_ids) == 0:
164176
raise IdXMLReaderEmptyListException(
@@ -564,7 +576,10 @@ def _update_existing_ids(
564576

565577
peptide_id.setHits(updated_peptide_hits)
566578

567-
oms.IdXMLFile().store(str(self.filename), self.protein_ids, self.peptide_ids) # type: ignore
579+
# Store the idXML file
580+
idxml_file = oms.IdXMLFile() # type: ignore
581+
filename_str: str = str(self.filename)
582+
idxml_file.store(filename_str, self.protein_ids, self.peptide_ids)
568583

569584
def _update_peptide_hit(self, peptide_hit: Any, psm: PSM) -> None:
570585
"""Inplace update of PeptideHit with novel predicted features information from PSM."""
@@ -594,7 +609,11 @@ def _create_ids_for_collection(
594609
) -> None:
595610
"""Create ProteinIdentification and PeptideIdentification objects for a single collection."""
596611
self.protein_ids = [oms.ProteinIdentification()] # type: ignore
597-
self.peptide_ids = []
612+
# In pyOpenMS 3.5+, peptide_ids must be a PeptideIdentificationList
613+
if _has_peptide_id_list:
614+
self.peptide_ids = oms.PeptideIdentificationList() # type: ignore
615+
else:
616+
self.peptide_ids = [] # list[oms.PeptideIdentification] for pyOpenMS <3.5
598617

599618
# Set msrun filename with spectra_data meta value
600619
msrun_reference = [str(run).encode() for run in runs.keys()]
@@ -617,14 +636,19 @@ def _create_ids_for_collection(
617636
# Create PeptideHits
618637
peptide_hits = [self._create_peptide_hit(psm) for psm in psms]
619638
peptide_id.setHits(peptide_hits)
620-
self.peptide_ids.append(peptide_id)
639+
# Use push_back for pyOpenMS 3.5+, append for older versions
640+
if _has_peptide_id_list:
641+
self.peptide_ids.push_back(peptide_id) # type: ignore
642+
else:
643+
self.peptide_ids.append(peptide_id) # type: ignore[union-attr]
621644

622645
# Create protein hits
623646
self._create_protein_hits(protein_list)
624647

625648
# Write idXML file
626-
filename = "/".join(filter(None, [collection, str(self.filename)]))
627-
oms.IdXMLFile().store(filename, self.protein_ids, self.peptide_ids) # type: ignore
649+
filename: str = "/".join(filter(None, [collection, str(self.filename)]))
650+
idxml_file = oms.IdXMLFile() # type: ignore
651+
idxml_file.store(filename, self.protein_ids, self.peptide_ids) # type: ignore
628652

629653
def _create_peptide_identification(
630654
self,

psm_utils/io/peptide_record.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,8 @@ def from_dataframe(peprec_df: pd.DataFrame) -> PSMList:
448448
"""
449449
psm_list = []
450450
for _, row in peprec_df.iterrows():
451-
entry = _PeprecEntry(**row.to_dict())
451+
row_dict = {str(k): v for k, v in row.to_dict().items()}
452+
entry = _PeprecEntry(**row_dict)
452453
psm_list.append(PeptideRecordReader._entry_to_psm(entry, filename=""))
453454
return PSMList(psm_list=psm_list)
454455

psm_utils/io/pepxml.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
"mzFidelity",
2828
]
2929

30+
KNOWN_METADATA_KEYS = [
31+
"num_matched_ions",
32+
"tot_num_ions",
33+
"num_missed_cleavages",
34+
]
35+
3036

3137
class PepXMLReader(ReaderBase):
3238
"""Reader for pepXML PSM files."""
@@ -127,47 +133,51 @@ def _parse_peptidoform(
127133

128134
def _parse_psm(self, spectrum_query: dict[str, Any], search_hit: dict[str, Any]) -> PSM:
129135
"""Parse pepXML PSM to PSM."""
130-
metadata = {
131-
"num_matched_ions": str(search_hit["num_matched_ions"]),
132-
"tot_num_ions": str(search_hit["tot_num_ions"]),
133-
"num_missed_cleavages": str(search_hit["num_missed_cleavages"]),
134-
}
136+
# Build metadata from optional search hit fields
137+
metadata = {key: str(search_hit[key]) for key in KNOWN_METADATA_KEYS if key in search_hit}
138+
139+
# Add all search scores to metadata
135140
metadata.update(
136141
{
137-
f"search_score_{key.lower()}": str(search_hit["search_score"][key])
138-
for key in search_hit["search_score"]
142+
f"search_score_{key.lower()}": str(value)
143+
for key, value in search_hit["search_score"].items()
139144
}
140145
)
141146

147+
# Build provenance data from optional spectrum query fields
148+
provenance_data = {
149+
k: str(v)
150+
for k, v in {
151+
"pepxml_index": spectrum_query.get("index"),
152+
"start_scan": spectrum_query.get("start_scan"),
153+
"end_scan": spectrum_query.get("end_scan"),
154+
}.items()
155+
if v is not None
156+
}
157+
142158
return PSM(
143159
peptidoform=self._parse_peptidoform(
144160
search_hit["peptide"],
145161
search_hit["modifications"],
146162
spectrum_query["assumed_charge"],
147163
),
148-
spectrum_id=spectrum_query["spectrumNativeID"]
149-
if "spectrumNativeID" in spectrum_query
150-
else spectrum_query["spectrum"],
164+
spectrum_id=spectrum_query.get("spectrumNativeID", spectrum_query.get("spectrum")),
151165
run=None,
152166
collection=None,
153167
spectrum=None,
154168
is_decoy=None,
155-
score=search_hit["search_score"][self.score_key],
169+
score=search_hit["search_score"].get(self.score_key, None),
156170
qvalue=None,
157171
pep=None,
158172
precursor_mz=mass_to_mz(
159173
spectrum_query["precursor_neutral_mass"], spectrum_query["assumed_charge"]
160174
),
161175
retention_time=spectrum_query.get("retention_time_sec"),
162176
ion_mobility=spectrum_query.get("ion_mobility"),
163-
protein_list=[p["protein"] for p in search_hit["proteins"]],
164-
rank=search_hit["hit_rank"],
177+
protein_list=[p["protein"] for p in search_hit.get("proteins", [])],
178+
rank=search_hit.get("hit_rank", None),
165179
source=None,
166-
provenance_data={
167-
"pepxml_index": str(spectrum_query["index"]),
168-
"start_scan": str(spectrum_query["start_scan"]),
169-
"end_scan": str(spectrum_query["end_scan"]),
170-
},
180+
provenance_data=provenance_data,
171181
metadata=metadata,
172182
rescoring_features={},
173183
)

psm_utils/io/percolator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,10 @@ def write_file(self, psm_list: PSMList) -> None:
342342
f, fieldnames=self._columns, delimiter="\t", extrasaction="ignore"
343343
)
344344
writer.writeheader()
345-
for psm in psm_list:
346-
writer.writerow(self._psm_to_entry(psm))
345+
for i, psm in enumerate(psm_list):
346+
entry = self._psm_to_entry(psm)
347+
entry["ScanNr"] = i
348+
writer.writerow(entry)
347349

348350
def _psm_to_entry(self, psm: PSM) -> dict[str, Any]:
349351
"""Parse PSM to Percolator Tab entry."""

0 commit comments

Comments
 (0)