Skip to content

Commit 802f926

Browse files
Wrap consensus-map protein inference (#19)
* Wrap consensus-map protein inference * Add protein inference examples to README
1 parent a45efab commit 802f926

File tree

7 files changed

+343
-3
lines changed

7 files changed

+343
-3
lines changed

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,38 @@ print(f"Consensus contains {len(consensus)} features")
145145
The helper returns a fresh `Py_ConsensusMap` instance that can be exported,
146146
converted to a pandas DataFrame, or iterated for downstream analysis.
147147

148+
## Protein inference and rollups
149+
150+
Recent wrappers expose multiple entry points for inferring proteins directly
151+
from the Python API—either by starting from identification files, feature maps,
152+
or full consensus maps.
153+
154+
```python
155+
from openms_python import Identifications, Py_FeatureMap, Py_ConsensusMap
156+
157+
# 1) Run inference straight from an idXML file
158+
ids = Identifications.from_idxml("search_results.idXML")
159+
protein_summary = ids.infer_proteins(algorithm="bayesian")
160+
print(protein_summary.summary())
161+
162+
# 2) Trigger inference on a feature map (assigned + unassigned peptides)
163+
fmap = Py_FeatureMap().load("sample.featureXML")
164+
proteins = fmap.infer_proteins(include_unassigned=True)
165+
proteins.to_idxml("sample_proteins.idXML")
166+
167+
# 3) Operate directly on a consensus map
168+
consensus = Py_ConsensusMap().load("merged.consensusXML")
169+
consensus.infer_proteins(algorithm="basic")
170+
171+
# Optionally compute quantitative protein ratios in place
172+
consensus.infer_protein_quantities(reference_map=1)
173+
consensus.store("merged_with_proteins.consensusXML")
174+
```
175+
176+
All helpers share the same ergonomic parameter handling, accept native
177+
`pyopenms` parameters (`oms.Param`) or plain dictionaries, and return
178+
`Identifications` or the map instance itself for easy method chaining.
179+
148180
## Identification performance showcase
149181

150182
Looking for a larger end-to-end example? `tests/test_idperformance.py` ships with

openms_python/py_consensusmap.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from ._io_utils import ensure_allowed_suffix, CONSENSUS_MAP_EXTENSIONS
1111
from .py_featuremap import Py_FeatureMap
12+
from .py_identifications import Identifications
1213

1314

1415
class Py_ConsensusMap:
@@ -112,6 +113,48 @@ def store(self, filepath: Union[str, Path]) -> 'Py_ConsensusMap':
112113
oms.ConsensusXMLFile().store(str(filepath), self._consensus_map)
113114
return self
114115

116+
# ==================== Protein inference ====================
117+
118+
def infer_proteins(
119+
self,
120+
*,
121+
algorithm: str = "basic",
122+
params: Optional[Union[oms.Param, Dict[str, Union[int, float, str]]]] = None,
123+
include_unassigned: bool = False,
124+
greedy_group_resolution: bool = True,
125+
experimental_design: Optional[oms.ExperimentalDesign] = None,
126+
) -> Identifications:
127+
"""Run protein inference on the identifications attached to this map."""
128+
129+
identifications = self._collect_identifications()
130+
return identifications.infer_proteins(
131+
algorithm=algorithm,
132+
params=params,
133+
consensus_map=self._consensus_map,
134+
include_unassigned=include_unassigned,
135+
greedy_group_resolution=greedy_group_resolution,
136+
experimental_design=experimental_design,
137+
)
138+
139+
def infer_protein_quantities(self, reference_map: int = 0) -> "Py_ConsensusMap":
140+
"""Infer protein-level quantities directly on this consensus map.
141+
142+
The method wraps :class:`pyopenms.ProteinInference`, which attaches
143+
quantitative results to the map's stored
144+
:class:`pyopenms.ProteinIdentification` entries based on their
145+
associated peptide identifications.
146+
147+
Parameters
148+
----------
149+
reference_map:
150+
Index of the reference (e.g. iTRAQ) channel used as denominator
151+
when calculating protein ratios.
152+
"""
153+
154+
runner = oms.ProteinInference()
155+
runner.infer(self._consensus_map, int(reference_map))
156+
return self
157+
115158
# ==================== Alignment helpers ====================
116159

117160
@classmethod
@@ -362,3 +405,10 @@ def _link_feature_maps(
362405
consensus_map.setColumnHeaders(headers)
363406

364407
return consensus_map
408+
409+
def _collect_identifications(self) -> Identifications:
410+
proteins = self._consensus_map.getProteinIdentifications()
411+
peptides = list(self._consensus_map.getUnassignedPeptideIdentifications())
412+
for feature in self._consensus_map:
413+
peptides.extend(feature.getPeptideIdentifications())
414+
return Identifications(proteins, peptides)

openms_python/py_featuremap.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
from __future__ import annotations
33

44
from pathlib import Path
5-
from typing import Iterable, Iterator, Optional, Union
5+
from typing import Dict, Iterable, Iterator, Optional, Union
66

77
import pyopenms as oms
88
import pandas as pd
99
from ._io_utils import ensure_allowed_suffix, FEATURE_MAP_EXTENSIONS
1010
from .py_feature import Py_Feature
11+
from .py_identifications import Identifications
1112

1213

1314
class Py_FeatureMap:
@@ -111,6 +112,28 @@ def store(self, filepath: Union[str, Path]) -> 'Py_FeatureMap':
111112
oms.FeatureXMLFile().store(str(filepath), self._feature_map)
112113
return self
113114

115+
# ==================== Protein inference ====================
116+
117+
def infer_proteins(
118+
self,
119+
*,
120+
algorithm: str = "basic",
121+
params: Optional[Union[oms.Param, Dict[str, Union[int, float, str]]]] = None,
122+
include_unassigned: bool = False,
123+
greedy_group_resolution: bool = True,
124+
experimental_design: Optional[oms.ExperimentalDesign] = None,
125+
) -> Identifications:
126+
"""Run protein inference on the identifications stored in this map."""
127+
128+
identifications = self._collect_identifications()
129+
return identifications.infer_proteins(
130+
algorithm=algorithm,
131+
params=params,
132+
include_unassigned=include_unassigned,
133+
greedy_group_resolution=greedy_group_resolution,
134+
experimental_design=experimental_design,
135+
)
136+
114137
# ==================== pandas integration ====================
115138

116139
def to_dataframe(self) -> pd.DataFrame:
@@ -251,7 +274,7 @@ def _normalize_meta_value(value: object):
251274
except Exception:
252275
return value
253276
return value
254-
277+
255278
def _as_native_feature(self, feature: Union[oms.Feature, Py_Feature]) -> oms.Feature:
256279
if isinstance(feature, Py_Feature):
257280
return feature.native
@@ -261,3 +284,9 @@ def _as_native_feature(self, feature: Union[oms.Feature, Py_Feature]) -> oms.Fea
261284
"Features must be pyopenms.Feature or Py_Feature instances, "
262285
f"got {type(feature).__name__}"
263286
)
287+
288+
def _collect_identifications(self) -> Identifications:
289+
proteins = self._feature_map.getProteinIdentifications()
290+
peptides = list(self._feature_map.getUnassignedPeptideIdentifications())
291+
peptides.extend(self._feature_map.get_assigned_peptide_identifications())
292+
return Identifications(proteins, peptides)

openms_python/py_identifications.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
from __future__ import annotations
33

44
from pathlib import Path
5-
from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union
5+
from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, Union, TYPE_CHECKING
66

77
import pyopenms as oms
88

99
from ._io_utils import ensure_allowed_suffix, IDENTIFICATION_EXTENSIONS
1010

11+
if TYPE_CHECKING: # pragma: no cover - imported for type checking only
12+
from .py_consensusmap import Py_ConsensusMap
13+
1114

1215
class ProteinIdentifications:
1316
"""Sequence-like container for :class:`pyopenms.ProteinIdentification`."""
@@ -246,6 +249,82 @@ def filter_by_fdr(
246249
kept_proteins = [entry for entry in proteins if _extract_q_value(entry, qvalue_keys, False) <= threshold]
247250
return Identifications(kept_proteins, self.peptides.copy())
248251

252+
def infer_proteins(
253+
self,
254+
*,
255+
algorithm: str = "basic",
256+
params: Optional[Union[oms.Param, Dict[str, Union[int, float, str]]]] = None,
257+
consensus_map: Optional[Union["Py_ConsensusMap", oms.ConsensusMap]] = None,
258+
include_unassigned: bool = False,
259+
greedy_group_resolution: bool = True,
260+
experimental_design: Optional[oms.ExperimentalDesign] = None,
261+
) -> "Identifications":
262+
"""Run a protein inference algorithm and return the updated identifications.
263+
264+
Parameters
265+
----------
266+
algorithm:
267+
Name of the inference algorithm to run. Supported values are
268+
``"basic"`` and ``"bayesian"``.
269+
params:
270+
Optional parameter dictionary or :class:`pyopenms.Param` applied to
271+
the underlying OpenMS algorithm.
272+
consensus_map:
273+
When provided, inference is performed on identifications attached to
274+
this :class:`Py_ConsensusMap` / :class:`pyopenms.ConsensusMap`
275+
instead of the peptide list.
276+
include_unassigned:
277+
Controls whether features without identifications should be
278+
considered when running the basic inference on a consensus map.
279+
greedy_group_resolution:
280+
Passed to Epifany (the Bayesian implementation) to control how
281+
indistinguishable protein groups are resolved.
282+
experimental_design:
283+
Optional :class:`pyopenms.ExperimentalDesign` forwarded to the
284+
Bayesian algorithm for replicate-aware inference.
285+
"""
286+
287+
algorithm = algorithm.lower()
288+
peptides = [oms.PeptideIdentification(entry) for entry in self.peptides.native]
289+
proteins = [oms.ProteinIdentification(entry) for entry in self.proteins.native]
290+
291+
if algorithm == "basic":
292+
runner = oms.BasicProteinInferenceAlgorithm()
293+
_apply_algorithm_params(runner, params)
294+
if consensus_map is not None:
295+
if not proteins:
296+
raise ValueError("Protein inference requires at least one protein identification")
297+
native_map = _coerce_consensus_map(consensus_map)
298+
runner.run(native_map, proteins[0], bool(include_unassigned))
299+
else:
300+
runner.run(peptides, proteins)
301+
return Identifications(proteins, peptides)
302+
303+
if algorithm == "bayesian":
304+
runner = oms.BayesianProteinInferenceAlgorithm()
305+
_apply_algorithm_params(runner, params)
306+
if consensus_map is not None:
307+
native_map = _coerce_consensus_map(consensus_map)
308+
if experimental_design is not None:
309+
runner.inferPosteriorProbabilities(native_map, bool(greedy_group_resolution), experimental_design)
310+
else:
311+
runner.inferPosteriorProbabilities(native_map, bool(greedy_group_resolution))
312+
return Identifications(proteins, peptides)
313+
314+
if not proteins:
315+
raise ValueError("Protein inference requires at least one protein identification")
316+
if experimental_design is not None:
317+
runner.inferPosteriorProbabilities(proteins, peptides, bool(greedy_group_resolution), experimental_design)
318+
else:
319+
runner.inferPosteriorProbabilities(proteins, peptides, bool(greedy_group_resolution))
320+
return Identifications(proteins, peptides)
321+
322+
raise ValueError(
323+
"Unknown protein inference algorithm '{algorithm}'. Supported values are 'basic' and 'bayesian'.".format(
324+
algorithm=algorithm
325+
)
326+
)
327+
249328
def summary(self) -> dict:
250329
"""Return basic counts about the contained identifications."""
251330

@@ -345,3 +424,29 @@ def _target_decoy_label(entry, hit, is_peptide: bool) -> Optional[str]:
345424
if entry.metaValueExists("target_decoy"):
346425
return str(entry.getMetaValue("target_decoy")).lower()
347426
return None
427+
428+
429+
def _apply_algorithm_params(algorithm, params: Optional[Union[oms.Param, Dict[str, Union[int, float, str]]]]) -> None:
430+
if params is None:
431+
return
432+
if isinstance(params, oms.Param):
433+
algorithm.setParameters(oms.Param(params))
434+
return
435+
param_obj = algorithm.getParameters()
436+
for key, value in params.items():
437+
param_obj.setValue(key, value)
438+
algorithm.setParameters(param_obj)
439+
440+
441+
def _coerce_consensus_map(
442+
consensus_map: Union["Py_ConsensusMap", oms.ConsensusMap]
443+
) -> oms.ConsensusMap:
444+
if isinstance(consensus_map, oms.ConsensusMap):
445+
return consensus_map
446+
from .py_consensusmap import Py_ConsensusMap # Local import to avoid circular dependency
447+
448+
if isinstance(consensus_map, Py_ConsensusMap):
449+
return consensus_map.native
450+
raise TypeError(
451+
"consensus_map must be a Py_ConsensusMap or pyopenms.ConsensusMap instance"
452+
)

tests/test_py_consensusmap.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
oms = pytest.importorskip("pyopenms")
55

66
from openms_python.py_consensusmap import Py_ConsensusMap
7+
from openms_python.py_identifications import Identifications
78

89

910
def build_consensus_map(count: int = 3) -> Py_ConsensusMap:
@@ -17,6 +18,28 @@ def build_consensus_map(count: int = 3) -> Py_ConsensusMap:
1718
return Py_ConsensusMap(cmap)
1819

1920

21+
def _protein(identifier: str, accession: str) -> oms.ProteinIdentification:
22+
entry = oms.ProteinIdentification()
23+
entry.setIdentifier(identifier)
24+
hit = oms.ProteinHit()
25+
hit.setAccession(accession)
26+
entry.setHits([hit])
27+
return entry
28+
29+
30+
def _peptide(identifier: str, sequence: str, score: float, accession: str) -> oms.PeptideIdentification:
31+
entry = oms.PeptideIdentification()
32+
entry.setIdentifier(identifier)
33+
hit = oms.PeptideHit()
34+
hit.setSequence(oms.AASequence.fromString(sequence))
35+
hit.setScore(score)
36+
evidence = oms.PeptideEvidence()
37+
evidence.setProteinAccession(accession)
38+
hit.setPeptideEvidences([evidence])
39+
entry.setHits([hit])
40+
return entry
41+
42+
2043
def test_py_consensusmap_len_and_indexing():
2144
cmap = build_consensus_map()
2245

@@ -149,3 +172,44 @@ def test_py_consensusmap_from_dataframe_requires_columns():
149172

150173
with pytest.raises(ValueError):
151174
Py_ConsensusMap.from_dataframe(df)
175+
176+
177+
def test_py_consensusmap_infer_proteins_uses_consensus_map():
178+
protein = _protein("run", "P10")
179+
pep_a = _peptide("run", "PEPA", 5.0, "P10")
180+
pep_b = _peptide("run", "PEPB", 30.0, "P10")
181+
182+
feature = oms.ConsensusFeature()
183+
feature.setPeptideIdentifications([pep_a, pep_b])
184+
185+
cmap = oms.ConsensusMap()
186+
cmap.setProteinIdentifications([protein])
187+
cmap.push_back(feature)
188+
189+
wrapper = Py_ConsensusMap(cmap)
190+
inferred = wrapper.infer_proteins(algorithm="basic", include_unassigned=True)
191+
192+
assert isinstance(inferred, Identifications)
193+
assert inferred.protein_identifications[0].getHits()[0].getScore() == pytest.approx(30.0)
194+
195+
196+
def test_py_consensusmap_infer_protein_quantities(monkeypatch):
197+
captured = {}
198+
199+
class DummyInference:
200+
def __init__(self):
201+
captured["instantiated"] = True
202+
203+
def infer(self, consensus_map, reference_map):
204+
captured["consensus_map"] = consensus_map
205+
captured["reference_map"] = reference_map
206+
207+
monkeypatch.setattr(oms, "ProteinInference", lambda: DummyInference())
208+
209+
wrapper = Py_ConsensusMap()
210+
result = wrapper.infer_protein_quantities(reference_map=3)
211+
212+
assert result is wrapper
213+
assert captured["instantiated"] is True
214+
assert captured["consensus_map"] is wrapper.native
215+
assert captured["reference_map"] == 3

0 commit comments

Comments
 (0)