Skip to content

Commit 247471b

Browse files
authored
Merge pull request #130 from CompOmics/fix/allow-mass-gap
Allow residue X with MassModification to indicate gap of known mass
2 parents 4742ee0 + 4b49b00 commit 247471b

File tree

2 files changed

+151
-4
lines changed

2 files changed

+151
-4
lines changed

psm_utils/peptidoform.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,19 @@ def sequential_composition(self) -> list[mass.Composition]:
237237
try:
238238
position_comp = mass.std_aa_comp[aa].copy()
239239
except (AttributeError, KeyError) as e:
240-
raise AmbiguousResidueException(
241-
f"Cannot resolve composition for amino acid {aa}."
242-
) from e
240+
# Allow X with modifications to specify gap with known composition
241+
if aa == "X":
242+
if tags and all(hasattr(tag, "composition") for tag in tags):
243+
position_comp = mass.Composition()
244+
else:
245+
raise AmbiguousResidueException(
246+
"Cannot resolve composition for `X` without associated formula "
247+
"modification."
248+
) from e
249+
else:
250+
raise AmbiguousResidueException(
251+
f"Cannot resolve composition for amino acid {aa}."
252+
) from e
243253
# Fixed modifications
244254
if aa in fixed_rules:
245255
position_comp += fixed_rules[aa]
@@ -348,7 +358,18 @@ def sequential_theoretical_mass(self) -> list[float]:
348358
try:
349359
position_mass = mass.std_aa_mass[aa]
350360
except (AttributeError, KeyError) as e:
351-
raise AmbiguousResidueException(f"Cannot resolve mass for amino acid {aa}.") from e
361+
# Allow X with modifications to specify gap of unknown mass
362+
if aa == "X":
363+
if tags and all(hasattr(tag, "mass") for tag in tags):
364+
position_mass = 0.0
365+
else:
366+
raise AmbiguousResidueException(
367+
"Cannot resolve mass for `X` without associated modification."
368+
) from e
369+
else:
370+
raise AmbiguousResidueException(
371+
f"Cannot resolve mass for amino acid {aa}."
372+
) from e
352373
# Fixed modifications
353374
if aa in fixed_rules:
354375
position_mass += fixed_rules[aa]

tests/test_peptidoform.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,134 @@ def test_add_apply_fixed_modifications(self):
136136
peptidoform.apply_fixed_modifications()
137137
assert peptidoform.proforma == expected_out
138138

139+
def test_sequential_theoretical_mass(self):
140+
"""Test sequential theoretical mass calculation."""
141+
test_cases = [
142+
# Simple peptide: (proforma_str, number_of_residues)
143+
("ACDEK", 5), # N-term, A, C, D, E, K, C-term = 7 total
144+
# Peptide with modifications
145+
("[Acetyl]-ACDEK", 5),
146+
("AC[Carbamidomethyl]DEK", 5),
147+
# Peptide with X and mass modification (gap of known mass)
148+
("ACX[+100.5]DEK", 6), # A, C, X, D, E, K
149+
("X[+50.0]ACDE", 5), # X, A, C, D, E
150+
# Multiple X residues with mass modifications
151+
("X[+100.0]ACX[+200.0]DE", 6), # X, A, C, X, D, E
152+
]
153+
154+
for proforma_str, num_residues in test_cases:
155+
peptidoform = Peptidoform(proforma_str)
156+
seq_mass = peptidoform.sequential_theoretical_mass
157+
158+
# Check that we get the right number of elements (N-term + residues + C-term)
159+
expected_length = num_residues + 2 # +2 for N-term and C-term
160+
assert len(seq_mass) == expected_length, (
161+
f"Failed for {proforma_str}: expected {expected_length}, got {len(seq_mass)}"
162+
)
163+
164+
# Check that all values are floats
165+
assert all(isinstance(m, float) for m in seq_mass), f"Failed for {proforma_str}"
166+
167+
# Check that sum matches theoretical mass (excluding charge)
168+
total_mass = sum(seq_mass)
169+
expected_total = peptidoform.theoretical_mass
170+
assert abs(total_mass - expected_total) < 1e-6, (
171+
f"Failed for {proforma_str}: {total_mass} != {expected_total}"
172+
)
173+
174+
def test_sequential_theoretical_mass_with_x_gap(self):
175+
"""Test sequential theoretical mass with X representing a gap of known mass."""
176+
# X[+100.5] should contribute 100.5 to the mass
177+
peptidoform = Peptidoform("ACX[+100.5]DE")
178+
seq_mass = peptidoform.sequential_theoretical_mass
179+
180+
# seq_mass should be: [N-term, A, C, X+100.5, D, E, C-term]
181+
assert len(seq_mass) == 7
182+
183+
# The X residue (index 3) should have mass 0.0 + 100.5 = 100.5
184+
x_mass = seq_mass[3]
185+
assert abs(x_mass - 100.5) < 1e-6, f"Expected 100.5, got {x_mass}"
186+
187+
def test_sequential_theoretical_mass_with_x_no_modification_fails(self):
188+
"""Test that X without modification fails for sequential_theoretical_mass."""
189+
from psm_utils.peptidoform import AmbiguousResidueException
190+
191+
# X without any modification should fail for mass calculation
192+
peptidoform = Peptidoform("ACXDE")
193+
194+
with pytest.raises(
195+
AmbiguousResidueException,
196+
match="Cannot resolve mass for `X` without associated modification",
197+
):
198+
_ = peptidoform.sequential_theoretical_mass
199+
200+
def test_sequential_composition(self):
201+
"""Test sequential composition calculation."""
202+
from pyteomics import mass
203+
204+
test_cases = [
205+
# Simple peptide: (proforma_str, number_of_residues)
206+
("ACDEK", 5), # N-term, A, C, D, E, K, C-term = 7 total
207+
# Peptide with modifications
208+
("[Acetyl]-ACDEK", 5),
209+
("AC[Carbamidomethyl]DEK", 5),
210+
# Peptide with terminal modifications
211+
("[Acetyl]-ACDEK-[Amidated]", 5),
212+
]
213+
214+
for proforma_str, num_residues in test_cases:
215+
peptidoform = Peptidoform(proforma_str)
216+
seq_comp = peptidoform.sequential_composition
217+
218+
# Check that we get the right number of elements (N-term + residues + C-term)
219+
expected_length = num_residues + 2 # +2 for N-term and C-term
220+
assert len(seq_comp) == expected_length, (
221+
f"Failed for {proforma_str}: expected {expected_length}, got {len(seq_comp)}"
222+
)
223+
224+
# Check that all values are Composition objects
225+
assert all(isinstance(c, mass.Composition) for c in seq_comp), (
226+
f"Failed for {proforma_str}"
227+
)
228+
229+
# Check that sum matches full composition
230+
total_comp = mass.Composition()
231+
for comp in seq_comp:
232+
total_comp += comp
233+
assert total_comp == peptidoform.composition, f"Failed for {proforma_str}"
234+
235+
def test_sequential_composition_with_x_gap(self):
236+
"""Test sequential composition with X representing a gap of unknown composition."""
237+
from pyteomics import mass
238+
239+
# X with formula modification should allow empty base composition
240+
peptidoform = Peptidoform("ACX[Formula:C6H12O6]DE")
241+
seq_comp = peptidoform.sequential_composition
242+
243+
# seq_comp should be: [N-term, A, C, X+composition, D, E, C-term]
244+
assert len(seq_comp) == 7
245+
246+
# The X residue (index 3) should have composition C6H12O6
247+
x_comp = seq_comp[3]
248+
expected_comp = mass.Composition({"C": 6, "H": 12, "O": 6})
249+
assert x_comp == expected_comp, f"Expected {expected_comp}, got {x_comp}"
250+
251+
def test_sequential_composition_with_x_mass_only_fails(self):
252+
"""Test that X with only mass modification fails for sequential_composition."""
253+
from psm_utils.peptidoform import AmbiguousResidueException
254+
255+
# X with only mass modification should fail for composition calculation
256+
peptidoform = Peptidoform("ACX[+100.5]DE")
257+
258+
with pytest.raises(
259+
AmbiguousResidueException,
260+
match="Cannot resolve composition for `X` without associated formula modification",
261+
):
262+
_ = peptidoform.sequential_composition
263+
139264

140265
def test_format_number_as_string():
266+
"""Test format_number_as_string function."""
141267
test_cases = [
142268
(1212.12, "+1212.12"),
143269
(-1212.12, "-1212.12"),

0 commit comments

Comments
 (0)