22
33from __future__ import annotations
44
5- from typing import Optional
5+ from typing import Optional , Literal
66import pyopenms as oms
7+ import warnings
78
89
910class Py_AASequence :
1011 """
11- A Pythonic wrapper around pyOpenMS AASequence.
12+ A Pythonic, immutable wrapper around pyOpenMS AASequence.
1213
1314 This class provides intuitive properties and methods for working with
1415 amino acid sequences, including common operations like reversing and
@@ -40,7 +41,7 @@ def __init__(self, native_sequence: Optional[oms.AASequence] = None):
4041 @classmethod
4142 def from_string (cls , sequence_str : str ) -> Py_AASequence :
4243 """
43- Create AASequence from string representation.
44+ Create Py_AASequence from string representation.
4445
4546 Args:
4647 sequence_str: String representation of the amino acid sequence.
@@ -57,6 +58,20 @@ def from_string(cls, sequence_str: str) -> Py_AASequence:
5758
5859 # ==================== Pythonic Properties ====================
5960
61+ @classmethod
62+ def from_native (cls , native_sequence : oms .AASequence ) -> Py_AASequence :
63+ """
64+ Creates Py_AASequence from native pyOpenMS AASequence.
65+
66+ Args:
67+ native_sequence (oms.AASequence):
68+
69+ Returns:
70+ Py_AASequence: New wrapped opject
71+
72+ """
73+ return cls (native_sequence )
74+
6075 @property
6176 def native (self ) -> oms .AASequence :
6277 """Return the underlying pyOpenMS AASequence."""
@@ -204,26 +219,126 @@ def __eq__(self, other: object) -> bool:
204219 return False
205220 return self .sequence == other .sequence
206221
207- def __getitem__ (self , index : int ) -> str :
222+ def __getitem__ (self , index ) :
208223 """
209- Get residue at position.
224+ Get residue(s) at position(s).
225+
226+ Supports both single indexing and slicing, returning Py_AASequence objects.
210227
211228 Args:
212- index: Position in the sequence (0-based) .
229+ index: Integer for single residue, or slice object for subsequence .
213230
214231 Returns:
215- str: Single letter amino acid code.
232+ Py_AASequence: Wrapped residue or subsequence.
233+
234+ Example:
235+ >>> seq = Py_AASequence.from_string("PEPTIDE")
236+ >>> seq[1] # Returns Py_AASequence("E")
237+ >>> seq[1:4] # Returns Py_AASequence("EPT")
238+ >>> seq[-1] # Returns Py_AASequence("E")
216239 """
217- if index < 0 or index >= len (self ):
218- raise IndexError (f"Index { index } out of range for sequence of length { len (self )} " )
219- residue = self ._sequence .getResidue (index )
220- return residue .getOneLetterCode ()
240+ if isinstance (index , slice ):
241+ start , stop , step = index .indices (len (self ))
242+ if step != 1 :
243+ raise ValueError ("Step slicing is not supported for amino acid sequences" )
244+ return Py_AASequence .from_native (self ._sequence .getSubsequence (start , stop - start ))
245+ else :
246+ # Handle negative indices
247+ if index < 0 :
248+ index = len (self ) + index
249+ if index >= len (self ):
250+ raise IndexError (f"Index { index } out of range for sequence of length { len (self )} " )
251+ residue = self ._sequence .getSubsequence (index , 1 )
252+ return Py_AASequence .from_native (residue )
221253
222254 def __iter__ (self ):
223255 """Iterate over residues."""
224256 for i in range (len (self )):
225257 yield self [i ]
258+ def __add__ (self , other : Py_AASequence | str ) -> Py_AASequence :
259+ """
260+ Concatenate sequences.
261+
262+ Args:
263+ other: Py_AASequence or string to append.
264+
265+ Returns:
266+ Py_AASequence: New concatenated sequence.
267+
268+ Example:
269+ >>> seq1 = Py_AASequence.from_string("PEP")
270+ >>> seq2 = Py_AASequence.from_string("TIDE")
271+ >>> combined = seq1 + seq2
272+ >>> print(combined.sequence)
273+ PEPTIDE
274+ >>> combined2 = seq1 + "TIDE"
275+ >>> print(combined2.sequence)
276+ PEPTIDE
277+ """
278+ if isinstance (other , Py_AASequence ):
279+ combined_str = self .sequence + other .sequence
280+ elif isinstance (other , str ):
281+ combined_str = self .sequence + other
282+ else :
283+ return NotImplemented
284+ return Py_AASequence .from_string (combined_str )
285+
286+ def __radd__ (self , other : str ) -> Py_AASequence :
287+ """
288+ Support string + Py_AASequence.
226289
290+ Example:
291+ >>> seq = Py_AASequence.from_string("TIDE")
292+ >>> combined = "PEP" + seq
293+ >>> print(combined.sequence)
294+ PEPTIDE
295+ """
296+ if isinstance (other , str ):
297+ combined_str = other + self .sequence
298+ return Py_AASequence .from_string (combined_str )
299+ return NotImplemented
300+
301+ def __mul__ (self , times : int ) -> Py_AASequence :
302+ """
303+ Repeat sequence.
304+
305+ Args:
306+ times: Number of times to repeat (must be >= 0).
307+
308+ Returns:
309+ Py_AASequence: New repeated sequence.
310+
311+ Example:
312+ >>> seq = Py_AASequence.from_string("PEP")
313+ >>> repeated = seq * 3
314+ >>> print(repeated.sequence)
315+ PEPPEPPEP
316+ """
317+ if not isinstance (times , int ) or times < 0 :
318+ return NotImplemented
319+ return Py_AASequence .from_string (self .sequence * times )
320+
321+ def __rmul__ (self , times : int ) -> Py_AASequence :
322+ """Support int * Py_AASequence."""
323+ return self .__mul__ (times )
324+ def __contains__ (self , substring : str ) -> bool :
325+ """Check if substring is in sequence."""
326+ return self .has_substring (substring )
327+
328+ def __hash__ (self ) -> int :
329+ """Make sequences hashable for use in sets/dicts."""
330+ return hash (self .sequence )
331+
332+ def __lt__ (self , other : Py_AASequence ) -> bool :
333+ """Lexicographic comparison by sequence."""
334+ if not isinstance (other , Py_AASequence ):
335+ return NotImplemented
336+ return self .sequence < other .sequence
337+ def count (self , residue : str ) -> int :
338+ """Count occurrences of a residue, to be consistent with str.count(), note currently does not account for modifications"""
339+ warnings .warn ("count method does not account for modifications" )
340+ return self .unmodified_sequence .count (residue )
341+
227342 # ==================== Additional Utilities ====================
228343
229344 def get_mz (self , charge : int ) -> float :
@@ -277,4 +392,37 @@ def has_suffix(self, suffix: str) -> bool:
277392 bool: True if sequence ends with suffix.
278393 """
279394 return self ._sequence .hasSuffix (oms .AASequence .fromString (suffix ))
395+
396+
397+ # ===================== Exporting =======================
398+ def to_string (self , modified = True , mod_format : Literal ['default' , 'unimod' , 'bracket' ] = 'default' ) -> str :
399+ """
400+ Get string representation of the sequence.
401+
402+ Args:
403+ modified (bool): Whether to include modifications in the string.
404+ mod_format (Optional[Literal['default', 'unimod', 'bracket']]): Format for modifications.
405+ 'default' for OpenMS format,
406+ 'unimod' for UniMod format,
407+ 'bracket' for bracket notation.
408+ Default is 'default'.
280409
410+ Returns:
411+ str: Amino acid sequence as string.
412+
413+ Example:
414+ >>> seq = Py_AASequence.from_string("PEPTIDE")
415+ >>> seq_str = seq.to_string()
416+ """
417+ if not modified :
418+ return self .unmodified_sequence
419+
420+ else :
421+ if mod_format == 'default' :
422+ return self ._sequence .toString ()
423+ elif mod_format == 'unimod' :
424+ return self ._sequence .toUniModString ()
425+ elif mod_format == 'bracket' :
426+ return self ._sequence .toBracketString ()
427+ else :
428+ raise ValueError (f"Unsupported mod_format: { mod_format } , supported are 'default', 'unimod' and 'bracket'" )
0 commit comments