@@ -310,7 +310,7 @@ def encode_atoms(
310310 dict_index = {"C" : 0 , "H" : 1 , "N" : 2 , "O" : 3 , "S" : 4 , "P" : 5 },
311311 ):
312312 """
313- Extract all features we can extract... Probably the function your want to call by default
313+ Extract all features we can extract... Probably the function you want to call by default
314314
315315 Parameters
316316 ----------
@@ -324,7 +324,7 @@ def encode_atoms(
324324 indicates padding length with 'X'-characters. Shorter sequences are padded. Longer sequences
325325 are sliced shorter (C-terminal > than padding length will be missing)
326326 positions : list
327- list of positions to include seperately , for the C-terminus
327+ list of positions to include separately , for the C-terminus
328328 provide negative indices
329329 sum_mods : int
330330 value that is used to feed the second head of cerberus with summed information, for example,
@@ -336,20 +336,28 @@ def encode_atoms(
336336 dict_index : dict
337337 index position of atom for compositional features for the whole peptide (each position)
338338 charges : list
339- optional list with charges, keep emtpy if these will not effect the predicted value
339+ optional list with charges, keep empty if these will not effect the predicted value
340340
341341 Returns
342342 -------
343343 object :: pd.DataFrame
344344 feature matrix (np.matrix) of all positions (up till padding length)
345345 object :: pd.DataFrame
346- feature matrix (np.matrix) of summed positions (up till paddint length / sum_mods)
346+ feature matrix (np.matrix) of summed positions (up till padding length / sum_mods)
347347 object :: pd.DataFrame
348348 feature matrix (np.matrix) of specific positions (from positions argument)
349349 object :: pd.DataFrame
350350 feature matrix (np.matrix) of summed composition
351351 """
352352
353+ # Local helper to ensure each unique warning is logged only once.
354+ logged_warnings = set ()
355+
356+ def warn_once (message ):
357+ if message not in logged_warnings :
358+ logged_warnings .add (message )
359+ logger .warning (message )
360+
353361 # TODO param flag for CCS prediction
354362 def rolling_sum (a , n = 2 ):
355363 ret = np .cumsum (a , axis = 1 , dtype = np .float32 )
@@ -375,11 +383,11 @@ def rolling_sum(a, n=2):
375383 if seq_len > padding_length :
376384 seq = seq [0 :padding_length ]
377385 seq_len = len (seq )
378- logger . warning ("Truncating peptide (too long): %s" % (seq ))
386+ warn_once ("Truncating peptide (too long): %s" % (seq ))
379387
380388 peptide_composition = [mass .std_aa_comp [aa ] for aa in seq ]
381389
382- # Initialize all feature matrixes
390+ # Initialize all feature matrices
383391 matrix = np .zeros (
384392 (padding_length , len (dict_index .keys ())), dtype = np .float16
385393 )
@@ -395,11 +403,11 @@ def rolling_sum(a, n=2):
395403 try :
396404 matrix [i , dict_index [k ]] = v
397405 except IndexError :
398- logger . warning (
406+ warn_once (
399407 f"Could not add the following value: pos { i } for atom { k } with value { v } "
400408 )
401409 except KeyError :
402- logger . warning (
410+ warn_once (
403411 f"Could not add the following value: pos { i } for atom { k } with value { v } "
404412 )
405413
@@ -409,47 +417,44 @@ def rolling_sum(a, n=2):
409417 try :
410418 matrix_pos [p , dict_index_pos [atom ]] = val
411419 except KeyError :
412- logger . warning (f"Could not add the following atom: { atom } " )
420+ warn_once (f"Could not add the following atom: { atom } " )
413421 except IndexError :
414- logger .warning (
415- f"Could not add the following atom: { p } { atom } { val } "
416- )
422+ warn_once (f"Could not add the following atom: { p } { atom } { val } " )
417423
418424 for pn in positions_neg :
419425 aa = seq [seq_len + pn ]
420426 for atom , val in mass .std_aa_comp [aa ].items ():
421427 try :
422428 matrix_pos [pn , dict_index_pos [atom ]] = val
423429 except KeyError :
424- logger . warning (f"Could not add the following atom: { atom } " )
430+ warn_once (f"Could not add the following atom: { atom } " )
425431 except IndexError :
426- logger . warning (
432+ warn_once (
427433 f"Could not add the following atom: { pn } { atom } { val } "
428434 )
429435
430436 for i , peptide_position in enumerate (peptidoform .parsed_sequence ):
431437 try :
432438 matrix_hc [i , dict_aa [peptide_position [0 ]]] = 1.0
433439 except KeyError :
434- logger . warning (
440+ warn_once (
435441 f"Skipping the following (not in library): { i } { peptide_position } "
436442 )
437443 except IndexError :
438- # Likely to be a sequence > 60 AA
439- logger .warning (
444+ warn_once (
440445 f"Could not add the following atom: { i } { peptide_position } "
441446 )
442447
443448 if peptide_position [1 ] is not None :
444449 try :
445450 modification_composition = peptide_position [1 ][0 ].composition
446451 except KeyError :
447- logger . warning (
452+ warn_once (
448453 f"Skipping the following (not in library): { peptide_position [1 ]} "
449454 )
450455 continue
451456 except :
452- logger . warning (
457+ warn_once (
453458 f"Skipping the following (not in library): { peptide_position [1 ]} "
454459 )
455460 continue
@@ -473,7 +478,7 @@ def rolling_sum(a, n=2):
473478 ] += atom_change
474479 except KeyError :
475480 try :
476- logger . warning (
481+ warn_once (
477482 f"Could not add the following atom: { atom_position_composition } , attempting to replace the [] part"
478483 )
479484 atom_position_composition = sub (
@@ -492,21 +497,21 @@ def rolling_sum(a, n=2):
492497 dict_index_pos [atom_position_composition ],
493498 ] += atom_change
494499 except KeyError :
495- logger . warning (
500+ warn_once (
496501 f"Could not add the following atom: { atom_position_composition } , second attempt, now ignored"
497502 )
498503 continue
499504 except :
500- logger . warning (
505+ warn_once (
501506 f"Could not add the following atom: { atom_position_composition } , second attempt, now ignored"
502507 )
503508 continue
504509 except IndexError :
505- logger . warning (
510+ warn_once (
506511 f"Could not add the following atom: { i } { atom_position_composition } { atom_change } "
507512 )
508513 except :
509- logger . warning (
514+ warn_once (
510515 f"Could not add the following atom: { atom_position_composition } , second attempt, now ignored"
511516 )
512517 continue
0 commit comments