Skip to content

Commit 382bfd0

Browse files
Update feat_extractor.py
1 parent 818a3d8 commit 382bfd0

File tree

1 file changed

+29
-24
lines changed

1 file changed

+29
-24
lines changed

deeplc/feat_extractor.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def encode_atoms(
310310
dict_index={"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5},
311311
):
312312
"""
313-
Extract all features we can extract... Probably the function your want to call by default
313+
Extract all features we can extract... Probably the function you want to call by default
314314
315315
Parameters
316316
----------
@@ -324,7 +324,7 @@ def encode_atoms(
324324
indicates padding length with 'X'-characters. Shorter sequences are padded. Longer sequences
325325
are sliced shorter (C-terminal > than padding length will be missing)
326326
positions : list
327-
list of positions to include seperately, for the C-terminus
327+
list of positions to include separately, for the C-terminus
328328
provide negative indices
329329
sum_mods : int
330330
value that is used to feed the second head of cerberus with summed information, for example,
@@ -336,20 +336,28 @@ def encode_atoms(
336336
dict_index : dict
337337
index position of atom for compositional features for the whole peptide (each position)
338338
charges : list
339-
optional list with charges, keep emtpy if these will not effect the predicted value
339+
optional list with charges, keep empty if these will not effect the predicted value
340340
341341
Returns
342342
-------
343343
object :: pd.DataFrame
344344
feature matrix (np.matrix) of all positions (up till padding length)
345345
object :: pd.DataFrame
346-
feature matrix (np.matrix) of summed positions (up till paddint length / sum_mods)
346+
feature matrix (np.matrix) of summed positions (up till padding length / sum_mods)
347347
object :: pd.DataFrame
348348
feature matrix (np.matrix) of specific positions (from positions argument)
349349
object :: pd.DataFrame
350350
feature matrix (np.matrix) of summed composition
351351
"""
352352

353+
# Local helper to ensure each unique warning is logged only once.
354+
logged_warnings = set()
355+
356+
def warn_once(message):
357+
if message not in logged_warnings:
358+
logged_warnings.add(message)
359+
logger.warning(message)
360+
353361
# TODO param flag for CCS prediction
354362
def rolling_sum(a, n=2):
355363
ret = np.cumsum(a, axis=1, dtype=np.float32)
@@ -375,11 +383,11 @@ def rolling_sum(a, n=2):
375383
if seq_len > padding_length:
376384
seq = seq[0:padding_length]
377385
seq_len = len(seq)
378-
logger.warning("Truncating peptide (too long): %s" % (seq))
386+
warn_once("Truncating peptide (too long): %s" % (seq))
379387

380388
peptide_composition = [mass.std_aa_comp[aa] for aa in seq]
381389

382-
# Initialize all feature matrixes
390+
# Initialize all feature matrices
383391
matrix = np.zeros(
384392
(padding_length, len(dict_index.keys())), dtype=np.float16
385393
)
@@ -395,11 +403,11 @@ def rolling_sum(a, n=2):
395403
try:
396404
matrix[i, dict_index[k]] = v
397405
except IndexError:
398-
logger.warning(
406+
warn_once(
399407
f"Could not add the following value: pos {i} for atom {k} with value {v}"
400408
)
401409
except KeyError:
402-
logger.warning(
410+
warn_once(
403411
f"Could not add the following value: pos {i} for atom {k} with value {v}"
404412
)
405413

@@ -409,47 +417,44 @@ def rolling_sum(a, n=2):
409417
try:
410418
matrix_pos[p, dict_index_pos[atom]] = val
411419
except KeyError:
412-
logger.warning(f"Could not add the following atom: {atom}")
420+
warn_once(f"Could not add the following atom: {atom}")
413421
except IndexError:
414-
logger.warning(
415-
f"Could not add the following atom: {p} {atom} {val}"
416-
)
422+
warn_once(f"Could not add the following atom: {p} {atom} {val}")
417423

418424
for pn in positions_neg:
419425
aa = seq[seq_len + pn]
420426
for atom, val in mass.std_aa_comp[aa].items():
421427
try:
422428
matrix_pos[pn, dict_index_pos[atom]] = val
423429
except KeyError:
424-
logger.warning(f"Could not add the following atom: {atom}")
430+
warn_once(f"Could not add the following atom: {atom}")
425431
except IndexError:
426-
logger.warning(
432+
warn_once(
427433
f"Could not add the following atom: {pn} {atom} {val}"
428434
)
429435

430436
for i, peptide_position in enumerate(peptidoform.parsed_sequence):
431437
try:
432438
matrix_hc[i, dict_aa[peptide_position[0]]] = 1.0
433439
except KeyError:
434-
logger.warning(
440+
warn_once(
435441
f"Skipping the following (not in library): {i} {peptide_position}"
436442
)
437443
except IndexError:
438-
# Likely to be a sequence > 60 AA
439-
logger.warning(
444+
warn_once(
440445
f"Could not add the following atom: {i} {peptide_position}"
441446
)
442447

443448
if peptide_position[1] is not None:
444449
try:
445450
modification_composition = peptide_position[1][0].composition
446451
except KeyError:
447-
logger.warning(
452+
warn_once(
448453
f"Skipping the following (not in library): {peptide_position[1]}"
449454
)
450455
continue
451456
except:
452-
logger.warning(
457+
warn_once(
453458
f"Skipping the following (not in library): {peptide_position[1]}"
454459
)
455460
continue
@@ -473,7 +478,7 @@ def rolling_sum(a, n=2):
473478
] += atom_change
474479
except KeyError:
475480
try:
476-
logger.warning(
481+
warn_once(
477482
f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part"
478483
)
479484
atom_position_composition = sub(
@@ -492,21 +497,21 @@ def rolling_sum(a, n=2):
492497
dict_index_pos[atom_position_composition],
493498
] += atom_change
494499
except KeyError:
495-
logger.warning(
500+
warn_once(
496501
f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
497502
)
498503
continue
499504
except:
500-
logger.warning(
505+
warn_once(
501506
f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
502507
)
503508
continue
504509
except IndexError:
505-
logger.warning(
510+
warn_once(
506511
f"Could not add the following atom: {i} {atom_position_composition} {atom_change}"
507512
)
508513
except:
509-
logger.warning(
514+
warn_once(
510515
f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
511516
)
512517
continue

0 commit comments

Comments
 (0)