@@ -329,7 +329,7 @@ def pept_diff(p1, p2):
329329 return sum ([p1 [i ] != p2 [i ] for i in range (len (p1 ))])
330330
331331
332- def _get_normal_peptides (mhc_df , iars , peplen ):
332+ def _get_normal_peptides (job , mhc_df , iars , peplen ):
333333 """
334334 Get the corresponding normal peptides for the tumor peptides that have already been subjected to
335335 mhc:peptide binding prediction.
@@ -345,38 +345,42 @@ def _get_normal_peptides(mhc_df, iars, peplen):
345345 for pred in mhc_df .itertuples ():
346346 containing_iars = [i for i , sl in iars .items () if pred .pept in sl [0 ]]
347347 assert len (containing_iars ) != 0 , "No IARS contained the peptide"
348- if len (containing_iars ) > 1 :
349- # If there are multiple IARs, they all or none of them have to have a corresponding
350- # normal.
351- assert len (set ([len (y ) for x , y in iars .items () if x in containing_iars ])) == 1
352348 if len (iars [containing_iars [0 ]]) == 1 :
353349 # This is a fusion and has no corresponding normal
354350 normal_peptides .append ('N' * peplen )
355351 else :
356- tum , norm = iars [ containing_iars [ 0 ]]
357- pos = tum . find ( pred . pept )
358- temp_normal_pept = norm [ pos : pos + peplen ]
359- ndiff = pept_diff ( pred . pept , temp_normal_pept )
360- assert ndiff != 0
361- if ndiff == 1 :
362- normal_peptides .append (norm [ pos : pos + peplen ] )
352+ # If there are multiple IARs, they all or none of them have to have a corresponding
353+ # normal.
354+ if len ( set ([ len ( y ) for x , y in iars . items () if x in containing_iars ])) != 1 :
355+ job . fileStore . logToMaster ( 'Some IARS were found to contain the substring but were'
356+ 'inconsistent with the presence of a corresponding '
357+ 'normal.' )
358+ normal_peptides .append ('N' * peplen )
363359 else :
364- if len (tum ) == len (norm ):
365- # Too (2+) many single nucleotide changes to warrant having a normal counterpart
366- # This might be an artifact
367- normal_peptides .append ('N' * peplen )
360+ tum , norm = iars [containing_iars [0 ]]
361+ pos = tum .find (pred .pept )
362+ temp_normal_pept = norm [pos :pos + peplen ]
363+ ndiff = pept_diff (pred .pept , temp_normal_pept )
364+ assert ndiff != 0
365+ if ndiff == 1 :
366+ normal_peptides .append (norm [pos :pos + peplen ])
368367 else :
369- # There is an indel in play. The difference cannot be in the last AA as that
370- # would have come out properly in the first case. There is a possibility that
371- # the indel was in the first AA causing a shift. We can handle that by looking
372- # at the suffix.
373- pos = norm .find (pred .pept [1 :])
374- if pos != - 1 :
375- # The suffix was found,
376- normal_peptides .append (norm [pos - 1 :pos + peplen ])
377- else :
378- # The indel was too large to warrant having a normal counterpart
368+ if len (tum ) == len (norm ):
369+ # Too (2+) many single nucleotide changes to warrant having a normal
370+ # counterpart. This might be an artifact
379371 normal_peptides .append ('N' * peplen )
372+ else :
373+ # There is an indel in play. The difference cannot be in the last AA as that
374+ # would have come out properly in the first case. There is a possibility
375+ # that the indel was in the first AA causing a shift. We can handle that by
376+ # looking at the suffix.
377+ pos = norm .find (pred .pept [1 :])
378+ if pos != - 1 :
379+ # The suffix was found,
380+ normal_peptides .append (norm [pos - 1 :pos + peplen ])
381+ else :
382+ # The indel was too large to warrant having a normal counterpart
383+ normal_peptides .append ('N' * peplen )
380384
381385 mhc_df ['normal_pept' ] = normal_peptides
382386 return mhc_df , normal_peptides
@@ -417,7 +421,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
417421 'predictor' : None }
418422 elif predictor == 'Consensus' :
419423 results = _process_consensus_mhcii (mhc_file )
420- results , peptides = _get_normal_peptides (results , iars , peplen )
424+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
421425 with open ('peptides.faa' , 'w' ) as pfile :
422426 for pept in peptides :
423427 print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -431,7 +435,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
431435 'predictor' : 'Consensus' }
432436 elif predictor == 'Sturniolo' :
433437 results = _process_sturniolo_mhcii (mhc_file )
434- results , peptides = _get_normal_peptides (results , iars , peplen )
438+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
435439 with open ('peptides.faa' , 'w' ) as pfile :
436440 for pept in peptides :
437441 print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -445,7 +449,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
445449 'predictor' : 'Sturniolo' }
446450 elif predictor == 'netMHCIIpan' :
447451 results = _process_net_mhcii (mhc_file )
448- results , peptides = _get_normal_peptides (results , iars , peplen )
452+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
449453 with open ('peptides.faa' , 'w' ) as pfile :
450454 for pept in peptides :
451455 print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -465,7 +469,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
465469 mhc_file = job .fileStore .readGlobalFile (binding_result ,
466470 os .path .join (work_dir , 'mhci_results' ))
467471 results = _process_mhci (mhc_file )
468- results , peptides = _get_normal_peptides (results , iars , peplen )
472+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
469473 with open ('peptides.faa' , 'w' ) as pfile :
470474 for pept in peptides :
471475 print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
0 commit comments