@@ -205,31 +205,17 @@ def create_stylized_sequence(peptide_sequence):
205
205
206
206
if nucleotide .open_tag :
207
207
208
- if nucleotide .large : # we are assuming that a cystine is never in the classI and classIi
209
- new_string += '<span style="font-size:105%">'
210
- new_string += nucleotide .nucleotide
211
-
212
- if nucleotide .bold and nucleotide .color and nucleotide .underline :
213
- new_string += '<span style="font-weight:bold;color:#ff0000;text-decoration:underline;">'
214
- new_string += nucleotide .nucleotide
215
- elif nucleotide .bold and not nucleotide .color and not nucleotide .underline :
216
- new_string += '<span style="font-weight:bold;">'
217
- new_string += nucleotide .nucleotide
218
- elif not nucleotide .bold and nucleotide .color and not nucleotide .underline :
219
- new_string += '<span style="color:#ff0000;">'
220
- new_string += nucleotide .nucleotide
221
- elif not nucleotide .bold and not nucleotide .color and nucleotide .underline :
222
- new_string += '<span style="text-decoration:underline;">'
223
- new_string += nucleotide .nucleotide
224
- elif nucleotide .bold and nucleotide .color and not nucleotide .underline :
225
- new_string += '<span style="font-weight:bold;color:#ff0000;">'
226
- new_string += nucleotide .nucleotide
227
- elif not nucleotide .bold and nucleotide .color and nucleotide .underline :
228
- new_string += '<span style="color:#ff0000;text-decoration:underline;">'
229
- new_string += nucleotide .nucleotide
230
- elif nucleotide .bold and not nucleotide .color and nucleotide .underline :
231
- new_string += '<span style="font-weight:bold;text-decoration:underline;">'
232
- new_string += nucleotide .nucleotide
208
+ new_string += '<span style="'
209
+ if nucleotide .bold :
210
+ new_string += 'font-weight:bold;'
211
+ if nucleotide .color :
212
+ new_string += 'color:#ff0000;'
213
+ if nucleotide .underline :
214
+ new_string += 'text-decoration:underline;'
215
+ if nucleotide .large :
216
+ new_string += 'font-size:105%;'
217
+ new_string += '">'
218
+ new_string += nucleotide .nucleotide
233
219
234
220
if not nucleotide .large and not nucleotide .bold and not nucleotide .color and not nucleotide .underline :
235
221
new_string += nucleotide .nucleotide
@@ -241,56 +227,41 @@ def main():
241
227
args = parse_arguments ()
242
228
243
229
# read in classI and class II
244
- #peptides_51mer = pd.read_excel("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/10146-0021_Peptides_51-mer.xlsx")
245
- #classI = pd.read_csv("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/classI.TWJF-10146-0021-Tumor_Lysate.all_epitopes.aggregated.tsv", sep="\t")
246
- #classII = pd.read_csv("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/classII.TWJF-10146-0021-Tumor_Lysate.all_epitopes.aggregated.tsv", sep="\t")
247
-
248
230
peptides_51mer = pd .read_excel (args .p )
249
231
classI = pd .read_csv (args .classI , sep = "\t " )
250
232
classII = pd .read_csv (args .classII , sep = "\t " )
251
233
252
234
# Create a universal ID by editing the peptide 51mer ID
253
235
peptides_51mer .rename (columns = {'ID' : 'full ID' }, inplace = True )
254
- peptides_51mer ['ID' ] = peptides_51mer ['full ID' ]
236
+ peptides_51mer ['51mer ID' ] = peptides_51mer ['full ID' ]
255
237
256
- peptides_51mer ['ID' ] = peptides_51mer ['ID' ].apply (lambda x : '.' .join (x .split ('.' )[1 :])) # Removing before first period, periods will be removed
238
+ peptides_51mer ['51mer ID' ] = peptides_51mer ['51mer ID' ].apply (lambda x : '.' .join (x .split ('.' )[1 :])) # Removing before first period, periods will be removed
257
239
258
- peptides_51mer ['ID' ] = peptides_51mer ['ID' ].apply (lambda x : '.' .join (x .split ('.' )[1 :])) # Removing before second period
259
- peptides_51mer ['ID' ] = peptides_51mer ['ID' ].apply (lambda x : '.' .join (x .split ('.' )[:3 ]) + '.' + '.' .join (x .split ('.' )[4 :]))
240
+ peptides_51mer ['51mer ID' ] = peptides_51mer ['51mer ID' ].apply (lambda x : '.' .join (x .split ('.' )[1 :])) # Removing before second period
241
+ peptides_51mer ['51mer ID' ] = peptides_51mer ['51mer ID' ].apply (lambda x : '.' .join (x .split ('.' )[:3 ]) + '.' + '.' .join (x .split ('.' )[4 :]))
260
242
261
243
262
244
for index , row in peptides_51mer .iterrows ():
263
- for i , char in enumerate (row ['ID' ][::- 1 ]):
245
+ for i , char in enumerate (row ['51mer ID' ][::- 1 ]):
264
246
if char .isdigit ():
265
- peptides_51mer .at [index , 'ID' ] = row ['ID' ][:- i ]
247
+ peptides_51mer .at [index , '51mer ID' ] = row ['51mer ID' ][:- i ]
266
248
break
267
249
else :
268
- result = row ['ID' ]
269
-
270
- # create a key that is gene, transcript, AA change for CLASSI
271
- classII ['modified AA Change' ] = classII ['AA Change' ]
272
-
273
- # Apply the function to the 'Value' column
274
- classII ['modified AA Change' ] = classII ['modified AA Change' ].apply (rearrange_string )
250
+ result = row ['51mer ID' ]
275
251
276
- classII ['ID' ] = classII ['Gene' ] + '.' + classII ['Best Transcript' ] + '.' + classII ['modified AA Change' ]
252
+ # create a dataframe that contains the classI and classII pepetide sequence
253
+ classI .rename (columns = {"Best Peptide" :"Best Peptide Class I" }, inplace = True )
254
+ classII .rename (columns = {"Best Peptide" :"Best Peptide Class II" }, inplace = True )
277
255
278
- # create a key that is gene, transcript, AA change for CLASSI
256
+ # create a key that is gene, transcript, AA change for ClassI to join to the peptides order form
279
257
classI ['modified AA Change' ] = classI ['AA Change' ]
280
-
281
- # Apply the function to the 'Value' column
282
258
classI ['modified AA Change' ] = classI ['modified AA Change' ].apply (rearrange_string )
259
+ classI ['51mer ID' ] = classI ['Gene' ] + '.' + classI ['Best Transcript' ] + '.' + classI ['modified AA Change' ]
283
260
284
- classI ['ID' ] = classI ['Gene' ] + '.' + classI ['Best Transcript' ] + '.' + classI ['modified AA Change' ]
285
-
286
- # Merge the sequences from classI and classII with peptide 51mer
287
- merged_peptide_51mer = pd .merge (peptides_51mer , classII [['ID' , 'Best Peptide' ]], on = 'ID' , how = 'left' )
288
-
289
- merged_peptide_51mer .rename (columns = {"Best Peptide" :"Best Peptide Class II" }, inplace = True )
290
-
291
- merged_peptide_51mer = pd .merge (merged_peptide_51mer , classI [['ID' , 'Best Peptide' , 'Pos' ]], on = 'ID' , how = 'left' )
261
+ class_sequences = pd .merge (classI [['ID' , 'Best Peptide Class I' , '51mer ID' , 'Pos' ]], classII [['ID' , 'Best Peptide Class II' ]], on = 'ID' , how = 'left' )
292
262
293
- merged_peptide_51mer .rename (columns = {"Best Peptide" :"Best Peptide Class I" }, inplace = True )
263
+ # Create a dataframe that has the classI and classII sequence
264
+ merged_peptide_51mer = pd .merge (peptides_51mer , class_sequences , on = '51mer ID' , how = 'left' )
294
265
295
266
# convert peptide 51mer to HTML
296
267
peptides_51mer_html = peptides_51mer .to_html (index = False ) # convert to html
@@ -301,16 +272,17 @@ def main():
301
272
302
273
for index , row in peptides_51mer .iterrows ():
303
274
304
- search_string = row ['full ID' ]
275
+ search_string = row ['51mer ID' ]
276
+ print (search_string )
305
277
306
278
#classII_sequence
307
- classII_peptide = merged_peptide_51mer .loc [merged_peptide_51mer ['full ID' ] == search_string , 'Best Peptide Class II' ].values [0 ]
279
+ classII_peptide = merged_peptide_51mer .loc [merged_peptide_51mer ['51mer ID' ] == search_string , 'Best Peptide Class II' ].values [0 ]
308
280
#classI_sequence
309
- classI_peptide = merged_peptide_51mer .loc [merged_peptide_51mer ['full ID' ] == search_string , 'Best Peptide Class I' ].values [0 ]
281
+ classI_peptide = merged_peptide_51mer .loc [merged_peptide_51mer ['51mer ID' ] == search_string , 'Best Peptide Class I' ].values [0 ]
310
282
311
283
312
284
# mutant pepetide position ---
313
- mutant_peptide_pos = str (merged_peptide_51mer .loc [merged_peptide_51mer ['full ID' ] == search_string , 'Pos' ].values [0 ])
285
+ mutant_peptide_pos = str (merged_peptide_51mer .loc [merged_peptide_51mer ['51mer ID' ] == search_string , 'Pos' ].values [0 ])
314
286
315
287
# Find the tag containing the search string
316
288
tag_with_search_string = peptides_51mer_soup .find ('td' , string = search_string )
@@ -330,7 +302,7 @@ def main():
330
302
# actaully lets break class I and classII into two steps and handle the mutated nucleotide in class I function
331
303
# it should be basically like at that position in the class I set
332
304
333
- set_underline (peptide_sequence , mutant_peptide_pos , row ['full ID' ])
305
+ set_underline (peptide_sequence , mutant_peptide_pos , row ['51mer ID' ])
334
306
335
307
set_span_tags (peptide_sequence ) # pass by reference
336
308
0 commit comments