Skip to content

Commit ef92cce

Browse files
Fix the adding of an extra C if there are cystines in classI or classII, also changed how sheets are joined to account for different classI and classII transcripts
1 parent 7de6ef1 commit ef92cce

File tree

2 files changed

+34
-62
lines changed

2 files changed

+34
-62
lines changed

scripts/color_peptides51mer.py

Lines changed: 32 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -205,31 +205,17 @@ def create_stylized_sequence(peptide_sequence):
205205

206206
if nucleotide.open_tag:
207207

208-
if nucleotide.large: # we are assuming that a cystine is never in the classI and classIi
209-
new_string += '<span style="font-size:105%">'
210-
new_string += nucleotide.nucleotide
211-
212-
if nucleotide.bold and nucleotide.color and nucleotide.underline:
213-
new_string += '<span style="font-weight:bold;color:#ff0000;text-decoration:underline;">'
214-
new_string += nucleotide.nucleotide
215-
elif nucleotide.bold and not nucleotide.color and not nucleotide.underline:
216-
new_string += '<span style="font-weight:bold;">'
217-
new_string += nucleotide.nucleotide
218-
elif not nucleotide.bold and nucleotide.color and not nucleotide.underline:
219-
new_string += '<span style="color:#ff0000;">'
220-
new_string += nucleotide.nucleotide
221-
elif not nucleotide.bold and not nucleotide.color and nucleotide.underline:
222-
new_string += '<span style="text-decoration:underline;">'
223-
new_string += nucleotide.nucleotide
224-
elif nucleotide.bold and nucleotide.color and not nucleotide.underline:
225-
new_string += '<span style="font-weight:bold;color:#ff0000;">'
226-
new_string += nucleotide.nucleotide
227-
elif not nucleotide.bold and nucleotide.color and nucleotide.underline:
228-
new_string += '<span style="color:#ff0000;text-decoration:underline;">'
229-
new_string += nucleotide.nucleotide
230-
elif nucleotide.bold and not nucleotide.color and nucleotide.underline:
231-
new_string += '<span style="font-weight:bold;text-decoration:underline;">'
232-
new_string += nucleotide.nucleotide
208+
new_string += '<span style="'
209+
if nucleotide.bold:
210+
new_string += 'font-weight:bold;'
211+
if nucleotide.color:
212+
new_string += 'color:#ff0000;'
213+
if nucleotide.underline:
214+
new_string += 'text-decoration:underline;'
215+
if nucleotide.large:
216+
new_string += 'font-size:105%;'
217+
new_string += '">'
218+
new_string += nucleotide.nucleotide
233219

234220
if not nucleotide.large and not nucleotide.bold and not nucleotide.color and not nucleotide.underline:
235221
new_string += nucleotide.nucleotide
@@ -241,56 +227,41 @@ def main():
241227
args = parse_arguments()
242228

243229
# read in classI and class II
244-
#peptides_51mer = pd.read_excel("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/10146-0021_Peptides_51-mer.xlsx")
245-
#classI = pd.read_csv("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/classI.TWJF-10146-0021-Tumor_Lysate.all_epitopes.aggregated.tsv", sep="\t")
246-
#classII = pd.read_csv("/Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/data_files/classII.TWJF-10146-0021-Tumor_Lysate.all_epitopes.aggregated.tsv", sep="\t")
247-
248230
peptides_51mer = pd.read_excel(args.p)
249231
classI = pd.read_csv(args.classI, sep="\t")
250232
classII = pd.read_csv(args.classII, sep="\t")
251233

252234
# Create a universal ID by editing the peptide 51mer ID
253235
peptides_51mer.rename(columns={'ID': 'full ID'}, inplace=True)
254-
peptides_51mer['ID'] = peptides_51mer['full ID']
236+
peptides_51mer['51mer ID'] = peptides_51mer['full ID']
255237

256-
peptides_51mer['ID'] = peptides_51mer['ID'].apply(lambda x: '.'.join(x.split('.')[1:])) # Removing before first period, periods will be removed
238+
peptides_51mer['51mer ID'] = peptides_51mer['51mer ID'].apply(lambda x: '.'.join(x.split('.')[1:])) # Removing before first period, periods will be removed
257239

258-
peptides_51mer['ID'] = peptides_51mer['ID'].apply(lambda x: '.'.join(x.split('.')[1:])) # Removing before second period
259-
peptides_51mer['ID'] = peptides_51mer['ID'].apply(lambda x: '.'.join(x.split('.')[:3]) + '.' + '.'.join(x.split('.')[4:]))
240+
peptides_51mer['51mer ID'] = peptides_51mer['51mer ID'].apply(lambda x: '.'.join(x.split('.')[1:])) # Removing before second period
241+
peptides_51mer['51mer ID'] = peptides_51mer['51mer ID'].apply(lambda x: '.'.join(x.split('.')[:3]) + '.' + '.'.join(x.split('.')[4:]))
260242

261243

262244
for index, row in peptides_51mer.iterrows():
263-
for i, char in enumerate(row['ID'][::-1]):
245+
for i, char in enumerate(row['51mer ID'][::-1]):
264246
if char.isdigit():
265-
peptides_51mer.at[index, 'ID'] = row['ID'][:-i]
247+
peptides_51mer.at[index, '51mer ID'] = row['51mer ID'][:-i]
266248
break
267249
else:
268-
result = row['ID']
269-
270-
# create a key that is gene, transcript, AA change for CLASSI
271-
classII['modified AA Change'] = classII['AA Change']
272-
273-
# Apply the function to the 'Value' column
274-
classII['modified AA Change'] = classII['modified AA Change'].apply(rearrange_string)
250+
result = row['51mer ID']
275251

276-
classII['ID'] = classII['Gene'] + '.' + classII['Best Transcript'] + '.' + classII['modified AA Change']
252+
# create a dataframe that contains the classI and classII pepetide sequence
253+
classI.rename(columns = {"Best Peptide":"Best Peptide Class I"}, inplace=True)
254+
classII.rename(columns = {"Best Peptide":"Best Peptide Class II"}, inplace=True)
277255

278-
# create a key that is gene, transcript, AA change for CLASSI
256+
# create a key that is gene, transcript, AA change for ClassI to join to the peptides order form
279257
classI['modified AA Change'] = classI['AA Change']
280-
281-
# Apply the function to the 'Value' column
282258
classI['modified AA Change'] = classI['modified AA Change'].apply(rearrange_string)
259+
classI['51mer ID'] = classI['Gene'] + '.' + classI['Best Transcript'] + '.' + classI['modified AA Change']
283260

284-
classI['ID'] = classI['Gene'] + '.' + classI['Best Transcript'] + '.' + classI['modified AA Change']
285-
286-
# Merge the sequences from classI and classII with peptide 51mer
287-
merged_peptide_51mer = pd.merge(peptides_51mer, classII[['ID', 'Best Peptide']], on='ID', how='left')
288-
289-
merged_peptide_51mer.rename(columns = {"Best Peptide":"Best Peptide Class II"}, inplace=True)
290-
291-
merged_peptide_51mer = pd.merge(merged_peptide_51mer, classI[['ID', 'Best Peptide', 'Pos']], on='ID', how='left')
261+
class_sequences = pd.merge(classI[['ID', 'Best Peptide Class I', '51mer ID', 'Pos']], classII[['ID', 'Best Peptide Class II']], on='ID', how='left')
292262

293-
merged_peptide_51mer.rename(columns = {"Best Peptide":"Best Peptide Class I"}, inplace=True)
263+
# Create a dataframe that has the classI and classII sequence
264+
merged_peptide_51mer = pd.merge(peptides_51mer, class_sequences, on='51mer ID', how='left')
294265

295266
# convert peptide 51mer to HTML
296267
peptides_51mer_html = peptides_51mer.to_html(index=False) # convert to html
@@ -301,16 +272,17 @@ def main():
301272

302273
for index, row in peptides_51mer.iterrows():
303274

304-
search_string = row['full ID']
275+
search_string = row['51mer ID']
276+
print(search_string)
305277

306278
#classII_sequence
307-
classII_peptide = merged_peptide_51mer.loc[merged_peptide_51mer['full ID'] == search_string, 'Best Peptide Class II'].values[0]
279+
classII_peptide = merged_peptide_51mer.loc[merged_peptide_51mer['51mer ID'] == search_string, 'Best Peptide Class II'].values[0]
308280
#classI_sequence
309-
classI_peptide = merged_peptide_51mer.loc[merged_peptide_51mer['full ID'] == search_string, 'Best Peptide Class I'].values[0]
281+
classI_peptide = merged_peptide_51mer.loc[merged_peptide_51mer['51mer ID'] == search_string, 'Best Peptide Class I'].values[0]
310282

311283

312284
# mutant pepetide position ---
313-
mutant_peptide_pos = str(merged_peptide_51mer.loc[merged_peptide_51mer['full ID'] == search_string, 'Pos'].values[0])
285+
mutant_peptide_pos = str(merged_peptide_51mer.loc[merged_peptide_51mer['51mer ID'] == search_string, 'Pos'].values[0])
314286

315287
# Find the tag containing the search string
316288
tag_with_search_string = peptides_51mer_soup.find('td', string=search_string)
@@ -330,7 +302,7 @@ def main():
330302
# actaully lets break class I and classII into two steps and handle the mutated nucleotide in class I function
331303
# it should be basically like at that position in the class I set
332304

333-
set_underline(peptide_sequence, mutant_peptide_pos, row['full ID'])
305+
set_underline(peptide_sequence, mutant_peptide_pos, row['51mer ID'])
334306

335307
set_span_tags(peptide_sequence) # pass by reference
336308

scripts/setup_review.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525

2626

2727

28-
command1 = f"python /opt/scripts/generate_reviews_files.py -WB {args.WB} -a {args.a} -c {args.c} -samp {args.samp}"
29-
command2 = f"python /opt/scripts/color_peptides51mer.py -WB {args.WB} -p {args.WB}/../manual_review/{args.samp}_Peptides_51-mer.xlsx -classI {args.classI} -classII {args.classII} -samp {args.samp}"
28+
command1 = f"python /Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/generate_reviews_files.py -WB {args.WB} -a {args.a} -c {args.c} -samp {args.samp}"
29+
command2 = f"python /Volumes/mgriffit/Active/griffithlab/gc2596/e.schmidt/neoag_vaccine_scripts/scripts/color_peptides51mer.py -WB {args.WB} -p {args.WB}/../manual_review/{args.samp}_Peptides_51-mer.xlsx -classI {args.classI} -classII {args.classII} -samp {args.samp}"
3030

3131

3232
# Execute the first script

0 commit comments

Comments
 (0)