Skip to content

Commit 0ddf476

Browse files
Merge pull request #6 from griffithlab/molecular_weight
using ProteinAnaylsis from biopython to cacluate MW
2 parents 66fc980 + 1514326 commit 0ddf476

File tree

5 files changed

+89
-18
lines changed

5 files changed

+89
-18
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
FROM python:3.8-slim-buster
44

5+
RUN ["apt-get", "update"]
6+
RUN ["apt-get", "install", "-y", "vim"]
7+
58
ADD scripts/get_FDA_thresholds.py /opt/scripts/get_FDA_thresholds.py
69
ADD scripts/get_neoantigen_qc.py /opt/scripts/get_neoantigen_qc.py
710
ADD scripts/requirements.txt /opt/scripts/requirements.txt

scripts/color_peptides51mer.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ def __init__(self, nucleotide, bold, color, underline, large, position, open_tag
1717
self.open_tag = open_tag
1818
self.close_tag = close_tag
1919

20+
def view(self):
21+
print("Nucleotide: ", self.nucleotide)
22+
print("Open Tag: ", self.open_tag)
23+
print("Close Tag: ", self.close_tag)
24+
print("Bold: ", self.bold)
25+
print("Color: ",self.color)
26+
print("Underline: ", self.underline)
27+
print("Large: ", self.large)
28+
2029
# ---- PARSE ARGUMENTS -------------------------------------------------------
2130
# Parses command line arguments
2231
# Enables user help
@@ -178,7 +187,7 @@ def set_span_tags(peptide_sequence):
178187
nucleotide.open_tag = True
179188

180189
if inside_span:
181-
nucleotide.close_tag = True # only if its isnide a span tag
190+
nucleotide.close_tag = True # only if its inside a span tag
182191
else:
183192
nucleotide.close_tag = False
184193

@@ -202,9 +211,7 @@ def create_stylized_sequence(peptide_sequence):
202211
if nucleotide.close_tag:
203212
new_string += '</span>'
204213

205-
206214
if nucleotide.open_tag:
207-
208215
new_string += '<span style="'
209216
if nucleotide.bold:
210217
new_string += 'font-weight:bold;'
@@ -216,9 +223,6 @@ def create_stylized_sequence(peptide_sequence):
216223
new_string += 'font-size:105%;'
217224
new_string += '">'
218225
new_string += nucleotide.nucleotide
219-
220-
if not nucleotide.large and not nucleotide.bold and not nucleotide.color and not nucleotide.underline:
221-
new_string += nucleotide.nucleotide
222226
else:
223227
new_string += nucleotide.nucleotide
224228
return(new_string)
@@ -273,7 +277,6 @@ def main():
273277
for index, row in peptides_51mer.iterrows():
274278

275279
search_string = row['51mer ID']
276-
print(search_string)
277280

278281
#classII_sequence
279282
classII_peptide = merged_peptide_51mer.loc[merged_peptide_51mer['51mer ID'] == search_string, 'Best Peptide Class II'].values[0]
@@ -306,10 +309,14 @@ def main():
306309

307310
set_span_tags(peptide_sequence) # pass by reference
308311

312+
print(row['51mer ID'])
309313
new_string = create_stylized_sequence(peptide_sequence)
310314

311315
next_td_tags[2].string = new_string
312316

317+
# Remove the tag_with_search_string from the BeautifulSoup tree
318+
tag_with_search_string.decompose()
319+
313320
modified_html = peptides_51mer_soup.prettify(formatter=None)
314321

315322
else:
@@ -318,6 +325,13 @@ def main():
318325
print("ClassI: ", classI_peptide)
319326
print("ClassII: ", classII_peptide, "\n")
320327

328+
soup = BeautifulSoup(modified_html, 'html.parser')
329+
tag_with_search_string = soup.select_one('th:-soup-contains("51mer ID")')
330+
if tag_with_search_string:
331+
tag_with_search_string.decompose()
332+
# Now 'soup' contains the modified HTML with the tag removed
333+
modified_html = soup.prettify(formatter=None)
334+
321335
if args.WB:
322336
html_file_name = args.WB + '/../manual_review/' + args.samp + ".Colored_Peptides.html"
323337
else:

scripts/generate_reviews_files.py

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import csv
33
import pandas as pd
44
import sys
5+
from Bio.SeqUtils.ProtParam import ProteinAnalysis
6+
import re
57

68
'''
79
Write a script to create the files for the Case Final Reports
@@ -36,6 +38,29 @@ def parse_arguments():
3638

3739
return(parser.parse_args())
3840

41+
# Fucnction to break the pepetides ID on the . to extract gene and AA information
42+
def extract_info(value):
43+
parts = value.split('.')
44+
result = '.'.join([parts[2], parts[3], parts[4]])
45+
return result
46+
47+
# Function to rearrange string so that G518D looks like 518G/D
48+
def rearrange_string(s):
49+
match = re.match(r'([A-Za-z]+)([\d-]+)([A-Za-z]*)', s)
50+
if match:
51+
letters_before = match.group(1)
52+
numbers = match.group(2)
53+
letters_after = match.group(3)
54+
55+
return f"{numbers}{letters_before}/{letters_after}"
56+
else:
57+
return s
58+
59+
# Function to calculate molecular weight
60+
def calculate_molecular_weight(peptide):
61+
analyzed_seq = ProteinAnalysis(peptide)
62+
return analyzed_seq.molecular_weight()
63+
3964
def main():
4065

4166
# 1. ITB reivew
@@ -54,20 +79,41 @@ def main():
5479
reviewed_canidates = reviewed_canidates[reviewed_canidates.Evaluation != "Pending"]
5580
reviewed_canidates = reviewed_canidates[reviewed_canidates.Evaluation != "Reject"]
5681

82+
reviewed_canidates = reviewed_canidates.rename(columns={'Comments':'pVAC Review Comments'})
83+
reviewed_canidates["Variant Called by CLE Pipeline"] = " "
84+
reviewed_canidates["IGV Review Comments"] = " "
85+
86+
87+
# create sorting ID that is gene and transcript to sort in the same order as peptide
88+
reviewed_canidates['sorting id'] = reviewed_canidates['Gene'] + '.' + reviewed_canidates['Best Transcript']
89+
90+
5791
peptides = pd.read_csv(args.c, sep="\t")
5892
peptides = peptides.drop(['cterm_7mer_gravy_score', 'cysteine_count', 'n_terminal_asparagine', 'asparagine_proline_bond_count',
5993
'difficult_n_terminal_residue', 'c_terminal_cysteine', 'c_terminal_proline', 'max_7mer_gravy_score'], axis=1)
60-
peptides = peptides.rename(columns={"id":"ID", "peptide_sequence":"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES"})
6194
peptides["RESTRICTING HLA ALLELE"] = " "
62-
peptides["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)"] = " "
95+
96+
peptides["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)"] = peptides["peptide_sequence"].apply(calculate_molecular_weight)
97+
98+
peptides = peptides.rename(columns={"id":"ID", "peptide_sequence":"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES"})
6399
peptides["Comments"] = " "
64100
peptides["CANDIDATE NEOANTIGEN"] = peptides["ID"].apply(lambda x: '.'.join(x.split('.')[:3]))
65101
peptides["CANDIDATE NEOANTIGEN"] = args.samp + "." + peptides["CANDIDATE NEOANTIGEN"]
66102

67-
68-
69103
peptides = peptides[["ID", "CANDIDATE NEOANTIGEN", "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES",
70104
"RESTRICTING HLA ALLELE", "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)", "Comments"]]
105+
106+
107+
# creating a ID to sort reviewed canidates by the order of the 51mer
108+
peptides['sorting id'] = peptides['ID'].apply(extract_info)
109+
110+
reviewed_canidates = reviewed_canidates.set_index('sorting id')
111+
reviewed_canidates = reviewed_canidates.reindex(index=peptides['sorting id'])
112+
reviewed_canidates = reviewed_canidates.reset_index()
113+
114+
reviewed_canidates = reviewed_canidates.drop(columns=['sorting id'])
115+
peptides = peptides.drop(columns=['sorting id'])
116+
71117

72118
if args.WB:
73119
Peptide_file_name = args.WB + '/../manual_review/' + args.samp + "_Peptides_51-mer.xlsx"

scripts/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ argparse
22
pandas
33
et-xmlfile == 1.1.0
44
openpyxl == 3.1.2
5-
bs4 == 0.0.1
5+
bs4 == 0.0.1
6+
biopython

scripts/setup_review.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
import argparse
22
import subprocess
33

4+
def execute_script(script_path):
5+
try:
6+
print("Executing...", script_path)
7+
subprocess.run(script_path, shell=True, check=True)
8+
print("Successful.")
9+
print()
10+
except subprocess.CalledProcessError as e:
11+
print(f"Warning: Script {script_path} did not execute correctly. Error: {e}")
12+
print()
13+
14+
415
# Define the command-line arguments
516
parser = argparse.ArgumentParser(description='Sets up manuel review files')
617

@@ -30,15 +41,11 @@
3041

3142

3243
# Execute the first script
33-
print("Generating Review Files...")
34-
35-
subprocess.run(command1, shell=True)
44+
execute_script(command1)
3645

3746
# Execute the second script
38-
print("Coloring Peptide Sequeces...")
47+
execute_script(command2)
3948

40-
subprocess.run(command2, shell=True)
4149

4250

43-
print("Scripts have been executed successfully.")
4451

0 commit comments

Comments
 (0)