Skip to content

Commit 5faf97a

Browse files
Order by peptide sheet, MW, add extra columns to review file, fix some formatting
1 parent 6d705f0 commit 5faf97a

File tree

4 files changed

+66
-22
lines changed

4 files changed

+66
-22
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
FROM python:3.8-slim-buster
44

5+
RUN ["apt-get", "update"]
6+
RUN ["apt-get", "install", "-y", "vim"]
7+
58
ADD scripts/get_FDA_thresholds.py /opt/scripts/get_FDA_thresholds.py
69
ADD scripts/get_neoantigen_qc.py /opt/scripts/get_neoantigen_qc.py
710
ADD scripts/requirements.txt /opt/scripts/requirements.txt

scripts/color_peptides51mer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,9 +320,12 @@ def main():
320320
print("ClassI: ", classI_peptide)
321321
print("ClassII: ", classII_peptide, "\n")
322322

323-
tag_with_search_string = modified_html.find('th', string="51mer ID")
324-
tag_with_search_string.decompose()
325-
323+
soup = BeautifulSoup(modified_html, 'html.parser')
324+
tag_with_search_string = soup.select_one('th:-soup-contains("51mer ID")')
325+
if tag_with_search_string:
326+
tag_with_search_string.decompose()
327+
# Now 'soup' contains the modified HTML with the tag removed
328+
modified_html = soup.prettify(formatter=None)
326329

327330
if args.WB:
328331
html_file_name = args.WB + '/../manual_review/' + args.samp + ".Colored_Peptides.html"

scripts/generate_reviews_files.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pandas as pd
44
import sys
55
from Bio.SeqUtils.ProtParam import ProteinAnalysis
6+
import re
67

78
'''
89
Write a script to create the files for the Case Final Reports
@@ -37,6 +38,29 @@ def parse_arguments():
3738

3839
return(parser.parse_args())
3940

41+
# Fucnction to break the pepetides ID on the . to extract gene and AA information
42+
def extract_info(value):
43+
parts = value.split('.')
44+
result = '.'.join([parts[2], parts[3], parts[4]])
45+
return result
46+
47+
# Function to rearrange string so that G518D looks like 518G/D
48+
def rearrange_string(s):
49+
match = re.match(r'([A-Za-z]+)([\d-]+)([A-Za-z]*)', s)
50+
if match:
51+
letters_before = match.group(1)
52+
numbers = match.group(2)
53+
letters_after = match.group(3)
54+
55+
return f"{numbers}{letters_before}/{letters_after}"
56+
else:
57+
return s
58+
59+
# Function to calculate molecular weight
60+
def calculate_molecular_weight(peptide):
61+
analyzed_seq = ProteinAnalysis(peptide)
62+
return analyzed_seq.molecular_weight()
63+
4064
def main():
4165

4266
# 1. ITB reivew
@@ -54,35 +78,42 @@ def main():
5478

5579
reviewed_canidates = reviewed_canidates[reviewed_canidates.Evaluation != "Pending"]
5680
reviewed_canidates = reviewed_canidates[reviewed_canidates.Evaluation != "Reject"]
57-
# key for sorted the tables
58-
reviewed_canidates["CANDIDATE NEOANTIGEN"] = args.samp + "." + "MT." + reviewed_canidates["Pos"] + "." + reviewed_canidates["Gene"]
81+
82+
reviewed_canidates = reviewed_canidates.rename(columns={'Comments':'pVAC Review Comments'})
83+
reviewed_canidates["Variant Called by CLE Pipeline"] = " "
84+
reviewed_canidates["IGV Review Comments"] = " "
85+
86+
87+
# create sorting ID that is gene and transcript to sort in the same order as peptide
88+
reviewed_canidates['sorting id'] = reviewed_canidates['Gene'] + '.' + reviewed_canidates['Best Transcript']
89+
5990

6091
peptides = pd.read_csv(args.c, sep="\t")
6192
peptides = peptides.drop(['cterm_7mer_gravy_score', 'cysteine_count', 'n_terminal_asparagine', 'asparagine_proline_bond_count',
6293
'difficult_n_terminal_residue', 'c_terminal_cysteine', 'c_terminal_proline', 'max_7mer_gravy_score'], axis=1)
6394
peptides["RESTRICTING HLA ALLELE"] = " "
6495

65-
# Define a function to calculate molecular weight
66-
def calculate_molecular_weight(peptide):
67-
analyzed_seq = ProteinAnalysis(peptide)
68-
return analyzed_seq.molecular_weight()
69-
7096
peptides["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)"] = peptides["peptide_sequence"].apply(calculate_molecular_weight)
7197

7298
peptides = peptides.rename(columns={"id":"ID", "peptide_sequence":"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES"})
7399
peptides["Comments"] = " "
74100
peptides["CANDIDATE NEOANTIGEN"] = peptides["ID"].apply(lambda x: '.'.join(x.split('.')[:3]))
75101
peptides["CANDIDATE NEOANTIGEN"] = args.samp + "." + peptides["CANDIDATE NEOANTIGEN"]
76102

77-
78-
79103
peptides = peptides[["ID", "CANDIDATE NEOANTIGEN", "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES",
80104
"RESTRICTING HLA ALLELE", "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)", "Comments"]]
81105

82-
# Sort the reviewed canidates according to peptide
83-
reviewed_canidates = reviewed_canidates.set_index('Col2')
84-
reviewed_canidates = reviewed_canidates.reindex(index=peptides['CANDIDATE NEOANTIGEN'])
85-
reviewed_canidates = reviewed_canidates.reset_index()
106+
107+
# creating a ID to sort reviewed canidates by the order of the 51mer
108+
peptides['sorting id'] = peptides['ID'].apply(extract_info)
109+
110+
reviewed_canidates = reviewed_canidates.set_index('sorting id')
111+
reviewed_canidates = reviewed_canidates.reindex(index=peptides['sorting id'])
112+
reviewed_canidates = reviewed_canidates.reset_index()
113+
114+
reviewed_canidates = reviewed_canidates.drop(columns=['sorting id'])
115+
peptides = peptides.drop(columns=['sorting id'])
116+
86117

87118
if args.WB:
88119
Peptide_file_name = args.WB + '/../manual_review/' + args.samp + "_Peptides_51-mer.xlsx"

scripts/setup_review.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
import argparse
22
import subprocess
33

4+
def execute_script(script_path):
5+
try:
6+
print("Executing...", script_path)
7+
subprocess.run(script_path, shell=True, check=True)
8+
print("Successful.")
9+
print()
10+
except subprocess.CalledProcessError as e:
11+
print(f"Warning: Script {script_path} did not execute correctly. Error: {e}")
12+
print()
13+
14+
415
# Define the command-line arguments
516
parser = argparse.ArgumentParser(description='Sets up manuel review files')
617

@@ -30,15 +41,11 @@
3041

3142

3243
# Execute the first script
33-
print("Generating Review Files...")
34-
35-
subprocess.run(command1, shell=True)
44+
execute_script(command1)
3645

3746
# Execute the second script
38-
print("Coloring Peptide Sequeces...")
47+
execute_script(command2)
3948

40-
subprocess.run(command2, shell=True)
4149

4250

43-
print("Scripts have been executed successfully.")
4451

0 commit comments

Comments
 (0)