2
2
import csv
3
3
import pandas as pd
4
4
import sys
5
+ from Bio .SeqUtils .ProtParam import ProteinAnalysis
6
+ import re
5
7
6
8
'''
7
9
Write a script to create the files for the Case Final Reports
@@ -36,6 +38,29 @@ def parse_arguments():
36
38
37
39
return (parser .parse_args ())
38
40
41
+ # Fucnction to break the pepetides ID on the . to extract gene and AA information
42
+ def extract_info (value ):
43
+ parts = value .split ('.' )
44
+ result = '.' .join ([parts [2 ], parts [3 ], parts [4 ]])
45
+ return result
46
+
47
+ # Function to rearrange string so that G518D looks like 518G/D
48
+ def rearrange_string (s ):
49
+ match = re .match (r'([A-Za-z]+)([\d-]+)([A-Za-z]*)' , s )
50
+ if match :
51
+ letters_before = match .group (1 )
52
+ numbers = match .group (2 )
53
+ letters_after = match .group (3 )
54
+
55
+ return f"{ numbers } { letters_before } /{ letters_after } "
56
+ else :
57
+ return s
58
+
59
+ # Function to calculate molecular weight
60
+ def calculate_molecular_weight (peptide ):
61
+ analyzed_seq = ProteinAnalysis (peptide )
62
+ return analyzed_seq .molecular_weight ()
63
+
39
64
def main ():
40
65
41
66
# 1. ITB reivew
@@ -54,20 +79,41 @@ def main():
54
79
reviewed_canidates = reviewed_canidates [reviewed_canidates .Evaluation != "Pending" ]
55
80
reviewed_canidates = reviewed_canidates [reviewed_canidates .Evaluation != "Reject" ]
56
81
82
+ reviewed_canidates = reviewed_canidates .rename (columns = {'Comments' :'pVAC Review Comments' })
83
+ reviewed_canidates ["Variant Called by CLE Pipeline" ] = " "
84
+ reviewed_canidates ["IGV Review Comments" ] = " "
85
+
86
+
87
+ # create sorting ID that is gene and transcript to sort in the same order as peptide
88
+ reviewed_canidates ['sorting id' ] = reviewed_canidates ['Gene' ] + '.' + reviewed_canidates ['Best Transcript' ]
89
+
90
+
57
91
peptides = pd .read_csv (args .c , sep = "\t " )
58
92
peptides = peptides .drop (['cterm_7mer_gravy_score' , 'cysteine_count' , 'n_terminal_asparagine' , 'asparagine_proline_bond_count' ,
59
93
'difficult_n_terminal_residue' , 'c_terminal_cysteine' , 'c_terminal_proline' , 'max_7mer_gravy_score' ], axis = 1 )
60
- peptides = peptides .rename (columns = {"id" :"ID" , "peptide_sequence" :"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES" })
61
94
peptides ["RESTRICTING HLA ALLELE" ] = " "
62
- peptides ["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)" ] = " "
95
+
96
+ peptides ["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)" ] = peptides ["peptide_sequence" ].apply (calculate_molecular_weight )
97
+
98
+ peptides = peptides .rename (columns = {"id" :"ID" , "peptide_sequence" :"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES" })
63
99
peptides ["Comments" ] = " "
64
100
peptides ["CANDIDATE NEOANTIGEN" ] = peptides ["ID" ].apply (lambda x : '.' .join (x .split ('.' )[:3 ]))
65
101
peptides ["CANDIDATE NEOANTIGEN" ] = args .samp + "." + peptides ["CANDIDATE NEOANTIGEN" ]
66
102
67
-
68
-
69
103
peptides = peptides [["ID" , "CANDIDATE NEOANTIGEN" , "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES" ,
70
104
"RESTRICTING HLA ALLELE" , "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)" , "Comments" ]]
105
+
106
+
107
+ # creating a ID to sort reviewed canidates by the order of the 51mer
108
+ peptides ['sorting id' ] = peptides ['ID' ].apply (extract_info )
109
+
110
+ reviewed_canidates = reviewed_canidates .set_index ('sorting id' )
111
+ reviewed_canidates = reviewed_canidates .reindex (index = peptides ['sorting id' ])
112
+ reviewed_canidates = reviewed_canidates .reset_index ()
113
+
114
+ reviewed_canidates = reviewed_canidates .drop (columns = ['sorting id' ])
115
+ peptides = peptides .drop (columns = ['sorting id' ])
116
+
71
117
72
118
if args .WB :
73
119
Peptide_file_name = args .WB + '/../manual_review/' + args .samp + "_Peptides_51-mer.xlsx"
0 commit comments