3
3
import pandas as pd
4
4
import sys
5
5
from Bio .SeqUtils .ProtParam import ProteinAnalysis
6
+ import re
6
7
7
8
'''
8
9
Write a script to create the files for the Case Final Reports
@@ -37,6 +38,29 @@ def parse_arguments():
37
38
38
39
return (parser .parse_args ())
39
40
41
+ # Fucnction to break the pepetides ID on the . to extract gene and AA information
42
+ def extract_info (value ):
43
+ parts = value .split ('.' )
44
+ result = '.' .join ([parts [2 ], parts [3 ], parts [4 ]])
45
+ return result
46
+
47
+ # Function to rearrange string so that G518D looks like 518G/D
48
+ def rearrange_string (s ):
49
+ match = re .match (r'([A-Za-z]+)([\d-]+)([A-Za-z]*)' , s )
50
+ if match :
51
+ letters_before = match .group (1 )
52
+ numbers = match .group (2 )
53
+ letters_after = match .group (3 )
54
+
55
+ return f"{ numbers } { letters_before } /{ letters_after } "
56
+ else :
57
+ return s
58
+
59
+ # Function to calculate molecular weight
60
+ def calculate_molecular_weight (peptide ):
61
+ analyzed_seq = ProteinAnalysis (peptide )
62
+ return analyzed_seq .molecular_weight ()
63
+
40
64
def main ():
41
65
42
66
# 1. ITB reivew
@@ -54,35 +78,42 @@ def main():
54
78
55
79
reviewed_canidates = reviewed_canidates [reviewed_canidates .Evaluation != "Pending" ]
56
80
reviewed_canidates = reviewed_canidates [reviewed_canidates .Evaluation != "Reject" ]
57
- # key for sorted the tables
58
- reviewed_canidates ["CANDIDATE NEOANTIGEN" ] = args .samp + "." + "MT." + reviewed_canidates ["Pos" ] + "." + reviewed_canidates ["Gene" ]
81
+
82
+ reviewed_canidates = reviewed_canidates .rename (columns = {'Comments' :'pVAC Review Comments' })
83
+ reviewed_canidates ["Variant Called by CLE Pipeline" ] = " "
84
+ reviewed_canidates ["IGV Review Comments" ] = " "
85
+
86
+
87
+ # create sorting ID that is gene and transcript to sort in the same order as peptide
88
+ reviewed_canidates ['sorting id' ] = reviewed_canidates ['Gene' ] + '.' + reviewed_canidates ['Best Transcript' ]
89
+
59
90
60
91
peptides = pd .read_csv (args .c , sep = "\t " )
61
92
peptides = peptides .drop (['cterm_7mer_gravy_score' , 'cysteine_count' , 'n_terminal_asparagine' , 'asparagine_proline_bond_count' ,
62
93
'difficult_n_terminal_residue' , 'c_terminal_cysteine' , 'c_terminal_proline' , 'max_7mer_gravy_score' ], axis = 1 )
63
94
peptides ["RESTRICTING HLA ALLELE" ] = " "
64
95
65
- # Define a function to calculate molecular weight
66
- def calculate_molecular_weight (peptide ):
67
- analyzed_seq = ProteinAnalysis (peptide )
68
- return analyzed_seq .molecular_weight ()
69
-
70
96
peptides ["CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)" ] = peptides ["peptide_sequence" ].apply (calculate_molecular_weight )
71
97
72
98
peptides = peptides .rename (columns = {"id" :"ID" , "peptide_sequence" :"CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES" })
73
99
peptides ["Comments" ] = " "
74
100
peptides ["CANDIDATE NEOANTIGEN" ] = peptides ["ID" ].apply (lambda x : '.' .join (x .split ('.' )[:3 ]))
75
101
peptides ["CANDIDATE NEOANTIGEN" ] = args .samp + "." + peptides ["CANDIDATE NEOANTIGEN" ]
76
102
77
-
78
-
79
103
peptides = peptides [["ID" , "CANDIDATE NEOANTIGEN" , "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE WITH FLANKING RESIDUES" ,
80
104
"RESTRICTING HLA ALLELE" , "CANDIDATE NEOANTIGEN AMINO ACID SEQUENCE MW (CLIENT)" , "Comments" ]]
81
105
82
- # Sort the reviewed canidates according to peptide
83
- reviewed_canidates = reviewed_canidates .set_index ('Col2' )
84
- reviewed_canidates = reviewed_canidates .reindex (index = peptides ['CANDIDATE NEOANTIGEN' ])
85
- reviewed_canidates = reviewed_canidates .reset_index ()
106
+
107
+ # creating a ID to sort reviewed canidates by the order of the 51mer
108
+ peptides ['sorting id' ] = peptides ['ID' ].apply (extract_info )
109
+
110
+ reviewed_canidates = reviewed_canidates .set_index ('sorting id' )
111
+ reviewed_canidates = reviewed_canidates .reindex (index = peptides ['sorting id' ])
112
+ reviewed_canidates = reviewed_canidates .reset_index ()
113
+
114
+ reviewed_canidates = reviewed_canidates .drop (columns = ['sorting id' ])
115
+ peptides = peptides .drop (columns = ['sorting id' ])
116
+
86
117
87
118
if args .WB :
88
119
Peptide_file_name = args .WB + '/../manual_review/' + args .samp + "_Peptides_51-mer.xlsx"
0 commit comments