9
9
Write a script to create the files for the Case Final Reports
10
10
- Sample Peptides 51-mer
11
11
- SAMPLE.Annotated.Neoantigen_Candidates.xlsx
12
-
13
- Maybe the Sample Genomics Review Report with everything highlighted in yellow
14
-
15
-
16
- Use:
17
- python3 generate_reviews_files.py -a /Volumes/gillandersw/Active/Project_0001_Clinical_Trials/CTEP/analysis/TWJF-10146-MO011-0021/itb-review-files/10146-0021.Annotated.Neoantigen_Candidates.xlsx -c /Volumes/gillandersw/Active/Project_0001_Clinical_Trials/CTEP/analysis/TWJF-10146-MO011-0021/generate_protein_fasta/candidates/annotated_filtered.vcf-pass-51mer.fa.manufacturability.tsv -samp 10146-0021
18
12
'''
19
13
20
14
# ---- PARSE ARGUMENTS -------------------------------------------------------
@@ -56,37 +50,55 @@ def rearrange_string(s):
56
50
else :
57
51
return s
58
52
59
- # Function to calculate molecular weight
53
+ # Function to calculate molecular weight---------------------------------------
60
54
def calculate_molecular_weight (peptide ):
61
55
analyzed_seq = ProteinAnalysis (peptide )
62
56
return analyzed_seq .molecular_weight ()
63
57
64
- def main ():
58
+ # Function to make id column unique -------------------------------------------
59
+ def make_column_unique (df , column_name ):
60
+ seen_values = set ()
61
+ new_values = []
62
+
63
+ for value in df [column_name ]:
64
+ if value in seen_values :
65
+ suffix = 1
66
+ while f"{ value } .{ suffix } " in seen_values :
67
+ suffix += 1
68
+ unique_value = f"{ value } .{ suffix } "
69
+ else :
70
+ unique_value = value
65
71
66
- # 1. ITB reivew
67
- # 2. Generate protein Fasta
72
+ seen_values . add ( unique_value )
73
+ new_values . append ( unique_value )
68
74
75
+ df [column_name ] = new_values
76
+ return df
77
+
78
+
79
+ def main ():
69
80
70
81
args = parse_arguments ()
71
82
72
- reviewed_canidates = pd .read_excel (args .a )
83
+ reviewed_candidates = pd .read_excel (args .a )
73
84
74
85
75
- reviewed_canidates .columns = reviewed_canidates .iloc [0 ]
76
- reviewed_canidates = reviewed_canidates [1 :] # there is a extra row before the col name row
77
- reviewed_canidates = reviewed_canidates .reset_index (drop = True ) # Reset the index of the dataframe
86
+ reviewed_candidates .columns = reviewed_candidates .iloc [0 ]
87
+ reviewed_candidates = reviewed_candidates [1 :] # there is a extra row before the col name row
88
+ reviewed_candidates = reviewed_candidates .reset_index (drop = True ) # Reset the index of the dataframe
78
89
79
- reviewed_canidates = reviewed_canidates [ reviewed_canidates .Evaluation != "Pending" ]
80
- reviewed_canidates = reviewed_canidates [ reviewed_canidates .Evaluation != "Reject" ]
90
+ reviewed_candidates = reviewed_candidates [ reviewed_candidates .Evaluation != "Pending" ]
91
+ reviewed_candidates = reviewed_candidates [ reviewed_candidates .Evaluation != "Reject" ]
81
92
82
- reviewed_canidates = reviewed_canidates .rename (columns = {'Comments' :'pVAC Review Comments' })
83
- reviewed_canidates ["Variant Called by CLE Pipeline" ] = " "
84
- reviewed_canidates ["IGV Review Comments" ] = " "
93
+ reviewed_candidates = reviewed_candidates .rename (columns = {'Comments' :'pVAC Review Comments' })
94
+ reviewed_candidates ["Variant Called by CLE Pipeline" ] = " "
95
+ reviewed_candidates ["IGV Review Comments" ] = " "
85
96
86
97
87
98
# create sorting ID that is gene and transcript to sort in the same order as peptide
88
- reviewed_canidates ['sorting id' ] = reviewed_canidates ['Gene' ] + '.' + reviewed_canidates ['Best Transcript' ]
89
-
99
+ reviewed_candidates ['sorting id' ] = reviewed_candidates ['Gene' ] + '.' + reviewed_candidates ['Best Transcript' ]
100
+ # make sure the sorting id column is unique
101
+ reviewed_canidates = make_column_unique (reviewed_candidates , 'sorting id' )
90
102
91
103
peptides = pd .read_csv (args .c , sep = "\t " )
92
104
peptides = peptides .drop (['cterm_7mer_gravy_score' , 'cysteine_count' , 'n_terminal_asparagine' , 'asparagine_proline_bond_count' ,
@@ -106,12 +118,14 @@ def main():
106
118
107
119
# creating a ID to sort reviewed canidates by the order of the 51mer
108
120
peptides ['sorting id' ] = peptides ['ID' ].apply (extract_info )
121
+ # make sure every sorting id is unique
122
+ peptides = make_column_unique (peptides , 'sorting id' )
109
123
110
- reviewed_canidates = reviewed_canidates .set_index ('sorting id' )
111
- reviewed_canidates = reviewed_canidates .reindex (index = peptides ['sorting id' ])
112
- reviewed_canidates = reviewed_canidates .reset_index ()
124
+ reviewed_candidates = reviewed_candidates .set_index ('sorting id' )
125
+ reviewed_candidates = reviewed_candidates .reindex (index = peptides ['sorting id' ])
126
+ reviewed_candidates = reviewed_candidates .reset_index ()
113
127
114
- reviewed_canidates = reviewed_canidates .drop (columns = ['sorting id' ])
128
+ reviewed_candidates = reviewed_candidates .drop (columns = ['sorting id' ])
115
129
peptides = peptides .drop (columns = ['sorting id' ])
116
130
117
131
0 commit comments