@@ -105,7 +105,7 @@ def generate_cgi_cna_file_from_list(genelist):
105105 file2 .write (row )
106106 file2 .close ()
107107
108- def launch_cgi_job_with_mulitple_variant_types (mutations_file , cnas_file , transloc_file , cancer_type , reference ):
108+ def launch_cgi_job_with_mulitple_variant_types (mutations_file = None , cnas_file = None , transloc_file = None , cancer_type = "HGSOC" , reference = "GRCh38" ):
109109 """
110110 This function launches a CGI (Cancer Genome Interpreter) job with multiple variant types,
111111 using the CGI API. It takes in mutation, cnas, and translocation files, cancer type, and
@@ -174,7 +174,7 @@ def launch_cgi_job_with_mulitple_variant_types(mutations_file, cnas_file, transl
174174 return 0
175175
176176
177- def query_cgi_job (jobid , snv_annotations : pd .DataFrame = None , cna_annotations : pd .DataFrame = None ):
177+ def query_cgi_job (jobid , output , snv_annotations : pd .DataFrame = None , cna_annotations : pd .DataFrame = None ):
178178 """
179179 Query the CGI API with a job ID and save the results to the database.
180180
@@ -206,10 +206,8 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
206206 cgi_snvdf = None
207207 cgi_cnadf = None
208208 treatments = []
209+
209210 for fn in fnames :
210- # reader = z.open(f)
211- # for row in reader.readlines():
212- # print(row)
213211 z .extract (fn )
214212 df = pd .read_csv (fn , sep = "\t " )
215213 print (fn )
@@ -277,14 +275,14 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
277275 snv_annotations .at [indxs , 'tumorTypeSummary' ] = handle_string_field (cgi_snv ["driver_statement" ])
278276
279277 if isinstance (snv_annotations , pd .DataFrame ):
280- snv_annotations .to_csv ("snv_annotated_cgi.csv" , index = False , sep = "\t " , columns = ['patient_id' , 'sample_id' , 'alteration' , 'hugoSymbol' , 'tumorType' , 'consequence' , 'oncogenic' , 'mutationEffectDescription' , 'gene_role' , 'citationPMids' , 'level_of_evidence' , 'geneSummary' , 'variantSummary' , 'tumorTypeSummary' ])
278+ snv_annotations .to_csv (output , index = False , sep = "\t " , columns = ['patient_id' , 'sample_id' , 'alteration' , 'hugoSymbol' , 'tumorType' , 'consequence' , 'oncogenic' , 'mutationEffectDescription' , 'gene_role' , 'citationPMids' , 'level_of_evidence' , 'geneSummary' , 'variantSummary' , 'tumorTypeSummary' ])
281279 trdf = pd .DataFrame (treatments )
282- trdf .to_csv ("treatments_cgi_snv .csv" , index = False , sep = "\t " )
280+ trdf .to_csv ("treatments .csv" , mode = "a " , index = False , sep = "\t " )
283281
284282 if isinstance (cna_annotations , pd .DataFrame ):
285- cna_annotations .to_csv ("cna_annotated_cgi.csv" , index = False , sep = "\t " , columns = ['patient_id' , 'sample_id' , 'alteration' , 'hugoSymbol' , 'tumorType' , 'oncogenic' , 'mutationEffectDescription' , 'gene_role' , 'citationPMids' , 'level_of_evidence' , 'geneSummary' , 'variantSummary' , 'tumorTypeSummary' ])
283+ cna_annotations .to_csv (output , index = False , sep = "\t " , columns = ['patient_id' , 'sample_id' , 'alteration' , 'hugoSymbol' , 'tumorType' , 'oncogenic' , 'mutationEffectDescription' , 'gene_role' , 'citationPMids' , 'level_of_evidence' , 'geneSummary' , 'variantSummary' , 'tumorTypeSummary' ])
286284 trdf = pd .DataFrame (treatments )
287- trdf .to_csv ("treatments_cgi_cna .csv" , index = False , sep = "\t " )
285+ trdf .to_csv ("treatments .csv" , mode = "a " , index = False , sep = "\t " )
288286
289287 return 1
290288 else :
@@ -303,7 +301,7 @@ def generate_cgi_cna_file_from_list(genelist):
303301 file2 .write (row )
304302 file2 .close ()
305303
306- def generate_temp_cgi_query_files (snv_annotations : pd .DataFrame = None , cna_annotations : pd .DataFrame = None , translocs : pd .DataFrame = None ):
304+ def generate_temp_cgi_query_files (snv_annotations : pd .DataFrame = None , cna_annotations : pd .DataFrame = None , translocs : pd .DataFrame = None , append_to_annotations : bool = True ):
307305 """
308306 Generate temporary CGI query files from annotations.
309307
@@ -315,16 +313,27 @@ def generate_temp_cgi_query_files(snv_annotations: pd.DataFrame = None, cna_anno
315313 header = "chr\t pos\t ref\t alt\t sample\n "
316314 try :
317315 if isinstance (snv_annotations , pd .DataFrame ):
318- with open ("./tmp/snvs.ext" , "w" ) as file1 :
319- file1 .write (header )
320-
321- uniques = snv_annotations [['alteration' ]].drop_duplicates ()
322- for indx , snv in uniques .iterrows ():
323- id = "SNV:" + snv ['alteration' ]
324- alt_split = snv ['alteration' ].split (':' )
325- row = alt_split [1 ]+ '\t ' + alt_split [2 ]+ '\t ' + alt_split [3 ]+ '\t ' + alt_split [4 ]+ '\t ' + id + '\n '
326- file1 .write (row )
327- file1 .close ()
316+ if append_to_annotations :
317+ with open ("./tmp/snvs.ext" , "w" ) as file1 :
318+ file1 .write (header )
319+
320+ uniques = snv_annotations [['alteration' ]].drop_duplicates ()
321+ for indx , snv in uniques .iterrows ():
322+ id = "SNV:" + snv ['alteration' ]
323+ alt_split = snv ['alteration' ].split (':' )
324+ row = alt_split [1 ]+ '\t ' + alt_split [2 ]+ '\t ' + alt_split [3 ]+ '\t ' + alt_split [4 ]+ '\t ' + id + '\n '
325+ file1 .write (row )
326+ file1 .close ()
327+ else :
328+ with open ("./tmp/snvs.ext" , "w" ) as file1 :
329+ file1 .write (header )
330+
331+ uniques = snv_annotations [['hugoSymbol' , 'chromosome' , 'position' , 'reference_allele' , 'sample_allele' , 'tumorType' , 'referenceGenome' ]].drop_duplicates ()
332+ for indx , snv in uniques .iterrows ():
333+ id = "SNV:" + snv ['hugoSymbol' ]+ ':' + snv ['chromosome' ]+ ':' + str (snv ['position' ])+ ':' + snv ['reference_allele' ]+ ':' + snv ['sample_allele' ]
334+ row = snv ['chromosome' ]+ '\t ' + str (snv ['position' ])+ '\t ' + snv ['reference_allele' ]+ '\t ' + snv ['sample_allele' ]+ '\t ' + id + '\n ' #+'\t'+cryptocode.encrypt(snv.samples, settings.CRYPTOCODE)+'\n'
335+ file1 .write (row )
336+ file1 .close ()
328337
329338 if isinstance (cna_annotations , pd .DataFrame ):
330339 header = "gene\t cna\t sample\n "
0 commit comments