code cleaning

aim11 · aim11 · commit fbeea3a47cd2 · 2025-01-29T15:01:23.000+02:00
diff --git a/cgi_annotator.py b/cgi_annotator.py
@@ -163,11 +163,6 @@ def launch_cgi_job_with_mulitple_variant_types(mutations_file, cnas_file, transl
         preload_content=False  # Set preload_content to False to allow streaming the files
     )
 
-    # Attach the files using the files parameter
-
-
-    # Send the request
-    #response = http.urlopen(response)
     if (response.status == 200):
 
         jobid = response.data.decode("utf-8")
@@ -253,20 +248,8 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
                     i += 1
                     cgi_cna = cgi_cnadf.loc[cgi_cnadf['sample'] == id].iloc[0]
                     cna_annotations.at[indxs, 'oncogenic'] = handle_string_field(cgi_cna["driver"])
-                    # snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
                     cna_annotations.at[indxs, 'gene_role'] = handle_string_field(cgi_cna["gene_role"]),
-                    # snv_annotations.at[indxs,'citationPMids'] = handle_string_field(",".join(rjson["mutationEffect"]["citations"]["pmids"]))
-                    # TODO: Evidence level is related to drug not alteration, show highest in level_of_evidence, treatments table include all levels
-                    # level = map_cgi_evidence(biom)
-                    # if level < cna_annotations.at[indxs, 'level_of_evidence']:
-                    #    cna_annotations.at[indxs, 'level_of_evidence'] = "CGI:"+map_cgi_evidence(biom)
-                    #evid = handle_string_field(biom['Evidence']) + "(" + handle_string_field(biom['Response']) + ")"
-                    #cna_annotations.at[indxs, 'cgi_level'] = evid
-                    # snv_annotations.at[indxs, 'geneSummary'] = handle_string_field(rjson["geneSummary"])
-                    # snv_annotations.at[indxs, 'variantSummary'] = handle_string_field(row["CGI-External oncogenic annotation"])
                     cna_annotations.at[indxs, 'tumorTypeSummary'] =  handle_string_field(cgi_cna["driver_statement"])
-                    # snv_annotations.at[indxs, 'treatments'] = handle_drugs_field(rjson["treatments"])
-                    # alteration = snv_annotations.at[indxs, 'alteration'].value
 
             if idsplit[0] == "SNV":
                 hugoSymbol = idsplit[1]
@@ -290,28 +273,15 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
                     snv_annotations.at[indxs, 'consequence'] = handle_string_field(row["CGI-Consequence"]),
                     cgi_snv = cgi_snvdf.loc[cgi_snvdf['CGI-Sample ID'] == id].iloc[0]
                     snv_annotations.at[indxs, 'oncogenic'] = handle_string_field(cgi_snv["CGI-Oncogenic Summary"])
-                    # snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
                     snv_annotations.at[indxs, 'gene_role'] = handle_string_field(cgi_snv["CGI-Oncogenic Prediction"]),
-                    # snv_annotations.at[indxs,'citationPMids'] = handle_string_field(",".join(rjson["mutationEffect"]["citations"]["pmids"]))
-                    # TODO: Evidence level is related to drug not alteration, show highest in level_of_evidence, treatments table include all levels
-                    # level = map_cgi_evidence(biom)
-                    # if level < snv_annotations.at[indxs, 'level_of_evidence']:
-                    # snv_annotations.at[indxs, 'level_of_evidence'] = map_cgi_evidence(biom)
-                    #snv_annotations.at[indxs, 'cgi_level'] = handle_string_field(biom['Evidence']) + "(" + handle_string_field(biom['Response']) + ")"
-                    # snv_annotations.at[indxs, 'geneSummary'] = handle_string_field(rjson["geneSummary"])
-                    # snv_annotations.at[indxs, 'variantSummary'] = handle_string_field(row["CGI-External oncogenic annotation"])
                     snv_annotations.at[indxs, 'tumorTypeSummary'] = handle_string_field(cgi_snv["driver_statement"])
-                    #snv_annotations.at[indxs, 'treatments'] = handle_drugs_field(rjson["treatments"])
-                    # alteration = snv_annotations.at[indxs, 'alteration'].value
 
         if isinstance(snv_annotations, pd.DataFrame):
-            #snv_annotations.drop(columns=snv_annotations.columns[0], axis=1, inplace=True)
             snv_annotations.to_csv("snv_annotated_cgi.csv", index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'consequence', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
             trdf = pd.DataFrame(treatments)
             trdf.to_csv("treatments_cgi_snv.csv", index=False, sep="\t")
 
         if isinstance(cna_annotations, pd.DataFrame):
-        # cna_annotations.drop(columns=cna_annotations.columns[0], axis=1, inplace=True)
             cna_annotations.to_csv("cna_annotated_cgi.csv", index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
             trdf = pd.DataFrame(treatments)
             trdf.to_csv("treatments_cgi_cna.csv", index=False, sep="\t")
@@ -352,7 +322,7 @@ def generate_temp_cgi_query_files(snv_annotations: pd.DataFrame = None, cna_anno
                 for indx, snv in uniques.iterrows():
                     id = "SNV:"+snv['alteration']
                     alt_split = snv['alteration'].split(':')
-                    row = alt_split[1]+'\t'+alt_split[2]+'\t'+alt_split[3]+'\t'+alt_split[4]+'\t'+id+'\n' #+'\t'+cryptocode.encrypt(snv.samples, settings.CRYPTOCODE)+'\n'
+                    row = alt_split[1]+'\t'+alt_split[2]+'\t'+alt_split[3]+'\t'+alt_split[4]+'\t'+id+'\n'
                     file1.write(row)
                 file1.close()
 
@@ -366,17 +336,10 @@ def generate_temp_cgi_query_files(snv_annotations: pd.DataFrame = None, cna_anno
                 for indx, cna in uniques.iterrows():
                     print(cna)
                     id = "CNA:"+str(cna['hugoSymbol']) + ':' + str(cna['alteration'])
-                    row = cna['hugoSymbol']+'\t'+cna_alt_to_cgi[cna['alteration']].value+'\t'+id+'\n'#+'\t'+cryptocode.encrypt(cna.sample_id, settings.CRYPTOCODE)+'\n'
+                    row = cna['hugoSymbol']+'\t'+cna_alt_to_cgi[cna['alteration']].value+'\t'+id+'\n'
                     file2.write(row)
                 file2.close()
 
-        # header = "fus\tsample\n"
-        # with open("./tmp/fus.ext", "w") as file3:
-        #     file3.write(header)
-        #     for transloc in translocs:
-        #         row = transloc+'\t'+cryptocode.encrypt(transloc.sample, settings.CRYPTOCODE)+'\n'
-        #         file3.write(row)
-        #     file3.close()
     except Exception as e:
         print(f"Unexpected {e=}, {type(e)=}")
         raise
diff --git a/oncokb_annotator.py b/oncokb_annotator.py
@@ -77,11 +77,9 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
     Response: The HTTP response from the OncoKB API.
     """
 
-    token = ONCOKB_TOKEN
-
     api_url = "https://www.oncokb.org/api/v1/annotate/copyNumberAlterations"
     #request_url = api_url + 'copyNameAlterationType='+AlterationType[cna.CNstatus].value+'&hugoSymbol='+hugosymbol+'&tumorType='+tumorType
-    header = {'accept':'application/json', 'Content-Type': 'application/json', 'Authorization':'Bearer '+token}
+    header = {'accept':'application/json', 'Content-Type': 'application/json', 'Authorization':'Bearer '+ONCOKB_TOKEN}
 
     print("Request OncoKB API "+api_url)
 
@@ -111,21 +109,16 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
     print("Querying " +str(len(uniques))+ " CNAs....")
 
     # Sending a POST request and getting back response as HTTPResponse object.
-    #response = urllib3.PoolManager().request("POST", api_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'})
-    response = httpx.post(api_url, json=data, headers={'Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'}, timeout=None)
-    #response = http.request("GET",request_url, headers=header)
-    #print(response.data)
-    #print(response.data.decode('utf-8'))
+    #response = urllib3.PoolManager().request("POST", api_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer '})
+    response = httpx.post(api_url, json=data, headers={'Authorization':'Bearer {ONCOKB_TOKEN}'}, timeout=None)
+
 
-    #treatmentsdf = pd.DataFrame.from_dict({'alteration_type':[],'alteration':[],'approvedIndications':[],'description':[],'treatment':[],'level_of_evidence':[],'citations':[],'tumorType':[]})
     if (response.status_code == 200):
         treatments = []
         respjson = json.loads(response.text)
 
         for rjson in respjson:
             hugosymbol = handle_string_field(rjson["query"]["hugoSymbol"])
-            #idsplit = str(cryptocode.decrypt(rjson["query"]["id"], settings.CRYPTOCODE)).split(":")
-            #cna_id = idsplit[2]
             alteration = str.upper(handle_string_field(rjson["query"]["alteration"]))
 
             updatedf = cna_annotations.loc[(cna_annotations['hugoSymbol']==hugosymbol) & (cna_annotations['alteration']==alteration)]
@@ -135,8 +128,6 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
                 cna_annotations.at[indxs,'referenceGenome'] = handle_string_field(rjson["query"]["referenceGenome"])
                 cna_annotations.at[indxs,'tumorType'] = handle_string_field(rjson["query"]["tumorType"])
                 cna_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
-                #updatedf['proteinStart'] = handle_int_field(rjson["query"]["proteinStart"])
-                #updatedf['proteinEnd'] = handle_int_field(rjson["query"]["proteinEnd"])
                 cna_annotations.at[indxs,'oncogenic'] = handle_string_field(rjson["oncogenic"])
                 cna_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
                 cna_annotations.at[indxs,'gene_role'] = handle_string_field(rjson["mutationEffect"]["knownEffect"])
@@ -150,8 +141,6 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
                 cna_annotations.at[indxs,'geneSummary'] = handle_string_field(rjson["geneSummary"])
                 cna_annotations.at[indxs,'variantSummary'] = handle_string_field(rjson["variantSummary"])
                 cna_annotations.at[indxs,'tumorTypeSummary'] = handle_string_field(rjson["tumorTypeSummary"])
-                #cna_annotations.at[indxs,'treatments'] = handle_drugs_field(rjson["treatments"])
-
                 treatments.extend(handle_treatments_oncokb(rjson["treatments"], 'CNA', hugosymbol + ':' + alteration))
 
             #print("Updated "+str(updatedf.count())+" CNAs")
@@ -177,8 +166,7 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
     None
     """
 
-    token = ONCOKB_TOKEN
-    header = {"accept":"application/json", 'Content-Type': 'application/json', "Authorization":'Bearer '+token}
+    header = {"accept":"application/json", 'Content-Type': 'application/json', "Authorization":'Bearer '+ONCOKB_TOKEN}
     request_url = "https://www.oncokb.org/api/v1/annotate/mutations/byGenomicChange"
     #request_url = "https://www.oncokb.org/api/v1/annotate/mutations/byHGVSg"
 
@@ -197,14 +185,11 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
         for row in uniques
     ]
 
-    #print(data)
-    # Sending a GET request and getting back response as HTTPResponse object.
     print("Request OncoKB API "+request_url)
     print("Querying " + str(len(uniques)) + " CNAs....")
 
-    #response = urllib3.PoolManager().request("POST", request_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'})
-    response = httpx.post(request_url, json=data, headers={'Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'}, timeout=None)
-    # response = http.request("GET",request_url, headers=header)
+    #response = urllib3.PoolManager().request("POST", request_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer'})
+    response = httpx.post(request_url, json=data, headers={'Authorization':'Bearer {ONCOKB_TOKEN}'}, timeout=None)
     print(response.status_code)
 
     #TODO: check why EGFR chr7,55181426,55181427,A,C  is not found but is found from web api (and also from CGI)
@@ -213,8 +198,7 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
 
         respjson = json.loads(response.text)
         for rjson in respjson:
-            #print(rjson)
-            #print("OBJ", rjson)
+
             id = str(rjson["query"]["id"])
             idsplit = id.split(":")
             chromosome = str(idsplit[0])
@@ -228,7 +212,6 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
                 snv_annotations.at[indxs, 'alteration'] = alteration
                 snv_annotations.at[indxs, 'referenceGenome'] = handle_string_field(rjson["query"]["referenceGenome"])
                 snv_annotations.at[indxs,'tumorType'] = handle_string_field(rjson["query"]["tumorType"])
-                #snv_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
                 snv_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
                 snv_annotations.at[indxs,'oncogenic'] = handle_string_field(rjson["oncogenic"])
                 snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
@@ -238,11 +221,9 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
                 snv_annotations.at[indxs,'geneSummary'] = handle_string_field(rjson["geneSummary"])
                 snv_annotations.at[indxs,'variantSummary'] = handle_string_field(rjson["variantSummary"])
                 snv_annotations.at[indxs,'tumorTypeSummary'] = handle_string_field(rjson["tumorTypeSummary"])
-                #snv_annotations.at[indxs,'treatments'] = handle_drugs_field(rjson["treatments"])
                 treatments.extend(handle_treatments_oncokb(rjson["treatments"], 'SNV', alteration))
 
         print(snv_annotations)
-        #snv_annotations.drop(columns=snv_annotations.columns[0], axis=1, inplace=True)
         header = False if i > 1 else True
         snv_annotations.to_csv("snv_annotated_oncokb.csv", mode="a", header=header, index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'consequence', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
         trdf = pd.DataFrame(treatments)
diff --git a/utils.py b/utils.py
@@ -111,7 +111,6 @@ def gene_id_convert(geneids, target):
     request_url = "https://biit.cs.ut.ee/gprofiler/api/convert/convert/"
     print("Request gProfiler API "+request_url)
     data = '{"organism":"hsapiens", "target":"'+target+'", "query":"'+geneids+'"}'
-    #{"organism":"hsapiens", "target":target, "query":geneids}
     headers = {"Content-Type": "application/json"}
     body = json.dumps(data).encode('utf-8')
     response = httpx.post(request_url, json=body, headers=headers, timeout=None)