Skip to content

Commit fbeea3a

Browse files
committed
code cleaning
1 parent 33c32c6 commit fbeea3a

File tree

3 files changed

+10
-67
lines changed

3 files changed

+10
-67
lines changed

cgi_annotator.py

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,6 @@ def launch_cgi_job_with_mulitple_variant_types(mutations_file, cnas_file, transl
163163
preload_content=False # Set preload_content to False to allow streaming the files
164164
)
165165

166-
# Attach the files using the files parameter
167-
168-
169-
# Send the request
170-
#response = http.urlopen(response)
171166
if (response.status == 200):
172167

173168
jobid = response.data.decode("utf-8")
@@ -253,20 +248,8 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
253248
i += 1
254249
cgi_cna = cgi_cnadf.loc[cgi_cnadf['sample'] == id].iloc[0]
255250
cna_annotations.at[indxs, 'oncogenic'] = handle_string_field(cgi_cna["driver"])
256-
# snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
257251
cna_annotations.at[indxs, 'gene_role'] = handle_string_field(cgi_cna["gene_role"]),
258-
# snv_annotations.at[indxs,'citationPMids'] = handle_string_field(",".join(rjson["mutationEffect"]["citations"]["pmids"]))
259-
# TODO: Evidence level is related to drug not alteration, show highest in level_of_evidence, treatments table include all levels
260-
# level = map_cgi_evidence(biom)
261-
# if level < cna_annotations.at[indxs, 'level_of_evidence']:
262-
# cna_annotations.at[indxs, 'level_of_evidence'] = "CGI:"+map_cgi_evidence(biom)
263-
#evid = handle_string_field(biom['Evidence']) + "(" + handle_string_field(biom['Response']) + ")"
264-
#cna_annotations.at[indxs, 'cgi_level'] = evid
265-
# snv_annotations.at[indxs, 'geneSummary'] = handle_string_field(rjson["geneSummary"])
266-
# snv_annotations.at[indxs, 'variantSummary'] = handle_string_field(row["CGI-External oncogenic annotation"])
267252
cna_annotations.at[indxs, 'tumorTypeSummary'] = handle_string_field(cgi_cna["driver_statement"])
268-
# snv_annotations.at[indxs, 'treatments'] = handle_drugs_field(rjson["treatments"])
269-
# alteration = snv_annotations.at[indxs, 'alteration'].value
270253

271254
if idsplit[0] == "SNV":
272255
hugoSymbol = idsplit[1]
@@ -290,28 +273,15 @@ def query_cgi_job(jobid, snv_annotations: pd.DataFrame = None, cna_annotations:
290273
snv_annotations.at[indxs, 'consequence'] = handle_string_field(row["CGI-Consequence"]),
291274
cgi_snv = cgi_snvdf.loc[cgi_snvdf['CGI-Sample ID'] == id].iloc[0]
292275
snv_annotations.at[indxs, 'oncogenic'] = handle_string_field(cgi_snv["CGI-Oncogenic Summary"])
293-
# snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
294276
snv_annotations.at[indxs, 'gene_role'] = handle_string_field(cgi_snv["CGI-Oncogenic Prediction"]),
295-
# snv_annotations.at[indxs,'citationPMids'] = handle_string_field(",".join(rjson["mutationEffect"]["citations"]["pmids"]))
296-
# TODO: Evidence level is related to drug not alteration, show highest in level_of_evidence, treatments table include all levels
297-
# level = map_cgi_evidence(biom)
298-
# if level < snv_annotations.at[indxs, 'level_of_evidence']:
299-
# snv_annotations.at[indxs, 'level_of_evidence'] = map_cgi_evidence(biom)
300-
#snv_annotations.at[indxs, 'cgi_level'] = handle_string_field(biom['Evidence']) + "(" + handle_string_field(biom['Response']) + ")"
301-
# snv_annotations.at[indxs, 'geneSummary'] = handle_string_field(rjson["geneSummary"])
302-
# snv_annotations.at[indxs, 'variantSummary'] = handle_string_field(row["CGI-External oncogenic annotation"])
303277
snv_annotations.at[indxs, 'tumorTypeSummary'] = handle_string_field(cgi_snv["driver_statement"])
304-
#snv_annotations.at[indxs, 'treatments'] = handle_drugs_field(rjson["treatments"])
305-
# alteration = snv_annotations.at[indxs, 'alteration'].value
306278

307279
if isinstance(snv_annotations, pd.DataFrame):
308-
#snv_annotations.drop(columns=snv_annotations.columns[0], axis=1, inplace=True)
309280
snv_annotations.to_csv("snv_annotated_cgi.csv", index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'consequence', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
310281
trdf = pd.DataFrame(treatments)
311282
trdf.to_csv("treatments_cgi_snv.csv", index=False, sep="\t")
312283

313284
if isinstance(cna_annotations, pd.DataFrame):
314-
# cna_annotations.drop(columns=cna_annotations.columns[0], axis=1, inplace=True)
315285
cna_annotations.to_csv("cna_annotated_cgi.csv", index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
316286
trdf = pd.DataFrame(treatments)
317287
trdf.to_csv("treatments_cgi_cna.csv", index=False, sep="\t")
@@ -352,7 +322,7 @@ def generate_temp_cgi_query_files(snv_annotations: pd.DataFrame = None, cna_anno
352322
for indx, snv in uniques.iterrows():
353323
id = "SNV:"+snv['alteration']
354324
alt_split = snv['alteration'].split(':')
355-
row = alt_split[1]+'\t'+alt_split[2]+'\t'+alt_split[3]+'\t'+alt_split[4]+'\t'+id+'\n' #+'\t'+cryptocode.encrypt(snv.samples, settings.CRYPTOCODE)+'\n'
325+
row = alt_split[1]+'\t'+alt_split[2]+'\t'+alt_split[3]+'\t'+alt_split[4]+'\t'+id+'\n'
356326
file1.write(row)
357327
file1.close()
358328

@@ -366,17 +336,10 @@ def generate_temp_cgi_query_files(snv_annotations: pd.DataFrame = None, cna_anno
366336
for indx, cna in uniques.iterrows():
367337
print(cna)
368338
id = "CNA:"+str(cna['hugoSymbol']) + ':' + str(cna['alteration'])
369-
row = cna['hugoSymbol']+'\t'+cna_alt_to_cgi[cna['alteration']].value+'\t'+id+'\n'#+'\t'+cryptocode.encrypt(cna.sample_id, settings.CRYPTOCODE)+'\n'
339+
row = cna['hugoSymbol']+'\t'+cna_alt_to_cgi[cna['alteration']].value+'\t'+id+'\n'
370340
file2.write(row)
371341
file2.close()
372342

373-
# header = "fus\tsample\n"
374-
# with open("./tmp/fus.ext", "w") as file3:
375-
# file3.write(header)
376-
# for transloc in translocs:
377-
# row = transloc+'\t'+cryptocode.encrypt(transloc.sample, settings.CRYPTOCODE)+'\n'
378-
# file3.write(row)
379-
# file3.close()
380343
except Exception as e:
381344
print(f"Unexpected {e=}, {type(e)=}")
382345
raise

oncokb_annotator.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,9 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
7777
Response: The HTTP response from the OncoKB API.
7878
"""
7979

80-
token = ONCOKB_TOKEN
81-
8280
api_url = "https://www.oncokb.org/api/v1/annotate/copyNumberAlterations"
8381
#request_url = api_url + 'copyNameAlterationType='+AlterationType[cna.CNstatus].value+'&hugoSymbol='+hugosymbol+'&tumorType='+tumorType
84-
header = {'accept':'application/json', 'Content-Type': 'application/json', 'Authorization':'Bearer '+token}
82+
header = {'accept':'application/json', 'Content-Type': 'application/json', 'Authorization':'Bearer '+ONCOKB_TOKEN}
8583

8684
print("Request OncoKB API "+api_url)
8785

@@ -111,21 +109,16 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
111109
print("Querying " +str(len(uniques))+ " CNAs....")
112110

113111
# Sending a POST request and getting back response as HTTPResponse object.
114-
#response = urllib3.PoolManager().request("POST", api_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'})
115-
response = httpx.post(api_url, json=data, headers={'Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'}, timeout=None)
116-
#response = http.request("GET",request_url, headers=header)
117-
#print(response.data)
118-
#print(response.data.decode('utf-8'))
112+
#response = urllib3.PoolManager().request("POST", api_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer '})
113+
response = httpx.post(api_url, json=data, headers={'Authorization':'Bearer {ONCOKB_TOKEN}'}, timeout=None)
114+
119115

120-
#treatmentsdf = pd.DataFrame.from_dict({'alteration_type':[],'alteration':[],'approvedIndications':[],'description':[],'treatment':[],'level_of_evidence':[],'citations':[],'tumorType':[]})
121116
if (response.status_code == 200):
122117
treatments = []
123118
respjson = json.loads(response.text)
124119

125120
for rjson in respjson:
126121
hugosymbol = handle_string_field(rjson["query"]["hugoSymbol"])
127-
#idsplit = str(cryptocode.decrypt(rjson["query"]["id"], settings.CRYPTOCODE)).split(":")
128-
#cna_id = idsplit[2]
129122
alteration = str.upper(handle_string_field(rjson["query"]["alteration"]))
130123

131124
updatedf = cna_annotations.loc[(cna_annotations['hugoSymbol']==hugosymbol) & (cna_annotations['alteration']==alteration)]
@@ -135,8 +128,6 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
135128
cna_annotations.at[indxs,'referenceGenome'] = handle_string_field(rjson["query"]["referenceGenome"])
136129
cna_annotations.at[indxs,'tumorType'] = handle_string_field(rjson["query"]["tumorType"])
137130
cna_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
138-
#updatedf['proteinStart'] = handle_int_field(rjson["query"]["proteinStart"])
139-
#updatedf['proteinEnd'] = handle_int_field(rjson["query"]["proteinEnd"])
140131
cna_annotations.at[indxs,'oncogenic'] = handle_string_field(rjson["oncogenic"])
141132
cna_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
142133
cna_annotations.at[indxs,'gene_role'] = handle_string_field(rjson["mutationEffect"]["knownEffect"])
@@ -150,8 +141,6 @@ def query_oncokb_cnas_to_csv(cna_annotations: pd.DataFrame, i):
150141
cna_annotations.at[indxs,'geneSummary'] = handle_string_field(rjson["geneSummary"])
151142
cna_annotations.at[indxs,'variantSummary'] = handle_string_field(rjson["variantSummary"])
152143
cna_annotations.at[indxs,'tumorTypeSummary'] = handle_string_field(rjson["tumorTypeSummary"])
153-
#cna_annotations.at[indxs,'treatments'] = handle_drugs_field(rjson["treatments"])
154-
155144
treatments.extend(handle_treatments_oncokb(rjson["treatments"], 'CNA', hugosymbol + ':' + alteration))
156145

157146
#print("Updated "+str(updatedf.count())+" CNAs")
@@ -177,8 +166,7 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
177166
None
178167
"""
179168

180-
token = ONCOKB_TOKEN
181-
header = {"accept":"application/json", 'Content-Type': 'application/json', "Authorization":'Bearer '+token}
169+
header = {"accept":"application/json", 'Content-Type': 'application/json', "Authorization":'Bearer '+ONCOKB_TOKEN}
182170
request_url = "https://www.oncokb.org/api/v1/annotate/mutations/byGenomicChange"
183171
#request_url = "https://www.oncokb.org/api/v1/annotate/mutations/byHGVSg"
184172

@@ -197,14 +185,11 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
197185
for row in uniques
198186
]
199187

200-
#print(data)
201-
# Sending a GET request and getting back response as HTTPResponse object.
202188
print("Request OncoKB API "+request_url)
203189
print("Querying " + str(len(uniques)) + " CNAs....")
204190

205-
#response = urllib3.PoolManager().request("POST", request_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'})
206-
response = httpx.post(request_url, json=data, headers={'Authorization':'Bearer 16d3a20d-c93c-4b2d-84ad-b3657a367fdb'}, timeout=None)
207-
# response = http.request("GET",request_url, headers=header)
191+
#response = urllib3.PoolManager().request("POST", request_url, body=data, headers={'accept':'application/json','Content-Type':'application/json','Authorization':'Bearer'})
192+
response = httpx.post(request_url, json=data, headers={'Authorization':'Bearer {ONCOKB_TOKEN}'}, timeout=None)
208193
print(response.status_code)
209194

210195
#TODO: check why EGFR chr7,55181426,55181427,A,C is not found but is found from web api (and also from CGI)
@@ -213,8 +198,7 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
213198

214199
respjson = json.loads(response.text)
215200
for rjson in respjson:
216-
#print(rjson)
217-
#print("OBJ", rjson)
201+
218202
id = str(rjson["query"]["id"])
219203
idsplit = id.split(":")
220204
chromosome = str(idsplit[0])
@@ -228,7 +212,6 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
228212
snv_annotations.at[indxs, 'alteration'] = alteration
229213
snv_annotations.at[indxs, 'referenceGenome'] = handle_string_field(rjson["query"]["referenceGenome"])
230214
snv_annotations.at[indxs,'tumorType'] = handle_string_field(rjson["query"]["tumorType"])
231-
#snv_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
232215
snv_annotations.at[indxs,'consequence'] = handle_string_field(rjson["query"]["consequence"])
233216
snv_annotations.at[indxs,'oncogenic'] = handle_string_field(rjson["oncogenic"])
234217
snv_annotations.at[indxs,'mutationEffectDescription'] = handle_string_field(rjson["mutationEffect"]["description"])
@@ -238,11 +221,9 @@ def query_oncokb_somatic_mutations(snv_annotations: pd.DataFrame, i):
238221
snv_annotations.at[indxs,'geneSummary'] = handle_string_field(rjson["geneSummary"])
239222
snv_annotations.at[indxs,'variantSummary'] = handle_string_field(rjson["variantSummary"])
240223
snv_annotations.at[indxs,'tumorTypeSummary'] = handle_string_field(rjson["tumorTypeSummary"])
241-
#snv_annotations.at[indxs,'treatments'] = handle_drugs_field(rjson["treatments"])
242224
treatments.extend(handle_treatments_oncokb(rjson["treatments"], 'SNV', alteration))
243225

244226
print(snv_annotations)
245-
#snv_annotations.drop(columns=snv_annotations.columns[0], axis=1, inplace=True)
246227
header = False if i > 1 else True
247228
snv_annotations.to_csv("snv_annotated_oncokb.csv", mode="a", header=header, index=False, sep="\t", columns=['patient_id', 'sample_id', 'alteration', 'hugoSymbol', 'tumorType', 'consequence', 'oncogenic', 'mutationEffectDescription', 'gene_role', 'citationPMids', 'level_of_evidence', 'geneSummary', 'variantSummary', 'tumorTypeSummary'])
248229
trdf = pd.DataFrame(treatments)

utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ def gene_id_convert(geneids, target):
111111
request_url = "https://biit.cs.ut.ee/gprofiler/api/convert/convert/"
112112
print("Request gProfiler API "+request_url)
113113
data = '{"organism":"hsapiens", "target":"'+target+'", "query":"'+geneids+'"}'
114-
#{"organism":"hsapiens", "target":target, "query":geneids}
115114
headers = {"Content-Type": "application/json"}
116115
body = json.dumps(data).encode('utf-8')
117116
response = httpx.post(request_url, json=body, headers=headers, timeout=None)

0 commit comments

Comments
 (0)