Skip to content

Commit 0b0bd48

Browse files
committed
use requests for drs objects
1 parent 306488e commit 0b0bd48

File tree

4 files changed

+217
-175
lines changed

4 files changed

+217
-175
lines changed

htsget_server/beacon_operations.py

Lines changed: 134 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from flask import Flask
22
import variants
3-
import drs_operations
43
import htsget_operations
54
import database
6-
import drs_database
75
import json
86
import re
97
import connexion
@@ -14,6 +12,8 @@
1412
import os
1513
from config import AGGREGATE_COUNT_THRESHOLD, SEARCH_PATH, HTSGET_URL, BUCKET_SIZE
1614
from candigv2_logging.logging import CanDIGLogger
15+
import requests
16+
from authx.auth import create_service_token
1717

1818

1919
logger = CanDIGLogger(__file__)
@@ -254,15 +254,20 @@ def search(raw_req):
254254
for i in range(len(potential_hits)):
255255
drs_obj_id = potential_hits[i]['drs_object_id']
256256
# look for experiments and programs for all drs objects, even if user is not authorized
257-
drs_obj = drs_database.get_drs_object(drs_obj_id)
258-
if "program" in drs_obj:
259-
if drs_obj["program"] not in results:
260-
results[drs_obj["program"]] = []
261-
for c in drs_obj["contents"]:
262-
if c["id"] not in ["analysis", "index"]:
263-
# this is a ExperimentContentObject
264-
res = {"submitter_sample_id": c["name"], "variant_count": potential_hits[i]["variantcount"]}
265-
results[drs_obj["program"]].append(res)
257+
headers = {
258+
"X-Service-Token": create_service_token()
259+
}
260+
resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj_id}", headers=headers)
261+
if resp.status_code == 200:
262+
drs_obj = resp.json()
263+
if "program" in drs_obj:
264+
if drs_obj["program"] not in results:
265+
results[drs_obj["program"]] = []
266+
for c in drs_obj["contents"]:
267+
if c["id"] not in ["analysis", "index"]:
268+
# this is a ExperimentContentObject
269+
res = {"submitter_sample_id": c["name"], "variant_count": potential_hits[i]["variantcount"]}
270+
results[drs_obj["program"]].append(res)
266271

267272
search_json = {
268273
"potential_hits": potential_hits,
@@ -399,31 +404,41 @@ def full_beacon_search(search_json):
399404
response['beaconHandovers'] = []
400405
for drs_obj_id in variants_by_file.keys():
401406
# look for experiments and programs for all drs objects, even if user is not authorized
402-
drs_obj = drs_database.get_drs_object(drs_obj_id)
403-
if "program" in drs_obj:
404-
download_handovers = []
405-
for c in drs_obj["contents"]:
406-
if c["id"] in ["analysis", "index"]:
407-
# this is a file that we should create a download url for
408-
file_drs_obj = drs_database.get_drs_object(c["name"])
409-
download_handover = {
410-
'handoverType': {'id': 'CUSTOM', 'label': 'DOWNLOAD'},
411-
'url': drs_operations._get_download_url(file_drs_obj['id'])
412-
}
413-
if 'size' in file_drs_obj:
414-
download_handover['size'] = file_drs_obj['size']
415-
download_handovers.append(download_handover)
416-
if drs_obj["program"] in authed_programs:
417-
# fill in htsget handover data
418-
try:
419-
htsget_handover, status_code = htsget_operations._get_urls("variant", drs_obj_id, reference_name=actual_params['reference_name'], start=actual_params['start'], end=actual_params['end'])
420-
except Exception as e:
421-
raise Exception(f"exception in get_variants for {drs_obj_id}: {type(e)} {str(e)}")
422-
if htsget_handover is not None:
423-
htsget_handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'}
424-
response['beaconHandovers'].append(htsget_handover)
425-
if len(download_handovers) > 0:
426-
response['beaconHandovers'].extend(download_handovers)
407+
headers = {
408+
"X-Service-Token": create_service_token()
409+
}
410+
resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj_id}", headers=headers)
411+
if resp.status_code == 200:
412+
drs_obj = resp.json()
413+
if "program" in drs_obj:
414+
download_handovers = []
415+
for c in drs_obj["contents"]:
416+
if c["id"] in ["analysis", "index"]:
417+
# this is a file that we should create a download url for
418+
headers = {
419+
"X-Service-Token": create_service_token()
420+
}
421+
resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{c["name"]}", headers=headers)
422+
if resp.status_code == 200:
423+
file_drs_obj = resp.json()
424+
download_handover = {
425+
'handoverType': {'id': 'CUSTOM', 'label': 'DOWNLOAD'},
426+
'url': f"{HTSGET_URL}/ga4gh/drs/v1/objects/{file_drs_obj['id']}/download"
427+
}
428+
if 'size' in file_drs_obj:
429+
download_handover['size'] = file_drs_obj['size']
430+
download_handovers.append(download_handover)
431+
if drs_obj["program"] in authed_programs:
432+
# fill in htsget handover data
433+
try:
434+
htsget_handover, status_code = htsget_operations._get_urls("variant", drs_obj_id, reference_name=actual_params['reference_name'], start=actual_params['start'], end=actual_params['end'])
435+
except Exception as e:
436+
raise Exception(f"exception in get_variants for {drs_obj_id}: {type(e)} {str(e)}")
437+
if htsget_handover is not None:
438+
htsget_handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'}
439+
response['beaconHandovers'].append(htsget_handover)
440+
if len(download_handovers) > 0:
441+
response['beaconHandovers'].extend(download_handovers)
427442
if len(response['beaconHandovers']) > 0 and meta['returnedGranularity'] == 'record':
428443
response['response'] = resultset
429444
if len(resultset) > 0: # use true number if we're authorized, even if below AGGREGATE_COUNT_THRESHOLD
@@ -474,90 +489,95 @@ def compile_beacon_resultset(variants_by_obj, reference_genome="hg38", authed_pr
474489
resultset = {}
475490
for drs_obj in variants_by_obj.keys():
476491
# check to see if this drs_object is authorized:
477-
x = drs_database.get_drs_object(drs_obj)
478-
is_authed = False
479-
if x["program"] in authed_programs:
480-
is_authed = True
481-
if database.get_variantfile(drs_obj)['reference_genome'] != reference_genome:
482-
continue
483-
for variant in variants_by_obj[drs_obj]['variants']:
484-
# parse the variants beacon-style
485-
variant['variations'] = compile_variations_from_record(ref=variant.pop('ref'), alt=variant.pop('alt'), chrom=variant.pop('chrom'), pos=variant.pop('pos'), reference_genome=reference_genome)
486-
assign_info_to_variations(variant)
487-
488-
# the variations in each variant need to be copied out first:
489-
resultset[drs_obj] = []
490-
for var in variant['variations']:
491-
resultset[drs_obj].append(var['hgvsid'])
492-
if var['hgvsid'] not in resultset:
493-
resultset[var['hgvsid']] = {
494-
'variation': {
495-
"location": var.pop('location'),
496-
"state": var.pop('state'),
497-
"type": var.pop('type')
498-
},
499-
"identifiers": {
500-
"genomicHGVSId": var['hgvsid']
501-
}
502-
}
503-
# move allele-specific info to the variant, like CSQ annotations
504-
if 'info' in var:
505-
if 'CSQ' in var['info']:
506-
if 'molecularAttributes' not in resultset[var['hgvsid']]:
507-
compile_molecular_attributes_from_csq(resultset[var['hgvsid']], var['info'].pop('CSQ'))
508-
509-
# now process the samples into the variations:
510-
if 'samples' in variant and len(variant['samples']) > 0:
511-
for k in variant['samples'].keys():
512-
sample = variant['samples'][k]
513-
# Begin creating a Case Level Data object
514-
cld = {
515-
'genotype': {
516-
'value': sample['GT']
517-
}
518-
}
519-
# check to see that we should be processing the actual sample data:
520-
if is_authed:
521-
cld['analysisId'] = drs_obj
522-
cld['biosampleId'] = f"{x['program']}~{k}"
523-
alleles = sample['GT'].split('/')
524-
if len(alleles) < 2:
525-
alleles = sample['GT'].split('|')
526-
# put a copy of this cld in each variation:
527-
cld['genotype']['secondaryAlleleIds'] = [resultset[drs_obj][int(alleles[0])], resultset[drs_obj][int(alleles[1])]]
528-
if alleles[0] == alleles[1]:
529-
cld['genotype']['zygosity'] = {
530-
'id': 'GENO:0000136',
531-
'label': 'homozygous'
492+
headers = {
493+
"X-Service-Token": create_service_token()
494+
}
495+
response = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj}", headers=headers)
496+
if response.status_code == 200:
497+
x = response.json()
498+
is_authed = False
499+
if x["program"] in authed_programs:
500+
is_authed = True
501+
if database.get_variantfile(drs_obj)['reference_genome'] != reference_genome:
502+
continue
503+
for variant in variants_by_obj[drs_obj]['variants']:
504+
# parse the variants beacon-style
505+
variant['variations'] = compile_variations_from_record(ref=variant.pop('ref'), alt=variant.pop('alt'), chrom=variant.pop('chrom'), pos=variant.pop('pos'), reference_genome=reference_genome)
506+
assign_info_to_variations(variant)
507+
508+
# the variations in each variant need to be copied out first:
509+
resultset[drs_obj] = []
510+
for var in variant['variations']:
511+
resultset[drs_obj].append(var['hgvsid'])
512+
if var['hgvsid'] not in resultset:
513+
resultset[var['hgvsid']] = {
514+
'variation': {
515+
"location": var.pop('location'),
516+
"state": var.pop('state'),
517+
"type": var.pop('type')
518+
},
519+
"identifiers": {
520+
"genomicHGVSId": var['hgvsid']
521+
}
532522
}
533-
cld['genotype'].pop('secondaryAlleleIds')
534-
if alleles[0].isdigit():
535-
var = resultset[drs_obj][int(alleles[0])]
536-
if 'caseLevelData' not in resultset[var]:
537-
resultset[var]['caseLevelData'] = []
538-
resultset[var]['caseLevelData'].append(json.loads(json.dumps(cld)))
539-
else:
540-
if alleles[0] == '0' or alleles[1] == '0':
541-
cld['genotype']['zygosity'] = {
542-
'id': 'GENO:0000458',
543-
'label': 'simple heterozygous'
523+
# move allele-specific info to the variant, like CSQ annotations
524+
if 'info' in var:
525+
if 'CSQ' in var['info']:
526+
if 'molecularAttributes' not in resultset[var['hgvsid']]:
527+
compile_molecular_attributes_from_csq(resultset[var['hgvsid']], var['info'].pop('CSQ'))
528+
529+
# now process the samples into the variations:
530+
if 'samples' in variant and len(variant['samples']) > 0:
531+
for k in variant['samples'].keys():
532+
sample = variant['samples'][k]
533+
# Begin creating a Case Level Data object
534+
cld = {
535+
'genotype': {
536+
'value': sample['GT']
544537
}
545-
else:
538+
}
539+
# check to see that we should be processing the actual sample data:
540+
if is_authed:
541+
cld['analysisId'] = drs_obj
542+
cld['biosampleId'] = f"{x['program']}~{k}"
543+
alleles = sample['GT'].split('/')
544+
if len(alleles) < 2:
545+
alleles = sample['GT'].split('|')
546+
# put a copy of this cld in each variation:
547+
cld['genotype']['secondaryAlleleIds'] = [resultset[drs_obj][int(alleles[0])], resultset[drs_obj][int(alleles[1])]]
548+
if alleles[0] == alleles[1]:
546549
cld['genotype']['zygosity'] = {
547-
'id': 'GENO:0000402',
548-
'label': 'compound heterozygous'
550+
'id': 'GENO:0000136',
551+
'label': 'homozygous'
549552
}
550-
for a in alleles:
551-
if a.isdigit():
552-
var = resultset[drs_obj][int(a)]
553-
# make a copy cld for the other allele's variant
554-
second_cld = json.loads(json.dumps(cld))
555-
# this allele should not be in cld's secondaryAlleleIds,
556-
# and the second allele should not be in second_cld's secondaryAlleleIds
557-
second_cld['genotype']['secondaryAlleleIds'].remove(resultset[drs_obj][int(a)])
553+
cld['genotype'].pop('secondaryAlleleIds')
554+
if alleles[0].isdigit():
555+
var = resultset[drs_obj][int(alleles[0])]
558556
if 'caseLevelData' not in resultset[var]:
559557
resultset[var]['caseLevelData'] = []
560-
resultset[var]['caseLevelData'].append(second_cld)
558+
resultset[var]['caseLevelData'].append(json.loads(json.dumps(cld)))
559+
else:
560+
if alleles[0] == '0' or alleles[1] == '0':
561+
cld['genotype']['zygosity'] = {
562+
'id': 'GENO:0000458',
563+
'label': 'simple heterozygous'
564+
}
565+
else:
566+
cld['genotype']['zygosity'] = {
567+
'id': 'GENO:0000402',
568+
'label': 'compound heterozygous'
569+
}
570+
for a in alleles:
571+
if a.isdigit():
572+
var = resultset[drs_obj][int(a)]
573+
# make a copy cld for the other allele's variant
574+
second_cld = json.loads(json.dumps(cld))
575+
# this allele should not be in cld's secondaryAlleleIds,
576+
# and the second allele should not be in second_cld's secondaryAlleleIds
577+
second_cld['genotype']['secondaryAlleleIds'].remove(resultset[drs_obj][int(a)])
578+
if 'caseLevelData' not in resultset[var]:
579+
resultset[var]['caseLevelData'] = []
580+
resultset[var]['caseLevelData'].append(second_cld)
561581
resultset.pop(drs_obj)
562582
final_resultset = []
563583
# only include variants that are actually seen in the data (not things like ref alleles that are not in any samples)

htsget_server/drs_operations.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,3 @@ def _get_access_url(access_id):
309309
return url, 500
310310
else:
311311
return {"message": f"Malformed access_id {access_id}: should be in the form endpoint/bucket/item", "method": "_get_access_url"}, 400
312-
313-
314-
# convenience method for other methods to easily get the download url
315-
def _get_download_url(drs_file_obj_id):
316-
return f"{HTSGET_URL}/ga4gh/drs/v1/objects/{drs_file_obj_id}/download"

0 commit comments

Comments
 (0)