|
1 | 1 | from flask import Flask |
2 | 2 | import variants |
3 | | -import drs_operations |
4 | 3 | import htsget_operations |
5 | 4 | import database |
6 | | -import drs_database |
7 | 5 | import json |
8 | 6 | import re |
9 | 7 | import connexion |
|
14 | 12 | import os |
15 | 13 | from config import AGGREGATE_COUNT_THRESHOLD, SEARCH_PATH, HTSGET_URL, BUCKET_SIZE |
16 | 14 | from candigv2_logging.logging import CanDIGLogger |
| 15 | +import requests |
| 16 | +from authx.auth import create_service_token |
17 | 17 |
|
18 | 18 |
|
19 | 19 | logger = CanDIGLogger(__file__) |
@@ -254,15 +254,20 @@ def search(raw_req): |
254 | 254 | for i in range(len(potential_hits)): |
255 | 255 | drs_obj_id = potential_hits[i]['drs_object_id'] |
256 | 256 | # look for experiments and programs for all drs objects, even if user is not authorized |
257 | | - drs_obj = drs_database.get_drs_object(drs_obj_id) |
258 | | - if "program" in drs_obj: |
259 | | - if drs_obj["program"] not in results: |
260 | | - results[drs_obj["program"]] = [] |
261 | | - for c in drs_obj["contents"]: |
262 | | - if c["id"] not in ["analysis", "index"]: |
263 | | - # this is a ExperimentContentObject |
264 | | - res = {"submitter_sample_id": c["name"], "variant_count": potential_hits[i]["variantcount"]} |
265 | | - results[drs_obj["program"]].append(res) |
| 257 | + headers = { |
| 258 | + "X-Service-Token": create_service_token() |
| 259 | + } |
| 260 | + resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj_id}", headers=headers) |
| 261 | + if resp.status_code == 200: |
| 262 | + drs_obj = resp.json() |
| 263 | + if "program" in drs_obj: |
| 264 | + if drs_obj["program"] not in results: |
| 265 | + results[drs_obj["program"]] = [] |
| 266 | + for c in drs_obj["contents"]: |
| 267 | + if c["id"] not in ["analysis", "index"]: |
| 268 | + # this is a ExperimentContentObject |
| 269 | + res = {"submitter_sample_id": c["name"], "variant_count": potential_hits[i]["variantcount"]} |
| 270 | + results[drs_obj["program"]].append(res) |
266 | 271 |
|
267 | 272 | search_json = { |
268 | 273 | "potential_hits": potential_hits, |
@@ -399,31 +404,41 @@ def full_beacon_search(search_json): |
399 | 404 | response['beaconHandovers'] = [] |
400 | 405 | for drs_obj_id in variants_by_file.keys(): |
401 | 406 | # look for experiments and programs for all drs objects, even if user is not authorized |
402 | | - drs_obj = drs_database.get_drs_object(drs_obj_id) |
403 | | - if "program" in drs_obj: |
404 | | - download_handovers = [] |
405 | | - for c in drs_obj["contents"]: |
406 | | - if c["id"] in ["analysis", "index"]: |
407 | | - # this is a file that we should create a download url for |
408 | | - file_drs_obj = drs_database.get_drs_object(c["name"]) |
409 | | - download_handover = { |
410 | | - 'handoverType': {'id': 'CUSTOM', 'label': 'DOWNLOAD'}, |
411 | | - 'url': drs_operations._get_download_url(file_drs_obj['id']) |
412 | | - } |
413 | | - if 'size' in file_drs_obj: |
414 | | - download_handover['size'] = file_drs_obj['size'] |
415 | | - download_handovers.append(download_handover) |
416 | | - if drs_obj["program"] in authed_programs: |
417 | | - # fill in htsget handover data |
418 | | - try: |
419 | | - htsget_handover, status_code = htsget_operations._get_urls("variant", drs_obj_id, reference_name=actual_params['reference_name'], start=actual_params['start'], end=actual_params['end']) |
420 | | - except Exception as e: |
421 | | - raise Exception(f"exception in get_variants for {drs_obj_id}: {type(e)} {str(e)}") |
422 | | - if htsget_handover is not None: |
423 | | - htsget_handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'} |
424 | | - response['beaconHandovers'].append(htsget_handover) |
425 | | - if len(download_handovers) > 0: |
426 | | - response['beaconHandovers'].extend(download_handovers) |
| 407 | + headers = { |
| 408 | + "X-Service-Token": create_service_token() |
| 409 | + } |
| 410 | + resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj_id}", headers=headers) |
| 411 | + if resp.status_code == 200: |
| 412 | + drs_obj = resp.json() |
| 413 | + if "program" in drs_obj: |
| 414 | + download_handovers = [] |
| 415 | + for c in drs_obj["contents"]: |
| 416 | + if c["id"] in ["analysis", "index"]: |
| 417 | + # this is a file that we should create a download url for |
| 418 | + headers = { |
| 419 | + "X-Service-Token": create_service_token() |
| 420 | + } |
| 421 | + resp = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{c["name"]}", headers=headers) |
| 422 | + if resp.status_code == 200: |
| 423 | + file_drs_obj = resp.json() |
| 424 | + download_handover = { |
| 425 | + 'handoverType': {'id': 'CUSTOM', 'label': 'DOWNLOAD'}, |
| 426 | + 'url': f"{HTSGET_URL}/ga4gh/drs/v1/objects/{file_drs_obj['id']}/download" |
| 427 | + } |
| 428 | + if 'size' in file_drs_obj: |
| 429 | + download_handover['size'] = file_drs_obj['size'] |
| 430 | + download_handovers.append(download_handover) |
| 431 | + if drs_obj["program"] in authed_programs: |
| 432 | + # fill in htsget handover data |
| 433 | + try: |
| 434 | + htsget_handover, status_code = htsget_operations._get_urls("variant", drs_obj_id, reference_name=actual_params['reference_name'], start=actual_params['start'], end=actual_params['end']) |
| 435 | + except Exception as e: |
| 436 | + raise Exception(f"exception in get_variants for {drs_obj_id}: {type(e)} {str(e)}") |
| 437 | + if htsget_handover is not None: |
| 438 | + htsget_handover['handoverType'] = {'id': 'CUSTOM', 'label': 'HTSGET'} |
| 439 | + response['beaconHandovers'].append(htsget_handover) |
| 440 | + if len(download_handovers) > 0: |
| 441 | + response['beaconHandovers'].extend(download_handovers) |
427 | 442 | if len(response['beaconHandovers']) > 0 and meta['returnedGranularity'] == 'record': |
428 | 443 | response['response'] = resultset |
429 | 444 | if len(resultset) > 0: # use true number if we're authorized, even if below AGGREGATE_COUNT_THRESHOLD |
@@ -474,90 +489,95 @@ def compile_beacon_resultset(variants_by_obj, reference_genome="hg38", authed_pr |
474 | 489 | resultset = {} |
475 | 490 | for drs_obj in variants_by_obj.keys(): |
476 | 491 | # check to see if this drs_object is authorized: |
477 | | - x = drs_database.get_drs_object(drs_obj) |
478 | | - is_authed = False |
479 | | - if x["program"] in authed_programs: |
480 | | - is_authed = True |
481 | | - if database.get_variantfile(drs_obj)['reference_genome'] != reference_genome: |
482 | | - continue |
483 | | - for variant in variants_by_obj[drs_obj]['variants']: |
484 | | - # parse the variants beacon-style |
485 | | - variant['variations'] = compile_variations_from_record(ref=variant.pop('ref'), alt=variant.pop('alt'), chrom=variant.pop('chrom'), pos=variant.pop('pos'), reference_genome=reference_genome) |
486 | | - assign_info_to_variations(variant) |
487 | | - |
488 | | - # the variations in each variant need to be copied out first: |
489 | | - resultset[drs_obj] = [] |
490 | | - for var in variant['variations']: |
491 | | - resultset[drs_obj].append(var['hgvsid']) |
492 | | - if var['hgvsid'] not in resultset: |
493 | | - resultset[var['hgvsid']] = { |
494 | | - 'variation': { |
495 | | - "location": var.pop('location'), |
496 | | - "state": var.pop('state'), |
497 | | - "type": var.pop('type') |
498 | | - }, |
499 | | - "identifiers": { |
500 | | - "genomicHGVSId": var['hgvsid'] |
501 | | - } |
502 | | - } |
503 | | - # move allele-specific info to the variant, like CSQ annotations |
504 | | - if 'info' in var: |
505 | | - if 'CSQ' in var['info']: |
506 | | - if 'molecularAttributes' not in resultset[var['hgvsid']]: |
507 | | - compile_molecular_attributes_from_csq(resultset[var['hgvsid']], var['info'].pop('CSQ')) |
508 | | - |
509 | | - # now process the samples into the variations: |
510 | | - if 'samples' in variant and len(variant['samples']) > 0: |
511 | | - for k in variant['samples'].keys(): |
512 | | - sample = variant['samples'][k] |
513 | | - # Begin creating a Case Level Data object |
514 | | - cld = { |
515 | | - 'genotype': { |
516 | | - 'value': sample['GT'] |
517 | | - } |
518 | | - } |
519 | | - # check to see that we should be processing the actual sample data: |
520 | | - if is_authed: |
521 | | - cld['analysisId'] = drs_obj |
522 | | - cld['biosampleId'] = f"{x['program']}~{k}" |
523 | | - alleles = sample['GT'].split('/') |
524 | | - if len(alleles) < 2: |
525 | | - alleles = sample['GT'].split('|') |
526 | | - # put a copy of this cld in each variation: |
527 | | - cld['genotype']['secondaryAlleleIds'] = [resultset[drs_obj][int(alleles[0])], resultset[drs_obj][int(alleles[1])]] |
528 | | - if alleles[0] == alleles[1]: |
529 | | - cld['genotype']['zygosity'] = { |
530 | | - 'id': 'GENO:0000136', |
531 | | - 'label': 'homozygous' |
| 492 | + headers = { |
| 493 | + "X-Service-Token": create_service_token() |
| 494 | + } |
| 495 | + response = requests.get(url=f"{os.getenv("DRS_URL")}/ga4gh/drs/v1/objects/{drs_obj}", headers=headers) |
| 496 | + if response.status_code == 200: |
| 497 | + x = response.json() |
| 498 | + is_authed = False |
| 499 | + if x["program"] in authed_programs: |
| 500 | + is_authed = True |
| 501 | + if database.get_variantfile(drs_obj)['reference_genome'] != reference_genome: |
| 502 | + continue |
| 503 | + for variant in variants_by_obj[drs_obj]['variants']: |
| 504 | + # parse the variants beacon-style |
| 505 | + variant['variations'] = compile_variations_from_record(ref=variant.pop('ref'), alt=variant.pop('alt'), chrom=variant.pop('chrom'), pos=variant.pop('pos'), reference_genome=reference_genome) |
| 506 | + assign_info_to_variations(variant) |
| 507 | + |
| 508 | + # the variations in each variant need to be copied out first: |
| 509 | + resultset[drs_obj] = [] |
| 510 | + for var in variant['variations']: |
| 511 | + resultset[drs_obj].append(var['hgvsid']) |
| 512 | + if var['hgvsid'] not in resultset: |
| 513 | + resultset[var['hgvsid']] = { |
| 514 | + 'variation': { |
| 515 | + "location": var.pop('location'), |
| 516 | + "state": var.pop('state'), |
| 517 | + "type": var.pop('type') |
| 518 | + }, |
| 519 | + "identifiers": { |
| 520 | + "genomicHGVSId": var['hgvsid'] |
| 521 | + } |
532 | 522 | } |
533 | | - cld['genotype'].pop('secondaryAlleleIds') |
534 | | - if alleles[0].isdigit(): |
535 | | - var = resultset[drs_obj][int(alleles[0])] |
536 | | - if 'caseLevelData' not in resultset[var]: |
537 | | - resultset[var]['caseLevelData'] = [] |
538 | | - resultset[var]['caseLevelData'].append(json.loads(json.dumps(cld))) |
539 | | - else: |
540 | | - if alleles[0] == '0' or alleles[1] == '0': |
541 | | - cld['genotype']['zygosity'] = { |
542 | | - 'id': 'GENO:0000458', |
543 | | - 'label': 'simple heterozygous' |
| 523 | + # move allele-specific info to the variant, like CSQ annotations |
| 524 | + if 'info' in var: |
| 525 | + if 'CSQ' in var['info']: |
| 526 | + if 'molecularAttributes' not in resultset[var['hgvsid']]: |
| 527 | + compile_molecular_attributes_from_csq(resultset[var['hgvsid']], var['info'].pop('CSQ')) |
| 528 | + |
| 529 | + # now process the samples into the variations: |
| 530 | + if 'samples' in variant and len(variant['samples']) > 0: |
| 531 | + for k in variant['samples'].keys(): |
| 532 | + sample = variant['samples'][k] |
| 533 | + # Begin creating a Case Level Data object |
| 534 | + cld = { |
| 535 | + 'genotype': { |
| 536 | + 'value': sample['GT'] |
544 | 537 | } |
545 | | - else: |
| 538 | + } |
| 539 | + # check to see that we should be processing the actual sample data: |
| 540 | + if is_authed: |
| 541 | + cld['analysisId'] = drs_obj |
| 542 | + cld['biosampleId'] = f"{x['program']}~{k}" |
| 543 | + alleles = sample['GT'].split('/') |
| 544 | + if len(alleles) < 2: |
| 545 | + alleles = sample['GT'].split('|') |
| 546 | + # put a copy of this cld in each variation: |
| 547 | + cld['genotype']['secondaryAlleleIds'] = [resultset[drs_obj][int(alleles[0])], resultset[drs_obj][int(alleles[1])]] |
| 548 | + if alleles[0] == alleles[1]: |
546 | 549 | cld['genotype']['zygosity'] = { |
547 | | - 'id': 'GENO:0000402', |
548 | | - 'label': 'compound heterozygous' |
| 550 | + 'id': 'GENO:0000136', |
| 551 | + 'label': 'homozygous' |
549 | 552 | } |
550 | | - for a in alleles: |
551 | | - if a.isdigit(): |
552 | | - var = resultset[drs_obj][int(a)] |
553 | | - # make a copy cld for the other allele's variant |
554 | | - second_cld = json.loads(json.dumps(cld)) |
555 | | - # this allele should not be in cld's secondaryAlleleIds, |
556 | | - # and the second allele should not be in second_cld's secondaryAlleleIds |
557 | | - second_cld['genotype']['secondaryAlleleIds'].remove(resultset[drs_obj][int(a)]) |
| 553 | + cld['genotype'].pop('secondaryAlleleIds') |
| 554 | + if alleles[0].isdigit(): |
| 555 | + var = resultset[drs_obj][int(alleles[0])] |
558 | 556 | if 'caseLevelData' not in resultset[var]: |
559 | 557 | resultset[var]['caseLevelData'] = [] |
560 | | - resultset[var]['caseLevelData'].append(second_cld) |
| 558 | + resultset[var]['caseLevelData'].append(json.loads(json.dumps(cld))) |
| 559 | + else: |
| 560 | + if alleles[0] == '0' or alleles[1] == '0': |
| 561 | + cld['genotype']['zygosity'] = { |
| 562 | + 'id': 'GENO:0000458', |
| 563 | + 'label': 'simple heterozygous' |
| 564 | + } |
| 565 | + else: |
| 566 | + cld['genotype']['zygosity'] = { |
| 567 | + 'id': 'GENO:0000402', |
| 568 | + 'label': 'compound heterozygous' |
| 569 | + } |
| 570 | + for a in alleles: |
| 571 | + if a.isdigit(): |
| 572 | + var = resultset[drs_obj][int(a)] |
| 573 | + # make a copy cld for the other allele's variant |
| 574 | + second_cld = json.loads(json.dumps(cld)) |
| 575 | + # this allele should not be in cld's secondaryAlleleIds, |
| 576 | + # and the second allele should not be in second_cld's secondaryAlleleIds |
| 577 | + second_cld['genotype']['secondaryAlleleIds'].remove(resultset[drs_obj][int(a)]) |
| 578 | + if 'caseLevelData' not in resultset[var]: |
| 579 | + resultset[var]['caseLevelData'] = [] |
| 580 | + resultset[var]['caseLevelData'].append(second_cld) |
561 | 581 | resultset.pop(drs_obj) |
562 | 582 | final_resultset = [] |
563 | 583 | # only include variants that are actually seen in the data (not things like ref alleles that are not in any samples) |
|
0 commit comments