Skip to content

Commit 4812071

Browse files
authored
Initial commit
1 parent c303ff7 commit 4812071

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

gnomad_python_api.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# gnomAD Python API by @furkanmtorun
2+
3+
# | GitHub: [@furkanmtorun](https://github.com/furkanmtorun)
4+
# | [Google Scholar](https://scholar.google.com/citations?user=d5ZyOZ4AAAAJ)
5+
# | [Personal Website](https://furkanmtorun.github.io/)
6+
7+
# Import required libraries and packages
8+
from pandas.io.json import json_normalize as json_normalize
9+
from tqdm import tqdm
10+
import pandas as pd
11+
import requests
12+
import argparse
13+
import json
14+
import os
15+
16+
# Create a folder for outputs in the current directory
17+
if not os.path.exists('outputs/'):
18+
os.mkdir('outputs/')
19+
20+
# Argument parsing
21+
def arg_parser():
22+
global filter_by
23+
global search_by
24+
global dataset
25+
parser = argparse.ArgumentParser()
26+
parser.add_argument("-filter_by", type=str, required=True, default="gene_name", help="Get your variants according to: gene_name, gene_id or transcript_id ")
27+
parser.add_argument("-search_by", type=str, required=True, default="TP53", help="Type the Ensembl Gene ID or Gene Name or the file name (e.g: myGenes.txt) containing genes")
28+
parser.add_argument("-dataset", type=str, required=True, default="gnomad_r2_1", help="Select your dataset: exac, gnomad_r2_1, gnomad_r3, gnomad_r2_1_controls, gnomad_r2_1_non_neuro, gnomad_r2_1_non_cancer, gnomad_r2_1_non_topmed")
29+
args = parser.parse_args()
30+
if args.dataset not in ["exac", "gnomad_r2_1", "gnomad_r3", "gnomad_r2_1_controls", "gnomad_r2_1_non_neuro", "gnomad_r2_1_non_cancer", "gnomad_r2_1_non_topmed"]:
31+
print("! Select a proper gnomAD data set:\n\texac, gnomad_r2_1, gnomad_r3, gnomad_r2_1_controls, gnomad_r2_1_non_neuro, gnomad_r2_1_non_cancer, gnomad_r2_1_non_topmed")
32+
if args.filter_by not in ["gene_name", "gene_id", "transcript_id"]:
33+
print("! Select a proper filter type :\n\tgene_name, gene_id or transcript_id")
34+
filter_by = args.filter_by
35+
search_by = args.search_by
36+
dataset = args.dataset
37+
38+
# gnomAD Parameters and API Function
39+
end_point = "https://gnomad.broadinstitute.org/api/"
40+
41+
def get_variants_by(filter_by, search_term, dataset, timeout=None):
42+
query = """
43+
{
44+
%s(%s: "%s") {
45+
variants(dataset: %s) {
46+
gene_id
47+
gene_symbol
48+
chrom
49+
pos
50+
rsid
51+
ref
52+
alt
53+
consequence
54+
genome {
55+
genome_af:af
56+
genome_ac:ac
57+
genome_an:an
58+
genome_ac_hemi:ac_hemi
59+
genome_ac_hom:ac_hom
60+
}
61+
exome {
62+
exome_af:af
63+
exome_ac:ac
64+
exome_an:an
65+
exome_ac_hemi:ac_hemi
66+
exome_ac_hom:ac_hom
67+
}
68+
flags
69+
lof
70+
consequence_in_canonical_transcript
71+
gene_symbol
72+
hgvsc
73+
lof_filter
74+
lof_flags
75+
hgvsc
76+
hgvsp
77+
reference_genome
78+
variant_id: variantId
79+
}
80+
}
81+
}
82+
"""
83+
if filter_by == "transcript_id":
84+
query = query % ("transcript", filter_by, search_term, dataset)
85+
else:
86+
query = query % ("gene", filter_by, search_term, dataset)
87+
response = requests.post(end_point, data={'query': query}, timeout=timeout)
88+
if response.status_code == 200:
89+
try:
90+
if filter_by == "transcript_id":
91+
data = json_normalize(response.json()["data"]["transcript"]["variants"])
92+
else:
93+
data = json_normalize(response.json()["data"]["gene"]["variants"])
94+
data.columns = data.columns.map(lambda x: x.split(".")[-1])
95+
data.to_csv("outputs/" + search_term + ".tsv", sep="\t", index=False)
96+
# return data
97+
except (KeyError, TypeError):
98+
print(str(response["error"]))
99+
except (ConnectionError, ConnectionAbortedError, ConnectionRefusedError, ConnectionResetError):
100+
print("An unknown error occured regarding the internet connection!")
101+
elif response.status_code == 404:
102+
print('API is not accessible right now. Check the end point out!')
103+
104+
# Action
105+
if __name__ == "__main__":
106+
arg_parser()
107+
if "." in search_by.upper():
108+
try:
109+
with open(search_by, "r") as f:
110+
gene_list = [line.rstrip() for line in f]
111+
for tmp_gene in tqdm(gene_list):
112+
get_variants_by(filter_by, tmp_gene.upper(), dataset)
113+
except:
114+
print("A problem occured while reading the file namely {} or the type {} you selected is wrong!"\
115+
.format(search_by, filter_by))
116+
finally:
117+
f.close()
118+
elif "." not in search_by.upper():
119+
get_variants_by(filter_by, search_by.upper(), dataset)

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pandas
2+
requests
3+
tqdm
4+
argparse

0 commit comments

Comments
 (0)