Skip to content

Commit 14193de

Browse files
committed
Merge remote-tracking branch 'origin/dgb2'
2 parents 52a59ea + e34cf01 commit 14193de

File tree

11 files changed

+1151
-401
lines changed

11 files changed

+1151
-401
lines changed
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Dr. Gene Budger (DGB) 2
1+
# DrugRanger
22

3-
The Dr. Gene Budger 2 (DGB2) Appyter takes a single human gene as input, and returns ranked up- and down-regulating drugs from three Connectivity Mapping resources that were shown to maximally increase or decrease the mRNA expression of the gene in human cell lines. The three Connectivity Mapping resources are:
3+
The DrugRanger Appyter takes a single human gene or gene set as input, and returns ranked up- and down-regulating drugs from three Connectivity Mapping resources that were shown to maximally increase or decrease the mRNA expression of the gene(s) in human cell lines. The three Connectivity Mapping resources are:
44

55
- [Ginkgo GDPx1 and GDPx2 datasets](https://huggingface.co/ginkgo-datapoints)
66

77
- [Novartis DRUG-seq U2OS MoABox dataset](https://zenodo.org/records/14291446)
88

9-
- [LINCS L1000 Chemical Perturbation dataset](https://maayanlab.cloud/sigcom-lincs/#/Download)
9+
- [Tahoe-100M](https://huggingface.co/datasets/tahoebio/Tahoe-100M)
1010

11-
In addition to producing tables of ranked up- and down-regulating drugs of the input gene, the notebook creates volcano plot visualizations and UpSet plots that identify overlap in regulators across datasets.
11+
In addition to producing tables of ranked up- and down-regulating drugs of the input gene, the notebook creates various visualizations for the single gene and multi-gene analysis, to help users determine the most effective regulators of their input gene(s).

appyters/Drug_Gene_Budger2/appyter.json

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"$schema": "https://raw.githubusercontent.com/MaayanLab/appyter-catalog/main/schema/appyter-validator.json",
33
"name": "Drug_Gene_Budger2",
4-
"title": "Dr. Gene Budger (DGB) 2",
5-
"version": "0.0.8",
6-
"description": "An appyter that retrieves drugs that up-regulate and down-regulate a single input gene across Connectivity Mapping datasets",
7-
"image": "dgb_logo.png",
4+
"title": "DrugRanger",
5+
"version": "0.1.0",
6+
"description": "An appyter that retrieves drugs that up-regulate and down-regulate a single input gene or gene set across Connectivity Mapping datasets",
7+
"image": "DR_logo.png",
88
"authors": [
99
{
1010
"name": "Lily Taub",
@@ -13,7 +13,6 @@
1313
],
1414
"url": "https://github.com/MaayanLab/appyter-catalog",
1515
"tags": [
16-
"L1000",
1716
"DRUG-seq",
1817
"RNA-seq"
1918
],
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import pandas as pd
2+
import numpy as np
3+
import hashlib
4+
import polars as pl
5+
6+
def prepare_novartis_data(gene, URL = 'https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/novartis_de'):
7+
'''
8+
gene: gene symbol to retrieve
9+
URL: Novartis data storage location
10+
11+
output: results dataframe from Novartis data
12+
'''
13+
try:
14+
novartis_de = pd.read_feather(f'{URL}/{gene}.f').set_index('index')
15+
except:
16+
# print(f'{gene} not found in Novartis')
17+
return None
18+
# format p-values
19+
novartis_de['log10adj.P.Val'] = novartis_de['P.Adj'].replace(0,1e-323).map(np.log10)*-1
20+
# rename logFC column for concordance with Ginkgo columns
21+
novartis_de.rename(columns={'LogFC':'logFC', 'P.Adj':'adj.P.Val'}, inplace=True)
22+
return novartis_de
23+
24+
def prepare_lincs_data(gene, URL='https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/lincs_de'):
25+
'''
26+
gene: gene symbol to retrieve
27+
URL: LINCS data storage location
28+
29+
output: results dataframe from LINCS data
30+
'''
31+
try:
32+
lincs_de = pd.read_feather(f'{URL}/{gene}.f')
33+
except:
34+
# print(f'{gene} not found in LINCS')
35+
return None
36+
# format p-values
37+
lincs_de['log10adj.P.Val'] = lincs_de['adj.P.Val'].replace(0,1e-323).map(np.log10)*-1
38+
# remove CRISPR KO perturbations
39+
lincs_ko_perturbs = pd.read_csv('https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/lincs_ko_perturbs.txt', sep='\t')
40+
lincs_de = lincs_de[~lincs_de['Drug'].isin(lincs_ko_perturbs.cmap_name.to_list())]
41+
return lincs_de
42+
43+
def hash_bucket(gene, num_buckets=512):
44+
'''
45+
gene: Gene symbol
46+
num_buckets: number of hash buckets to create
47+
48+
output: integer hash for gene name (between 0-n_buckets)
49+
'''
50+
return int(hashlib.md5(gene.encode()).hexdigest(),16) % num_buckets
51+
52+
def prepare_tahoe_data(df, gene):
53+
'''
54+
df: DataFrame retrieved from Tahoe gene bucket file
55+
gene: gene to filter dataframe
56+
57+
output: results dataframe from Tahoe data filtered to gene
58+
'''
59+
tahoe_de = df[df['gene_name']==gene]
60+
if tahoe_de.shape[0] == 0:
61+
# print(f'{gene} not found in Tahoe')
62+
return None
63+
tahoe_de['log10adj.P.Val'] = tahoe_de['padj'].replace(0,1e-323).map(np.log10)*-1
64+
tahoe_de.rename(columns = {'log2FoldChange':'logFC', 'drug':'Drug', 'padj':'adj.P.Val', 'group':'Perturbation', 'gene_name':'Gene'}, inplace=True)
65+
tahoe_de['GeneDir'] = np.where(tahoe_de['UpReg']==1,'Up','Dn')
66+
return tahoe_de
67+
68+
def retrieve_tahoe_data(gene_set, URL='https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/tahoe_de'):
69+
'''
70+
gene_set: list of gene symbols
71+
72+
output: dictionary of tahoe data for each gene in gene_set
73+
'''
74+
hash_dict = {}
75+
for g in gene_set:
76+
hash_dict[g] = str(hash_bucket(g))
77+
hash_dict_rev = {}
78+
for gene, hash in hash_dict.items():
79+
if hash in hash_dict_rev:
80+
hash_dict_rev[hash].append(gene)
81+
else:
82+
hash_dict_rev[hash]=[gene]
83+
tahoe_results = {}
84+
for hash, genes in hash_dict_rev.items():
85+
df = pd.read_parquet(f'{URL}/gene_bucket_{hash}.parquet', use_pandas_metadata=False)
86+
for g in genes:
87+
tahoe_data = prepare_tahoe_data(df, g)
88+
tahoe_results[g] = tahoe_data
89+
return tahoe_results
90+
91+
def prepare_ginkgo_data_dict(gene, cell_types, URL='https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/ginkgo_de'):
92+
'''
93+
gene: gene symbol to retrieve
94+
cell types: Ginkgo cell types in GDPx1 and GDPx2
95+
URL: Ginkgo data storage location
96+
97+
output: dictionary with dataframes for all Ginkgo cell types
98+
'''
99+
try:
100+
df = pd.read_feather(f'{URL}/{gene}.f')
101+
except:
102+
# print('Gene not found in Ginkgo')
103+
return None
104+
cell_type_results = {}
105+
for k in cell_types:
106+
subset = df[df['Perturbation'].str.contains(k)]
107+
subset['log10adj.P.Val'] = subset['adj.P.Val'].replace(0,1e-323).map(np.log10)*-1
108+
subset = subset.drop('index', axis=1)
109+
cell_type_results[k] = subset
110+
111+
return cell_type_results
112+
113+
def prepare_ginkgo_data_df(gene, cell_types, URL='https://appyters.maayanlab.cloud/storage/DrugRegulators_Appyter/ginkgo_de'):
114+
'''
115+
gene: gene symbol to retrieve
116+
cell types: Ginkgo cell types in GDPx1 and GDPx2
117+
URL: Ginkgo data storage location
118+
119+
output: results dataframe for all Ginkgo cell types
120+
'''
121+
try:
122+
123+
df = pd.read_feather(f'{URL}/{gene}.f')
124+
except:
125+
# print('Gene not found in Ginkgo')
126+
return None
127+
cell_type_results = {}
128+
for k in cell_types:
129+
subset = df[df['Perturbation'].str.contains(k)]
130+
subset['log10adj.P.Val'] = subset['adj.P.Val'].replace(0,1e-323).map(np.log10)*-1
131+
subset = subset.drop('index', axis=1)
132+
cell_type_results[k] = subset
133+
134+
all_df = pd.concat(cell_type_results.values(), ignore_index=True)
135+
136+
return all_df

0 commit comments

Comments
 (0)