Skip to content

Commit ca68a59

Browse files
committed
update
1 parent 736bde2 commit ca68a59

File tree

55 files changed

+5770
-243
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+5770
-243
lines changed

DNASE_PE

Lines changed: 1546 additions & 0 deletions
Large diffs are not rendered by default.

ccres_cell_type_specfic.csv

Lines changed: 1680 additions & 0 deletions
Large diffs are not rendered by default.

dnase_run_chrwide_make_bigwigs.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import pandas as pd
2+
import os
3+
4+
#data=pd.read_csv("logs/checkpoint/JAN_02_2023/model_dir_subsample_atac.csv",sep=",", names=["fold", "cell", "cell1", "model"])
5+
#data=pd.read_csv("logs/checkpoint/JAN_02_2023/model_dir_dnase.csv",sep=",", names=["fold", "cell", "model"])
6+
#data=pd.read_csv("logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", names=["fold", "cell", "model"])
7+
#print(data)
8+
#data=pd.read_csv("k562_atac_uncorrected.csv",sep=",", names=["fold", "model"])
9+
data=pd.read_csv("k562_dnase_uncorrected.csv",sep=",", names=["fold", "model"])
10+
print(data)
11+
12+
13+
14+
for i,r in data.iterrows():
15+
16+
#if r["fold"] == "fold_0":
17+
# continue
18+
19+
#chrombpnet_nb=r["model"]+"/chrombpnet_model/chrombpnet_wo_bias.h5"
20+
#chrombpnet=r["model"]+"/chrombpnet_model/chrombpnet.h5"
21+
chrombpnet=r["model"]
22+
chrombpnet_nb=r["model"]
23+
#cellline=r["cell"]
24+
cellline="K562"
25+
outputf="results/chrombpnet/auprc_curves/"+cellline+"/"+"/DNASE_uncorrected/"+cellline+"_"+r["fold"]
26+
#gpu="MIG-166d7783-762d-5f61-b31c-549eb4e0fba0"
27+
#gpu="MIG-f80e9374-504a-571b-bac0-6fb00750db4c"
28+
gpu="MIG-166d7783-762d-5f61-b31c-549eb4e0fba0"
29+
30+
#print(chrombpnet_nb)
31+
if os.path.isfile(chrombpnet_nb):
32+
#ofile=outputf+"_wo_bias_predictions.h5"
33+
ofile=outputf+"_wo_bias.bw"
34+
print(ofile)
35+
if not os.path.isfile(ofile):
36+
command = "bash chr1wide_make_bigwigs.sh "+chrombpnet_nb+" "+chrombpnet+" "+cellline+" "+r["fold"].replace("_","")+" DNASE "+gpu
37+
print(command)
38+
os.system(command)
39+
else:
40+
print("Done !!!!!!!!! "+ofile)
41+

k562_atac_uncorrected.csv

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fold_0,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/uncorrected_model_09.23.2024_filters_512_dil_8_fold_fold_0/uncorrected_model/hint_atac.h5
2+
fold_1,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/uncorrected_model_09.23.2024_filters_512_dil_8_fold_fold_1/uncorrected_model/hint_atac.h5
3+
fold_2,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/uncorrected_model_09.23.2024_filters_512_dil_8_fold_fold_2/uncorrected_model/hint_atac.h5
4+
fold_3,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/uncorrected_model_09.23.2024_filters_512_dil_8_fold_fold_3/uncorrected_model/hint_atac.h5
5+
fold_4,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/uncorrected_model_09.23.2024_filters_512_dil_8_fold_fold_4/uncorrected_model/hint_atac.h5
6+
7+

k562_dnase_uncorrected.csv

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fold_0,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/K562/uncorrected_model_09.24.2024_filters_512_dil_8_fold_0/uncorrected_model/hint_atac.h5
2+
fold_1,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/K562/uncorrected_model_09.26.2024_filters_512_dil_8_fold_1/uncorrected_model/hint_atac.h5
3+
fold_2,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/K562/uncorrected_model_09.26.2024_filters_512_dil_8_fold_2/uncorrected_model/hint_atac.h5
4+
fold_3,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/K562/uncorrected_model_09.26.2024_filters_512_dil_8_fold_3/uncorrected_model/hint_atac.h5
5+
fold_4,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/K562/uncorrected_model_09.26.2024_filters_512_dil_8_fold_4/uncorrected_model/hint_atac.h5
6+
7+

k562_run_chrwide_make_bigwigs.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import pandas as pd
2+
import os
3+
4+
#data=pd.read_csv("logs/checkpoint/JAN_02_2023/model_dir_subsample_atac.csv",sep=",", names=["fold", "cell", "cell1", "model"])
5+
data=pd.read_csv("logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", names=["fold", "cell", "model"])
6+
#data=pd.read_csv("logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2.csv",sep=",", names=["fold", "cell", "model"])
7+
#print(data)
8+
9+
10+
for i,r in data.iterrows():
11+
if r["cell"] != "K562":
12+
continue
13+
#print(r)
14+
15+
#if r["fold"] != "fold_0":
16+
# continue
17+
18+
chrombpnet_nb=r["model"]+"/chrombpnet_model/chrombpnet_wo_bias.h5"
19+
chrombpnet=r["model"]+"/chrombpnet_model/chrombpnet.h5"
20+
cellline=r["cell"]
21+
outputf="results/chrombpnet/auprc_curves/"+cellline+"/ATAC/"+cellline+"_"+r["fold"].replace("_","")
22+
gpu="MIG-166d7783-762d-5f61-b31c-549eb4e0fba0"
23+
#gpu="1"
24+
25+
#print(chrombpnet_nb)
26+
if os.path.isfile(chrombpnet_nb):
27+
#ofile=outputf+"_wo_bias_predictions.h5"
28+
ofile=outputf+"_wo_bias.bw"
29+
print(ofile)
30+
if not os.path.isfile(ofile):
31+
command = "bash chr1wide_make_bigwigs.sh "+chrombpnet_nb+" "+chrombpnet+" "+cellline+" "+r["fold"].replace("_","")+" ATAC "+gpu
32+
print(command)
33+
os.system(command)
34+
else:
35+
print("Done !!!!!!!!! "+ofile)
36+
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import pandas as pd
2+
import pybedtools
3+
4+
encid="K562"
5+
6+
sel_bed="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/selected.regions.valid.merged.bed.gz"
7+
seldata=pd.read_csv(sel_bed, sep='\t', header=None)
8+
print(seldata.shape)
9+
10+
pred_bed="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/preds_upload/average_preds_with_ccre_vf/filtered.regions.bed.gz"
11+
data=pd.read_csv(pred_bed, sep='\t', header=None)
12+
print(data.head())
13+
x = pybedtools.BedTool.from_dataframe(data[[0,1,2]])
14+
t = x.sort().merge()
15+
y=pybedtools.BedTool.from_dataframe(seldata)
16+
x = t.intersect(y, v=True, f=1.0)
17+
print("regions in pred not in sel")
18+
19+
try:
20+
output_bed = x.to_dataframe()
21+
print(output_bed.shape)
22+
print(output_bed.head())
23+
print(set(output_bed["chrom"]))
24+
except:
25+
print("none**************")
26+
pass
27+
28+
print("regions in sel not in pred")
29+
30+
x = y.intersect(t, v=True, f=1.0)
31+
#print(y)
32+
#print(t)
33+
try:
34+
output_bed = x.to_dataframe()
35+
print(output_bed.shape)
36+
print(output_bed.head())
37+
print(set(output_bed["chrom"]))
38+
except:
39+
print("none**************")
40+
pass
41+
42+
pred_bed="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/mean_folds.inputs.bed.gz"
43+
data=pd.read_csv(pred_bed, sep='\t', header=None)
44+
print(data.head())
45+
data[1] = data[1]+data[9]-500
46+
data[2] = data[1]+1000
47+
print(data.head())
48+
x = pybedtools.BedTool.from_dataframe(data[[0,1,2]])
49+
t = x.sort().merge()
50+
y=pybedtools.BedTool.from_dataframe(seldata)
51+
x = t.intersect(y, v=True, f=1.0)
52+
53+
print("regions in interpret not in sel")
54+
55+
try:
56+
output_bed = x.to_dataframe()
57+
print(output_bed.shape)
58+
print(output_bed.head())
59+
print(set(output_bed["chrom"]))
60+
except:
61+
print("none**************")
62+
pass
63+
64+
65+
print("regions in sel not in interpret")
66+
67+
x = y.intersect(t, v=True, f=1.0)
68+
try:
69+
output_bed = x.to_dataframe()
70+
print(output_bed.shape)
71+
print(output_bed.head())
72+
print(set(output_bed["chrom"]))
73+
except:
74+
print("none**************")
75+
pass
76+
77+
78+
x = y.coverage(t)
79+
output_bed = x.to_dataframe()
80+
print(output_bed.head())
81+
82+
print(sum(output_bed["thickStart"]==1.0))
83+
print(sum(output_bed["thickStart"]<1.0))
84+
print(seldata.shape)
85+
print(output_bed.shape)
86+
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import h5py
2+
import pandas as pd
3+
4+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/K562_{}_attribs_reformatted.h5".format("counts")
5+
print(ifile)
6+
data = h5py.File(ifile, "r")
7+
print(data["attributions"]["shap"].shape)
8+
9+
print(data["coords"]["coords_chrom"][0], data["coords"]["coords_start_dset"][0], data["coords"]["coords_end_dset"][0])
10+
print(data["coords"]["coords_chrom"][-1], data["coords"]["coords_start_dset"][-1], data["coords"]["coords_end_dset"][-1])
11+
12+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/K562_{}_attribs_reformatted.h5".format("profile")
13+
print(ifile)
14+
data = h5py.File(ifile, "r")
15+
print(data["attributions"]["shap"].shape)
16+
17+
print(data["coords"]["coords_chrom"][0], data["coords"]["coords_start_dset"][0], data["coords"]["coords_end_dset"][0])
18+
print(data["coords"]["coords_chrom"][-1], data["coords"]["coords_start_dset"][-1], data["coords"]["coords_end_dset"][-1])
19+
20+
21+
22+
data = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/mean_folds.inputs.bed.gz", sep="\t", header=None)
23+
print(data.shape)
24+
print(data.head(2))
25+
print(data.tail(2))
26+
27+
print(data.iloc[0,0], data.iloc[0,1]+data.iloc[0,9]-1057, data.iloc[0,1]+data.iloc[0,9]-1057+2114)
28+
print(data.iloc[-1,0], data.iloc[-1,1]+data.iloc[-1,9]-1057, data.iloc[-1,1]+data.iloc[-1,9]-1057+2114)
29+
30+
31+
32+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.h5"
33+
print(ifile)
34+
data = h5py.File(ifile, "r")
35+
print(data["shap"]['seq'].shape)
36+
37+
data=pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/modisco.inputs.bed.gz", sep='\t', header=None)
38+
print(data.shape)
39+
print(data.head(2))
40+
print(data.tail(2))
41+
42+
#print(data.[0,0], data[0][1]+data[0][9]-1057,data[0][1]+data[0][9]-1057+2114)
43+
#print(data[-1][0], data[-1][1]+data[0][9]-1057,data[-1][1]+data[-1][9]-1057+2114)
44+
45+
46+
47+
for i in range(0,5):
48+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/fold_{}/K562_{}_attribs_reformatted.h5".format(str(i),"counts")
49+
print(ifile)
50+
data = h5py.File(ifile, "r")
51+
print(data["attributions"]["shap"].shape)
52+
53+
54+
55+
for i in range(0,5):
56+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/fold_{}/K562_{}_attribs_reformatted.h5".format(str(i),"profile")
57+
print(ifile)
58+
data = h5py.File(ifile, "r")
59+
print(data["attributions"]["shap"].shape)
60+
61+
62+
63+
data=pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/K562/interpret_upload/average_preds/per_folds.inputs.bed.gz", sep='\t', header=None)
64+
print(data.shape)
65+
print(data.head(2))
66+
print(data.tail(2))
67+
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import h5py
2+
import pandas as pd
3+
4+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/average_preds_with_ccre_vf/HEPG2.mean_preds_w_bias_predictions.h5"
5+
print(ifile)
6+
data = h5py.File(ifile, "r")
7+
print(data["predictions"]["logits"].shape)
8+
print(data["predictions"]["logcounts"].shape)
9+
10+
11+
print(data["coords"]["coords_chrom"][0], data["coords"]["coords_start_dset"][0], data["coords"]["coords_end_dset"][0])
12+
print(data["coords"]["coords_chrom"][-1], data["coords"]["coords_start_dset"][-1], data["coords"]["coords_end_dset"][-1])
13+
14+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/average_preds_with_ccre_vf/HEPG2.mean_preds_wo_bias_predictions.h5"
15+
print(ifile)
16+
data = h5py.File(ifile, "r")
17+
print(data["predictions"]["logits"].shape)
18+
print(data["predictions"]["logcounts"].shape)
19+
20+
21+
print(data["coords"]["coords_chrom"][0], data["coords"]["coords_start_dset"][0], data["coords"]["coords_end_dset"][0])
22+
print(data["coords"]["coords_chrom"][-1], data["coords"]["coords_start_dset"][-1], data["coords"]["coords_end_dset"][-1])
23+
24+
25+
#data = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/fold_0/HEPG2_w_bias_all_regions.bed", sep='\t', header=None)
26+
data = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/average_preds_with_ccre_vf/filtered.regions.bed.gz", sep='\t', header=None)
27+
print(data.shape)
28+
print(data.head(2))
29+
print(data.tail(2))
30+
31+
data = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/average_preds_with_ccre_vf/input.regions.bed.gz", sep='\t', header=None)
32+
#data = data[~(data[0]=="chrM")]
33+
print(data.shape)
34+
print(data.head(2))
35+
print(data.tail(2))
36+
37+
print(data.iloc[0,0], data.iloc[0,1]+data.iloc[0,9]-500, data.iloc[0,1]+data.iloc[0,9]-500+1000)
38+
print(data.iloc[-1,0], data.iloc[-1,1]+data.iloc[-1,9]-500, data.iloc[-1,1]+data.iloc[-1,9]-500+1000)
39+
40+
41+
data = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/interpret_upload/average_preds/mean_folds.inputs.bed.gz", sep="\t", header=None)
42+
print(data.shape)
43+
print(data.head(2))
44+
print(data.tail(2))
45+
46+
47+
48+
49+
for i in range(0,5):
50+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/fold_{}/HEPG2_w_bias_all_with_ccre_predictions.h5".format(str(i))
51+
print(ifile)
52+
data = h5py.File(ifile, "r")
53+
print(data["predictions"]["logits"].shape)
54+
print(data["predictions"]["logcounts"].shape)
55+
56+
for i in range(0,5):
57+
ifile="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/HEPG2/preds_upload/fold_{}/HEPG2_wo_bias_all_with_ccre_predictions.h5".format(str(i))
58+
print(ifile)
59+
data = h5py.File(ifile, "r")
60+
print(data["predictions"]["logits"].shape)
61+
print(data["predictions"]["logcounts"].shape)
62+
63+
64+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
2+
file=/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/GM12878/interpret_uncorrected_model_05.10.2022/GM12878
3+
chrom_sizes=/mnt/lab_data2/anusri/chrombpnet/reference/chrom.sizes
4+
python /mnt/lab_data2/anusri/chrombpnet/src/evaluation/make_bigwigs/importance_hdf5_to_bigwig.py -h5 $file.profile_scores.h5 -r $file.interpreted_regions.bed -c $chrom_sizes -o $file.profile.bw -s $file.profile.stat -t 1
5+
6+
7+
file=/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/GM12878/interpret_DNASE_SE_03.06.2022_simplebias/GM12878
8+
chrom_sizes=/mnt/lab_data2/anusri/chrombpnet/reference/chrom.sizes
9+
python /mnt/lab_data2/anusri/chrombpnet/src/evaluation/make_bigwigs/importance_hdf5_to_bigwig.py -h5 $file.profile_scores.h5 -r $file.interpreted_regions.bed -c $chrom_sizes -o $file.profile.bw -s $file.profile.stat -t 1
10+
11+
12+
#file=/mnt/lab_data2/anusri/chrombpnet/results/hint_atac/DNASE_SE/GM12878/DNASE_SE_11.28.2022_hint_atac/hint_atac_model/interpret/GM12878
13+
#chrom_sizes=/mnt/lab_data2/anusri/chrombpnet/reference/chrom.sizes
14+
#python /mnt/lab_data2/anusri/chrombpnet/src/evaluation/make_bigwigs/importance_hdf5_to_bigwig.py -h5 $file.profile_scores.h5 -r $file.interpreted_regions_v2.bed -c $chrom_sizes -o $file.profile.bw -s $file.profile.stat -t 1
15+
16+
17+
#file=/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/GM12878/nautilus_runs/GM12878_03.06.2022_bias_128_4_1234_0.8_fold_0/chrombpnet_model/interpret/GM12878
18+
#chrom_sizes=/mnt/lab_data2/anusri/chrombpnet/reference/chrom.sizes
19+
#python /mnt/lab_data2/anusri/chrombpnet/src/evaluation/make_bigwigs/importance_hdf5_to_bigwig.py -h5 $file.profile_scores.h5 -r $file.interpreted_regions.bed -c $chrom_sizes -o $file.profile.bw -s $file.profile.stat -t 1
20+
21+
#file1=/oak/stanford/groups/akundaje/projects/chrombpnet_paper_new/DNASE_SE/GM12878/GM12878_03.06.2022_bias_128_4_1234_0.8_fold_0/BIAS/GM12878
22+
#chrom_sizes=/mnt/lab_data2/anusri/chrombpnet/reference/chrom.sizes
23+
#python /mnt/lab_data2/anusri/chrombpnet/src/evaluation/make_bigwigs/importance_hdf5_to_bigwig.py -h5 $file1.profile_scores.h5 -r $file.interpreted_regions.bed -c $chrom_sizes -o $file1.profile.bw -s $file1.profile.stat -t 1
24+
25+
26+
27+
28+

0 commit comments

Comments
 (0)