Skip to content

Commit b33e85d

Browse files
committed
updates
1 parent a5b6295 commit b33e85d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+17471
-759
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
*.fai
2020
*.json
2121
*.tsv
22+
*.h5
2223
# Directories #
2324
# logs
2425
results
@@ -30,3 +31,5 @@ reference
3031
melanie_models/
3132
melanie_bias_model.sh
3233
jan_5_2024/
34+
filtered_variants/
35+
full_variants/

all_afr_caqtls_enformer_gm_1.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
3+
#tail -n +2 /mnt/lab_data2/anusri/enformer/eu_caqtls/afgr_all.tsv > /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv
4+
#split -l 54846 /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/split
5+
6+
dsqtl=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/splitaa
7+
genome=/mnt/lab_data2/anusri/chrombpnet/reference/hg38.genome.fa
8+
#chrom_sizes=/mnt/data/annotations/by_release/hg19/hg19.chrom.sizes
9+
output_dirn=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splitaa/
10+
mkdir $output_dirn
11+
gpu=MIG-40f43250-998e-586a-ac37-d6520e92590f
12+
13+
14+
15+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer_new_center.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
16+
17+

all_afr_caqtls_enformer_gm_2.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
3+
#tail -n +2 /mnt/lab_data2/anusri/enformer/eu_caqtls/afgr_all.tsv > /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv
4+
#split -l 54846 /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/split
5+
6+
dsqtl=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/splitab
7+
genome=/mnt/lab_data2/anusri/chrombpnet/reference/hg38.genome.fa
8+
#chrom_sizes=/mnt/data/annotations/by_release/hg19/hg19.chrom.sizes
9+
output_dirn=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splitab/
10+
mkdir $output_dirn
11+
gpu=MIG-f80e9374-504a-571b-bac0-6fb00750db4c
12+
13+
14+
15+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer_new_center.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
16+
17+

all_afr_caqtls_enformer_gm_3.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
3+
#tail -n +2 /mnt/lab_data2/anusri/enformer/eu_caqtls/afgr_all.tsv > /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv
4+
#split -l 54846 /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/split
5+
6+
dsqtl=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/splitac
7+
genome=/mnt/lab_data2/anusri/chrombpnet/reference/hg38.genome.fa
8+
#chrom_sizes=/mnt/data/annotations/by_release/hg19/hg19.chrom.sizes
9+
output_dirn=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splitac/
10+
mkdir $output_dirn
11+
gpu=1
12+
13+
14+
15+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer_new_center.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
16+
17+

all_afr_caqtls_enformer_gm_4.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
3+
#tail -n +2 /mnt/lab_data2/anusri/enformer/eu_caqtls/afgr_all.tsv > /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv
4+
#split -l 54846 /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/meta_data.tsv /mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/split
5+
6+
dsqtl=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splits/splitad
7+
genome=/mnt/lab_data2/anusri/chrombpnet/reference/hg38.genome.fa
8+
#chrom_sizes=/mnt/data/annotations/by_release/hg19/hg19.chrom.sizes
9+
output_dirn=/mnt/lab_data2/anusri/variant-scorer/src/output/all_afr_caqtls_window/splitad/
10+
mkdir $output_dirn
11+
gpu=2
12+
13+
14+
15+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer_new_center.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
16+
17+

chr1wide_make_bigwigs.sh

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
chrombpnet_nb=$1
22
chrombpnet=$2
33
cellline=$3
4-
gpu=$4
4+
fold=$4
5+
dtype=$5
6+
gpu=$6
57

6-
regions=results/chrombpnet/auprc_curves/narrowpeak_genomewide_chr1.bed
7-
output_dir=results/chrombpnet/auprc_curves/$cellline
8+
9+
#regions=results/chrombpnet/auprc_curves/narrowpeak_genomewide_chr1.bed
10+
regions=results/chrombpnet/auprc_curves/downloads/$fold"_w_1000_s_250_narrowpeak.bed"
11+
output_dir=results/chrombpnet/auprc_curves/$cellline/$dtype"_uncorrected"
812
mkdir $output_dir
913

1014

1115
chrom_sizes=$PWD/reference/chrom.sizes
1216
ref_fasta=$PWD/reference/hg38.genome.fa
1317
file=$output_dir/$cellline
1418

15-
echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1"
16-
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
17-
-g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1
19+
echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_no_bias.py -cm $chrombpnet --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1"
20+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_no_bias.py -cm $chrombpnet --regions $regions \
21+
-g $ref_fasta -b 32 -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1
22+
23+
#echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1"
24+
#CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
25+
# -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1
1826

1927

20-
chrombpnet=results/chrombpnet/auprc_curves/$cellline/$cellline"_w_bias_predictions.h5"
21-
chrombpnet_nb=results/chrombpnet/auprc_curves/$cellline/$cellline"_wo_bias_predictions.h5"
28+
#chrombpnet=results/chrombpnet/auprc_curves/$cellline/$dtype/$cellline"_"$fold"_w_bias_predictions.h5"
29+
#chrombpnet_nb=results/chrombpnet/auprc_curves/$cellline/$dtype/$cellline"_"$fold"_wo_bias_predictions.h5"
2230

23-
echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1"
24-
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
25-
-g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1
31+
#echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1"
32+
#CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
33+
# -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1
2634

2735

2836

dnase_chr1wide_make_bigwigs.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
chrombpnet_nb=$1
2+
chrombpnet=$2
3+
cellline=$3
4+
fold=$4
5+
gpu=$5
6+
7+
8+
#regions=results/chrombpnet/auprc_curves/narrowpeak_genomewide_chr1.bed
9+
regions=results/chrombpnet/auprc_curves/downloads/$fold"_w_1000_s_250_narrowpeak.bed"
10+
output_dir=results/chrombpnet/auprc_curves/$cellline/DNASE/
11+
mkdir $output_dir
12+
13+
14+
chrom_sizes=$PWD/reference/chrom.sizes
15+
ref_fasta=$PWD/reference/hg38.genome.fa
16+
file=$output_dir/$cellline
17+
18+
echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1"
19+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/predict_to_bigwig_new.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
20+
-g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1
21+
22+
23+
chrombpnet=results/chrombpnet/auprc_curves/$cellline/$cellline"_w_bias_predictions.h5"
24+
chrombpnet_nb=results/chrombpnet/auprc_curves/$cellline/$cellline"_wo_bias_predictions.h5"
25+
26+
echo "CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions -g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline -t 1"
27+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/make_bigwigs/make_only_bigwigs.py -cm $chrombpnet -cmb $chrombpnet_nb --regions $regions \
28+
-g $ref_fasta -c $chrom_sizes -o $output_dir/$cellline"_"$fold -t 1
29+
30+
31+

dsqtl_meta_data_small.tsv

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
chr10 104429746 C G chr10_104429746_C_G
22
chr10 105803156 T G chr10_105803156_T_G
33
chr2 133340261 A G chr2_133340261_A_G
4+
chr19 14788486 A G chr19_14788486_A_G
5+
chr3 119999621 T G blah1
6+
chr2 17342057 T C blah2
7+
chr2 20391281 C G blah3
8+
chr10 87984457 T C rs10887562
9+
chr16 84541173 C T rs76547445
10+
chr13 42086794 T C rs9532865
11+
chr20 48585958 T C rs73131258

dsqtls_new_dnas_1.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#dsqtl=dsqtl_meta_data_small.tsv
2+
dsqtl=dsqtl_meta_data_small_new.tsv
3+
genome=reference/male.hg19.fa
4+
5+
model_dir=/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/ENCSR000EMT/chrombpnet_model_feb15_fold_0/
6+
model=$model_dir/chrombpnet_wo_bias.h5
7+
output_dirn=$model_dir/dsqtls_interpret_small_1/
8+
mkdir $output_dirn
9+
gpu=3
10+
chrom=reference/chrom.sizes
11+
#CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction_interpret/snp_scoring.py -i $dsqtl -g $genome -m $model -o $output_dirn -bs 64 --debug_mode_on 0
12+
mkdir $output_dirn/bigwigs/
13+
#python src/evaluation/variant_effect_prediction_interpret/convert_shap_to_bigiwg.py -p $output_dirn -o $output_dirn/bigwigs/ --chromsizes $chrom
14+
15+
16+
17+
model_dir=/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/GM12878/nautilus_runs/GM12878_03.01.2022_bias_128_4_1234_0.4_fold_0/chrombpnet_model/
18+
model=$model_dir/chrombpnet_wo_bias.h5
19+
output_dirn=$model_dir/dsqtls_interpret_small_new_1/
20+
mkdir $output_dirn
21+
gpu=3
22+
chrom=reference/chrom.sizes
23+
#CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction_interpret/snp_scoring.py -i $dsqtl -g $genome -m $model -o $output_dirn -bs 64 --debug_mode_on 0
24+
mkdir $output_dirn/bigwigs/
25+
#python src/evaluation/variant_effect_prediction_interpret/convert_shap_to_bigiwg.py -p $output_dirn -o $output_dirn/bigwigs/ --chromsizes $chrom
26+
27+
28+
29+
output_dirn=results/chrombpnet/DNASE_SE/GM12878/nautilus_runs/over_fitting_test/enformer_dsqtls_interpret_small_new_1/
30+
mkdir $output_dirn
31+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
32+
33+

dsqtls_new_dnas_ld.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
dsqtl=dsqtl_meta_data_small_ld.tsv
2+
genome=reference/male.hg19.fa
3+
4+
model_dir=/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/ENCSR000EMT/chrombpnet_model_feb15_fold_0/
5+
model=$model_dir/chrombpnet_wo_bias.h5
6+
output_dirn=$model_dir/dsqtls_interpret_small_ld/
7+
mkdir $output_dirn
8+
gpu=3
9+
chrom=reference/chrom.sizes
10+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction_interpret/snp_scoring_new.py -i $dsqtl -g $genome -m $model -o $output_dirn -bs 64 --debug_mode_on 0
11+
mkdir $output_dirn/bigwigs/
12+
python src/evaluation/variant_effect_prediction_interpret/convert_shap_to_bigiwg.py -p $output_dirn -o $output_dirn/bigwigs/ --chromsizes $chrom
13+
14+
15+
16+
model_dir=/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/GM12878/nautilus_runs/GM12878_03.01.2022_bias_128_4_1234_0.4_fold_0/chrombpnet_model/
17+
model=$model_dir/chrombpnet_wo_bias.h5
18+
output_dirn=$model_dir/dsqtls_interpret_small_ld/
19+
mkdir $output_dirn
20+
gpu=3
21+
chrom=reference/chrom.sizes
22+
CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction_interpret/snp_scoring_new.py -i $dsqtl -g $genome -m $model -o $output_dirn -bs 64 --debug_mode_on 0
23+
mkdir $output_dirn/bigwigs/
24+
python src/evaluation/variant_effect_prediction_interpret/convert_shap_to_bigiwg.py -p $output_dirn -o $output_dirn/bigwigs/ --chromsizes $chrom
25+
26+
27+
28+
#output_dirn=results/chrombpnet/DNASE_SE/GM12878/nautilus_runs/over_fitting_test/enformer_dsqtls_interpret_small_ld/
29+
#mkdir $output_dirn
30+
#CUDA_VISIBLE_DEVICES=$gpu python src/evaluation/variant_effect_prediction/snp_scoring_enformer.py -i $dsqtl -g $genome -o $output_dirn -bs 1 --debug_mode_on 0
31+
32+

0 commit comments

Comments
 (0)