forked from dna-seq/dna-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdvc.lock
More file actions
149 lines (149 loc) · 6.69 KB
/
dvc.lock
File metadata and controls
149 lines (149 loc) · 6.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
schema: '2.0'
stages:
prepare:
cmd: gunzip -k
data/gwas/homo_sapiens_ensembl_107/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
&& unzip -o -d data/ensembl/107 data/ensembl/107/107.zip && mv data/ensembl/107/VEP_plugins-release-107
data/ensembl/107/plugins
deps:
- path: data/ensembl/107/107.zip
- path:
data/gwas/homo_sapiens_ensembl_107/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
outs:
- path: data/ensembl/107/plugins
- path: data/gwas/homo_sapiens_ensembl_107/Homo_sapiens.GRCh38.dna.primary_assembly.fa
start:
cmd: docker-compose up
stop:
cmd: docker-compose down
cache:
cmd: tar -C data/ensembl/107/cache/ -zxvf data/ensembl/107/cache/homo_sapiens_vep_107_GRCh38.tar.gz
deps:
- path: data/ensembl/107/cache/homo_sapiens_vep_107_GRCh38.tar.gz
outs:
- path: data/ensembl/107/cache/homo_sapiens
prepare_genome:
cmd: gunzip -k
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
&& samtools faidx
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa
&& samtools dict
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa
-o
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.dict
&& md5sum
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.*
>
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.md5
deps:
- path:
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
hash: md5
md5: 1c8c1d3ebfa7e615cd9ac2cf00853423
size: 881964081
outs:
- path:
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa
hash: md5
md5: a65212262de00761b43869d5c08c8e4d
size: 3151425851
- path:
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.dict
hash: md5
md5: 7298443f9f172d2e564c6bf1b123345b
size: 34186
- path:
data/ensembl/113/species/Homo_sapiens/Homo_sapiens.GRCh38.dna.primary_assembly.fa.fai
hash: md5
md5: 06640e0e262c0a1494700e32f48685e1
size: 6406
prepare_vep:
cmd: unzip -u data/ensembl/107/107.zip -d data/ensembl/107/ && mv data/ensembl/107/VEP_plugins-release-107
data/ensembl/107/plugins
deps:
- path: data/ensembl/107/107.zip
- path: data/ensembl/107/cache/homo_sapiens_vep_107_GRCh38.tar.gz
outs:
- path: data/ensembl/107/plugins/
prepare_annotations:
cmd: "gunzip -c data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz
|\n awk '($1 ~ /^snpId/ || $2 ~ /NA/) {next} {print $0}' |\n sort -t $'\\\
t' -k2,2 -k3,3n |\n awk '{ gsub (/\\t +/, \"\\t\", $0); print}' |\n bgzip
-c > data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz
&&\nrm -f data/gwas/annotations/gene2phenotype/AllG2P.csv && awk '(NR == 1)
|| (FNR > 1)' data/gwas/annotations/gene2phenotype/*.csv > data/gwas/annotations/gene2phenotype/AllG2P.csv"
deps:
- path: data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz
- path: data/gwas/annotations/clinvar.vcf.gz
- path: data/gwas/annotations/clinvar.vcf.gz.tbi
- path: data/gwas/annotations/gene2phenotype/CancerG2P.csv
- path: data/gwas/annotations/gene2phenotype/DDG2P.csv
- path: data/gwas/annotations/gene2phenotype/EyeG2P.csv
- path: data/gwas/annotations/gene2phenotype/SkinG2P.csv
outs:
- path: data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz
- path: data/gwas/annotations/gene2phenotype/AllG2P.csv
prepare_vep_plugins:
cmd: unzip -u data/ensembl/109/109.zip -d data/ensembl/109/ && mv data/ensembl/109/VEP_plugins-release-109
data/ensembl/109/plugins
deps:
- path: data/ensembl/109/109.zip
md5: 71e85d54f56b05706a277040d1c4528f
size: 546595
outs:
- path: data/ensembl/109/plugins/
md5: 5adcc05fd98e76419fe6b60e76ae8e96.dir
size: 1568486
nfiles: 84
prepare_vep_cache:
cmd: tar -zxf data/ensembl/109/homo_sapiens_vep_109_GRCh38.tar.gz -C data/ensembl/109/cache/
&& md5sum data/ensembl/109/homo_sapiens_vep_109_GRCh38.tar.gz > data/ensembl/109/homo_sapiens_vep_109_GRCh38.tar.gz.md5
deps:
- path: data/ensembl/109/homo_sapiens_vep_109_GRCh38.tar.gz
md5: 8c3c2f1008c1d0fe32f6349b17056b7b
size: 26814637616
outs:
- path: data/ensembl/109/cache/homo_sapiens
md5: dae205e594fda66d2807accf96c3a16e.dir
size: 26911521503
nfiles: 13892
prepare_annotations_clinvar:
cmd: echo "Clinvars annotations prepared"
deps:
- path: data/gwas/annotations/clinvar.vcf.gz
- path: data/gwas/annotations/clinvar.vcf.gz.tbi
prepare_annotations_gene2phenotype:
cmd: rm -f data/gwas/annotations/gene2phenotype/AllG2P.csv && awk '(NR == 1) ||
(FNR > 1)' data/gwas/annotations/gene2phenotype/*.csv > data/gwas/annotations/gene2phenotype/AllG2P.csv
deps:
- path: data/gwas/annotations/gene2phenotype/CancerG2P.csv
- path: data/gwas/annotations/gene2phenotype/DDG2P.csv
- path: data/gwas/annotations/gene2phenotype/EyeG2P.csv
- path: data/gwas/annotations/gene2phenotype/SkinG2P.csv
outs:
- path: data/gwas/annotations/gene2phenotype/AllG2P.csv
prepare_annotations_digenet:
cmd: "gunzip -c data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz
|\n awk '($1 ~ /^snpId/ || $2 ~ /NA/) {next} {print $0}' |\n sort -t $'\\\
t' -k2,2 -k3,3n |\n awk '{ gsub (/\\t +/, \"\\t\", $0); print}' |\n bgzip
-c > data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz"
deps:
- path: data/gwas/annotations/all_variant_disease_pmid_associations.tsv.gz
outs:
- path: data/gwas/annotations/all_variant_disease_pmid_associations_final.tsv.gz
prepare_opencravat:
cmd: ov module install-base
test_opencravat:
cmd: ov new example-input . && ov run ./example_input -l hg38 && ov gui example_input/example_input.sqlite
install_opencravat:
cmd: ov module install-base && ov module install -y clinvar clinvar_acmg biogrid
uk10k_cohort uniprot pubmed provean polyphen2 cardioboost civic civic_gene cosmic
cosmic_gene go sift ensembl_regulatory_build dgi cvdkp
install_cancer:
cmd: ov module install chasmplus cgc cgl cancer_genome_interpreter cancer_hotspots
civic
prepare_annotations_longevity:
cmd: wget -qO- --timestamping https://genomics.senescence.info/longevity/longevity_genes.zip
| bsdtar -xvf- -C data/gwas/annotations/ longevity.csv
outs:
- path: data/gwas/annotations/longevity.csv