Skip to content

Commit 0aea521

Browse files
committed
added scripts
1 parent 6888eb4 commit 0aea521

File tree

6 files changed

+828
-1
lines changed

6 files changed

+828
-1
lines changed

wdl/Himito_build.wdl

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
version 1.0
2+
3+
workflow Himito_build {
4+
input {
5+
File fasta
6+
File reference_fa
7+
String prefix
8+
String reference_header
9+
String sampleid
10+
String data_type
11+
Int kmer_size
12+
13+
}
14+
15+
call Build {
16+
input:
17+
bam = fasta,
18+
reference = reference_fa,
19+
prefix = prefix,
20+
kmer_size = kmer_size,
21+
sampleid = sampleid
22+
}
23+
24+
25+
26+
27+
output {
28+
File graph = Build.graph
29+
}
30+
}
31+
32+
33+
task Build {
34+
input {
35+
File bam
36+
File reference
37+
String prefix
38+
String sampleid
39+
Int kmer_size
40+
}
41+
42+
command <<<
43+
set -euxo pipefail
44+
45+
if [[ ~{bam} == *.gz ]]; then
46+
gunzip -c ~{bam} > ~{prefix}.fasta
47+
/Himito/target/release/Himito build -k ~{kmer_size} -r ~{reference} -i ~{prefix}.fasta -o ~{sampleid}.~{prefix}.gfa
48+
else
49+
/Himito/target/release/Himito build -k ~{kmer_size} -r ~{reference} -i ~{bam} -o ~{sampleid}.~{prefix}.gfa
50+
fi
51+
52+
53+
>>>
54+
55+
output {
56+
File graph = "~{sampleid}.~{prefix}.gfa"
57+
}
58+
59+
runtime {
60+
docker: "us.gcr.io/broad-dsp-lrma/hangsuunc/himito:v1"
61+
memory: "2 GB"
62+
cpu: 1
63+
disks: "local-disk 10 SSD"
64+
}
65+
}

wdl/Himito_filter_methylation.wdl

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
version 1.0
2+
3+
workflow Himito_call {
4+
input {
5+
File whole_genome_bam
6+
File whole_genome_bai
7+
File reference_fa
8+
File reference_fai
9+
File truth_vcf
10+
File truth_tbi
11+
String prefix
12+
String sampleid
13+
String data_type
14+
Array[Float] max_methylation_threshold
15+
Int kmer_size
16+
17+
}
18+
19+
scatter (t in max_methylation_threshold) {
20+
call Filter {
21+
input:
22+
bam = whole_genome_bam,
23+
bai = whole_genome_bai,
24+
prefix = prefix + "_" + t,
25+
threshold = t
26+
}
27+
28+
call Build {
29+
input:
30+
bam = Filter.mt_bam,
31+
reference = reference_fa,
32+
prefix = prefix + "_" + t,
33+
kmer_size = kmer_size,
34+
sampleid = sampleid
35+
}
36+
37+
call Call {
38+
input:
39+
graph_gfa = Build.graph,
40+
reference = reference_fa,
41+
prefix = prefix + "_" + t,
42+
kmer_size = kmer_size,
43+
sampleid=sampleid,
44+
data_type = data_type
45+
}
46+
47+
48+
call VCFEval as Himito_Eval {
49+
input:
50+
query_vcf = Call.vcf,
51+
reference_fa = reference_fa,
52+
reference_fai = reference_fai,
53+
base_vcf = truth_vcf,
54+
base_vcf_index = truth_tbi,
55+
query_output_sample_name = sampleid + "_" + t + "_Himito",
56+
}
57+
58+
}
59+
60+
61+
62+
output {
63+
Array[File] vcf_file = Call.vcf
64+
Array[File] Himito_summary_file = Himito_Eval.summary_statistics
65+
}
66+
}
67+
68+
69+
struct RuntimeAttr {
70+
Float? mem_gb
71+
Int? cpu_cores
72+
Int? disk_gb
73+
Int? boot_disk_gb
74+
Int? preemptible_tries
75+
Int? max_retries
76+
String? docker
77+
}
78+
79+
struct RuntimeAttributes {
80+
Int disk_size
81+
Int cpu
82+
Int memory
83+
}
84+
85+
task Filter {
86+
input {
87+
File bam
88+
File bai
89+
String prefix
90+
Float threshold
91+
}
92+
93+
command <<<
94+
set -euxo pipefail
95+
/Himito/target/release/Himito filter -i ~{bam} -c chrM -m ~{prefix}_mt.bam -n ~{prefix}_numts.bam -f ~{threshold}
96+
>>>
97+
98+
output {
99+
File mt_bam = "~{prefix}_mt.bam"
100+
File numts_bam = "~{prefix}_numts.bam"
101+
}
102+
103+
runtime {
104+
docker: "us.gcr.io/broad-dsp-lrma/hangsuunc/himito:v1"
105+
memory: "1 GB"
106+
cpu: 1
107+
disks: "local-disk 300 SSD"
108+
}
109+
}
110+
111+
task Build {
112+
input {
113+
File bam
114+
File reference
115+
String prefix
116+
String sampleid
117+
Int kmer_size
118+
}
119+
120+
command <<<
121+
set -euxo pipefail
122+
123+
/Himito/target/release/Himito build -k ~{kmer_size} -r ~{reference} -i ~{bam} -o ~{sampleid}.~{prefix}.gfa
124+
125+
>>>
126+
127+
output {
128+
File graph = "~{sampleid}.~{prefix}.gfa"
129+
}
130+
131+
runtime {
132+
docker: "us.gcr.io/broad-dsp-lrma/hangsuunc/himito:v1"
133+
memory: "2 GB"
134+
cpu: 1
135+
disks: "local-disk 10 SSD"
136+
}
137+
}
138+
139+
task Call {
140+
141+
input {
142+
File graph_gfa
143+
File reference
144+
String prefix
145+
String sampleid
146+
String data_type
147+
Int kmer_size
148+
}
149+
150+
command <<<
151+
set -euxo pipefail
152+
/Himito/target/release/Himito call -g ~{graph_gfa} -r ~{reference} -k ~{kmer_size} -d ~{data_type} -s ~{sampleid} -o ~{sampleid}.~{prefix}.vcf
153+
154+
>>>
155+
156+
output {
157+
# File graph = "~{prefix}.annotated.gfa"
158+
File vcf = "~{sampleid}.~{prefix}.vcf"
159+
}
160+
161+
runtime {
162+
docker: "us.gcr.io/broad-dsp-lrma/hangsuunc/himito:v1"
163+
memory: "2 GB"
164+
cpu: 1
165+
disks: "local-disk 10 SSD"
166+
}
167+
}
168+
169+
task VCFEval {
170+
input {
171+
# Input VCF Files
172+
File query_vcf
173+
File reference_fa
174+
File reference_fai
175+
File base_vcf
176+
File base_vcf_index
177+
String query_output_sample_name
178+
179+
# Runtime params
180+
Int? preemptible
181+
RuntimeAttributes runtimeAttributes = {"disk_size": ceil(2 * size(query_vcf, "GB") + 2 * size(base_vcf, "GB") + size(reference_fa, "GB")) + 50,
182+
"cpu": 8, "memory": 16}
183+
}
184+
185+
command <<<
186+
set -xeuo pipefail
187+
188+
# Compress and Index vcf files
189+
bcftools view ~{query_vcf} -O z -o ~{query_output_sample_name}.vcf.gz
190+
bcftools index -t ~{query_output_sample_name}.vcf.gz
191+
192+
# split multiallelic sites in the base_vcf
193+
bcftools norm \
194+
-f ~{reference_fa} \
195+
-m -both ~{base_vcf} \
196+
-O z \
197+
-o ~{query_output_sample_name}.base.normed.vcf.gz
198+
bcftools index -t ~{query_output_sample_name}.base.normed.vcf.gz
199+
200+
# rtg vcfeval
201+
rtg format -o rtg_ref ~{reference_fa}
202+
rtg vcfeval \
203+
-b ~{query_output_sample_name}.base.normed.vcf.gz \
204+
-c ~{query_output_sample_name}.vcf.gz \
205+
-o reg \
206+
-t rtg_ref \
207+
--squash-ploidy \
208+
--sample ALT,ALT
209+
210+
mkdir output_dir
211+
cp reg/summary.txt output_dir/~{query_output_sample_name}_summary.txt
212+
213+
214+
>>>
215+
216+
runtime {
217+
docker: "us.gcr.io/broad-dsde-methods/vcfeval_docker:v1.1-tmp"
218+
preemptible: select_first([preemptible, 0])
219+
disks: "local-disk " + runtimeAttributes.disk_size + " HDD"
220+
cpu: runtimeAttributes.cpu
221+
memory: runtimeAttributes.memory + " GB"
222+
}
223+
224+
output {
225+
File summary_statistics = "output_dir/~{query_output_sample_name}_summary.txt"
226+
227+
}
228+
}

0 commit comments

Comments
 (0)