forked from PacificBiosciences/HiFi-human-WGS-WDL
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.wdl
More file actions
153 lines (135 loc) · 6.6 KB
/
main.wdl
File metadata and controls
153 lines (135 loc) · 6.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
version 1.0
import "humanwgs_structs.wdl"
import "wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl" as BackendConfiguration
import "sample_analysis/sample_analysis.wdl" as SampleAnalysis
import "cohort_analysis/cohort_analysis.wdl" as CohortAnalysis
import "tertiary_analysis/tertiary_analysis.wdl" as TertiaryAnalysis
workflow humanwgs {
input {
Cohort cohort
ReferenceData reference
SlivarData slivar_data
String deepvariant_version = "1.5.0"
DeepVariantModel? deepvariant_model
Int? pbsv_call_mem_gb
Int? glnexus_mem_gb
Boolean run_tertiary_analysis = false
# Backend configuration
String backend
String? zones
String? aws_spot_queue_arn
String? aws_on_demand_queue_arn
String? container_registry
Boolean preemptible
}
call BackendConfiguration.backend_configuration {
input:
backend = backend,
zones = zones,
aws_spot_queue_arn = aws_spot_queue_arn,
aws_on_demand_queue_arn = aws_on_demand_queue_arn,
container_registry = container_registry
}
RuntimeAttributes default_runtime_attributes = if preemptible then backend_configuration.spot_runtime_attributes else backend_configuration.on_demand_runtime_attributes
scatter (sample in cohort.samples) {
call SampleAnalysis.sample_analysis {
input:
sample = sample,
reference = reference,
deepvariant_version = deepvariant_version,
deepvariant_model = deepvariant_model,
default_runtime_attributes = default_runtime_attributes
}
}
if (length(cohort.samples) > 1) {
call CohortAnalysis.cohort_analysis {
input:
cohort_id = cohort.cohort_id,
sample_count = length(cohort.samples),
aligned_bams = flatten(sample_analysis.aligned_bams),
svsigs = flatten(sample_analysis.svsigs),
gvcfs = sample_analysis.small_variant_gvcf,
reference = reference,
pbsv_call_mem_gb = pbsv_call_mem_gb,
glnexus_mem_gb = glnexus_mem_gb,
default_runtime_attributes = default_runtime_attributes
}
}
if (run_tertiary_analysis) {
IndexData slivar_small_variant_input_vcf = select_first([
cohort_analysis.phased_joint_called_vcf,
sample_analysis.phased_small_variant_vcf[0]
])
IndexData slivar_sv_input_vcf = select_first([
cohort_analysis.sv_vcf,
sample_analysis.sv_vcf[0]
])
call TertiaryAnalysis.tertiary_analysis {
input:
cohort = cohort,
small_variant_vcf = slivar_small_variant_input_vcf,
sv_vcf = slivar_sv_input_vcf,
reference = reference,
slivar_data = slivar_data,
default_runtime_attributes = default_runtime_attributes
}
}
output {
# sample_analysis output
Array[Array[File]] bam_stats = sample_analysis.bam_stats
Array[Array[File]] read_length_summary = sample_analysis.read_length_summary
Array[Array[File]] read_quality_summary = sample_analysis.read_quality_summary
Array[IndexData] small_variant_gvcfs = sample_analysis.small_variant_gvcf
Array[File] small_variant_vcf_stats = sample_analysis.small_variant_vcf_stats
Array[File] small_variant_roh_bed = sample_analysis.small_variant_roh_bed
Array[IndexData] sample_sv_vcfs = sample_analysis.sv_vcf
Array[IndexData] sample_phased_small_variant_vcfs = sample_analysis.phased_small_variant_vcf
Array[File] sample_whatshap_stats_gtfs = sample_analysis.whatshap_stats_gtf
Array[File] sample_whatshap_stats_tsvs = sample_analysis.whatshap_stats_tsv
Array[File] sample_whatshap_stats_blocklists = sample_analysis.whatshap_stats_blocklist
Array[IndexData] merged_haplotagged_bam = sample_analysis.merged_haplotagged_bam
Array[File] haplotagged_bam_mosdepth_summary = sample_analysis.haplotagged_bam_mosdepth_summary
Array[File] haplotagged_bam_mosdepth_region_bed = sample_analysis.haplotagged_bam_mosdepth_region_bed
Array[IndexData] trgt_spanning_reads = sample_analysis.trgt_spanning_reads
Array[IndexData] trgt_repeat_vcf = sample_analysis.trgt_repeat_vcf
Array[File] trgt_dropouts = sample_analysis.trgt_dropouts
Array[Array[File]] cpg_pileup_beds = sample_analysis.cpg_pileup_beds
Array[Array[File]] cpg_pileup_bigwigs = sample_analysis.cpg_pileup_bigwigs
Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json
Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam
Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs
Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf
Array[File] hificnv_copynum_bedgraphs = sample_analysis.hificnv_copynum_bedgraph
Array[File] hificnv_depth_bws = sample_analysis.hificnv_depth_bw
Array[File] hificnv_maf_bws = sample_analysis.hificnv_maf_bw
# cohort_analysis output
IndexData? cohort_sv_vcf = cohort_analysis.sv_vcf
IndexData? cohort_phased_joint_called_vcf = cohort_analysis.phased_joint_called_vcf
File? cohort_whatshap_stats_gtfs = cohort_analysis.whatshap_stats_gtf
File? cohort_whatshap_stats_tsvs = cohort_analysis.whatshap_stats_tsv
File? cohort_whatshap_stats_blocklists = cohort_analysis.whatshap_stats_blocklist
# tertiary_analysis output
IndexData? filtered_small_variant_vcf = tertiary_analysis.filtered_small_variant_vcf
IndexData? compound_het_small_variant_vcf = tertiary_analysis.compound_het_small_variant_vcf
File? filtered_small_variant_tsv = tertiary_analysis.filtered_small_variant_tsv
File? compound_het_small_variant_tsv = tertiary_analysis.compound_het_small_variant_tsv
IndexData? filtered_svpack_vcf = tertiary_analysis.filtered_svpack_vcf
File? filtered_svpack_tsv = tertiary_analysis.filtered_svpack_tsv
}
parameter_meta {
cohort: {help: "Sample information for the cohort"}
reference: {help: "Reference genome data"}
slivar_data: {help: "Data files used for annotation with slivar"}
deepvariant_version: {help: "Version of deepvariant to use"}
deepvariant_model: {help: "Optional deepvariant model file to use"}
pbsv_call_mem_gb: {help: "Optional amount of RAM in GB for pbsv_call; default 64 for cohorts N<=3, 96 for cohorts N>3"}
glnexus_mem_gb: {help: "Optional amount of RAM in GB for glnexus; default 30"}
run_tertiary_analysis: {help: "Run the optional tertiary analysis steps"}
backend: {help: "Backend where the workflow will be executed ['GCP', 'Azure', 'AWS', 'HPC']"}
zones: {help: "Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'"}
aws_spot_queue_arn: {help: "Queue ARN for the spot batch queue; required if backend is set to 'AWS'"}
aws_on_demand_queue_arn: {help: "Queue ARN for the on demand batch queue; required if backend is set to 'AWS'"}
container_registry: {help: "Container registry where workflow images are hosted. If left blank, PacBio's public Quay.io registry will be used."}
preemptible: {help: "Where possible, run tasks preemptibly"}
}
}