wdl-humanwgs/workflows/main.wdl at main · dockstore-testing/wdl-humanwgs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
version 1.0

import "humanwgs_structs.wdl"
import "wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl" as BackendConfiguration
import "sample_analysis/sample_analysis.wdl" as SampleAnalysis
import "cohort_analysis/cohort_analysis.wdl" as CohortAnalysis
import "tertiary_analysis/tertiary_analysis.wdl" as TertiaryAnalysis

workflow humanwgs {
	input {
		Cohort cohort

		ReferenceData reference
		SlivarData slivar_data

		String deepvariant_version = "1.5.0"
		DeepVariantModel? deepvariant_model

		Int? pbsv_call_mem_gb
		Int? glnexus_mem_gb

		Boolean run_tertiary_analysis = false

		# Backend configuration
		String backend
		String? zones
		String? aws_spot_queue_arn
		String? aws_on_demand_queue_arn
		String? container_registry

		Boolean preemptible
	}

	call BackendConfiguration.backend_configuration {
		input:
			backend = backend,
			zones = zones,
			aws_spot_queue_arn = aws_spot_queue_arn,
			aws_on_demand_queue_arn = aws_on_demand_queue_arn,
			container_registry = container_registry
	}

	RuntimeAttributes default_runtime_attributes = if preemptible then backend_configuration.spot_runtime_attributes else backend_configuration.on_demand_runtime_attributes

	scatter (sample in cohort.samples) {
		call SampleAnalysis.sample_analysis {
			input:
				sample = sample,
				reference = reference,
				deepvariant_version = deepvariant_version,
				deepvariant_model = deepvariant_model,
				default_runtime_attributes = default_runtime_attributes
		}
	}

	if (length(cohort.samples) > 1) {
		call CohortAnalysis.cohort_analysis {
			input:
				cohort_id = cohort.cohort_id,
				sample_count = length(cohort.samples),
				aligned_bams = flatten(sample_analysis.aligned_bams),
				svsigs = flatten(sample_analysis.svsigs),
				gvcfs = sample_analysis.small_variant_gvcf,
				reference = reference,
				pbsv_call_mem_gb = pbsv_call_mem_gb,
				glnexus_mem_gb = glnexus_mem_gb,
				default_runtime_attributes = default_runtime_attributes
		}
	}

	if (run_tertiary_analysis) {
		IndexData slivar_small_variant_input_vcf = select_first([
			cohort_analysis.phased_joint_called_vcf,
			sample_analysis.phased_small_variant_vcf[0]
		])
		IndexData slivar_sv_input_vcf = select_first([
			cohort_analysis.sv_vcf,
			sample_analysis.sv_vcf[0]
		])

		call TertiaryAnalysis.tertiary_analysis {
			input:
				cohort = cohort,
				small_variant_vcf = slivar_small_variant_input_vcf,
				sv_vcf = slivar_sv_input_vcf,
				reference = reference,
				slivar_data = slivar_data,
				default_runtime_attributes = default_runtime_attributes
		}
	}

	output {
		# sample_analysis output
		Array[Array[File]] bam_stats = sample_analysis.bam_stats
		Array[Array[File]] read_length_summary = sample_analysis.read_length_summary
		Array[Array[File]] read_quality_summary = sample_analysis.read_quality_summary
		Array[IndexData] small_variant_gvcfs = sample_analysis.small_variant_gvcf
		Array[File] small_variant_vcf_stats = sample_analysis.small_variant_vcf_stats
		Array[File] small_variant_roh_bed = sample_analysis.small_variant_roh_bed
		Array[IndexData] sample_sv_vcfs = sample_analysis.sv_vcf
		Array[IndexData] sample_phased_small_variant_vcfs = sample_analysis.phased_small_variant_vcf
		Array[File] sample_whatshap_stats_gtfs = sample_analysis.whatshap_stats_gtf
		Array[File] sample_whatshap_stats_tsvs = sample_analysis.whatshap_stats_tsv
		Array[File] sample_whatshap_stats_blocklists = sample_analysis.whatshap_stats_blocklist
		Array[IndexData] merged_haplotagged_bam = sample_analysis.merged_haplotagged_bam
		Array[File] haplotagged_bam_mosdepth_summary = sample_analysis.haplotagged_bam_mosdepth_summary
		Array[File] haplotagged_bam_mosdepth_region_bed = sample_analysis.haplotagged_bam_mosdepth_region_bed
		Array[IndexData] trgt_spanning_reads = sample_analysis.trgt_spanning_reads
		Array[IndexData] trgt_repeat_vcf = sample_analysis.trgt_repeat_vcf
		Array[File] trgt_dropouts = sample_analysis.trgt_dropouts
		Array[Array[File]] cpg_pileup_beds = sample_analysis.cpg_pileup_beds
		Array[Array[File]] cpg_pileup_bigwigs = sample_analysis.cpg_pileup_bigwigs
		Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json
		Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam
		Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs
		Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf
		Array[File] hificnv_copynum_bedgraphs = sample_analysis.hificnv_copynum_bedgraph
		Array[File] hificnv_depth_bws = sample_analysis.hificnv_depth_bw
		Array[File] hificnv_maf_bws = sample_analysis.hificnv_maf_bw

		# cohort_analysis output
		IndexData? cohort_sv_vcf = cohort_analysis.sv_vcf
		IndexData? cohort_phased_joint_called_vcf = cohort_analysis.phased_joint_called_vcf
		File? cohort_whatshap_stats_gtfs = cohort_analysis.whatshap_stats_gtf
		File? cohort_whatshap_stats_tsvs = cohort_analysis.whatshap_stats_tsv
		File? cohort_whatshap_stats_blocklists = cohort_analysis.whatshap_stats_blocklist

		# tertiary_analysis output
		IndexData? filtered_small_variant_vcf = tertiary_analysis.filtered_small_variant_vcf
		IndexData? compound_het_small_variant_vcf = tertiary_analysis.compound_het_small_variant_vcf
		File? filtered_small_variant_tsv = tertiary_analysis.filtered_small_variant_tsv
		File? compound_het_small_variant_tsv = tertiary_analysis.compound_het_small_variant_tsv
		IndexData? filtered_svpack_vcf = tertiary_analysis.filtered_svpack_vcf
		File? filtered_svpack_tsv = tertiary_analysis.filtered_svpack_tsv
	}

	parameter_meta {
		cohort: {help: "Sample information for the cohort"}
		reference: {help: "Reference genome data"}
		slivar_data: {help: "Data files used for annotation with slivar"}
		deepvariant_version: {help: "Version of deepvariant to use"}
		deepvariant_model: {help: "Optional deepvariant model file to use"}
		pbsv_call_mem_gb: {help: "Optional amount of RAM in GB for pbsv_call; default 64 for cohorts N<=3, 96 for cohorts N>3"}
		glnexus_mem_gb: {help: "Optional amount of RAM in GB for glnexus; default 30"}
		run_tertiary_analysis: {help: "Run the optional tertiary analysis steps"}
		backend: {help: "Backend where the workflow will be executed ['GCP', 'Azure', 'AWS', 'HPC']"}
		zones: {help: "Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'"}
		aws_spot_queue_arn: {help: "Queue ARN for the spot batch queue; required if backend is set to 'AWS'"}
		aws_on_demand_queue_arn: {help: "Queue ARN for the on demand batch queue; required if backend is set to 'AWS'"}
		container_registry: {help: "Container registry where workflow images are hosted. If left blank, PacBio's public Quay.io registry will be used."}
		preemptible: {help: "Where possible, run tasks preemptibly"}
	}
}