Skip to content

Commit 6b234e4

Browse files
committed
tumor-only calling edits
1 parent e3fc7a8 commit 6b234e4

File tree

5 files changed

+165
-2
lines changed

5 files changed

+165
-2
lines changed

tools/gatk/calculate_contamination.nf

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,26 @@ process CALCULATECONTAMINATION {
2323
-O ${tumor_pileups_table.simpleName}_contamination_table \\
2424
-tumor-segmentation ${tumor_pileups_table.simpleName}_segmentation_table
2525
"""
26+
}
27+
28+
process TUMORONLYCALCULATECONTAMINATION {
29+
30+
container "${params.container_gatk}"
31+
32+
publishDir "${params.outdir}/vcfs", mode: 'copy'
33+
34+
input:
35+
path tumor_pileups_table
36+
37+
output:
38+
path ("${tumor_pileups_table.simpleName}_contamination_table"), emit: contamination
39+
path ("${tumor_pileups_table.simpleName}_segmentation_table"), emit: segment
40+
41+
script:
42+
"""
43+
gatk CalculateContamination \\
44+
-I ${tumor_pileups_table} \\
45+
-O ${tumor_pileups_table.simpleName}_contamination_table \\
46+
-tumor-segmentation ${tumor_pileups_table.simpleName}_segmentation_table
47+
"""
2648
}

tools/gatk/get_pileup_summaries.nf

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,27 @@ process GETPILEUPSUMMARIES {
2525
-O ${bam_file.baseName}.getpileupsummaries.table
2626
"""
2727
}
28+
29+
process TUMORONLYGETPILEUPSUMMARIES {
30+
maxForks 2
31+
container "${params.container_gatk}"
32+
33+
publishDir "${params.outdir}/intermediates", mode: 'copy'
34+
35+
input:
36+
path bam_file
37+
path bai_file
38+
path exac
39+
40+
output:
41+
path ("*.getpileupsummaries.table")
42+
43+
script:
44+
"""
45+
gatk GetPileupSummaries \\
46+
-I ${bam_file} \\
47+
-V ${params.exac} \\
48+
-L ${params.exac} \\
49+
-O ${bam_file.baseName}.getpileupsummaries.table
50+
"""
51+
}

tools/gatk/mutect2.nf

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#!/usr/bin/env nextflow
22

3-
// Define the process for running MuTect2
43
process MUTECT2 {
54

65
maxForks 12 // set this when running on local scratch to parallelize; up to the max number of cpus available minus 1
@@ -46,3 +45,36 @@ process MUTECT2 {
4645
--f1r2-tar-gz ${sample_id}_${chrom}_f1r2.tar.gz
4746
"""
4847
}
48+
49+
// Define the process for running MuTect2
50+
process TUMORONLYMUTECT2 {
51+
52+
maxForks 12 // set this when running on local scratch to parallelize; up to the max number of cpus available minus 1
53+
cpus 1 // set cpu to 1: gatk discourages multithreading
54+
container "${params.container_gatk}"
55+
56+
input:
57+
path tumor_bam
58+
path tumor_bam_sorted_bai // only necessary when nextflow can't resolve path from symlink
59+
each chrom // repeat this process for each item in the chrom channel
60+
val sample_id
61+
path mutect_idx // nextflow can't resolve the rest of these files from symlink to mutect_idx, input paths to each
62+
path mutect_idx_fai
63+
path mutect_idx_dict
64+
path pon_vcf // nextflow can't resolve the rest of these files from symlink to pon_vcf, input paths to each
65+
path pon_tbi
66+
path pon_idx
67+
path pon_tar
68+
69+
output:
70+
path "${sample_id}_${chrom}_unfiltered.vcf", emit: vcf
71+
path "${sample_id}_${chrom}_f1r2.tar.gz", emit: f1r2
72+
path "${sample_id}_${chrom}_unfiltered.vcf.stats", emit: stats
73+
path "${sample_id}_${chrom}_unfiltered.vcf.idx", emit: index
74+
75+
script:
76+
77+
"""
78+
gatk Mutect2 -R ${mutect_idx} -I ${tumor_bam} --panel-of-normals ${params.pon_vcf} -L ${chrom} --germline-resource ${params.gnomad} -O ${sample_id}_${chrom}_unfiltered.vcf --f1r2-tar-gz ${sample_id}_${chrom}_f1r2.tar.gz
79+
"""
80+
}

workflows/calling/svc.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,4 @@ workflow {
102102

103103
// Annotate with snpEff
104104
SNPEFF(vcf, params.normalsample_id)
105-
}
105+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env nextflow
2+
3+
// Nextflow Pipeline Version
4+
params.release = "v0.2.4"
5+
params.releasedate = "11-25-2024"
6+
params.githublink = "https://github.com/ohsu-cedar-comp-hub/WGS-nextflow-workflow/releases/tag/v0.2.4"
7+
8+
// Import tools
9+
include { TUMORONLYGETPILEUPSUMMARIES } from '../../tools/gatk/get_pileup_summaries.nf'
10+
include { TUMORONLYCALCULATECONTAMINATION } from '../../tools/gatk/calculate_contamination.nf'
11+
// include { GETINTERVALS } from '../../tools/samtools/get_intervals.nf'
12+
include { TUMORONLYMUTECT2 } from '../../tools/gatk/mutect2.nf'
13+
include { BGZIP; PREPAREVCF } from '../../tools/bcftools/prepareVCFs.nf'
14+
include { MERGESTATS } from '../../tools/bcftools/combineMutectStats.nf'
15+
include { LEARNORIENTATION } from '../../tools/bcftools/combineF1R2files.nf'
16+
include { FILTERMUTECT } from '../../tools/gatk/filter_mutect.nf'
17+
include { REHEADER } from '../../tools/bcftools/reheader.nf'
18+
include { FUNCOTATOR } from '../../tools/gatk/funcotator.nf'
19+
include { SNPEFF } from '../../tools/snpeff/annotate_variants.nf'
20+
include { PASS } from '../../tools/snpeff/sift_variants.nf'
21+
include { ADDFILTER } from '../../tools/bcftools/filterVCF.nf'
22+
23+
tumor_ch = Channel.fromPath("${params.bam_files}/*.bam")
24+
tumor_ch_bai = Channel.fromPath("${params.bam_files}/*.bai")
25+
26+
chromosomes = (1..22).collect { it.toString() } + ['X']
27+
chrom_strings = Channel.from(chromosomes)
28+
chrom_ch = chrom_strings.map { it -> "chr" + it }
29+
30+
// Begin main workflow
31+
workflow {
32+
TUMORONLYMUTECT2(tumor_ch, tumor_ch_bai, chrom_ch, params.sample_id, params.mutect_idx, params.mutect_idx_fai, params.mutect_idx_dict, params.pon_vcf, params.pon_tbi, params.pon_idx, params.pon_tar)
33+
34+
// gatk getpileupsummaries
35+
TUMORONLYGETPILEUPSUMMARIES(tumor_ch, tumor_ch_bai, params.exac)
36+
tumor_table = GETPILEUPSUMMARIES.out
37+
38+
// gatk calculate contamination from pileup summaries
39+
TUMORONLYCALCULATECONTAMINATION(tumor_table)
40+
contam_table = CALCULATECONTAMINATION.out.contamination.collect()
41+
segment_table = CALCULATECONTAMINATION.out.segment.collect()
42+
43+
// Merge and prepare VCF
44+
BGZIP(MUTECT2.out.vcf) // concatenation requires bgzip'd files
45+
vcfs_ch = BGZIP.out.vcf.collect() // collect all bgzip vcf outputs into a channel
46+
split_vcf_index = BGZIP.out.index.collect() // collect all bgzip index outputs into a channel
47+
// concatenate, normalize, and sort the VCF
48+
PREPAREVCF(vcfs_ch, split_vcf_index, params.sample_id, params.mutect_idx, params.mutect_idx_fai, params.mutect_idx_dict)
49+
unfiltered_vcf = PREPAREVCF.out.vcf
50+
unfiltered_vcf_index = PREPAREVCF.out.index
51+
52+
// Merge stats file
53+
stats = MUTECT2.out.stats
54+
stats_ch = stats.collect()
55+
MERGESTATS(stats_ch, params.sample_id)
56+
filter_stats = MERGESTATS.out
57+
58+
// Merge f1r2 read orientation files
59+
f1r2files = MUTECT2.out.f1r2
60+
f1r2_ch = f1r2files.collect()
61+
LEARNORIENTATION(f1r2_ch, params.sample_id)
62+
orientationmodel = LEARNORIENTATION.out
63+
64+
// Filter mutect2 calls
65+
FILTERMUTECT(unfiltered_vcf, unfiltered_vcf_index, params.mutect_idx, params.mutect_idx_fai, params.mutect_idx_dict, filter_stats, orientationmodel, segment_table, contam_table, params.sample_id)
66+
67+
// Add nextflow workflow versioning to VCF header
68+
REHEADER(FILTERMUTECT.out)
69+
70+
// filter for passing variants
71+
PASS(REHEADER.out, params.sample_id)
72+
73+
// filter for variants above certain allelic depth, VAF, etc using bcftools
74+
ADDFILTER(PASS.out, params.sample_id)
75+
vcf = ADDFILTER.out
76+
77+
// Annotate with funcotator
78+
FUNCOTATOR(vcf,
79+
params.mutect_idx, params.mutect_idx_fai, params.mutect_idx_dict,
80+
params.funcotator_data,
81+
params.sample_id)
82+
83+
// Annotate with snpEff
84+
SNPEFF(vcf, params.sample_id)
85+
}

0 commit comments

Comments
 (0)