Skip to content

Commit b4ffdfb

Browse files
authored
Merge pull request #220 from ENCODE-DCC/dev
v1.8.0
2 parents 2d51d34 + 6808760 commit b4ffdfb

File tree

10 files changed

+176
-35
lines changed

10 files changed

+176
-35
lines changed

chip.wdl

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
version 1.0
22

33
workflow chip {
4-
String pipeline_ver = 'v1.7.1'
4+
String pipeline_ver = 'v1.8.0'
55

66
meta {
7-
version: 'v1.7.1'
7+
version: 'v1.8.0'
88
author: 'Jin wook Lee (leepc12@gmail.com) at ENCODE-DCC'
99
description: 'ENCODE TF/Histone ChIP-Seq pipeline'
1010
specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing'
1111

12-
caper_docker: 'encodedcc/chip-seq-pipeline:v1.7.1'
13-
caper_singularity: 'docker://encodedcc/chip-seq-pipeline:v1.7.1'
12+
caper_docker: 'encodedcc/chip-seq-pipeline:v1.8.0'
13+
caper_singularity: 'docker://encodedcc/chip-seq-pipeline:v1.8.0'
1414
croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json'
1515

1616
parameter_group: {
@@ -146,6 +146,8 @@ workflow chip {
146146
String aligner = 'bowtie2'
147147
File? custom_align_py
148148
Boolean use_bwa_mem_for_pe = false
149+
Int bwa_mem_read_len_limit = 70
150+
Boolean use_bowtie2_local_mode = false
149151
Int crop_length = 0
150152
Int crop_length_tol = 2
151153
String trimmomatic_phred_score_format = 'auto'
@@ -176,7 +178,7 @@ workflow chip {
176178
# group: resource_parameter
177179
Int align_cpu = 6
178180
Float align_bowtie2_mem_factor = 0.15
179-
Float align_bwa_mem_factor = 0.3
181+
Float align_bwa_mem_factor = 1.0
180182
Int align_time_hr = 48
181183
Float align_bowtie2_disk_factor = 8.0
182184
Float align_bwa_disk_factor = 8.0
@@ -639,9 +641,19 @@ workflow chip {
639641
help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".'
640642
}
641643
use_bwa_mem_for_pe: {
642-
description: 'For paired end dataset with read length >= 70bp, use bwa mem instead of bwa aln.',
644+
description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.',
643645
group: 'alignment',
644-
help: 'Use it only for paired end reads >= 70bp.'
646+
help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.'
647+
}
648+
bwa_mem_read_len_limit: {
649+
description: 'Read length limit for bwa mem (for PE FASTQs only).',
650+
group: 'alignment',
651+
help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.'
652+
}
653+
use_bowtie2_local_mode: {
654+
description: 'Use bowtie2\'s local mode (soft-clipping).',
655+
group: 'alignment',
656+
help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.'
645657
}
646658
crop_length: {
647659
description: 'Crop FASTQs\' reads longer than this length.',
@@ -1133,6 +1145,11 @@ workflow chip {
11331145
msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.'
11341146
}
11351147
}
1148+
if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) {
1149+
call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input:
1150+
msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.'
1151+
}
1152+
}
11361153
if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) {
11371154
call raise_exception as error_custom_aligner { input:
11381155
msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.'
@@ -1185,6 +1202,8 @@ workflow chip {
11851202
else custom_aligner_idx_tar,
11861203
paired_end = paired_end_,
11871204
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
1205+
bwa_mem_read_len_limit = bwa_mem_read_len_limit,
1206+
use_bowtie2_local_mode = use_bowtie2_local_mode,
11881207
ref_fa = ref_fa_,
11891208
11901209
trimmomatic_java_heap = align_trimmomatic_java_heap,
@@ -1282,7 +1301,9 @@ workflow chip {
12821301
else if aligner=='bowtie2' then bowtie2_idx_tar_
12831302
else custom_aligner_idx_tar,
12841303
paired_end = false,
1285-
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
1304+
use_bwa_mem_for_pe = false,
1305+
bwa_mem_read_len_limit = 0,
1306+
use_bowtie2_local_mode = use_bowtie2_local_mode,
12861307
ref_fa = ref_fa_,
12871308
12881309
cpu = align_cpu,
@@ -1413,6 +1434,8 @@ workflow chip {
14131434
else custom_aligner_idx_tar,
14141435
paired_end = ctl_paired_end_,
14151436
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
1437+
bwa_mem_read_len_limit = bwa_mem_read_len_limit,
1438+
use_bowtie2_local_mode = use_bowtie2_local_mode,
14161439
ref_fa = ref_fa_,
14171440
14181441
trimmomatic_java_heap = align_trimmomatic_java_heap,
@@ -2028,6 +2051,8 @@ task align {
20282051
File? idx_tar # reference index tar
20292052
Boolean paired_end
20302053
Boolean use_bwa_mem_for_pe
2054+
Int bwa_mem_read_len_limit
2055+
Boolean use_bowtie2_local_mode
20312056

20322057
String? trimmomatic_java_heap
20332058
Int cpu
@@ -2036,7 +2061,7 @@ task align {
20362061
Float disk_factor
20372062
}
20382063
Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G")
2039-
Float mem_gb = 5.0 + mem_factor * input_file_size_gb
2064+
Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb
20402065
Float samtools_mem_gb = 0.8 * mem_gb
20412066
Int disk_gb = round(40.0 + disk_factor * input_file_size_gb)
20422067

@@ -2102,6 +2127,7 @@ task align {
21022127
${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \
21032128
${if paired_end then '--paired-end' else ''} \
21042129
${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \
2130+
${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \
21052131
${'--mem-gb ' + samtools_mem_gb} \
21062132
${'--nth ' + cpu}
21072133
@@ -2112,6 +2138,7 @@ task align {
21122138
${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \
21132139
${'--multimapping ' + multimapping} \
21142140
${if paired_end then '--paired-end' else ''} \
2141+
${if use_bowtie2_local_mode then '--local' else ''} \
21152142
${'--mem-gb ' + samtools_mem_gb} \
21162143
${'--nth ' + cpu}
21172144
else

dev/test/test_task/test_bowtie2.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,9 @@
1212
"chip-seq-pipeline-test-data/input/se/fastqs/rep1/rep1.subsampled.25.fastq.gz"
1313
],
1414
"test_bowtie2.ref_pe_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/rep1-R1.subsampled.67.samstats.qc",
15-
"test_bowtie2.ref_se_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.samstats.qc"
15+
"test_bowtie2.ref_se_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.samstats.qc",
16+
17+
"test_bowtie2.ref_pe_local_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/local/rep1-R1.subsampled.67.srt.samstats.qc",
18+
"test_bowtie2.ref_se_local_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/local/rep1.subsampled.25.srt.samstats.qc"
19+
1620
}

dev/test/test_task/test_bowtie2.wdl

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ workflow test_bowtie2 {
1111
# we don't compare BAM because BAM's header includes date
1212
# hence md5sums don't match all the time
1313
String ref_pe_flagstat
14-
String ref_se_flagstat
14+
String ref_se_flagstat
15+
String ref_pe_local_flagstat
16+
String ref_se_local_flagstat
1517

1618
String pe_bowtie2_idx_tar
1719
String se_bowtie2_idx_tar
@@ -30,6 +32,8 @@ workflow test_bowtie2 {
3032
fastqs_R2 = pe_fastqs_R2,
3133
paired_end = true,
3234
use_bwa_mem_for_pe = false,
35+
bwa_mem_read_len_limit = 70,
36+
use_bowtie2_local_mode = false,
3337
crop_length = 0,
3438
crop_length_tol = 0,
3539
@@ -46,6 +50,45 @@ workflow test_bowtie2 {
4650
fastqs_R2 = [],
4751
paired_end = false,
4852
use_bwa_mem_for_pe = false,
53+
bwa_mem_read_len_limit = 70,
54+
use_bowtie2_local_mode = false,
55+
crop_length = 0,
56+
crop_length_tol = 0,
57+
58+
cpu = bowtie2_cpu,
59+
mem_factor = bowtie2_mem_factor,
60+
time_hr = bowtie2_time_hr,
61+
disk_factor = bowtie2_disk_factor,
62+
}
63+
64+
call chip.align as pe_bowtie2_local { input :
65+
aligner = 'bowtie2',
66+
idx_tar = pe_bowtie2_idx_tar,
67+
mito_chr_name = 'chrM',
68+
fastqs_R1 = pe_fastqs_R1,
69+
fastqs_R2 = pe_fastqs_R2,
70+
paired_end = true,
71+
use_bwa_mem_for_pe = false,
72+
bwa_mem_read_len_limit = 70,
73+
use_bowtie2_local_mode = true,
74+
crop_length = 0,
75+
crop_length_tol = 0,
76+
77+
cpu = bowtie2_cpu,
78+
mem_factor = bowtie2_mem_factor,
79+
time_hr = bowtie2_time_hr,
80+
disk_factor = bowtie2_disk_factor,
81+
}
82+
call chip.align as se_bowtie2_local { input :
83+
aligner = 'bowtie2',
84+
idx_tar = se_bowtie2_idx_tar,
85+
mito_chr_name = 'chrM',
86+
fastqs_R1 = se_fastqs,
87+
fastqs_R2 = [],
88+
paired_end = false,
89+
use_bwa_mem_for_pe = false,
90+
bwa_mem_read_len_limit = 70,
91+
use_bowtie2_local_mode = true,
4992
crop_length = 0,
5093
crop_length_tol = 0,
5194
@@ -59,14 +102,20 @@ workflow test_bowtie2 {
59102
labels = [
60103
'pe_bowtie2',
61104
'se_bowtie2',
105+
'pe_local_bowtie2',
106+
'se_local_bowtie2',
62107
],
63108
files = [
64109
pe_bowtie2.samstat_qc,
65110
se_bowtie2.samstat_qc,
111+
pe_bowtie2_local.samstat_qc,
112+
se_bowtie2_local.samstat_qc,
66113
],
67114
ref_files = [
68115
ref_pe_flagstat,
69116
ref_se_flagstat,
117+
ref_pe_local_flagstat,
118+
ref_se_local_flagstat,
70119
],
71120
}
72121
}

dev/test/test_task/test_bwa.wdl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ workflow test_bwa {
3030
fastqs_R2 = pe_fastqs_R2,
3131
paired_end = true,
3232
use_bwa_mem_for_pe = false,
33+
bwa_mem_read_len_limit = 70,
34+
use_bowtie2_local_mode = false,
3335
crop_length = 0,
3436
crop_length_tol = 0,
3537
@@ -46,6 +48,8 @@ workflow test_bwa {
4648
fastqs_R2 = [],
4749
paired_end = false,
4850
use_bwa_mem_for_pe = false,
51+
bwa_mem_read_len_limit = 70,
52+
use_bowtie2_local_mode = false,
4953
crop_length = 0,
5054
crop_length_tol = 0,
5155

dev/test/test_task/test_trimmomatic.wdl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ workflow test_trimmomatic {
3636
fastqs_R2 = pe_fastqs_R2,
3737
paired_end = true,
3838
use_bwa_mem_for_pe = false,
39+
bwa_mem_read_len_limit = 70,
40+
use_bowtie2_local_mode = false,
3941
crop_length = pe_crop_length,
4042
crop_length_tol = pe_crop_length_tol,
4143
@@ -52,6 +54,8 @@ workflow test_trimmomatic {
5254
fastqs_R2 = [],
5355
paired_end = false,
5456
use_bwa_mem_for_pe = false,
57+
bwa_mem_read_len_limit = 70,
58+
use_bowtie2_local_mode = false,
5559
crop_length = se_crop_length,
5660
crop_length_tol = se_crop_length_tol,
5761
@@ -69,6 +73,8 @@ workflow test_trimmomatic {
6973
fastqs_R2 = pe_fastqs_R2,
7074
paired_end = true,
7175
use_bwa_mem_for_pe = false,
76+
bwa_mem_read_len_limit = 70,
77+
use_bowtie2_local_mode = false,
7278
crop_length = pe_crop_length,
7379
crop_length_tol = pe_crop_length_tol,
7480
trimmomatic_phred_score_format = 'phred33',
@@ -86,6 +92,8 @@ workflow test_trimmomatic {
8692
fastqs_R2 = [],
8793
paired_end = false,
8894
use_bwa_mem_for_pe = false,
95+
bwa_mem_read_len_limit = 70,
96+
use_bowtie2_local_mode = false,
8997
crop_length = se_crop_length,
9098
crop_length_tol = se_crop_length_tol,
9199
trimmomatic_phred_score_format = 'phred64',

docs/input.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ Parameter|Type|Default|Description
153153
`chip.crop_length` | Int | 0 | Crop FASTQs with Trimmomatic (using parameters `CROP`). 0: cropping disabled.
154154
`chip.crop_length_tol` | Int | 2 | Trimmomatic's `MINLEN` will be set as `chip.crop_length` - `abs(chip.crop_length_tol)` where reads shorter than `MINLEN` will be removed, hence not included in output BAM files and all downstream analyses.
155155
`chip.trimmomatic_phred_score_format` | String | auto | Base encoding (format) for phred score in FASTQs. Choices: `auto`, `phred33` or `phred64` (without hyphen). This is used for Trimmomatic only. It is `auto` by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise `-phred33` or `-phred64` will be passed to the Trimmomatic command line. Use this parameter if you get an error `Error: Unable to detect quality encoding` in Trimmomatic.
156-
`chip.use_bwa_mem_for_pe` | Boolean | false | For PE dataset, uise bwa mem instead of bwa aln.
156+
`chip.use_bwa_mem_for_pe` | Boolean | false | For `chip.aligner` as `bwa` and PE datasets only, use `bwa mem` instead of `bwa aln`. If read length of R1 FASTQ is shorter than `chip.bwa_mem_read_len_limit` (70 by default) then `bwa aln` will be used instead.
157+
`chip.bwa_mem_read_len_limit` | Int | 70 | For `chip.aligner` as `bwa` and PE dataset only, R1 FASTQ's read length limit for `bwa mem`.
158+
`chip.use_bowtie2_local_mode` | Boolean | false | Use bowtie2's local mode (adding `--local` to bowtie2 command line). If not defined, the default mode (end-to-end) will be used.
157159
`chip.custom_align_py` | File | | Python script for your custom aligner. See details about [how to use a custom aligner](#how-to-use-a-custom-aligner)
158160
159161
@@ -245,8 +247,8 @@ Base memory/disk is 4GB/20GB for most tasks.
245247
Parameter|Default|Description
246248
---------|-------|-----------
247249
`chip.align_cpu` | 6 |
248-
`chip.align_bowtie2_mem_factor` | 0.15 | Multiplied to size of FASTQs to determine required memory
249-
`chip.align_bwa_mem_factor` | 0.30 | Multiplied to size of FASTQs to determine required memory
250+
`chip.align_bowtie2_mem_factor` | 0.15 | Multiplied to size of FASTQs to determine required memory. 5.0 + bowtie2_index_file_size + sum(all_fastqs) GB.
251+
`chip.align_bwa_mem_factor` | 1.0 | Multiplied to size of FASTQs to determine required memory. 5.0 + bwa_index_file_size + sum(all_fastqs) GB.
250252
`chip.align_time_hr` | 48 | Walltime (HPCs only)
251253
`chip.align_bowtie2_disk_factor` | 8.0 | Multiplied to size of FASTQs to determine required disk
252254
`chip.align_bwa_disk_factor` | 8.0 | Multiplied to size of FASTQs to determine required disk

example_input_json/template.full.json

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
"chip.ctl_fastqs_rep2_R1" : [ "ctl2_R1.fastq.gz" ],
2323
"chip.ctl_fastqs_rep2_R2" : [ "ctl2_R2.fastq.gz" ],
2424

25+
"chip.use_bwa_mem_for_pe" : false,
26+
"chip.bwa_mem_read_len_limit" : 70,
27+
"chip.use_bowtie2_local_mode" : false,
28+
2529
"chip.crop_length" : 0,
2630

2731
"chip.mapq_thresh" : 30,
@@ -53,23 +57,23 @@
5357

5458
"chip.align_cpu" : 6,
5559
"chip.align_bowtie2_mem_factor" : 0.15,
56-
"chip.align_bwa_mem_factor" : 0.15,
60+
"chip.align_bwa_mem_factor" : 1.0,
5761
"chip.align_time_hr" : 48,
5862
"chip.align_bowtie2_disk_factor" : 8.0,
5963
"chip.align_bwa_disk_factor" : 8.0,
6064

6165
"chip.filter_cpu" : 4,
6266
"chip.filter_mem_factor" : 0.4,
6367
"chip.filter_time_hr" : 24,
64-
"chip.filter_disk_factor" : 6.0,
68+
"chip.filter_disk_factor" : 8.0,
6569

6670
"chip.bam2ta_cpu" : 2,
6771
"chip.bam2ta_mem_factor" : 0.35,
6872
"chip.bam2ta_time_hr" : 6,
6973
"chip.bam2ta_disk_factor" : 4.0,
7074

71-
"chip.spr_mem_factor" : 4.5,
72-
"chip.spr_disk_factor" : 6.0,
75+
"chip.spr_mem_factor" : 13.5,
76+
"chip.spr_disk_factor" : 18.0,
7377

7478
"chip.jsd_cpu" : 4,
7579
"chip.jsd_mem_factor" : 0.1,
@@ -81,17 +85,17 @@
8185
"chip.xcor_time_hr" : 24,
8286
"chip.xcor_disk_factor" : 4.5,
8387

84-
"chip.subsample_ctl_mem_factor" : 7.0,
85-
"chip.subsample_ctl_disk_factor" : 7.5,
88+
"chip.subsample_ctl_mem_factor" : 14.0,
89+
"chip.subsample_ctl_disk_factor" : 15.0,
8690

8791
"chip.call_peak_cpu" : 6,
8892
"chip.call_peak_spp_mem_factor" : 5.0,
89-
"chip.call_peak_macs2_mem_factor" : 2.5,
93+
"chip.call_peak_macs2_mem_factor" : 5.0,
9094
"chip.call_peak_time_hr" : 72,
9195
"chip.call_peak_spp_disk_factor" : 5.0,
92-
"chip.call_peak_macs2_disk_factor" : 15.0,
96+
"chip.call_peak_macs2_disk_factor" : 30.0,
9397

94-
"chip.macs2_signal_track_mem_factor" : 6.0,
98+
"chip.macs2_signal_track_mem_factor" : 12.0,
9599
"chip.macs2_signal_track_time_hr" : 24,
96-
"chip.macs2_signal_track_disk_factor" : 40.0
100+
"chip.macs2_signal_track_disk_factor" : 80.0
97101
}

scripts/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ ptools_bin
4848

4949
jsondiff ==1.1.1
5050
libgcc
51+
tbb ==2021.1.1
5152
requests
5253
ncurses
5354
gnuplot

0 commit comments

Comments
 (0)