Skip to content

Commit fdc4b60

Browse files
author
Suvaline Vana
committed
2 parents 99b71f4 + 38fa0d7 commit fdc4b60

9 files changed

+180
-91
lines changed

src/pipecraft-core/service_scripts/quality_filtering_paired_end_dada2.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,13 @@ end=$(date +%s)
140140
runtime=$((end-start))
141141

142142
#Make README.txt file
143-
printf "# Quality filtering of PAIRED-END sequencing data with dada2.
143+
printf "# Quality filtering with dada2.
144144
145145
Files in 'qualFiltered_out':
146-
# *_filt.fastq = quality filtered sequences per sample in FASTQ format
147-
# seq_count_summary.txt = summary of sequence counts per sample
148-
# FASTA/*_filt.fasta = quality filtered sequences per sample in FASTA format
149-
# (*.rds = R objects for dada2, you may delete these files if present)
146+
# *_filt.fastq = quality filtered sequences per sample in FASTQ format.
147+
# seq_count_summary.txt = summary of sequence counts per sample.
148+
# FASTA/*_filt.fasta = quality filtered sequences per sample in FASTA format.
149+
# (*.rds = R objects for dada2, you may delete these files if present).
150150
151151
Core command ->
152152
filterAndTrim(inputR1, outputR1, inputR2, outputR2, maxN = $maxN, maxEE = c($maxEE, $maxEE), truncQ = $truncQ, truncLen= c($truncLen_R1, $truncLen_R2), maxLen = $maxLen, minLen = $minLen, minQ=$minQ, rm.phix = TRUE, compress = FALSE, multithread = TRUE)

src/pipecraft-core/service_scripts/quality_filtering_paired_end_fastp.sh

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,27 @@ source /scripts/submodules/framework.functions.sh
3434
output_dir=$"/input/qualFiltered_out"
3535

3636
#additional options, if selection != undefined
37-
low_complex_filt=$low_complexity_filter
38-
if [[ $low_complex_filt == null ]]; then
37+
low_complex_filt=${low_complexity_filter}
38+
if [[ $low_complex_filt == null ]] || [[ -z $low_complex_filt ]]; then
3939
low_complexity_filter=$""
4040
else
4141
low_complexity_filter=$"--low_complexity_filter --complexity_threshold $low_complex_filt"
4242
fi
4343

44+
trim_polyG=${trim_polyG}
45+
if [[ $trim_polyG == null ]] || [[ -z $trim_polyG ]]; then
46+
trim_polyG=$"--disable_trim_poly_g "
47+
else
48+
trim_polyG=$"--trim_poly_g --poly_g_min_len $trim_polyG"
49+
fi
50+
51+
trim_polyX=${trim_polyX}
52+
if [[ $trim_polyX == null ]] || [[ -z $trim_polyX ]]; then
53+
trim_polyX=$""
54+
else
55+
trim_polyX=$"--trim_poly_x --poly_x_min_len $trim_polyX"
56+
fi
57+
4458
#############################
4559
### Start of the workflow ###
4660
#############################
@@ -83,6 +97,8 @@ while read LINE; do
8397
$trunc_length_R1 \
8498
$trunc_length_R2 \
8599
$aver_qual \
100+
$trim_polyG \
101+
$trim_polyX \
86102
$cores \
87103
--html $output_dir/fastp_report/$sample_name.html \
88104
--disable_adapter_trimming \
@@ -104,15 +120,16 @@ end=$(date +%s)
104120
runtime=$((end-start))
105121

106122
#Make README.txt file
107-
printf "# Quality filtering of PAIRED-END sequencing data with fastp.
108-
Files in 'qualFiltered_out' directory represent quality filtered sequences in FASTQ format according to the selected options.\n
123+
printf "# Quality filtering with fastp.
109124
110-
Core command ->
111-
fastp --in1 inputR1 --in2 inputR2 --out1 outputR1 --out2 outputR2 $window_size $required_qual $min_qual $min_qual_thresh $maxNs $min_length $max_length $trunc_length_R1 $trunc_length_R2 $aver_qual $cores --html fastp_report/sample_name.html --disable_adapter_trimming $low_complexity_filter
125+
Files in 'qualFiltered_out':
126+
# *.fastq = quality filtered sequences per sample.
127+
# seq_count_summary.txt = summary of sequence counts per sample.
112128
113-
\nSummary of sequence counts in 'seq_count_summary.txt'\n
129+
Core command ->
130+
fastp --in1 inputR1 --in2 inputR2 --out1 outputR1 --out2 outputR2 $window_size $required_qual $min_qual $min_qual_thresh $trim_polyG $trim_polyX $maxNs $min_length $max_length $trunc_length_R1 $trunc_length_R2 $aver_qual $cores --html fastp_report/sample_name.html --disable_adapter_trimming $low_complexity_filter
114131
115-
\nTotal run time was $runtime sec.\n
132+
Total run time was $runtime sec.
116133
##################################################################
117134
###Third-party applications for this process [PLEASE CITE]:
118135
#fastp v0.23.2

src/pipecraft-core/service_scripts/quality_filtering_paired_end_trimmomatic.sh

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,28 @@
1919
##########################################################
2020

2121
#load variables
22-
extension=$fileFormat
23-
window_size=$window_size
24-
required_qual=$required_quality
25-
min_length=$min_length
26-
threads=$cores
27-
phred=$phred
28-
leading_qual_threshold=$leading_qual_threshold
29-
trailing_qual_threshold=$trailing_qual_threshold
22+
extension=${fileFormat}
23+
window_size=${window_size}
24+
required_qual=${required_quality}
25+
min_length=${min_length}
26+
threads=${cores}
27+
phred=${phred}
28+
leading_qual_threshold=${leading_qual_threshold}
29+
trailing_qual_threshold=${trailing_qual_threshold}
3030

3131
#Source for functions
3232
source /scripts/submodules/framework.functions.sh
3333
#output dir
3434
output_dir=$"/input/qualFiltered_out"
3535

3636
#additional options, if selection != undefined
37-
if [[ $leading_qual_threshold == null ]]; then
38-
:
37+
if [[ $leading_qual_threshold == null ]] || [[ -z $leading_qual_threshold ]]; then
38+
LEADING=$""
3939
else
4040
LEADING=$"LEADING:$leading_qual_threshold"
4141
fi
42-
if [[ $trailing_qual_threshold == null ]]; then
43-
:
42+
if [[ $trailing_qual_threshold == null ]] || [[ -z $trailing_qual_threshold ]]; then
43+
TRAILING=$""
4444
else
4545
TRAILING=$"TRAILING:$trailing_qual_threshold"
4646
fi
@@ -106,17 +106,22 @@ printf "Files in /discarded folder represent sequences that did not pass quality
106106
If no files in this folder, then all sequences were passed to files in $output_dir directory" > $output_dir/untrimmed/README.txt
107107

108108
#Make README.txt file
109-
printf "Files in 'qualFiltered_out' directory represent quality filtered sequences in FASTQ format according to the selected options.
110-
Files in 'qualFiltered_out/FASTA' directory represent quality filtered sequences in FASTA format.
111-
If the quality of the data is sufficent after this step (check with QualityCheck module), then
112-
you may proceed with FASTA files only (however, note that FASTQ files are needed to assemble paired-end data).\n
109+
printf "# Quality filtering with trimmomatic.
110+
111+
Files in 'qualFiltered_out':
112+
# *.$newextension = quality filtered sequences in FASTQ format.
113+
# seq_count_summary.txt = summary of sequence counts per sample.
114+
Files in 'qualFiltered_out/FASTA':
115+
# *.fasta = quality filtered sequences in FASTA format.
116+
Files in 'qualFiltered_out/discarded':
117+
# *.discarded.$newextension = discarded sequences.
113118
114119
Core commands ->
115120
quality filtering: trimmomatic-0.39.jar PE inputR1 inputR2 outputR1 discarded/outputR1.discarded outputR2 discarded/outputR2.discarded $LEADING $TRAILING -phred$phred SLIDINGWINDOW:$window_size:$required_qual MINLEN:$min_length -threads $threads
116121
convert output fastq files to FASTA: seqkit fq2fa -t dna --line-width 0 input_file -o FASTA/output_file.fasta
117122
118-
\nSummary of sequence counts in 'seq_count_summary.txt'\n
119-
\nTotal run time was $runtime sec.\n\n
123+
Total run time was $runtime sec.
124+
120125
##################################################################
121126
###Third-party applications for this process [PLEASE CITE]:
122127
#trimmomatic v0.39 for quality filtering

src/pipecraft-core/service_scripts/quality_filtering_paired_end_vsearch.sh

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,27 +26,27 @@ minlen=$"--fastq_minlen ${min_length}"
2626
cores=$"--threads ${cores}"
2727
qmax=$"--fastq_qmax ${qmax}"
2828
qmin=$"--fastq_qmin ${qmin}"
29-
trunclen=$trunc_length
30-
maxlen=$max_length
31-
maxeerate=$maxee_rate
29+
trunc_length=${trunc_length}
30+
max_length=${max_length}
31+
maxee_rate=${maxee_rate}
3232

3333
#Source for functions
3434
source /scripts/submodules/framework.functions.sh
3535
#output dir
3636
output_dir=$"/input/qualFiltered_out"
3737

3838
#additional options, if selection != undefined
39-
if [[ $maxlen == null ]]; then
39+
if [[ $max_length == null ]] || [[ -z $max_length ]]; then
4040
max_length=$""
4141
else
42-
max_length=$"--fastq_maxlen $maxlen"
42+
max_length=$"--fastq_maxlen $max_length"
4343
fi
44-
if [[ $maxeerate == null ]]; then
44+
if [[ $maxee_rate == null ]] || [[ -z $maxee_rate ]]; then
4545
maxee_rate=$""
4646
else
47-
maxee_rate=$"--fastq_maxee_rate $maxeerate"
47+
maxee_rate=$"--fastq_maxee_rate $maxee_rate"
4848
fi
49-
if [[ $trunclen == null ]]; then
49+
if [[ $trunc_length == null ]] || [[ -z $trunc_length ]]; then
5050
trunc_length=$""
5151
else
5252
trunc_length=$"--fastq_trunclen $trunc_length"
@@ -79,6 +79,19 @@ while read LINE; do
7979
###############################
8080
mkdir -p tempdir
8181

82+
printf "vsearch --fastq_filter \
83+
$inputR1.$newextension \
84+
$maxee \
85+
$maxns \
86+
$trunc_length \
87+
$minlen \
88+
$cores \
89+
$qmax \
90+
$qmin \
91+
$max_length \
92+
$maxee_rate \
93+
--fastqout tempdir/$inputR1.$newextension"
94+
8295
#R1
8396
checkerror=$(vsearch --fastq_filter \
8497
$inputR1.$newextension \
@@ -154,17 +167,20 @@ end=$(date +%s)
154167
runtime=$((end-start))
155168

156169
#Make README.txt file
157-
printf "Files in 'qualFiltered_out' directory represent quality filtered sequences in FASTQ format according to the selected options.
158-
Files in 'qualFiltered_out/FASTA' directory represent quality filtered sequences in FASTA format.
159-
If the quality of the data is sufficent after this step (check with QualityCheck module), then
160-
you may proceed with FASTA files only (however, note that FASTQ files are needed to assemble paired-end data).\n
170+
printf "# Quality filtering with vsearch.
171+
172+
Files in 'qualFiltered_out':
173+
# *.$newextension = quality filtered sequences in FASTQ format.
174+
# seq_count_summary.txt = summary of sequence counts per sample.
175+
Files in 'qualFiltered_out/FASTA':
176+
# *.fasta = quality filtered sequences in FASTA format.
161177
162178
Core commands ->
163179
quality filtering: vsearch --fastq_filter input_file $maxee $maxns $trunc_length $minlen $cores $qmax $qmin $max_length $maxee_rate --fastqout output_file
164180
Synchronizing R1 and R2 reads: seqkit pair -1 inputR1 -2 inputR2
165181
166-
\nSummary of sequence counts in 'seq_count_summary.txt'\n
167-
\n\nTotal run time was $runtime sec.\n\n\n
182+
Total run time was $runtime sec.
183+
168184
##################################################################
169185
###Third-party applications for this process [PLEASE CITE]:
170186
#vsearch v2.18.0 for quality filtering

src/pipecraft-core/service_scripts/quality_filtering_single_end_dada2.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ end=$(date +%s)
6565
runtime=$((end-start))
6666

6767
#Make README.txt file
68-
printf "# Quality filtering of PAIRED-END sequencing data with dada2.
68+
printf "# Quality filtering with dada2.
6969
7070
Files in 'qualFiltered_out':
71-
# *_filt.fastq = quality filtered sequences per sample
72-
# seq_count_summary.txt = summary of sequence counts per sample
73-
# (*.rds = R objects for dada2, you may delete these files if present)
71+
# *_filt.fastq = quality filtered sequences per sample.
72+
# seq_count_summary.txt = summary of sequence counts per sample.
73+
# (*.rds = R objects for dada2, you may delete these files if present).
7474
7575
Core command ->
7676
filterAndTrim(inputR1, outputR1, maxN = $maxN, maxEE = $maxEE, truncQ = $truncQ, truncLen = $truncLen_R1, maxLen = $maxLen, minLen = $minLen, minQ=$minQ, rm.phix = TRUE, compress = FALSE, multithread = TRUE)

src/pipecraft-core/service_scripts/quality_filtering_single_end_fastp.sh

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,26 @@ output_dir=$"/input/qualFiltered_out"
3333

3434
#additional options, if selection != undefined
3535
low_complex_filt=$low_complexity_filter
36-
if [[ $low_complex_filt == null ]]; then
36+
if [[ $low_complex_filt == null ]] || [[ -z $low_complex_filt ]]; then
3737
low_complexity_filter=$""
3838
else
3939
low_complexity_filter=$"--low_complexity_filter --complexity_threshold $low_complex_filt"
4040
fi
4141

42+
trim_polyG=${trim_polyG}
43+
if [[ $trim_polyG == null ]] || [[ -z $trim_polyG ]]; then
44+
trim_polyG=$"--disable_trim_poly_g "
45+
else
46+
trim_polyG=$"--trim_poly_g --poly_g_min_len $trim_polyG"
47+
fi
48+
49+
trim_polyX=${trim_polyX}
50+
if [[ $trim_polyX == null ]] || [[ -z $trim_polyX ]]; then
51+
trim_polyX=$""
52+
else
53+
trim_polyX=$"--trim_poly_x --poly_x_min_len $trim_polyX"
54+
fi
55+
4256
#############################
4357
### Start of the workflow ###
4458
#############################
@@ -69,6 +83,8 @@ for file in *.$extension; do
6983
$required_qual \
7084
$min_qual \
7185
$min_qual_thresh \
86+
$trim_polyG \
87+
$trim_polyX \
7288
$maxNs \
7389
$min_length \
7490
$max_length \
@@ -90,14 +106,17 @@ end=$(date +%s)
90106
runtime=$((end-start))
91107

92108
#Make README.txt file
93-
printf "Files in 'qualFiltered_out' directory represent quality filtered sequences in FASTQ format according to the selected options.\n
109+
printf "# Quality filtering with fastp.
110+
111+
Files in 'qualFiltered_out':
112+
# *.fastq = quality filtered sequences per sample.
113+
# seq_count_summary.txt = summary of sequence counts per sample.
94114
95115
Core command ->
96-
fastp --in1 input --out1 output $window_size $required_qual $min_qual $min_qual_thresh $maxNs $min_length $max_length $trunc_length $aver_qual $cores --html fastp_report/sample_name.html --disable_adapter_trimming $low_complexity_filter
116+
fastp --in1 input --out1 output $window_size $required_qual $min_qual $min_qual_thresh $trim_polyG $trim_polyX $maxNs $min_length $max_length $trunc_length $aver_qual $cores --html fastp_report/sample_name.html --disable_adapter_trimming $low_complexity_filter
97117
98-
\nSummary of sequence counts in 'seq_count_summary.txt'\n
118+
Total run time was $runtime sec.
99119
100-
\nTotal run time was $runtime sec.\n
101120
##################################################################
102121
###Third-party applications for this process [PLEASE CITE]:
103122
#fastp v0.23.2

src/pipecraft-core/service_scripts/quality_filtering_single_end_trimmomatic.sh

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,28 @@
1818
##########################################################
1919

2020
#load variables
21-
extension=$fileFormat
22-
window_size=$window_size
23-
required_qual=$required_quality
24-
min_length=$min_length
25-
threads=$cores
26-
phred=$phred
27-
leading_qual_threshold=$leading_qual_threshold
28-
trailing_qual_threshold=$trailing_qual_threshold
21+
extension=${fileFormat}
22+
window_size=${window_size}
23+
required_qual=${required_quality}
24+
min_length=${min_length}
25+
threads=${cores}
26+
phred=${phred}
27+
leading_qual_threshold=${leading_qual_threshold}
28+
trailing_qual_threshold=${trailing_qual_threshold}
2929

3030
#Source for functions
3131
source /scripts/submodules/framework.functions.sh
3232
#output dir
3333
output_dir=$"/input/qualFiltered_out"
3434

3535
#additional options, if selection != undefined
36-
if [[ $leading_qual_threshold == null ]]; then
37-
:
36+
if [[ $leading_qual_threshold == null ]] || [[ -z $leading_qual_threshold ]]; then
37+
LEADING=$""
3838
else
3939
LEADING=$"LEADING:$leading_qual_threshold"
4040
fi
41-
if [[ $trailing_qual_threshold == null ]]; then
42-
:
41+
if [[ $trailing_qual_threshold == null ]] || [[ -z $trailing_qual_threshold ]]; then
42+
TRAILING=$""
4343
else
4444
TRAILING=$"TRAILING:$trailing_qual_threshold"
4545
fi
@@ -94,17 +94,20 @@ end=$(date +%s)
9494
runtime=$((end-start))
9595

9696
#Make README.txt file
97-
printf "Files in 'qualFiltered_out' directory represent quality filtered sequences in FASTQ format according to the selected options.
98-
Files in $output_dir/FASTA directory represent quality filtered sequences in FASTA format.
99-
If the quality of the data is sufficent after this step (check with QualityCheck module), then
100-
you may proceed with FASTA files only.\n
97+
printf "# Quality filtering with trimmomatic.
98+
99+
Files in 'qualFiltered_out':
100+
# *.$newextension = quality filtered sequences in FASTQ format.
101+
# seq_count_summary.txt = summary of sequence counts per sample.
102+
Files in 'qualFiltered_out/FASTA':
103+
# *.fasta = quality filtered sequences in FASTA format.
101104
102105
Core commands ->
103106
quality filtering: trimmomatic-0.39.jar SE input_file output_file -phred$phred $LEADING $TRAILING SLIDINGWINDOW:$window_size:$required_qual MINLEN:$min_length -threads $threads
104107
convert output fastq files to FASTA: seqkit fq2fa -t dna --line-width 0 input_file -o FASTA/output_file.fasta
105108
106-
\nSummary of sequence counts in 'seq_count_summary.txt'\n
107-
\nTotal run time was $runtime sec.\n\n
109+
Total run time was $runtime sec.
110+
108111
##################################################################
109112
###Third-party applications for this process [PLEASE CITE]:
110113
#trimmomatic v0.39 for quality filtering

0 commit comments

Comments
 (0)