Skip to content

Commit 539982b

Browse files
committed
Squash merge dev into main
1 parent 3ce7f10 commit 539982b

File tree

16 files changed

+249
-124
lines changed

16 files changed

+249
-124
lines changed

lib/Utils.groovy

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,18 @@ class Utils {
146146
.stripIndent()
147147
}
148148

149+
public static String reverseComplement(String seq) {
150+
def complementMap = [
151+
'A': 'T', 'T': 'A',
152+
'C': 'G', 'G': 'C',
153+
'a': 't', 't': 'a',
154+
'c': 'g', 'g': 'c',
155+
'N': 'N', 'n': 'n',
156+
'U': 'A', 'u': 'a',
157+
]
158+
return seq.reverse().collect { base ->
159+
complementMap.get(base, 'N') // default to 'N' if unknown base
160+
}.join()
161+
}
162+
149163
}

main.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ workflow {
8686
ch_ref_gbk,
8787
ch_contam_fasta,
8888
ch_snpeff_config,
89-
ch_metagenomics_ref,
89+
ch_metagenomics_ref
9090
)
9191

9292
} else if ( params.platform == "illumina" ) {
@@ -95,7 +95,9 @@ workflow {
9595
ch_primer_bed,
9696
ch_refseq,
9797
ch_ref_gbk,
98+
ch_contam_fasta,
9899
ch_snpeff_config,
100+
ch_metagenomics_ref
99101
)
100102

101103
} else {

modules/bedtools.nf

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ process GET_PRIMER_SEQS {
22

33
/* */
44

5-
array 1000
6-
75
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
86
maxRetries 2
97

modules/chopper.nf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ process FILTER_WITH_CHOPPER {
66
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
77
maxRetries 2
88

9-
array 1000
109
cpus 4
1110

1211
input:

modules/fastp.nf

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
process CORRECT_WITH_FASTP {
2+
3+
tag "${sample_id}"
4+
5+
input:
6+
tuple val(sample_id), path(reads)
7+
8+
output:
9+
tuple val(sample_id), path("${sample_id}.corrected.fastq.gz")
10+
11+
script:
12+
"""
13+
fastp -i `realpath ${reads}` -o "${sample_id}.corrected.fastq.gz" \
14+
--qualified_quality_phred 20 \
15+
--unqualified_percent_limit 30 \
16+
--length_required 50 \
17+
--trim_poly_g \
18+
--trim_poly_x
19+
"""
20+
21+
}

modules/ivar.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
process CALL_VARIANTS {
22

33
tag "${barcode}"
4+
label "big_mem"
45
publishDir params.ivar, mode: 'copy', overwrite: true
56

67
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
@@ -28,6 +29,7 @@ process CALL_VARIANTS {
2829
process CALL_CONSENSUS {
2930

3031
tag "${barcode}"
32+
label "big_mem"
3133
publishDir params.consensus, mode: 'copy', overwrite: true
3234

3335
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
@@ -69,4 +71,4 @@ process CONVERT_TO_VCF {
6971
ivar_variants_to_vcf.py ${ivar_table} ${barcode}.vcf
7072
"""
7173

72-
}
74+
}

modules/primer_patterns.nf

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ process GET_PRIMER_PATTERNS {
22

33
/* */
44

5-
array 1000
6-
75
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
86
maxRetries 2
97

modules/seqkit.nf

Lines changed: 91 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,38 @@
1+
process COMPRESS_TO_SORTED_FASTA {
2+
3+
tag "${barcode}"
4+
5+
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
6+
maxRetries 2
7+
8+
input:
9+
tuple val(barcode), path(fastq_reads)
10+
11+
output:
12+
tuple val(barcode), path("${barcode}.fasta.gz")
13+
14+
script:
15+
"""
16+
seqkit fq2fa ${fastq_reads} \
17+
| seqkit seq --only-id \
18+
| seqkit sort --two-pass -o "${barcode}.fasta.gz"
19+
"""
20+
}
21+
122
process FIND_COMPLETE_AMPLICONS {
223

324
/* */
425

526
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
627
maxRetries 2
728

8-
array 1000
929
cpus 3
1030

1131
input:
1232
tuple path(reads), path(patterns)
1333

1434
output:
15-
tuple val(barcode), path(patterns), path("${barcode}_amplicons.fastq.gz")
35+
tuple val(barcode), path(patterns), path("${barcode}_amplicons.fasta.gz")
1636

1737
script:
1838
barcode = file(reads).getSimpleName()
@@ -23,11 +43,75 @@ process FIND_COMPLETE_AMPLICONS {
2343
--max-mismatch ${params.max_mismatch} \
2444
--by-seq \
2545
--pattern-file ${patterns} \
26-
-o ${barcode}_amplicons.fastq.gz
46+
-o ${barcode}_amplicons.fasta.gz
2747
"""
2848

2949
}
3050

51+
process TRIM_ENDS_TO_PRIMERS {
52+
53+
/* */
54+
55+
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
56+
maxRetries 2
57+
58+
cpus 3
59+
60+
input:
61+
tuple val(barcode), path(patterns_file), path(untrimmed)
62+
63+
output:
64+
tuple val(barcode), path("${barcode}*.trimmed.fasta.gz")
65+
66+
script:
67+
amplicon = file(patterns_file).getSimpleName()
68+
"""
69+
FORWARD_PATTERN=\$(head -n 1 ${patterns_file})
70+
REVERSE_PATTERN=\$(tail -n 1 ${patterns_file})
71+
FORWARD_LENGTH=\${#FORWARD_PATTERN}
72+
REVERSE_LENGTH=\${#REVERSE_PATTERN}
73+
74+
seqkit amplicon \
75+
--region \${FORWARD_LENGTH}:-\${REVERSE_LENGTH} \
76+
--forward \$FORWARD_PATTERN \
77+
--reverse \$REVERSE_PATTERN \
78+
--max-mismatch ${params.max_mismatch} \
79+
--strict-mode \
80+
--threads ${task.cpus} \
81+
--out-file ${barcode}.${amplicon}.trimmed.fasta.gz \
82+
${untrimmed}
83+
"""
84+
85+
}
86+
87+
process PER_AMPLICON_FILTERS {
88+
89+
/* */
90+
91+
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
92+
maxRetries 2
93+
94+
cpus 4
95+
96+
input:
97+
tuple val(label), path(fasta)
98+
99+
output:
100+
tuple val(label), path("${new_id}.filtered.fasta.gz")
101+
102+
script:
103+
new_id = file(fasta).getName().replace(".fasta.gz", "")
104+
"""
105+
seqkit seq \
106+
--max-len ${params.max_len} \
107+
--min-len ${params.min_len} \
108+
--min-qual ${params.min_qual} \
109+
--threads ${task.cpus} \
110+
-o ${new_id}.filtered.fasta.gz
111+
${fasta}
112+
"""
113+
}
114+
31115
process AMPLICON_STATS {
32116

33117
/* */
@@ -69,51 +153,17 @@ process MERGE_BY_SAMPLE {
69153
cpus 3
70154

71155
input:
72-
tuple val(barcode), path("fastqs/*")
156+
tuple val(barcode), path("fastas/*")
73157

74158
output:
75-
tuple val(barcode), path("${barcode}.amplicons.fastq.gz")
159+
tuple val(barcode), path("${barcode}.amplicons.fasta.gz")
76160

77161
script:
78162
"""
79163
seqkit scat \
80164
--find-only \
81165
--threads ${task.cpus} \
82-
fastqs/ \
83-
| bgzip -o ${barcode}.amplicons.fastq.gz
166+
fastas/ \
167+
| bgzip -o ${barcode}.amplicons.fasta.gz
84168
"""
85169
}
86-
87-
process TRIM_ENDS_TO_PRIMERS {
88-
89-
/* */
90-
91-
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
92-
maxRetries 2
93-
94-
cpus 3
95-
96-
input:
97-
tuple val(barcode), path(patterns_file), path(untrimmed)
98-
99-
output:
100-
tuple val(barcode), path("${barcode}*.trimmed.fastq.gz")
101-
102-
script:
103-
amplicon = file(patterns_file).getSimpleName()
104-
"""
105-
FORWARD_PATTERN=\$(head -n 1 ${patterns_file})
106-
REVERSE_PATTERN=\$(tail -n 1 ${patterns_file})
107-
108-
seqkit amplicon \
109-
-f -r 1:-1 \
110-
--forward \$FORWARD_PATTERN \
111-
--reverse \$REVERSE_PATTERN \
112-
--max-mismatch ${params.max_mismatch} \
113-
--strict-mode \
114-
--threads ${task.cpus} \
115-
--out-file ${barcode}.${amplicon}.trimmed.fastq.gz \
116-
${untrimmed}
117-
"""
118-
119-
}

modules/vsearch.nf

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,37 @@
1+
process MERGE_READ_PAIRS {
2+
3+
/* */
4+
5+
tag "${sample_id}"
6+
7+
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
8+
maxRetries 2
9+
10+
cpus 4
11+
12+
input:
13+
tuple val(sample_id), path(reads1), path(reads2)
14+
15+
output:
16+
tuple val(sample_id), path("${sample_id}.merged.preclump.fastq.gz")
17+
18+
script:
19+
"""
20+
vsearch \
21+
--fastq_mergepairs ${reads1} \
22+
--reverse ${reads2} \
23+
--fastqout_notmerged_fwd ${sample_id}.unmerged_fwd.fastq.gz \
24+
--fastqout_notmerged_rev ${sample_id}.unmerged_rev.fastq.gz \
25+
--eetabbedout ${sample_id}.merge_stats.tsv \
26+
--log ${sample_id}.merging.log \
27+
--threads ${task.cpus} \
28+
--eeout \
29+
--fastqout - \
30+
| gzip -c - > ${sample_id}.merged.preclump.fastq.gz
31+
"""
32+
33+
}
34+
135
process ORIENT_READS {
236

337
tag "${barcode}"
@@ -10,14 +44,15 @@ process ORIENT_READS {
1044
each path(refseq)
1145

1246
output:
13-
tuple val(barcode), path("${barcode}.oriented.fastq")
47+
tuple val(barcode), path("${barcode}.oriented.fastq.gz")
1448

1549
script:
1650
"""
1751
vsearch \
1852
--orient ${reads} \
1953
--db ${refseq} \
20-
--fastqout ${barcode}.oriented.fastq
54+
--fastqout - \
55+
gzip -c > ${barcode}.oriented.fastq.gz
2156
"""
2257

2358
}

nextflow.config

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,18 +139,18 @@ params {
139139
// nextclade dataset
140140
nextclade_dataset = null
141141

142-
// Dataset, either a local FASTA file or a pre-built dataset built by Sylph, to use
143-
// for metagenomic profiling
144-
meta_ref = null
142+
// Dataset, either a local FASTA file or a pre-built dataset built by Sylph, to use
143+
// for metagenomic profiling
144+
meta_ref = null
145145

146-
// Contamination FASTA dataset to scrub from reads
147-
contam_fasta = null
146+
// Contamination FASTA dataset to scrub from reads
147+
contam_fasta = null
148148

149-
// K-mer size to use for metagenomic sketching
150-
k = 31
149+
// K-mer size to use for metagenomic sketching
150+
k = 31
151151

152-
// devider haplotype phasing preset
153-
devider_preset = "nanopore-r10" // old-long-reads, nanopore-r9, nanopore-r10, hi-fi
152+
// devider haplotype phasing preset
153+
devider_preset = "nanopore-r10" // old-long-reads, nanopore-r9, nanopore-r10, hi-fi
154154

155155
// nextclade dataset cache
156156
nextclade_cache = "$launchDir/work/nextclade_datasets"
@@ -161,6 +161,10 @@ params {
161161
// Where to place results
162162
results = "$launchDir/results"
163163

164+
// Whether to run in low-memory mode, which limits the number of parallel instances
165+
// of high-memory files
166+
low_memory = null
167+
164168
// whether to cleanup work directory after a successful run (defaults to false)
165169
cleanup = null
166170

@@ -183,6 +187,17 @@ if ( params.remote_pod5_location != "" || params.pod5_dir != "" || params.precal
183187
includeConfig "conf/illumina.config"
184188
}
185189

190+
process {
191+
withLabel:big_mem {
192+
maxForks = {
193+
params.low_memory ? 2 : {
194+
def n = Runtime.runtime.availableProcessors() - 1
195+
return n > 0 ? n : 1
196+
}
197+
}
198+
}
199+
}
200+
186201

187202
// WHETHER TO GENERATE A REPORT OF RUN STATISTICS
188203
report {

0 commit comments

Comments
 (0)