1+ process COMPRESS_TO_SORTED_FASTA {
2+
3+ tag " ${ barcode} "
4+
5+ errorStrategy { task. attempt < 3 ? ' retry' : ' ignore' }
6+ maxRetries 2
7+
8+ input:
9+ tuple val(barcode), path(fastq_reads)
10+
11+ output:
12+ tuple val(barcode), path(" ${ barcode} .fasta.gz" )
13+
14+ script:
15+ """
16+ seqkit fq2fa ${ fastq_reads} \
17+ | seqkit seq --only-id \
18+ | seqkit sort --two-pass -o "${ barcode} .fasta.gz"
19+ """
20+ }
21+
122process FIND_COMPLETE_AMPLICONS {
223
324 /* */
425
526 errorStrategy { task. attempt < 3 ? ' retry' : ' ignore' }
627 maxRetries 2
728
8- array 1000
929 cpus 3
1030
1131 input:
1232 tuple path(reads), path(patterns)
1333
1434 output:
15- tuple val(barcode), path(patterns), path(" ${ barcode} _amplicons.fastq .gz" )
35+ tuple val(barcode), path(patterns), path(" ${ barcode} _amplicons.fasta .gz" )
1636
1737 script:
1838 barcode = file(reads). getSimpleName()
@@ -23,11 +43,75 @@ process FIND_COMPLETE_AMPLICONS {
2343 --max-mismatch ${ params.max_mismatch} \
2444 --by-seq \
2545 --pattern-file ${ patterns} \
26- -o ${ barcode} _amplicons.fastq .gz
46+ -o ${ barcode} _amplicons.fasta .gz
2747 """
2848
2949}
3050
51+ process TRIM_ENDS_TO_PRIMERS {
52+
53+ /* */
54+
55+ errorStrategy { task. attempt < 3 ? ' retry' : ' ignore' }
56+ maxRetries 2
57+
58+ cpus 3
59+
60+ input:
61+ tuple val(barcode), path(patterns_file), path(untrimmed)
62+
63+ output:
64+ tuple val(barcode), path(" ${ barcode} *.trimmed.fasta.gz" )
65+
66+ script:
67+ amplicon = file(patterns_file). getSimpleName()
68+ """
69+ FORWARD_PATTERN=\$ (head -n 1 ${ patterns_file} )
70+ REVERSE_PATTERN=\$ (tail -n 1 ${ patterns_file} )
71+ FORWARD_LENGTH=\$ {#FORWARD_PATTERN}
72+ REVERSE_LENGTH=\$ {#REVERSE_PATTERN}
73+
74+ seqkit amplicon \
75+ --region \$ {FORWARD_LENGTH}:-\$ {REVERSE_LENGTH} \
76+ --forward \$ FORWARD_PATTERN \
77+ --reverse \$ REVERSE_PATTERN \
78+ --max-mismatch ${ params.max_mismatch} \
79+ --strict-mode \
80+ --threads ${ task.cpus} \
81+ --out-file ${ barcode} .${ amplicon} .trimmed.fasta.gz \
82+ ${ untrimmed}
83+ """
84+
85+ }
86+
87+ process PER_AMPLICON_FILTERS {
88+
89+ /* */
90+
91+ errorStrategy { task. attempt < 3 ? ' retry' : ' ignore' }
92+ maxRetries 2
93+
94+ cpus 4
95+
96+ input:
97+ tuple val(label), path(fasta)
98+
99+ output:
100+ tuple val(label), path(" ${ new_id} .filtered.fasta.gz" )
101+
102+ script:
103+ new_id = file(fasta). getName(). replace(" .fasta.gz" , " " )
104+ """
105+ seqkit seq \
106+ --max-len ${ params.max_len} \
107+ --min-len ${ params.min_len} \
108+ --min-qual ${ params.min_qual} \
109+ --threads ${ task.cpus} \
110+ -o ${ new_id} .filtered.fasta.gz
111+ ${ fasta}
112+ """
113+ }
114+
31115process AMPLICON_STATS {
32116
33117 /* */
@@ -69,51 +153,17 @@ process MERGE_BY_SAMPLE {
69153 cpus 3
70154
71155 input:
72- tuple val(barcode), path(" fastqs /*" )
156+ tuple val(barcode), path(" fastas /*" )
73157
74158 output:
75- tuple val(barcode), path(" ${ barcode} .amplicons.fastq .gz" )
159+ tuple val(barcode), path(" ${ barcode} .amplicons.fasta .gz" )
76160
77161 script:
78162 """
79163 seqkit scat \
80164 --find-only \
81165 --threads ${ task.cpus} \
82- fastqs / \
83- | bgzip -o ${ barcode} .amplicons.fastq .gz
166+ fastas / \
167+ | bgzip -o ${ barcode} .amplicons.fasta .gz
84168 """
85169}
86-
87- process TRIM_ENDS_TO_PRIMERS {
88-
89- /* */
90-
91- errorStrategy { task. attempt < 3 ? ' retry' : ' ignore' }
92- maxRetries 2
93-
94- cpus 3
95-
96- input:
97- tuple val(barcode), path(patterns_file), path(untrimmed)
98-
99- output:
100- tuple val(barcode), path(" ${ barcode} *.trimmed.fastq.gz" )
101-
102- script:
103- amplicon = file(patterns_file). getSimpleName()
104- """
105- FORWARD_PATTERN=\$ (head -n 1 ${ patterns_file} )
106- REVERSE_PATTERN=\$ (tail -n 1 ${ patterns_file} )
107-
108- seqkit amplicon \
109- -f -r 1:-1 \
110- --forward \$ FORWARD_PATTERN \
111- --reverse \$ REVERSE_PATTERN \
112- --max-mismatch ${ params.max_mismatch} \
113- --strict-mode \
114- --threads ${ task.cpus} \
115- --out-file ${ barcode} .${ amplicon} .trimmed.fastq.gz \
116- ${ untrimmed}
117- """
118-
119- }
0 commit comments