nrminor
diff --git a/‎lib/Utils.groovy‎
Lines changed: 14 additions & 0 deletions b/‎lib/Utils.groovy‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎main.nf‎
Lines changed: 3 additions & 1 deletion b/‎main.nf‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎modules/bedtools.nf‎
Lines changed: 0 additions & 2 deletions b/‎modules/bedtools.nf‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎modules/chopper.nf‎
Lines changed: 0 additions & 1 deletion b/‎modules/chopper.nf‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎modules/fastp.nf‎
Lines changed: 21 additions & 0 deletions b/‎modules/fastp.nf‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎modules/ivar.nf‎
Lines changed: 3 additions & 1 deletion b/‎modules/ivar.nf‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎modules/primer_patterns.nf‎
Lines changed: 0 additions & 2 deletions b/‎modules/primer_patterns.nf‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎modules/seqkit.nf‎
Lines changed: 91 additions & 41 deletions b/‎modules/seqkit.nf‎
Lines changed: 91 additions & 41 deletions
diff --git a/‎modules/vsearch.nf‎
Lines changed: 37 additions & 2 deletions b/‎modules/vsearch.nf‎
Lines changed: 37 additions & 2 deletions
diff --git a/‎nextflow.config‎
Lines changed: 24 additions & 9 deletions b/‎nextflow.config‎
Lines changed: 24 additions & 9 deletions
@@ -146,4 +146,18 @@ class Utils {
                 .stripIndent()
     }
 
+    public static String reverseComplement(String seq) {
+        def complementMap = [
+            'A': 'T', 'T': 'A',
+            'C': 'G', 'G': 'C',
+            'a': 't', 't': 'a',
+            'c': 'g', 'g': 'c',
+            'N': 'N', 'n': 'n',
+            'U': 'A', 'u': 'a',
+        ]
+        return seq.reverse().collect { base ->
+            complementMap.get(base, 'N')  // default to 'N' if unknown base
+        }.join()
+    }
+
 }
@@ -86,7 +86,7 @@ workflow {
             ch_ref_gbk,
             ch_contam_fasta,
             ch_snpeff_config,
-            ch_metagenomics_ref,
+            ch_metagenomics_ref
         )
 
     }  else if ( params.platform == "illumina" ) {
@@ -95,7 +95,9 @@ workflow {
             ch_primer_bed,
             ch_refseq,
             ch_ref_gbk,
+            ch_contam_fasta,
             ch_snpeff_config,
+            ch_metagenomics_ref
         )
 
     } else {
 
@@ -2,8 +2,6 @@ process GET_PRIMER_SEQS {
 
     /* */
 
-    array 1000
-
 	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
 	maxRetries 2
 
 
@@ -6,7 +6,6 @@ process FILTER_WITH_CHOPPER {
     errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
     maxRetries 2
 
-    array 1000
     cpus 4
 
     input:
 
@@ -0,0 +1,21 @@
+process CORRECT_WITH_FASTP {
+
+    tag "${sample_id}"
+
+    input:
+    tuple val(sample_id), path(reads)
+
+    output:
+    tuple val(sample_id), path("${sample_id}.corrected.fastq.gz")
+
+    script:
+    """
+    fastp -i `realpath ${reads}` -o "${sample_id}.corrected.fastq.gz" \
+    --qualified_quality_phred 20 \
+    --unqualified_percent_limit 30 \
+    --length_required 50 \
+    --trim_poly_g \
+    --trim_poly_x
+    """
+
+}
@@ -1,6 +1,7 @@
 process CALL_VARIANTS {
 
     tag "${barcode}"
+    label "big_mem"
     publishDir params.ivar, mode: 'copy', overwrite: true
 
 	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
@@ -28,6 +29,7 @@ process CALL_VARIANTS {
 process CALL_CONSENSUS {
 
     tag "${barcode}"
+    label "big_mem"
     publishDir params.consensus, mode: 'copy', overwrite: true
 
 	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
@@ -69,4 +71,4 @@ process CONVERT_TO_VCF {
     ivar_variants_to_vcf.py ${ivar_table} ${barcode}.vcf
     """
 
-}
+}
@@ -2,8 +2,6 @@ process GET_PRIMER_PATTERNS {
 
     /* */
 
-    array 1000
-
 	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
 	maxRetries 2
 
 
@@ -1,18 +1,38 @@
+process COMPRESS_TO_SORTED_FASTA {
+
+    tag "${barcode}"
+
+    errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
+    maxRetries 2
+
+    input:
+    tuple val(barcode), path(fastq_reads)
+
+    output:
+    tuple val(barcode), path("${barcode}.fasta.gz")
+
+    script:
+    """
+    seqkit fq2fa ${fastq_reads} \
+    | seqkit seq --only-id \
+    | seqkit sort --two-pass -o "${barcode}.fasta.gz"
+    """
+}
+
 process FIND_COMPLETE_AMPLICONS {
 
     /* */
 
 	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
 	maxRetries 2
 
-    array 1000
 	cpus 3
 
     input:
 	tuple path(reads), path(patterns)
 
     output:
-    tuple val(barcode), path(patterns), path("${barcode}_amplicons.fastq.gz")
+    tuple val(barcode), path(patterns), path("${barcode}_amplicons.fasta.gz")
 
     script:
 	barcode = file(reads).getSimpleName()
@@ -23,11 +43,75 @@ process FIND_COMPLETE_AMPLICONS {
 	--max-mismatch ${params.max_mismatch} \
 	--by-seq \
 	--pattern-file ${patterns} \
-	-o ${barcode}_amplicons.fastq.gz
+	-o ${barcode}_amplicons.fasta.gz
     """
 
 }
 
+process TRIM_ENDS_TO_PRIMERS {
+
+    /* */
+
+    errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
+    maxRetries 2
+
+    cpus 3
+
+    input:
+    tuple val(barcode), path(patterns_file), path(untrimmed)
+
+    output:
+    tuple val(barcode), path("${barcode}*.trimmed.fasta.gz")
+
+    script:
+    amplicon = file(patterns_file).getSimpleName()
+    """
+    FORWARD_PATTERN=\$(head -n 1 ${patterns_file})
+    REVERSE_PATTERN=\$(tail -n 1 ${patterns_file})
+    FORWARD_LENGTH=\${#FORWARD_PATTERN}
+    REVERSE_LENGTH=\${#REVERSE_PATTERN}
+
+    seqkit amplicon \
+    --region \${FORWARD_LENGTH}:-\${REVERSE_LENGTH} \
+    --forward \$FORWARD_PATTERN \
+    --reverse \$REVERSE_PATTERN \
+    --max-mismatch ${params.max_mismatch} \
+    --strict-mode \
+    --threads ${task.cpus} \
+    --out-file ${barcode}.${amplicon}.trimmed.fasta.gz \
+    ${untrimmed}
+    """
+
+}
+
+process PER_AMPLICON_FILTERS {
+
+    /* */
+
+    errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
+    maxRetries 2
+
+    cpus 4
+
+    input:
+    tuple val(label), path(fasta)
+
+    output:
+    tuple val(label), path("${new_id}.filtered.fasta.gz")
+
+    script:
+    new_id = file(fasta).getName().replace(".fasta.gz", "")
+    """
+    seqkit seq \
+    --max-len ${params.max_len} \
+    --min-len ${params.min_len} \
+    --min-qual ${params.min_qual} \
+    --threads ${task.cpus} \
+    -o ${new_id}.filtered.fasta.gz
+    ${fasta}
+    """
+}
+
 process AMPLICON_STATS {
 
     /* */
@@ -69,51 +153,17 @@ process MERGE_BY_SAMPLE {
 	cpus 3
 
 	input:
-	tuple val(barcode), path("fastqs/*")
+	tuple val(barcode), path("fastas/*")
 
 	output:
-	tuple val(barcode), path("${barcode}.amplicons.fastq.gz")
+	tuple val(barcode), path("${barcode}.amplicons.fasta.gz")
 
 	script:
 	"""
 	seqkit scat \
 	--find-only \
 	--threads ${task.cpus} \
-	fastqs/ \
-	| bgzip -o ${barcode}.amplicons.fastq.gz
+	fastas/ \
+	| bgzip -o ${barcode}.amplicons.fasta.gz
 	"""
 }
-
-process TRIM_ENDS_TO_PRIMERS {
-
-    /* */
-
-    errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
-    maxRetries 2
-
-    cpus 3
-
-    input:
-    tuple val(barcode), path(patterns_file), path(untrimmed)
-
-    output:
-    tuple val(barcode), path("${barcode}*.trimmed.fastq.gz")
-
-    script:
-    amplicon = file(patterns_file).getSimpleName()
-    """
-    FORWARD_PATTERN=\$(head -n 1 ${patterns_file})
-    REVERSE_PATTERN=\$(tail -n 1 ${patterns_file})
-
-    seqkit amplicon \
-    -f -r 1:-1 \
-    --forward \$FORWARD_PATTERN \
-    --reverse \$REVERSE_PATTERN \
-    --max-mismatch ${params.max_mismatch} \
-    --strict-mode \
-    --threads ${task.cpus} \
-    --out-file ${barcode}.${amplicon}.trimmed.fastq.gz \
-    ${untrimmed}
-    """
-
-}
 
@@ -1,3 +1,37 @@
+process MERGE_READ_PAIRS {
+
+    /* */
+
+	tag "${sample_id}"
+
+	errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
+	maxRetries 2
+
+	cpus 4
+
+	input:
+	tuple val(sample_id), path(reads1), path(reads2)
+
+	output:
+	tuple val(sample_id), path("${sample_id}.merged.preclump.fastq.gz")
+
+	script:
+	"""
+	vsearch \
+	--fastq_mergepairs ${reads1} \
+	--reverse ${reads2} \
+	--fastqout_notmerged_fwd ${sample_id}.unmerged_fwd.fastq.gz \
+	--fastqout_notmerged_rev ${sample_id}.unmerged_rev.fastq.gz \
+	--eetabbedout ${sample_id}.merge_stats.tsv \
+	--log ${sample_id}.merging.log \
+	--threads ${task.cpus} \
+	--eeout \
+	--fastqout - \
+	| gzip -c - > ${sample_id}.merged.preclump.fastq.gz
+	"""
+
+}
+
 process ORIENT_READS {
 
     tag "${barcode}"
@@ -10,14 +44,15 @@ process ORIENT_READS {
     each path(refseq)
 
     output:
-    tuple val(barcode), path("${barcode}.oriented.fastq")
+    tuple val(barcode), path("${barcode}.oriented.fastq.gz")
 
     script:
     """
     vsearch \
 	--orient ${reads} \
     --db ${refseq} \
-    --fastqout ${barcode}.oriented.fastq
+    --fastqout - \
+	gzip -c > ${barcode}.oriented.fastq.gz
     """
 
 }
 
@@ -139,18 +139,18 @@ params {
     // nextclade dataset
     nextclade_dataset = null
 
-	// Dataset, either a local FASTA file or a pre-built dataset built by Sylph, to use
-	// for metagenomic profiling
-	meta_ref = null
+		// Dataset, either a local FASTA file or a pre-built dataset built by Sylph, to use
+		// for metagenomic profiling
+		meta_ref = null
 
-	// Contamination FASTA dataset to scrub from reads
-	contam_fasta = null
+		// Contamination FASTA dataset to scrub from reads
+		contam_fasta = null
 
-	// K-mer size to use for metagenomic sketching
-	k = 31
+		// K-mer size to use for metagenomic sketching
+		k = 31
 
-	// devider haplotype phasing preset
-	devider_preset = "nanopore-r10" // old-long-reads, nanopore-r9, nanopore-r10, hi-fi
+		// devider haplotype phasing preset
+		devider_preset = "nanopore-r10" // old-long-reads, nanopore-r9, nanopore-r10, hi-fi
 
     // nextclade dataset cache
     nextclade_cache = "$launchDir/work/nextclade_datasets"
@@ -161,6 +161,10 @@ params {
     // Where to place results
     results = "$launchDir/results"
 
+		// Whether to run in low-memory mode, which limits the number of parallel instances
+		// of high-memory files
+		low_memory = null
+
     // whether to cleanup work directory after a successful run (defaults to false)
     cleanup = null
 
@@ -183,6 +187,17 @@ if ( params.remote_pod5_location != "" || params.pod5_dir != "" || params.precal
     includeConfig "conf/illumina.config"
 }
 
+process {
+  withLabel:big_mem {
+        maxForks = {
+          params.low_memory ? 2 : {
+            def n = Runtime.runtime.availableProcessors() - 1
+            return n > 0 ? n : 1
+        }
+        }
+      }
+  }
+
 
 // WHETHER TO GENERATE A REPORT OF RUN STATISTICS
 report {