diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 511b3b1d3..0a0579764 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install nf-core==2.2
+          pip install nf-core==2.14.1
 
       - name: Run nf-core lint
         env:
diff --git a/.markdownlint.yml b/.markdownlint.yml
deleted file mode 100644
index 9e605fcfa..000000000
--- a/.markdownlint.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# Markdownlint configuration file
-default: true
-line-length: false
-ul-indent:
-    indent: 4
-no-duplicate-header:
-    siblings_only: true
-no-inline-html:
-    allowed_elements:
-        - img
-        - p
-        - kbd
-        - details
-        - summary
diff --git a/.nf-core.yml b/.nf-core.yml
index 3c4b505cb..a1298cc75 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -14,7 +14,6 @@ lint:
     - docs/images/nf-core-autometa_logo_dark.png
     - .github/ISSUE_TEMPLATE/bug_report.md
     - .github/ISSUE_TEMPLATE/feature_request.md
-
   files_unchanged:
     - manifest
     - .github/CONTRIBUTING.md
@@ -30,10 +29,11 @@ lint:
     - LICENSE
     - .github/PULL_REQUEST_TEMPLATE.md
     - lib/NfcoreTemplate.groovy
-
   actions_ci:
     - .github/workflows/ci.yml
-
   schema_lint: true
   template_strings: false
-  nextflow_config: false
+  nextflow_config:
+    - manifest.name
+    - manifest.homePage
+repository_type: pipeline
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 000000000..d0e7ae589
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,9 @@
+email_template.html
+.nextflow*
+work/
+data/
+results/
+.DS_Store
+testing/
+testing*
+*.pyc
diff --git a/.prettierrc.yml b/.prettierrc.yml
new file mode 100644
index 000000000..c81f9a766
--- /dev/null
+++ b/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/Dockerfile b/Dockerfile
index edc2f042f..192632e9d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ LABEL maintainer="jason.kwan@wisc.edu"
 # along with Autometa. If not, see <http://www.gnu.org/licenses/>.
 
 RUN apt-get update --allow-releaseinfo-change \
-    && apt-get install -y procps make \
+    && apt-get install -y procps make curl \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
@@ -28,6 +28,8 @@ COPY autometa-env.yml ./
 RUN mamba env update -n base --file=autometa-env.yml \
     && mamba clean --all -y
 
+RUN mamba env update -n base --file=autometa-env.yml \
+    && mamba clean --all -y
 
 COPY . /Autometa
 WORKDIR /Autometa
@@ -42,6 +44,11 @@ RUN hmmpress -f autometa/databases/markers/bacteria.single_copy.hmm \
     && autometa-config --section databases --option base --value ${DB_DIR} \
     && echo "databases base directory set in ${DB_DIR}/"
 
+
+# make the /scratch/dbs directory available to anyone
+RUN chmod -R 755 /scratch/dbs
+
+    
 RUN echo "Testing autometa import" \
     && python -c "import autometa"
 
@@ -67,3 +74,5 @@ RUN echo "Checking autometa entrypoints" \
     && autometa-binning-ldm-loginfo -h > /dev/null \
     && autometa-benchmark -h > /dev/null \
     && autometa-download-dataset -h > /dev/null
+
+ENV NUMBA_CACHE_DIR=/tmp
diff --git a/autometa-env.yml b/autometa-env.yml
index 6cc97229f..a63ea1d68 100644
--- a/autometa-env.yml
+++ b/autometa-env.yml
@@ -4,18 +4,23 @@ channels:
   - bioconda
   - defaults
 dependencies:
+  - aria2
   - attrs # test-data requirement
   - bedtools
   - biopython>=1.82
   - bowtie2
+  - curl
   - diamond>=2.0
+  - gzip
   - gdown
   - hmmer
+  - joblib>=1.1.0 # See https://stackoverflow.com/a/73830525/12671809
   - numba>=0.47
   - numpy>=1.13
   - pandas>=1.5
   - parallel
   - pip
+  - procps-ng # required by nextflow
   - prodigal # NOTE: 2.5 and 2.6 output format is different for sequence headers
   - python-annoy>=1.11 # required for trimap installation.
   - requests
diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py
index ae7007f84..9fd37f56f 100644
--- a/autometa/common/kmers.py
+++ b/autometa/common/kmers.py
@@ -586,9 +586,12 @@ def embed(
             f"{method} not in embedding methods. Choices: {', '.join(choices)}"
         )
     # PCA
-    n_samples, n_components = df.shape
+
     # Drop any rows that all cols contain NaN. This may occur if the contig length is below the k-mer size
     X = df.dropna(axis="index", how="all").fillna(0).to_numpy()
+    n_samples, n_components = df.shape
+
+    logger.warning(f"n_samples: {n_samples} n_components: {n_components}")
     # Set random state using provided seed
     random_state = np.random.RandomState(seed)
     if isinstance(pca_dimensions, str):
@@ -599,11 +602,15 @@ def embed(
                 f"pca_dimensions must be an integer! given: {pca_dimensions}"
             )
     if n_components > pca_dimensions and pca_dimensions != 0:
+        if n_samples < pca_dimensions:
+            logging.error(
+                f"n_samples ({n_samples}) is less than pca_dimensions ({pca_dimensions}), lowering pca_dimensions to {min(n_samples, pca_dimensions)} ."
+            )
+            pca_dimensions = min(n_samples, pca_dimensions)
         logger.debug(
             f"Performing decomposition with PCA (seed {seed}): {n_components} to {pca_dimensions} dims"
         )
         X = PCA(n_components=pca_dimensions, random_state=random_state).fit_transform(X)
-        # X = PCA(n_components='mle').fit_transform(X)
         n_samples, n_components = X.shape
 
     logger.debug(f"{method}: {n_samples} data points and {n_components} dimensions")
diff --git a/autometa/config/databases.py b/autometa/config/databases.py
index 14ea42c3e..eb45c8779 100644
--- a/autometa/config/databases.py
+++ b/autometa/config/databases.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """
 # License: GNU Affero General Public License v3 or later
 # A copy of GNU AGPL v3 should have been included in this software package in LICENSE.txt.
diff --git a/autometa/taxonomy/download_gtdb_files.py b/autometa/taxonomy/download_gtdb_files.py
index ceabc7225..b914e1967 100644
--- a/autometa/taxonomy/download_gtdb_files.py
+++ b/autometa/taxonomy/download_gtdb_files.py
@@ -9,6 +9,8 @@
 
 from tqdm import tqdm
 
+from autometa.config.utilities import DEFAULT_FPATH
+
 
 # Set up logger
 logger = logging.getLogger(__name__)
@@ -312,3 +314,39 @@ def download_and_format(gtdb_host, gtdb_version, single_dir, force=False):
         "aa_reps_path": aa_reps_path,
         "combined_gtdb_fasta": combined_gtdb_fasta,
     }
+
+
+
+def main():
+    import argparse
+    import logging as logger
+
+    logger.basicConfig(
+        format="[%(asctime)s %(levelname)s] %(name)s: %(message)s",
+        datefmt="%m/%d/%Y %I:%M:%S %p",
+        level=logger.DEBUG,
+    )
+    parser = argparse.ArgumentParser(
+        description="Download GTDB files",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--version",
+        help="GTDB version to download, 'latest' to get the latest version, otherwise specify a version number.",
+        default="220",
+    )
+    parser.add_argument(
+        "--host",
+        help="GTDB host to download files from.",
+        default="data.gtdb.ecogenomic.org",
+    )
+    parser.add_argument(
+        "--outdir",
+        help="Directory to save the downloaded files.",
+        required=True        
+    )
+    args = parser.parse_args()
+    download_and_format(gtdb_host=args.host, gtdb_version=args.version, single_dir=args.outdir)
+
+if __name__ == "__main__":
+    main()
diff --git a/autometa/taxonomy/gtdb.py b/autometa/taxonomy/gtdb.py
index 6af8fd92e..24ee04e55 100644
--- a/autometa/taxonomy/gtdb.py
+++ b/autometa/taxonomy/gtdb.py
@@ -63,7 +63,7 @@ def __init__(self, dbdir: str, verbose: bool = True, config=DEFAULT_CONFIG):
         self.names_fpath = os.path.join(dbdir, "names.dmp")
         self.merged_fpath = os.path.join(dbdir, "merged.dmp")
         self.delnodes_fpath = os.path.join(dbdir, "delnodes.dmp")
-        self.verify_databases()
+        # self.verify_databases()
         self.names = self.parse_names()
         self.nodes = self.parse_nodes()
         self.merged = self.parse_merged()
diff --git a/autometa/validation/datasets.py b/autometa/validation/datasets.py
index 3adc8f4bd..0261bd74d 100755
--- a/autometa/validation/datasets.py
+++ b/autometa/validation/datasets.py
@@ -63,7 +63,10 @@ def download(
             file_id = df.loc[(community_size, file_name), "file_id"]
             file_id_filepath = os.path.join(community_size_outdir, file_name)
             url = f"https://drive.google.com/uc?id={file_id}"
-
+            # if the file already exists, skip downloading
+            if os.path.exists(file_id_filepath):
+                logger.info(f"File {file_name} already exists in {community_size_outdir}. Skipping download.")
+                continue
             gdown.download(url, file_id_filepath)
 
 
diff --git a/bin/mock_data_report.R b/bin/mock_data_report.R
new file mode 100755
index 000000000..a363f1c96
--- /dev/null
+++ b/bin/mock_data_report.R
@@ -0,0 +1,28 @@
+#!/usr/bin/env Rscript
+
+args = commandArgs(trailingOnly=TRUE)
+
+rmarkdown::render(
+  input=args[[1]],
+  params=list(
+    bins_path=args[[2]],
+    assembly_to_locus_path=args[[2]],
+    assembly_report_path=args[[3]],
+    genus=FALSE
+  ),
+  knit_root_dir=getwd(),
+  output_dir=getwd(),
+  output_file="mock_data_report_by_assembly.html"
+)
+rmarkdown::render(
+  input=args[[1]],
+  params=list(
+    bins_path= args[[2]],
+    assembly_to_locus_path = args[[2]],
+    assembly_report_path = args[[3]],
+    genus=TRUE
+  ),
+  knit_root_dir=getwd(),
+  output_dir=getwd(),
+  output_file="mock_data_report_by_genus.html"
+)
diff --git a/conf/base.config b/conf/base.config
index 88616e3af..6601fcdd5 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -26,22 +26,22 @@ process {
     //        adding in your local modules too.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_low {
-        cpus   = { check_max( 1     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 2.GB  * task.attempt, 'memory'  ) }
+        cpus   = { check_max( 1 ) }
+        memory = { check_max( 6.GB  * task.attempt, 'memory'  ) }
         time   = { check_max( 4.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 8     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 8.GB  * task.attempt, 'memory'  ) }
+        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 36.GB  * task.attempt, 'memory'  ) }
         time   = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_high {
-        cpus   = { check_max( 16    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 16.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
+        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 24.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_long {
-        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
+        time   = { check_max( 48.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_high_memory {
         memory = { check_max( 200.GB * task.attempt, 'memory' ) }
@@ -53,4 +53,7 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
+    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
+        cache = false
+    }
 }
diff --git a/conf/modules.config b/conf/modules.config
index 239c6b059..6a911643e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -1,128 +1,183 @@
 /*
 ========================================================================================
-    Config file for defining DSL2 per module options
+    Config file for defining DSL2 per module options and publishing paths
 ========================================================================================
     Available keys to override module options:
-        args            = Additional arguments appended to command in module.
-        args2           = Second set of arguments appended to command in module (multi-tool modules).
-        args3           = Third set of arguments appended to command in module (multi-tool modules).
-        publish_dir     = Directory to publish results.
-        publish_by_meta = Groovy list of keys available in meta map to append as directories to "publish_dir" path
-                            If publish_by_meta = true                 - Value of ${meta['id']} is appended as a directory to "publish_dir" path
-                            If publish_by_meta = ['id', 'custompath'] - If "id" is in meta map and "custompath" isn't then "${meta['id']}/custompath/"
-                                                                        is appended as a directory to "publish_dir" path
-                            If publish_by_meta = false / null         - No directories are appended to "publish_dir" path
-        publish_files   = Groovy map where key = "file_ext" and value = "directory" to publish results for that file extension
-                            The value of "directory" is appended to the standard "publish_dir" path as defined above.
-                            If publish_files = null (unspecified)     - All files are published.
-                            If publish_files = false                  - No files are published.
-        suffix          = File name suffix for output files.
+        ext.args            = Additional arguments appended to command in module.
+        ext.args2           = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3           = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix          = File name prefix for output files.
 ----------------------------------------------------------------------------------------
 */
-params {
-    modules {
-        'count_kmers_options' {
-            publish_by_meta  = ['id']
-            publish_dir    = "count_kmer_analysis"
-        }
-        'normalize_kmers_options' {
-            publish_by_meta  = ['id']
-            publish_dir    = "normalize_kmer_analysis"
-        }
-        'embed_kmers_options' {
-            publish_by_meta  = ['id']
-            publish_dir    = "embed_kmer_analysis"
-        }
-        'diamond_blastp_options' {
-            args           = "--evalue 1e-5 --max-target-seqs 200 -b 6 --outfmt 6"
-            publish_by_meta  = ['id']
-            publish_dir    = "diamond_blastp_results"
-        }
-        'get_genomes_for_mock' {
-            args = "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt"
-            args2 = 'GCF_000734955.1|GCF_900448115.1|GCF_015751765.1'
-            publish_files = false
-        }
-        'hmmsearch_options' {
-            args           = "-Z 150 --cpu 1 --seed 42"
-            args2          = ""
-        }
-        'hmmsearch_filter_options' {
-            args = ""
-        }
-        'merge_hmmsearch_options'{
-            publish_by_meta  = ['id']
-            publish_dir    = "hmmsearch"
-        }
-        'majority_vote_options' {
-            publish_by_meta  = ['id']
-        }
-        'merge_kmers_embedded_options'{
-            publish_by_meta  = ['id']
-            publish_dir    = "kmers_embedded"
-        }
-        'merge_kmers_normalized_options'{
-            publish_by_meta  = ['id']
-            publish_dir    = "kmers_normalized"
-        }
-        'mock_data_report'{
-            publish_by_meta  = ['id']
-            publish_dir    = "mock_data_reports"
-        }
-        'prodigal_options' {
-            publish_by_meta  = ['id']
-            args         = "-p meta -m"
-            publish_dir  = "prodigal"
-        }
-        'diamond_makedb_options' {
-            publish_by_meta  = ['id']
-            args             = ""
-        }
-        'align_reads_options' {
-            args            = ""
-            args2           = "-q --phred33 --very-sensitive --no-unal"
-            publish_by_meta = ['id']
-            publish_dir     = "align_reads"
-        }
-        'samtools_viewsort_options' {
-            args             = ""
-            args2            = ""
-            publish_by_meta  = ['id']
-            publish_dir      = "samtools_sort"
-        }
-        'bedtools_genomecov_options' {
-            args             = ""
-            args2            = ""
-            publish_by_meta  = ['id']
-            publish_dir      = "genome_coverage"
-        }
-        'seqkit_split_options' {
+
+
+process {
+    withName:'ALIGN_READS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+        ext.args = ''
+        ext.args2 = '-q --phred33 --very-sensitive --no-unal'
+    }
+    withName:'BEDTOOLS_GENOMECOV'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'BINNING'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'BINNING_SUMMARY'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'COUNT_KMERS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}/${meta.taxon}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
+        publishDir = [
+            path: { "${params.tracedir}" },
+            mode: 'copy',
+            pattern: '*_versions.yml'
+        ]
+    }
+    withName:'DIAMOND_BLASTP'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+        ext.args = '--faster --evalue 1e-5 --max-hsps 1 --max-target-seqs 200 -b 6 --outfmt 6'
+    }
+    withName:'DIAMOND_MAKEDB'{
+        storeDir = {"${params.nr_dmnd_dir}"}
+    }
+    withName:'DOWNLOAD_ACESSION2TAXID'{
+        storeDir = {"${params.prot_accession2taxid_gz_dir}"}
+    }
+    withName:'DOWNLOAD_NR'{
+        storeDir = {"${params.nr_dmnd_dir}"}
+    }
+    withName:'DOWNLOAD_TAXDUMP'{
+        storeDir = {"${params.taxdump_tar_gz_dir}"}
+    }
+    withName:'EMBED_KMERS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}/${meta.taxon}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'GET_GENOMES_FOR_MOCK'{
+        storeDir = { "${params.outdir}/${meta.id}/mock_data/genomes"}
+        ext.args = 'https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt'
+        ext.args2 = 'GCF_000734955.1|GCF_900448115.1|GCF_015751765.1'
+    }
+    withName:'GTDB_MAKEDB'{
+        storeDir = {"${params.gtdb_dir}"}
+    }
+    withName:'MAJORITY_VOTE'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'MARKERS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'MOCK_DATA_REPORT'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'NORMALIZE_KMERS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}/${meta.taxon}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'PARSE_BED'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'PRODIGAL'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+        ext.args = '-p meta -m'
+    }
+    withName:'REDUCE_LCA'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'SAMPLESHEET_CHECK'{
+        publishDir = [
+            path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'SAMTOOLS_VIEW_AND_SORT'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+        ext.args = ''
+        ext.args2 = ''
+    }
+    withName:'SEQKIT_FILTER'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'SPADES_KMER_COVERAGE'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode,
+            pattern: '*.coverages.tsv'
+        ]
+    }
+    withName:'SPLIT_KINGDOMS'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'RECRUIT'{
+        publishDir = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+    withName:'TEST_DOWNLOAD'{
+        storeDir = {"${params.prot_accession2taxid_gz_dir}"}
+    }
+}
+
+/*
+
+'seqkit_split_options' {
             publish_by_meta  = ['id']
-            args             = ""
             args2            = "--two-pass"
         }
-        'spades_kmer_coverage' {
-            publish_by_meta  = ['id']
-            publish_files    = ['*.coverages.tsv':'']
-            publish_dir      = "coverage"
-        }
-        'split_kingdoms_options' {
-            publish_by_meta  = ['id']
-        }
-        'taxon_assignment' {
-            publish_by_meta  = ['id']
-        }
-        'binning_options' {
-            publish_by_meta  = ['id']
-            publish_dir    = "binning"
-        }
-        'unclustered_recruitment_options' {
-            publish_by_meta  = ['id']
-            publish_dir      = "unclustered_recruitment"
-        }
-        'binning_summary_options' {
-            publish_by_meta  = ['id']
-            publish_dir    = "binning_summary"
-        }
-    }
-}
+
+
+
+*/
diff --git a/conf/test.config b/conf/test.config
index 571e13bab..06dfca8fc 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -7,8 +7,49 @@
 ========================================================================================
     Defines input files and everything required to run a fast and simple pipeline test.
 
-    Use as follows:
-        nextflow run autometa -profile test,<docker/singularity>
+example_dir="/tmp/autometa_test"
+mkdir -p $example_dir $example_dir/database_directory $example_dir/output
+cd $example_dir
+
+curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/combined_nucleotide.fna.gz -o $example_dir/combined_nucleotide.fna.gz
+curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/reads_1.fastq.gz -o $example_dir/reads_1.fastq.gz
+curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/reads_2.fastq.gz -o $example_dir/reads_2.fastq.gz
+curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/database_directory/prot.accession2taxid.gz -o $example_dir/database_directory/prot.accession2taxid.gz
+curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/database_directory/nr.dmnd -o $example_dir/database_directory/nr.dmnd
+
+# Create a sample sheet
+sample_sheet="$example_dir/autometa_test_samplesheet.csv"
+echo "sample,assembly,fastq_1,fastq_2,coverage_tab,cov_from_assembly" > $sample_sheet
+echo "example_1,${example_dir}/combined_nucleotide.fna.gz,${example_dir}/reads_1.fastq.gz,${example_dir}/reads_2.fastq.gz,,0" >> $sample_sheet
+
+cd ~/Autometa
+
+        nextflow run KwanLab/Autometa \
+            -profile docker \
+            --input $sample_sheet \
+            --taxonomy_aware \
+            --outdir ${example_dir}/output \
+            --single_db_dir $example_dir/database_directory \
+            --autometa_image_tag 'dev' \
+            --use_gtdb \
+            --gtdb_version '220' \
+            --large_downloads_permission
+            -resume
+
+# or
+
+        nextflow run KwanLab/Autometa \
+            -profile docker \
+            --input $sample_sheet \
+            --taxonomy_aware \
+            --outdir ${example_dir}/output \
+            --single_db_dir $example_dir/database_directory \
+            --autometa_image_tag 'dev' \
+            --large_downloads_permission \
+            --max_memory '900.GB' \
+            --max_cpus 90 \
+            --max_time '20040.h' \
+            -resume
 
 ----------------------------------------------------------------------------------------
 */
@@ -22,10 +63,7 @@ params {
     max_memory = 6.GB
     max_time   = 2.h
 
-    // Input data
-    // Specify the paths to your test data on nf-core/test-datasets
-    // Give any required params for the test so that command line flags are not needed
-    input  = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/015/645/455/GCF_015645455.1_ASM1564545v1/GCF_015645455.1_ASM1564545v1_genomic.fna.gz'
     mock_test = true
+    debug     = true
 
 }
diff --git a/docker/modules/mock_data_reporter.Dockerfile b/docker/modules/mock_data_reporter.Dockerfile
index 00e7b3b03..b72d322aa 100644
--- a/docker/modules/mock_data_reporter.Dockerfile
+++ b/docker/modules/mock_data_reporter.Dockerfile
@@ -1,4 +1,4 @@
-FROM rocker/rstudio:4.1.2
+FROM rocker/rstudio:4.2.2
 # Not starting from r-base b/c pandoc, etc needed
 LABEL maintainer="jason.kwan@wisc.edu"
 
@@ -17,9 +17,12 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \
     libnetcdf-dev \
     udunits-bin \
     libudunits2-dev \
-    curl
+    curl \
+    procps
 
 # R packages
-ENV R_PACKAGES='c("ggbeeswarm","data.table","plotly","crosstalk","DT","patchwork")'
-RUN echo 'options("repos"="https://mran.microsoft.com/snapshot/2022-01-19")' >> /usr/local/lib/R/etc/Rprofile.site
+ENV R_PACKAGES='c("rmarkdown", "data.table", "ggplot2", "plotly", "crosstalk", "magrittr", "DT", "ggbeeswarm", "patchwork", "htmltools")'
+
+# MRAN is going away. TODO: find a suitable replacement or snaphshot with renv or just cross fingers
+# RUN echo 'options("repos"="https://mran.microsoft.com/snapshot/2023-03-03")' >> /usr/local/lib/R/etc/Rprofile.site
 RUN Rscript -e "install.packages(${R_PACKAGES}, Ncpus=parallel::detectCores())"
diff --git a/docs/source/nextflow-workflow.rst b/docs/source/nextflow-workflow.rst
index ad5790e2f..7dd5f210b 100644
--- a/docs/source/nextflow-workflow.rst
+++ b/docs/source/nextflow-workflow.rst
@@ -5,6 +5,72 @@
 =======================
 
 
+Ultra-Quick Start
+#############
+
+If you already have Nextflow and Docker installed the following commands will get you started. For detailed instructions see the sections following this.
+
+
+.. code-block:: bash
+
+    # change this to your desired directories
+    example_dir="/tmp/autometa_test"
+    single_database_dir="/tmp/autometa_test/database_directory"
+
+    # make the needed subdirectories
+    mkdir -p $example_dir $example_dir/output
+    cd $example_dir
+
+    # download small example data
+    curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/combined_nucleotide.fna.gz -o $example_dir/combined_nucleotide.fna.gz
+    curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/reads_1.fastq.gz -o $example_dir/reads_1.fastq.gz
+    curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/reads_2.fastq.gz -o $example_dir/reads_2.fastq.gz
+    curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/database_directory/prot.accession2taxid.gz -o $example_dir/database_directory/prot.accession2taxid.gz
+    curl -L -H "Accept: application/vnd.github.v3.raw" https://github.com/KwanLab/autometa_test_data/raw/refs/heads/main/minimal/database_directory/nr.dmnd -o $example_dir/database_directory/nr.dmnd
+
+    # Create a sample sheet
+    sample_sheet="$example_dir/autometa_test_samplesheet.csv"
+    echo "sample,assembly,fastq_1,fastq_2,coverage_tab,cov_from_assembly" > $sample_sheet
+    echo "example_1,${example_dir}/combined_nucleotide.fna.gz,${example_dir}/reads_1.fastq.gz,${example_dir}/reads_2.fastq.gz,,0" >> $sample_sheet
+
+    # Run Autometa without taxon splitting
+    nextflow run KwanLab/Autometa \
+        -profile docker \
+        --input $sample_sheet \
+        --outdir ${example_dir}/output \
+        --max_memory '16.GB' \
+        --max_cpus 9 \
+        --max_time '8.h'
+
+    # Or use NCBI nr to split contigs by taxonomy
+    nextflow run KwanLab/Autometa \
+        -profile docker \
+        --input $sample_sheet \
+        --taxonomy_aware \
+        --outdir ${example_dir}/output \
+        --single_db_dir ${single_database_dir} \
+        --autometa_image_tag 'dev' \
+        --large_downloads_permission \
+        --max_memory '16.GB' \
+        --max_cpus 9 \
+        --max_time '8.h'
+
+    # Or with GTDB refinement
+        nextflow run KwanLab/Autometa \
+            -profile docker  \
+            --input $sample_sheet \
+            --taxonomy_aware \
+            --outdir ${example_dir}/output_gtdb \
+            --single_db_dir ${single_database_dir} \
+            --autometa_image_tag 'dev' \
+            --use_gtdb \
+            --gtdb_version '220' \
+            --gtdb_dir ${single_database_dir} \
+            --large_downloads_permission \
+            --max_memory '16.GB' \
+            --max_cpus 9 \
+            --max_time '8.h'
+
 Why nextflow?
 #############
 
@@ -227,12 +293,6 @@ Then copy the following code block into that new file ("agrp" is the slurm parti
             slurm {
                 process.executor       = "slurm"
                 process.queue          = "agrp" // <<-- change this to whatever your partition is called
-                docker.enabled         = true
-                docker.userEmulation   = true
-                singularity.enabled    = false
-                podman.enabled         = false
-                shifter.enabled        = false
-                charliecloud.enabled   = false
                 executor {
                     queueSize = 8
                 }
@@ -609,7 +669,7 @@ may still use multiple cores.
 Databases
 *********
 
-Autometa uses the following NCBI databases throughout its pipeline:
+When the Autometa workflow is run with the `--taxonomy_aware` flag it will use NCBI nr databases to help bin contigs. If the databases aren't present and you include the `--large_downloads_permission` flag, the workflow will download and format the following databases:
 
 - Non-redundant nr database
     - `ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz <https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz>`_
@@ -618,18 +678,27 @@ Autometa uses the following NCBI databases throughout its pipeline:
 - nodes.dmp, names.dmp and merged.dmp - Found within
     - `ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz <ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz>`_
 
-If you are running autometa for the first time you'll have to download these databases.
-You may use ``autometa-update-databases --update-ncbi``. This will download the databases to the default path. You can check
-the default paths using ``autometa-config --print``. If you need to change the default download directory you can use
-``autometa-config --section databases --option ncbi --value <path/to/new/ncbi_database_directory>``.
-See ``autometa-update-databases -h`` and ``autometa-config -h`` for full list of options.
+Additionally, the NCBI-taxonomy based taxa assignments can be refined using GTDB. To do so you must use the flag `--use_gtdb` and, optionally, the version of GTDB you would like to use with the `--gtdb_version` flag.
+
+If the `--large_downloads_permission` is provided the workflow will handle the downloading and formatting of the following files; and you should let it because it isn't straightforward to do manually.
+
+- GTDB taxdump
+    - `https://github.com/shenwei356/gtdb-taxdump/releases <https://github.com/shenwei356/gtdb-taxdump/releases>`_
+- GTDB database
+    - e.g. `https://data.gtdb.ecogenomic.org/releases/release220/220.0/genomic_files_reps/gtdb_proteins_aa_reps_r220.tar.gz <https://data.gtdb.ecogenomic.org/releases/release220/220.0/genomic_files_reps/gtdb_proteins_aa_reps_r220.tar.gz>`_
 
 In your ``nf-params.json`` file you also need to specify the directory where the different databases are present.
-Make sure that the directory path contains the following databases:
 
-- Diamond formatted nr file => nr.dmnd
-- Extracted files from tarball taxdump.tar.gz
-- prot.accession2taxid.gz
+The easiest method is to just set `--single_db_dir` to the directory where all the databases will stored and let the workflow handle the rest.
+If you want finer control you can direct the workflow to specific database directories using the following parameters:
+
+- `--nr_dmnd_dir`
+- `--lca_dir`
+- `--prot_accession2taxid_gz_dir`
+- `--taxdump_tar_gz_dir`
+- `--gtdb_dir`
+
+
 
 .. code-block::
 
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
index 40ab65f20..b3d092f80 100755
--- a/lib/NfcoreSchema.groovy
+++ b/lib/NfcoreSchema.groovy
@@ -27,7 +27,7 @@ class NfcoreSchema {
     /* groovylint-disable-next-line UnusedPrivateMethodParameter */
     public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') {
         def has_error = false
-        //=====================================================================//
+        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
         // Check for nextflow core params and unexpected params
         def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text
         def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions')
@@ -135,7 +135,7 @@ class NfcoreSchema {
             }
         }
 
-        //=====================================================================//
+        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
         // Validate parameters against the schema
         InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream()
         JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream))
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
deleted file mode 100755
index 44551e0a3..000000000
--- a/lib/NfcoreTemplate.groovy
+++ /dev/null
@@ -1,270 +0,0 @@
-//
-// This file holds several functions used within the nf-core pipeline template.
-//
-
-import org.yaml.snakeyaml.Yaml
-
-class NfcoreTemplate {
-
-    //
-    // Check AWS Batch related parameters have been specified correctly
-    //
-    public static void awsBatch(workflow, params) {
-        if (workflow.profile.contains('awsbatch')) {
-            // Check params.awsqueue and params.awsregion have been set if running on AWSBatch
-            assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
-            // Check outdir paths to be S3 buckets if running on AWSBatch
-            assert params.outdir.startsWith('s3:')       : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
-        }
-    }
-
-    //
-    // Check params.hostnames
-    //
-    public static void hostName(workflow, params, log) {
-        Map colors = logColours(params.monochrome_logs)
-        if (params.hostnames) {
-            try {
-                def hostname = "hostname".execute().text.trim()
-                params.hostnames.each { prof, hnames ->
-                    hnames.each { hname ->
-                        if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
-                            log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
-                                "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
-                                "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
-                                "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
-                                "=${colors.yellow}====================================================${colors.reset}="
-                        }
-                    }
-                }
-            } catch (Exception e) {
-                log.warn "[$workflow.manifest.name] Could not determine 'hostname' - skipping check. Reason: ${e.message}."
-            }
-        }
-    }
-
-    //
-    // Construct and send completion email
-    //
-    public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {
-
-        // Set up the e-mail variables
-        def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
-        if (!workflow.success) {
-            subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
-        }
-
-        def summary = [:]
-        for (group in summary_params.keySet()) {
-            summary << summary_params[group]
-        }
-
-        def misc_fields = [:]
-        misc_fields['Date Started']              = workflow.start
-        misc_fields['Date Completed']            = workflow.complete
-        misc_fields['Pipeline script file path'] = workflow.scriptFile
-        misc_fields['Pipeline script hash ID']   = workflow.scriptId
-        if (workflow.repository) misc_fields['Pipeline repository Git URL']    = workflow.repository
-        if (workflow.commitId)   misc_fields['Pipeline repository Git Commit'] = workflow.commitId
-        if (workflow.revision)   misc_fields['Pipeline Git branch/tag']        = workflow.revision
-        misc_fields['Nextflow Version']           = workflow.nextflow.version
-        misc_fields['Nextflow Build']             = workflow.nextflow.build
-        misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
-
-        def email_fields = [:]
-        email_fields['version']      = workflow.manifest.version
-        email_fields['runName']      = workflow.runName
-        email_fields['success']      = workflow.success
-        email_fields['dateComplete'] = workflow.complete
-        email_fields['duration']     = workflow.duration
-        email_fields['exitStatus']   = workflow.exitStatus
-        email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
-        email_fields['errorReport']  = (workflow.errorReport ?: 'None')
-        email_fields['commandLine']  = workflow.commandLine
-        email_fields['projectDir']   = workflow.projectDir
-        email_fields['summary']      = summary << misc_fields
-
-        // On success try attach the multiqc report
-        def mqc_report = null
-        try {
-            if (workflow.success) {
-                mqc_report = multiqc_report.getVal()
-                if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
-                    if (mqc_report.size() > 1) {
-                        log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
-                    }
-                    mqc_report = mqc_report[0]
-                }
-            }
-        } catch (all) {
-            if (multiqc_report) {
-                log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
-            }
-        }
-
-        // Check if we are only sending emails on failure
-        def email_address = params.email
-        if (!params.email && params.email_on_fail && !workflow.success) {
-            email_address = params.email_on_fail
-        }
-
-        // Render the TXT template
-        def engine       = new groovy.text.GStringTemplateEngine()
-        def tf           = new File("$projectDir/assets/email_template.txt")
-        def txt_template = engine.createTemplate(tf).make(email_fields)
-        def email_txt    = txt_template.toString()
-
-        // Render the HTML template
-        def hf            = new File("$projectDir/assets/email_template.html")
-        def html_template = engine.createTemplate(hf).make(email_fields)
-        def email_html    = html_template.toString()
-
-        // Render the sendmail template
-        def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit
-        def smail_fields           = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
-        def sf                     = new File("$projectDir/assets/sendmail_template.txt")
-        def sendmail_template      = engine.createTemplate(sf).make(smail_fields)
-        def sendmail_html          = sendmail_template.toString()
-
-        // Send the HTML e-mail
-        Map colors = logColours(params.monochrome_logs)
-        if (email_address) {
-            try {
-                if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
-                // Try to send HTML e-mail using sendmail
-                [ 'sendmail', '-t' ].execute() << sendmail_html
-                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
-            } catch (all) {
-                // Catch failures and try with plaintext
-                def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
-                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
-                    mail_cmd += [ '-A', mqc_report ]
-                }
-                mail_cmd.execute() << email_html
-                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
-            }
-        }
-
-        // Write summary e-mail HTML to a file
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-        def output_hf = new File(output_d, "pipeline_report.html")
-        output_hf.withWriter { w -> w << email_html }
-        def output_tf = new File(output_d, "pipeline_report.txt")
-        output_tf.withWriter { w -> w << email_txt }
-    }
-
-    //
-    // Print pipeline summary on completion
-    //
-    public static void summary(workflow, params, log) {
-        Map colors = logColours(params.monochrome_logs)
-        if (workflow.success) {
-            if (workflow.stats.ignoredCount == 0) {
-                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
-            } else {
-                log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
-            }
-        } else {
-            hostName(workflow, params, log)
-            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
-        }
-    }
-
-    //
-    // ANSII Colours used for terminal logging
-    //
-    public static Map logColours(Boolean monochrome_logs) {
-        Map colorcodes = [:]
-
-        // Reset / Meta
-        colorcodes['reset']      = monochrome_logs ? '' : "\033[0m"
-        colorcodes['bold']       = monochrome_logs ? '' : "\033[1m"
-        colorcodes['dim']        = monochrome_logs ? '' : "\033[2m"
-        colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m"
-        colorcodes['blink']      = monochrome_logs ? '' : "\033[5m"
-        colorcodes['reverse']    = monochrome_logs ? '' : "\033[7m"
-        colorcodes['hidden']     = monochrome_logs ? '' : "\033[8m"
-
-        // Regular Colors
-        colorcodes['black']      = monochrome_logs ? '' : "\033[0;30m"
-        colorcodes['red']        = monochrome_logs ? '' : "\033[0;31m"
-        colorcodes['green']      = monochrome_logs ? '' : "\033[0;32m"
-        colorcodes['yellow']     = monochrome_logs ? '' : "\033[0;33m"
-        colorcodes['blue']       = monochrome_logs ? '' : "\033[0;34m"
-        colorcodes['purple']     = monochrome_logs ? '' : "\033[0;35m"
-        colorcodes['cyan']       = monochrome_logs ? '' : "\033[0;36m"
-        colorcodes['white']      = monochrome_logs ? '' : "\033[0;37m"
-
-        // Bold
-        colorcodes['bblack']     = monochrome_logs ? '' : "\033[1;30m"
-        colorcodes['bred']       = monochrome_logs ? '' : "\033[1;31m"
-        colorcodes['bgreen']     = monochrome_logs ? '' : "\033[1;32m"
-        colorcodes['byellow']    = monochrome_logs ? '' : "\033[1;33m"
-        colorcodes['bblue']      = monochrome_logs ? '' : "\033[1;34m"
-        colorcodes['bpurple']    = monochrome_logs ? '' : "\033[1;35m"
-        colorcodes['bcyan']      = monochrome_logs ? '' : "\033[1;36m"
-        colorcodes['bwhite']     = monochrome_logs ? '' : "\033[1;37m"
-
-        // Underline
-        colorcodes['ublack']     = monochrome_logs ? '' : "\033[4;30m"
-        colorcodes['ured']       = monochrome_logs ? '' : "\033[4;31m"
-        colorcodes['ugreen']     = monochrome_logs ? '' : "\033[4;32m"
-        colorcodes['uyellow']    = monochrome_logs ? '' : "\033[4;33m"
-        colorcodes['ublue']      = monochrome_logs ? '' : "\033[4;34m"
-        colorcodes['upurple']    = monochrome_logs ? '' : "\033[4;35m"
-        colorcodes['ucyan']      = monochrome_logs ? '' : "\033[4;36m"
-        colorcodes['uwhite']     = monochrome_logs ? '' : "\033[4;37m"
-
-        // High Intensity
-        colorcodes['iblack']     = monochrome_logs ? '' : "\033[0;90m"
-        colorcodes['ired']       = monochrome_logs ? '' : "\033[0;91m"
-        colorcodes['igreen']     = monochrome_logs ? '' : "\033[0;92m"
-        colorcodes['iyellow']    = monochrome_logs ? '' : "\033[0;93m"
-        colorcodes['iblue']      = monochrome_logs ? '' : "\033[0;94m"
-        colorcodes['ipurple']    = monochrome_logs ? '' : "\033[0;95m"
-        colorcodes['icyan']      = monochrome_logs ? '' : "\033[0;96m"
-        colorcodes['iwhite']     = monochrome_logs ? '' : "\033[0;97m"
-
-        // Bold High Intensity
-        colorcodes['biblack']    = monochrome_logs ? '' : "\033[1;90m"
-        colorcodes['bired']      = monochrome_logs ? '' : "\033[1;91m"
-        colorcodes['bigreen']    = monochrome_logs ? '' : "\033[1;92m"
-        colorcodes['biyellow']   = monochrome_logs ? '' : "\033[1;93m"
-        colorcodes['biblue']     = monochrome_logs ? '' : "\033[1;94m"
-        colorcodes['bipurple']   = monochrome_logs ? '' : "\033[1;95m"
-        colorcodes['bicyan']     = monochrome_logs ? '' : "\033[1;96m"
-        colorcodes['biwhite']    = monochrome_logs ? '' : "\033[1;97m"
-
-        return colorcodes
-    }
-
-    //
-    // Does what is says on the tin
-    //
-    public static String dashedLine(monochrome_logs) {
-        Map colors = logColours(monochrome_logs)
-        return "-${colors.dim}----------------------------------------------------${colors.reset}-"
-    }
-
-    //
-    // nf-core logo
-    //
-    public static String logo(workflow, monochrome_logs) {
-        Map colors = logColours(monochrome_logs)
-        String.format(
-            """\n
-            ${dashedLine(monochrome_logs)}
-                                                    ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
-            ${colors.blue}        ___     __   __   __   ___     ${colors.green}/,-._.--~\'${colors.reset}
-            ${colors.blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${colors.yellow}}  {${colors.reset}
-            ${colors.blue}  | \\| |       \\__, \\__/ |  \\ |___     ${colors.green}\\`-._,-`-,${colors.reset}
-                                                    ${colors.green}`._,._,\'${colors.reset}
-            ${colors.purple}  ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
-            ${dashedLine(monochrome_logs)}
-            """.stripIndent()
-        )
-    }
-}
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
deleted file mode 100755
index 18173e985..000000000
--- a/lib/Utils.groovy
+++ /dev/null
@@ -1,47 +0,0 @@
-//
-// This file holds several Groovy functions that could be useful for any Nextflow pipeline
-//
-
-import org.yaml.snakeyaml.Yaml
-
-class Utils {
-
-    //
-    // When running with -profile conda, warn if channels have not been set-up appropriately
-    //
-    public static void checkCondaChannels(log) {
-        Yaml parser = new Yaml()
-        def channels = []
-        try {
-            def config = parser.load("conda config --show channels".execute().text)
-            channels = config.channels
-        } catch(NullPointerException | IOException e) {
-            log.warn "Could not verify conda channel configuration."
-            return
-        }
-
-        // Check that all channels are present
-        def required_channels = ['conda-forge', 'bioconda', 'defaults']
-        def conda_check_failed = !required_channels.every { ch -> ch in channels }
-
-        // Check that they are in the right order
-        conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
-        conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
-
-        if (conda_check_failed) {
-            log.warn "=============================================================================\n" +
-                "  There is a problem with your Conda configuration!\n\n" +
-                "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
-                "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
-                "  NB: The order of the channels matters!\n" +
-                "==================================================================================="
-        }
-    }
-
-    //
-    // Join module args with appropriate spacing
-    //
-    public static String joinModuleArgs(args_list) {
-        return ' ' + args_list.join(' ')
-    }
-}
diff --git a/lib/WorkflowAutometa.groovy b/lib/WorkflowAutometa.groovy
deleted file mode 100755
index e66120fa0..000000000
--- a/lib/WorkflowAutometa.groovy
+++ /dev/null
@@ -1,59 +0,0 @@
-//
-// This file holds several functions specific to the workflow/autometa.nf in the nf-core/autometa pipeline
-//
-
-class WorkflowAutometa {
-
-    //
-    // Check and validate parameters
-    //
-    public static void initialise(params, log) {
-        genomeExistsError(params, log)
-
-        if (!params.fasta) {
-            log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
-            System.exit(1)
-        }
-    }
-
-    //
-    // Get workflow summary for MultiQC
-    //
-    public static String paramsSummaryMultiqc(workflow, summary) {
-        String summary_section = ''
-        for (group in summary.keySet()) {
-            def group_params = summary.get(group)  // This gets the parameters of that particular group
-            if (group_params) {
-                summary_section += "    <p style=\"font-size:110%\"><b>$group</b></p>\n"
-                summary_section += "    <dl class=\"dl-horizontal\">\n"
-                for (param in group_params.keySet()) {
-                    summary_section += "        <dt>$param</dt><dd><samp>${group_params.get(param) ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>\n"
-                }
-                summary_section += "    </dl>\n"
-            }
-        }
-
-        String yaml_file_text  = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
-        yaml_file_text        += "description: ' - this information is collected when the pipeline is started.'\n"
-        yaml_file_text        += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
-        yaml_file_text        += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
-        yaml_file_text        += "plot_type: 'html'\n"
-        yaml_file_text        += "data: |\n"
-        yaml_file_text        += "${summary_section}"
-        return yaml_file_text
-    }
-
-    //
-    // Exit pipeline if incorrect --genome key provided
-    //
-    private static void genomeExistsError(params, log) {
-        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
-            log.error "=============================================================================\n" +
-                "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
-                "  Currently, the available genome keys are:\n" +
-                "  ${params.genomes.keySet().join(", ")}\n" +
-                "==================================================================================="
-            System.exit(1)
-        }
-    }
-}
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
deleted file mode 100755
index e34547e0a..000000000
--- a/lib/WorkflowMain.groovy
+++ /dev/null
@@ -1,94 +0,0 @@
-//
-// This file holds several functions specific to the main.nf workflow in the nf-core/autometa pipeline
-//
-
-class WorkflowMain {
-
-    //
-    // Citation string for pipeline
-    //
-    public static String citation(workflow) {
-        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
-            // TODO nf-core: Add Zenodo DOI for pipeline after first release
-            //"* The pipeline\n" +
-            //"  https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
-            "* The nf-core framework\n" +
-            "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
-            "* Software dependencies\n" +
-            "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
-    }
-
-    //
-    // Print help to screen if required
-    //
-    public static String help(workflow, params, log) {
-        def command = "nf-core launch KwanLab/Autometa"
-        def help_string = ''
-        help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs)
-        help_string += NfcoreSchema.paramsHelp(workflow, params, command)
-        help_string += '\n' + citation(workflow) + '\n'
-        help_string += NfcoreTemplate.dashedLine(params.monochrome_logs)
-        return help_string
-    }
-
-    //
-    // Print parameter summary log to screen
-    //
-    public static String paramsSummaryLog(workflow, params, log) {
-        def summary_log = ''
-        summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs)
-        summary_log += NfcoreSchema.paramsSummaryLog(workflow, params)
-        summary_log += '\n' + citation(workflow) + '\n'
-        summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs)
-        return summary_log
-    }
-
-    //
-    // Validate parameters and print summary to screen
-    //
-    public static void initialise(workflow, params, log) {
-        // Print help to screen if required
-        if (params.help) {
-            log.info help(workflow, params, log)
-            System.exit(0)
-        }
-
-        // Validate workflow parameters via the JSON schema
-        if (params.validate_params) {
-            NfcoreSchema.validateParameters(workflow, params, log)
-        }
-
-        // Print parameter summary log to screen
-        log.info paramsSummaryLog(workflow, params, log)
-
-        // Check that conda channels are set-up correctly
-        if (params.enable_conda) {
-            Utils.checkCondaChannels(log)
-        }
-
-        // Check AWS batch settings
-        NfcoreTemplate.awsBatch(workflow, params)
-
-        // Check the hostnames against configured profiles
-        NfcoreTemplate.hostName(workflow, params, log)
-
-        // Check input has been provided
-        if (!params.input) {
-            log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'"
-            System.exit(1)
-        }
-    }
-
-    //
-    // Get attribute from genome config file e.g. fasta
-    //
-    public static String getGenomeAttribute(params, attribute) {
-        def val = ''
-        if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
-            if (params.genomes[ params.genome ].containsKey(attribute)) {
-                val = params.genomes[ params.genome ][ attribute ]
-            }
-        }
-        return val
-    }
-}
diff --git a/main.nf b/main.nf
index c35c35414..24977c4f0 100644
--- a/main.nf
+++ b/main.nf
@@ -19,7 +19,6 @@ nextflow.enable.dsl = 2
 ========================================================================================
 */
 
-WorkflowMain.initialise(workflow, params, log)
 
 
 ////////////////////////////////////////////////////
@@ -41,7 +40,7 @@ Results directory: ${params.outdir}
 ========================================================================================
 */
 
-include { AUTOMETA } from './workflows/autometa.nf' addParams(single_db_dir: params.single_db_dir)
+include { AUTOMETA } from './workflows/autometa.nf'
 
 /*
 ========================================================================================
diff --git a/modules.json b/modules.json
index 229d1b13c..711e99009 100644
--- a/modules.json
+++ b/modules.json
@@ -1,13 +1,26 @@
 {
-    "name": "nf-core/autometa",
-    "homePage": "https://github.com/nf-core/autometa",
+    "name": "autometa",
+    "homePage": "https://github.com/KwanLab/Autometa",
     "repos": {
-        "nf-core/modules": {
-            "bowtie2/align": {
-                "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d"
-            },
-            "prodigal": {
-                "git_sha": "e937c7950af70930d1f34bb961403d9d2aa81c7d"
+        "https://github.com/nf-core/modules.git": {
+            "modules": {
+                "nf-core": {
+                    "bowtie2/align": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "prodigal": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    }
+                }
             }
         }
     }
diff --git a/modules/local/align_reads.nf b/modules/local/align_reads.nf
index 472a8a25f..64b20bd96 100644
--- a/modules/local/align_reads.nf
+++ b/modules/local/align_reads.nf
@@ -1,19 +1,11 @@
 #!/usr/bin/env nextflow
 
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 
 process ALIGN_READS {
     tag "Aligning reads to ${meta.id}"
     label 'process_high'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -24,28 +16,34 @@ process ALIGN_READS {
         tuple val(meta), path(metagenome), path(fwd_reads), path(rev_reads)
 
     output:
-        tuple val(meta), path("alignments.sam"), emit: sam
-        path "*.db*.bt2"                       , emit: bt2_db
-        path "*.version.txt"                   , emit: version
+        tuple val(meta), path("*.alignments.sam")   , emit: sam
+        path "*.db*.bt2"                            , emit: bt2_db
+        path "versions.yml"                         , emit: versions
 
     when:
-        meta.cov_from_assembly.equals('0')
+        task.ext.when == null || task.ext.when
 
     script:
+        def args = task.ext.args ?: ''
+        def args2 = task.ext.args2 ?: ''
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         bowtie2-build \\
-            ${options.args} \\
+            ${args} \\
             ${metagenome} \\
-            ${meta.id}.db
+            ${prefix}.db
 
         bowtie2 \\
             -x ${meta.id}.db \\
-            ${options.args2} \\
+            ${args2} \\
             -p ${task.cpus} \\
-            -S alignments.sam \\
+            -S ${prefix}.alignments.sam \\
             -1 $fwd_reads \\
             -2 $rev_reads
 
-        echo \$(bowtie2 --version 2>&1) | sed -n 's/^.*bowtie2-align-s version //p; s/ .*\$//' > bowtie2.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/analyze_kmers.nf b/modules/local/analyze_kmers.nf
deleted file mode 100644
index 8de91e294..000000000
--- a/modules/local/analyze_kmers.nf
+++ /dev/null
@@ -1,46 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-process ANALYZE_KMERS {
-    tag "Counting kmers for ${meta.id}"
-    label 'process_medium'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "autometa" : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/autometa"
-    } else {
-        container "jasonkwan/autometa:${params.autometa_image_tag}"
-    }
-
-    input:
-        tuple val(meta), path(metagenome)
-
-    output:
-        tuple val(meta), path("kmers.tsv")           , emit: counts
-        tuple val(meta), path("kmers.normalized.tsv"), emit: normalized
-        tuple val(meta), path("kmers.embedded.tsv")  , emit: embedded
-        path  '*.version.txt'                        , emit: version
-
-    script:
-        def software = getSoftwareName(task.process)
-        """
-        autometa-kmers \\
-            --fasta ${metagenome} \\
-            --kmers "kmers.tsv" \\
-            --size "${params.kmer_size}" \\
-            --norm-output "kmers.normalized.tsv" \\
-            --norm-method "${params.norm_method}" \\
-            --pca-dimensions "${params.pca_dimensions}" \\
-            --embedding-output "kmers.embedded.tsv" \\
-            --embedding-method "${params.embedding_method}" \\
-            --embedding-dimensions "${params.embedding_dimensions}" \\
-            --cpus "${task.cpus}" \\
-            --seed 42
-
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
-        """
-}
diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf
index 64e2241a0..cbfd55cf1 100644
--- a/modules/local/bedtools_genomecov.nf
+++ b/modules/local/bedtools_genomecov.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process BEDTOOLS_GENOMECOV {
     tag "${meta.id}"
     label 'process_medium'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null)
+    conda "bioconda::bedtools=2.30.0"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0"
     } else {
@@ -21,20 +13,25 @@ process BEDTOOLS_GENOMECOV {
         tuple val(meta), path(bam)
 
     output:
-        tuple val(meta), path("alignments.bed"), emit: bed
-        path  "*.version.txt"                  , emit: version
+        tuple val(meta), path("*alignments.bed"), emit: bed
+        path  "versions.yml"                    , emit: versions
 
     when:
         meta.cov_from_assembly.equals('0')
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def args = task.ext.args ?: ''
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         bedtools \\
             genomecov \\
             -ibam ${bam} \\
-            $options.args  > alignments.bed
+            ${args}  > ${prefix}.alignments.bed
 
-        bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+        END_VERSIONS
         """
 }
diff --git a/modules/local/binning.nf b/modules/local/binning.nf
index 877a65014..977d81de8 100644
--- a/modules/local/binning.nf
+++ b/modules/local/binning.nf
@@ -1,15 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process BINNING {
     tag "sample:${meta.id}, clustering:${params.clustering_method}, completeness:${params.completeness}, purity:${params.purity}, cov.std.dev.:${params.cov_stddev_limit}, gc.std.dev.:${params.gc_stddev_limit}"
     label 'process_high'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -20,24 +13,28 @@ process BINNING {
     errorStrategy { task.exitStatus in 204 ? 'ignore' : 'terminate' }
 
     input:
-        tuple val(meta), path(kmers), path(coverage), path(gc_content), path(markers), path(taxonomy)
+        tuple val(meta), path(kmers), path(markers), path(coverage), path(gc_content), path(taxonomy)
 
     output:
-        tuple val(meta), path("${params.kingdom}.binning.tsv.gz")     , emit: binning
-        tuple val(meta), path("${params.kingdom}.binning.main.tsv.gz"), emit: main
-        path  '*.version.txt'                                         , emit: version
+        tuple val(meta), path("*.binning.tsv.gz")        , emit: binning
+        tuple val(meta), path("*.binning.main.tsv.gz")   , emit: main
+        path 'versions.yml'                              , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
         taxonomy_call = params.taxonomy_aware ? "--taxonomy $taxonomy" : "" // https://github.com/nextflow-io/nextflow/issues/1694#issuecomment-683272275
+        def prefix = task.ext.prefix ?: "${meta.id}"
+        def taxon = meta.taxon ?: "${meta.taxon}"
         """
         autometa-binning \\
             --kmers $kmers \\
             --coverages $coverage \\
             --gc-content $gc_content \\
             --markers $markers \\
-            --output-binning ${params.kingdom}.binning.tsv.gz \\
-            --output-main ${params.kingdom}.binning.main.tsv.gz \\
+            --output-binning ${prefix}.${taxon}.binning.tsv.gz \\
+            --output-main ${prefix}.${taxon}.binning.main.tsv.gz \\
             --clustering-method ${params.clustering_method} \\
             --completeness ${params.completeness} \\
             --purity ${params.purity} \\
@@ -47,8 +44,12 @@ process BINNING {
             --starting-rank ${params.binning_starting_rank} \\
             --cpus ${task.cpus} \\
             --rank-filter superkingdom \\
-            --rank-name-filter ${params.kingdom}
+            --rank-name-filter ${taxon} \\
+            --verbose
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/binning_summary.nf b/modules/local/binning_summary.nf
index f0c8010db..db5dbeeda 100644
--- a/modules/local/binning_summary.nf
+++ b/modules/local/binning_summary.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process BINNING_SUMMARY {
     tag "Gathering binning summary for ${meta.id}"
     label 'process_high'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -18,30 +10,34 @@ process BINNING_SUMMARY {
     }
 
     input:
-        tuple val(meta), path(binning_main), path(markers), path(metagenome)
-        val(binning_column)
-        path(ncbi)
+        tuple val(meta), path(binning_main), path(markers), path(metagenome), path(taxdump_files), val(dbtype), val(binning_column)
 
     output:
-        tuple val(meta), path("metabin_stats.tsv")   , emit: stats
-        tuple val(meta), path("metabins")            , emit: metabins
-        tuple val(meta), path("metabin_taxonomy.tsv"), emit: taxonomies, optional: true
-        path  '*.version.txt'                        , emit: version
+        tuple val(meta), path("*metabin_stats.tsv")     , emit: stats
+        tuple val(meta), path("*metabins")              , emit: metabins
+        tuple val(meta), path("*metabin_taxonomy.tsv")  , emit: taxonomies, optional: true
+        path 'versions.yml'                             , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-binning-summary \\
-            --dbdir $ncbi \\
-            --dbtype ncbi \\
+            --dbdir . \\
+            --dbtype ${dbtype} \\
             --binning-main $binning_main \\
             --markers $markers \\
             --metagenome $metagenome \\
             --binning-column $binning_column \\
-            --output-stats "metabin_stats.tsv" \\
-            --output-taxonomy "metabin_taxonomy.tsv" \\
-            --output-metabins "metabins"
-
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+            --output-stats "${prefix}.metabin_stats.tsv" \\
+            --output-taxonomy "${prefix}.metabin_taxonomy.tsv" \\
+            --output-metabins "${prefix}.metabins"
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/count_kmers.nf b/modules/local/count_kmers.nf
index ff114fa0c..82ec8efcd 100644
--- a/modules/local/count_kmers.nf
+++ b/modules/local/count_kmers.nf
@@ -1,15 +1,9 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
 
 process COUNT_KMERS {
     tag "Counting ${params.kmer_size}-mers for ${meta.id}"
     label 'process_medium'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa"
     } else {
@@ -20,19 +14,25 @@ process COUNT_KMERS {
         tuple val(meta), path(metagenome)
 
     output:
-        tuple val(meta), path("kmers.tsv")           , emit: counts
-        path  '*.version.txt'                        , emit: version
+        tuple val(meta), path("*kmers.tsv") , emit: counts
+        path  'versions.yml'                , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-kmers \\
             --fasta $metagenome \\
-            --kmers "kmers.tsv" \\
+            --kmers "${prefix}.kmers.tsv" \\
             --size "${params.kmer_size}" \\
             --cpus "${task.cpus}" \\
             --seed 42
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/diamond_blastp.nf b/modules/local/diamond_blastp.nf
index 20db06c68..cc78345ea 100644
--- a/modules/local/diamond_blastp.nf
+++ b/modules/local/diamond_blastp.nf
@@ -1,23 +1,14 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process DIAMOND_BLASTP {
     tag "Aligning ORFS in ${meta.id} against ${diamond_database}"
     label 'process_high'
     // Old diamond manual suggested *NOT* running in parallel... so we are setting maxForks to 1 here.
-    // TODO: There appears to be features for multiprocessing available now
-    // See: https://github.com/bbuchfink/diamond/wiki/6.-Distributed-computing
     maxForks 1
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "bioconda::diamond=2.0.14" : null)
+    conda "bioconda::diamond=2.1.10"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/diamond:2.0.14--hdcc8f71_0"
+        container "https://depot.galaxyproject.org/singularity/diamond:2.1.10--h43eeafb_2"
     } else {
-        container "quay.io/biocontainers/diamond:2.0.14--hdcc8f71_0"
+        container "quay.io/biocontainers/diamond:2.1.10--h43eeafb_2"
     }
 
     input:
@@ -25,18 +16,25 @@ process DIAMOND_BLASTP {
         path(diamond_database)
 
     output:
-        tuple val(meta), path("blastp.tsv"), emit: diamond_results
-        path "*.version.txt"               , emit: version
+        tuple val(meta), path("*blastp.tsv"), emit: diamond_results
+        path "versions.yml"                 , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def args = task.ext.args ?: ''
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
-        diamond blastp $options.args \\
+        diamond blastp $args \\
             --query ${protein_fasta} \\
             --db ${diamond_database} \\
             --threads ${task.cpus} \\
-            --out blastp.tsv
+            --out ${prefix}.blastp.tsv
 
-        diamond version | sed 's/^.*diamond version //' > diamond.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/diamond_makedb.nf b/modules/local/diamond_makedb.nf
index 629d24e12..24c3d36f4 100644
--- a/modules/local/diamond_makedb.nf
+++ b/modules/local/diamond_makedb.nf
@@ -1,17 +1,9 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-params.nr_dmnd_dir = null
-options        = initOptions(params.options)
 
 process DIAMOND_MAKEDB {
     tag ' Preparing Diamond database'
     label 'process_high'
 
-    storeDir "${params.nr_dmnd_dir}"
-
-    conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
+    conda "bioconda::diamond=2.0.9"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0"
     } else {
@@ -23,17 +15,24 @@ process DIAMOND_MAKEDB {
         val(dbname)
 
     output:
-        path("*.dmnd"), emit: diamond_db
-        path  "*.version.txt"         , emit: version
+        path("*.dmnd")               , emit: diamond_db
+        path  "versions.yml"         , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def args = task.ext.args   ?: ''
         """
         diamond makedb --in ${fasta} \\
-            $options.args \\
+            $args \\
             --threads ${task.cpus} \\
             --db ${dbname}
 
-        diamond version | sed 's/^.*diamond version //' > diamond.version.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/embed_kmers.nf b/modules/local/embed_kmers.nf
index 24b603747..145400777 100644
--- a/modules/local/embed_kmers.nf
+++ b/modules/local/embed_kmers.nf
@@ -1,15 +1,10 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process EMBED_KMERS {
     tag "PCA dims:${params.pca_dimensions}, dims:${params.embedding_dimensions}, method:${params.embedding_method}, sample:${meta.id}"
     label 'process_medium'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
+// errorStrategy ignore  all
+  errorStrategy 'ignore'
+    conda "autometa"
 
-    conda (params.enable_conda ? "autometa" : null)
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa"
     } else {
@@ -17,27 +12,33 @@ process EMBED_KMERS {
     }
 
     // Not enough contigs to perform embedding with current parameter settings...
-    errorStrategy { task.exitStatus in 153 ? 'ignore' : 'terminate' }
+    // errorStrategy { task.exitStatus in 153 ? 'ignore' : 'terminate' }
 
     input:
         tuple val(meta), path(normalized)
 
     output:
-        tuple val(meta), path("kmers.embedded.tsv")  , emit: embedded
-        path  '*.version.txt'                        , emit: version
+        tuple val(meta), path("*kmers.embedded.tsv")  , emit: embedded
+        path  'versions.yml'                          , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-kmers \\
             --norm-output $normalized \\
             --pca-dimensions "${params.pca_dimensions}" \\
-            --embedding-output "kmers.embedded.tsv" \\
+            --embedding-output "${prefix}.kmers.embedded.tsv" \\
             --embedding-method "${params.embedding_method}" \\
             --embedding-dimensions "${params.embedding_dimensions}" \\
             --cpus "${task.cpus}" \\
             --seed 42
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/functions.nf b/modules/local/functions.nf
deleted file mode 100644
index da9da093d..000000000
--- a/modules/local/functions.nf
+++ /dev/null
@@ -1,68 +0,0 @@
-//
-//  Utility functions used in nf-core DSL2 module files
-//
-
-//
-// Extract name of software tool from process name using $task.process
-//
-def getSoftwareName(task_process) {
-    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
-}
-
-//
-// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
-//
-def initOptions(Map args) {
-    def Map options = [:]
-    options.args            = args.args ?: ''
-    options.args2           = args.args2 ?: ''
-    options.args3           = args.args3 ?: ''
-    options.publish_by_meta = args.publish_by_meta ?: []
-    options.publish_dir     = args.publish_dir ?: ''
-    options.publish_files   = args.publish_files
-    options.suffix          = args.suffix ?: ''
-    return options
-}
-
-//
-// Tidy up and join elements of a list to return a path string
-//
-def getPathFromList(path_list) {
-    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
-    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
-    return paths.join('/')
-}
-
-//
-// Function to save/publish module results
-//
-def saveFiles(Map args) {
-    if (!args.filename.endsWith('.version.txt')) {
-        def ioptions  = initOptions(args.options)
-        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
-        if (ioptions.publish_by_meta) {
-            def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
-            for (key in key_list) {
-                if (args.meta && key instanceof String) {
-                    def path = key
-                    if (args.meta.containsKey(key)) {
-                        path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
-                    }
-                    path = path instanceof String ? path : ''
-                    path_list.add(path)
-                }
-            }
-        }
-        if (ioptions.publish_files instanceof Map) {
-            for (ext in ioptions.publish_files) {
-                if (args.filename.endsWith(ext.key)) {
-                    def ext_list = path_list.collect()
-                    ext_list.add(ext.value)
-                    return "${getPathFromList(ext_list)}/$args.filename"
-                }
-            }
-        } else if (ioptions.publish_files == null) {
-            return "${getPathFromList(path_list)}/$args.filename"
-        }
-    }
-}
diff --git a/modules/local/get_genomes_for_mock.nf b/modules/local/get_genomes_for_mock.nf
index e7b8903bb..3d6c5c3e2 100644
--- a/modules/local/get_genomes_for_mock.nf
+++ b/modules/local/get_genomes_for_mock.nf
@@ -1,18 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process GET_GENOMES_FOR_MOCK {
-    def genome_count = options.args2.tokenize('|').size()
-    tag "fetching ${genome_count} genomes"
-
-    storeDir = 'mock_data/genomes'
     cache 'lenient'
 
-    conda (params.enable_conda ? "bioconda::emboss=6.6.0" : null)
-    container "jasonkwan/autometa-nf-modules-get_genomes_for_mock:${params.autometa_image_tag}"
+    conda "bioconda::emboss=6.6.0"
+    container "jasonkwan/autometa-nf-modules-get_genomes_for_mock:main"
 
     output:
         path "metagenome.fna.gz", emit: metagenome
@@ -21,23 +11,31 @@ process GET_GENOMES_FOR_MOCK {
         path "assembly_to_locus.txt", emit: assembly_to_locus
         path "assemblies.txt", emit: assemblies
         path "assembly_report.txt", emit: assembly_report
+        path "versions.yml"            , emit: versions
 
+    script:
+        def args = task.ext.args ?: ''
+        def args2 = task.ext.args2 ?: ''
     """
-    curl -s ${options.args} > assembly_report.txt
+    curl -s ${args} > assembly_report.txt
 
-    grep -E "${options.args2}" assembly_report.txt |\\
+    grep -E "${args2}" assembly_report.txt |\\
         awk -F '\\t' '{print \$20}' |\\
         sed 's,https://,rsync://,' |\\
-            xargs -n 1 -I {} \
-                rsync -am \
-                    --exclude='*_rna_from_genomic.fna.gz' \
-                    --exclude='*_cds_from_genomic.fna.gz' \
-                    --include="*_genomic.fna.gz" \
-                    --include="*_protein.faa.gz" \
-                    --include='*/' \
+            xargs -n 1 -I {} \\
+                rsync -am \\
+                    --exclude='*_rna_from_genomic.fna.gz' \\
+                    --exclude='*_cds_from_genomic.fna.gz' \\
+                    --include="*_genomic.fna.gz" \\
+                    --include="*_protein.faa.gz" \\
+                    --include='*/' \\
                     --exclude='*' {} .
 
     # "clean_mock_data.sh" is here: ~/Autometa/bin/clean_mock_data.sh
     clean_mock_data.sh
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        rsync: \$(rsync --version | head -n1 | sed 's/^rsync  version //' | sed 's/\s.*//')
+    END_VERSIONS
     """
 }
diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
deleted file mode 100644
index ccbf3f873..000000000
--- a/modules/local/get_software_versions.nf
+++ /dev/null
@@ -1,45 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-/*
-This file is left in from the template, that's mainly used for QUAST (http://cab.spbu.ru/software/quast/).
-There's a discussion that can be had later about incorporating that module fully or removing the remaining template that feeds into it
-*/
-
-process GET_SOFTWARE_VERSIONS {
-    label 'process_low'
-    publishDir "${params.outdir}",
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) }
-
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/python:3.8.3"
-    } else {
-        container "quay.io/biocontainers/python:3.8.3"
-    }
-
-    cache false
-
-    input:
-        path versions
-
-    output:
-        path "software_versions.tsv"     , emit: tsv
-        path 'software_versions_mqc.yaml', emit: yaml
-        path  '*.version.txt'            , emit: version
-
-    script:
-        // Add soft-links to original FastQs for consistent naming in pipeline
-        def software = getSoftwareName(task.process)
-        """
-        echo $workflow.manifest.version > pipeline.version.txt
-        echo $workflow.nextflow.version > nextflow.version.txt
-        scrape_software_versions.py &> software_versions_mqc.yaml
-
-        echo "make linter happy" > ${software}.version.txt
-        """
-}
diff --git a/modules/local/hmmer_hmmsearch.nf b/modules/local/hmmer_hmmsearch.nf
index 7e49c6e2f..60972da59 100644
--- a/modules/local/hmmer_hmmsearch.nf
+++ b/modules/local/hmmer_hmmsearch.nf
@@ -9,19 +9,11 @@ the results of this process
 =======================
 */
 
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process HMMER_HMMSEARCH {
     tag "Annotating ORFs in $meta.id"
     label 'process_medium'
 
-    // no publishdir
-
-    conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null)
+    conda "bioconda::hmmer=3.3.2"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1"
     } else {
@@ -34,20 +26,30 @@ process HMMER_HMMSEARCH {
 
     output:
         tuple val(meta), path("*.domtblout"), emit: domtblout
-        path "*.version.txt"                , emit: version
+        path "versions.yml"                 , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
-        def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-        def fastacmd = fasta.getExtension() == 'gz' ? "gunzip -c $fasta" : "cat $fasta"
+        def args = task.ext.args ?: ''
+        def args2 = task.ext.args2 ?: ''
         """
+        # hmmsearch can'ts use or pipe in gzipped fasta
+
+        zcat "${fasta}" > temp.fa
+
         hmmsearch \\
             --domtblout "${hmm.simpleName}.domtblout" \\
-            ${options.args} \\
-            ${options.args2} \\
-            $hmm \\
-            $fasta > /dev/null 2>&1
-
-        echo \$(hmmalign -h | grep -o '^# HMMER [0-9.]*') | sed 's/^# HMMER *//' > HMMER.version.txt
-        """
+            --cpu $task.cpus \\
+            $args \\
+            $args2 \\
+            "${hmm}" \\
+            temp.fa > /dev/null 2>&1
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/local/hmmer_hmmsearch_filter.nf b/modules/local/hmmer_hmmsearch_filter.nf
index 26d1d8cee..a306d6263 100644
--- a/modules/local/hmmer_hmmsearch_filter.nf
+++ b/modules/local/hmmer_hmmsearch_filter.nf
@@ -7,12 +7,6 @@ TODO: Not yet implemented
 */
 
 
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process HMMER_HMMSEARCH_FILTER {
     tag "Filtering marker hmms in ${meta.id}"
     label 'process_medium'
@@ -20,9 +14,8 @@ process HMMER_HMMSEARCH_FILTER {
     // if ( params.num_splits < 2 ) {
     // if running in parallel, the results are published from the process
     // that merges the individual results from this process
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -35,11 +28,12 @@ process HMMER_HMMSEARCH_FILTER {
 
     output:
         tuple val(meta), path("markers.tsv"), emit: markers_tsv
-        path "*.version.txt"                           , emit: version
+        path "versions.yml"                 , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
-        def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
         """
         autometa-hmmsearch-filter \\
             --domtblout "$domtblout" \\
@@ -47,6 +41,9 @@ process HMMER_HMMSEARCH_FILTER {
             --seqdb "$fasta" \\
             --out "markers.tsv"
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/length_table.nf b/modules/local/length_table.nf
deleted file mode 100644
index a94c76833..000000000
--- a/modules/local/length_table.nf
+++ /dev/null
@@ -1,41 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-process LENGTH_TABLE {
-    tag "${meta.id}"
-    label 'process_low'
-
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
-    } else {
-        container "jasonkwan/autometa:${params.autometa_image_tag}"
-    }
-
-    input:
-        tuple val(meta), path(metagenome)
-
-    output:
-        tuple val(meta), path("lengths.tsv"), emit: lengths
-        path  '*.version.txt'               , emit: version
-
-    script:
-        def software = getSoftwareName(task.process)
-        """
-        #!/usr/bin/env python
-        from Bio import SeqIO
-        import pandas as pd
-
-        seqs = {record.id: len(record.seq) for record in SeqIO.parse(${metagenome}, "fasta")}
-        lengths = pd.Series(seqs, name="length")
-        lengths.index.name = "contig"
-        lengths.to_csv(lengths.tsv, sep="\t", index=True, header=True)
-
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
-        """
-}
diff --git a/modules/local/majority_vote.nf b/modules/local/majority_vote.nf
index 6271b7bd2..83a34a5d0 100644
--- a/modules/local/majority_vote.nf
+++ b/modules/local/majority_vote.nf
@@ -1,16 +1,9 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 
 process MAJORITY_VOTE {
     tag "Performing taxon majority vote on ${meta.id}"
     label 'process_medium'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -19,21 +12,28 @@ process MAJORITY_VOTE {
 
     input:
         tuple val(meta), path(lca)
-        path(ncbi_tax_dir)
+        path taxdump_files // instead of passing to --dbdir, stage and pass '.'
+        val dbtype
 
     output:
-        tuple val(meta), path("votes.tsv"), emit: votes
-        path  '*.version.txt'             , emit: version
+        tuple val(meta), path("*votes.tsv") , emit: votes
+        path  'versions.yml'                , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-taxonomy-majority-vote \\
             --lca ${lca} \\
-            --output votes.tsv \\
-            --dbdir "${ncbi_tax_dir}" \\
-            --dbtype ncbi
-
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+            --output ${prefix}.votes.tsv \\
+            --dbdir . \\
+            --dbtype ${dbtype}
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/markers.nf b/modules/local/markers.nf
index 5835735b7..25175761c 100644
--- a/modules/local/markers.nf
+++ b/modules/local/markers.nf
@@ -1,17 +1,11 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
 
-params.options = [:]
-options        = initOptions(params.options)
 
 // TODO: For faster results/less I/O this could be replaced with hmmsearch
 process MARKERS {
-    tag "Finding markers for ${meta.id}"
+    tag "Finding ${meta.taxon} markers for ${meta.id}"
     label "process_medium"
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -23,33 +17,33 @@ process MARKERS {
 
     input:
         tuple val(meta), path(orfs)
-        //path(hmmdb) currently only inside docker
-        //path(cutoffs) currently only inside docker
 
     output:
-        tuple val(meta), path("${params.kingdom}.markers.tsv"), emit: markers_tsv
-        tuple val(meta), path("${params.kingdom}.hmmscan.tsv"), emit: hmmscan_tsv
-        path  '*.version.txt'               , emit: version
+        tuple val(meta), path("*.markers.tsv")  , emit: markers_tsv
+        tuple val(meta), path("*.hmmscan.tsv")  , emit: hmmscan_tsv
+        path  'versions.yml'                    , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
-        if (params.enable_conda)
-        """
-        exit 1
-        """
-        else
+        def prefix = task.ext.prefix ?: "${meta.id}"
+        def kingdom = meta.taxon
         """
         autometa-markers \\
             --orfs $orfs \\
-            --hmmscan ${params.kingdom}.hmmscan.tsv \\
-            --out ${params.kingdom}.markers.tsv \\
-            --kingdom ${params.kingdom} \\
+            --hmmscan ${prefix}.${kingdom}.hmmscan.tsv \\
+            --out ${prefix}.${kingdom}.markers.tsv \\
+            --kingdom ${kingdom} \\
             --parallel \\
             --cpus ${task.cpus} \\
             --seed 42 \\
-            --hmmdb "/scratch/dbs/markers/${params.kingdom}.single_copy.hmm" \\
-            --cutoffs "/scratch/dbs/markers/${params.kingdom}.single_copy.cutoffs"
+            --hmmdb "/scratch/dbs/markers/${kingdom}.single_copy.hmm" \\
+            --cutoffs "/scratch/dbs/markers/${kingdom}.single_copy.cutoffs"
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/merge_fasta.nf b/modules/local/merge_fasta.nf
index 461ca28d9..38e429611 100644
--- a/modules/local/merge_fasta.nf
+++ b/modules/local/merge_fasta.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process MERGE_FASTA {
     tag "Merging ${meta.id} FASTA"
     label 'process_low'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::seqkit=0.16.1" : null)
+    conda "bioconda::seqkit=0.16.1"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/seqkit:0.16.1--h9ee0642_0"
     } else {
@@ -23,14 +15,20 @@ process MERGE_FASTA {
 
     output:
         tuple val(meta), path("${meta.id}.${extension}"), emit: merged
-        path '*.version.txt'                            , emit: version
+        path 'versions.yml'                             , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
         """
         # If errors occur because of issues with symlinks,
         # try:  cat * | seqkit sort -n > "${meta.id}.${extension}"
         seqkit sort -n * > "${meta.id}.${extension}"
-        seqkit version | sed 's/seqkit v//g' > ${software}.version.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            seqkit: \$( seqkit | sed '3!d; s/Version: //' )
+        END_VERSIONS
         """
 }
diff --git a/modules/local/merge_tsv.nf b/modules/local/merge_tsv.nf
index b7ebccedb..ed17e58e0 100644
--- a/modules/local/merge_tsv.nf
+++ b/modules/local/merge_tsv.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process MERGE_TSV_WITH_HEADERS {
     tag "Merging files from parallel split for ${meta.id}"
     label 'process_low'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
 
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE"
@@ -26,8 +18,10 @@ process MERGE_TSV_WITH_HEADERS {
         tuple val(meta), path("${meta.id}.${extension}"), emit: merged_tsv
 
 
+    when:
+        task.ext.when == null || task.ext.when
+
     script:
-        def software = getSoftwareName(task.process)
         """
         awk 'FNR==1 && NR!=1{next;}{print}' *.tsv > "${meta.id}.${extension}"
         """
diff --git a/modules/local/mock_data_reporter.nf b/modules/local/mock_data_reporter.nf
index 0c91af41d..cd471f68c 100644
--- a/modules/local/mock_data_reporter.nf
+++ b/modules/local/mock_data_reporter.nf
@@ -1,65 +1,31 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process MOCK_DATA_REPORT {
 
     tag 'Preparing mock data report'
     label 'process_low'
 
-    publishDir "${options.publish_dir}", mode: params.publish_dir_mode
-
-    container "jasonkwan/autometa-nf-modules-mock_data_reporter:${params.autometa_image_tag}"
 
+    container "jasonkwan/autometa-nf-modules-mock_data_reporter:main"
 
     input:
         tuple val(meta), path(bins_path), path(assembly_to_locus_path), path(assembly_report_path)
         path(rmarkdown_file)
 
     output:
-        tuple val(meta), path("*.html"), emit: results
+        tuple val(meta), path("*.html") , emit: results
+        path "versions.yml"             , emit: versions
+
 
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         """
-        #!/usr/bin/env Rscript
-
-        packages <- c("markdown","data.table", "ggplot2", "plotly", "crosstalk", "magrittr", "DT", "stringi")
-
-        for (i in packages) {
-          if (!requireNamespace(i)) {
-            install.packages(i)
-          }
-          library(i, character.only = T)
-        }
-
-        rmarkdown::render(
-          input="${rmarkdown_file}",
-          params=list(
-            bins_path="${bins_path}",
-            assembly_to_locus_path="${assembly_to_locus_path}",
-            assembly_report_path="${assembly_report_path}",
-            genus=FALSE
-          ),
-          knit_root_dir=getwd(),
-          output_dir=getwd(),
-          output_file="mock_data_report_by_assembly.html"
-        )
+        mock_data_report.R ${rmarkdown_file} ${bins_path} ${assembly_to_locus_path} ${assembly_report_path}
 
-        rmarkdown::render(
-          input="${rmarkdown_file}",
-          params=list(
-            bins_path= "${bins_path}",
-            assembly_to_locus_path = "${assembly_to_locus_path}",
-            assembly_report_path = "${assembly_report_path}",
-            genus=TRUE
-          ),
-          knit_root_dir=getwd(),
-          output_dir=getwd(),
-          output_file="mock_data_report_by_genus.html"
-        )
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            R: 'For R and packages, see docker: jasonkwan/autometa-nf-modules-mock_data_reporter:main'
+        END_VERSIONS
 
         """
 }
diff --git a/modules/local/normalize_kmers.nf b/modules/local/normalize_kmers.nf
index b559e52df..fcd1c0eec 100644
--- a/modules/local/normalize_kmers.nf
+++ b/modules/local/normalize_kmers.nf
@@ -1,15 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process NORMALIZE_KMERS {
     tag "method:${params.norm_method}, sample:${meta.id}"
     label 'process_medium'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa"
     } else {
@@ -20,18 +13,24 @@ process NORMALIZE_KMERS {
         tuple val(meta), path(counts)
 
     output:
-        tuple val(meta), path("kmers.normalized.tsv"), emit: normalized
-        path  '*.version.txt'                        , emit: version
+        tuple val(meta), path("*kmers.normalized.tsv"), emit: normalized
+        path  'versions.yml'                          , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-kmers \\
             --kmers $counts \\
-            --norm-output "kmers.normalized.tsv" \\
-            --norm-method "${params.norm_method}" \\
+            --norm-output ${prefix}.kmers.normalized.tsv \\
+            --norm-method ${params.norm_method} \\
             --seed 42
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/parse_bed.nf b/modules/local/parse_bed.nf
index c92a75560..49d1f03df 100644
--- a/modules/local/parse_bed.nf
+++ b/modules/local/parse_bed.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process PARSE_BED {
     tag "$meta.id"
     label 'process_low'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0"
     } else {
@@ -21,22 +13,26 @@ process PARSE_BED {
         tuple val(meta), path(bed)
 
     output:
-        tuple val(meta), path("coverage.tsv"), emit: coverage
-        path  "*.version.txt"                , emit: version
+        tuple val(meta), path("*coverage.tsv"), emit: coverage
+        path  "versions.yml"                  , emit: versions
 
     when:
         meta.cov_from_assembly.equals('0')
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         # NOTE: Here we supply an argument to ibam to prevent raising an error
         # However, bed is the only arg required for nextflow since bed is generated from BEDTOOLS_GENOMECOV...
         autometa-bedtools-genomecov \\
             --ibam . \\
             --bed $bed \\
-            --output coverage.tsv
+            --output ${prefix}.coverage.tsv
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/prepare_lca.nf b/modules/local/prepare_lca.nf
index ce712cd3f..2d898f3a1 100644
--- a/modules/local/prepare_lca.nf
+++ b/modules/local/prepare_lca.nf
@@ -1,40 +1,42 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process PREPARE_LCA {
-    tag "Preparing db cache from ${blastdb_dir}"
+    tag "Preparing db cache for ${dbtype}"
     label 'process_medium'
 
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
+
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
         container "jasonkwan/autometa:${params.autometa_image_tag}"
     }
 
-    storeDir 'db/lca'
-    cache 'lenient'
-
     input:
-        path(blastdb_dir)
+        path taxdump_files // instead of passing to --dbdir, stage and pass '.'
+        val dbtype
 
     output:
         path "cache"           , emit: cache
-        path '*.version.txt'   , emit: version
+        path 'versions.yml'    , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
+
+    // storeDir = (dbtype == 'gtdb') ? params.gtdb_dir : (dbtype == 'ncbi' ? params.lca_dir : null)
 
     script:
-        def software = getSoftwareName(task.process)
         """
+        # https://autometa.readthedocs.io/en/latest/scripts/taxonomy/lca.html
         autometa-taxonomy-lca \\
             --blast . \\
             --lca-output . \\
-            --dbdir ${blastdb_dir} \\
-            --dbtype ncbi \\
+            --dbdir . \\
+            --dbtype ${dbtype} \\
             --cache cache \\
             --only-prepare-cache
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/reduce_lca.nf b/modules/local/reduce_lca.nf
index 031564557..2d47033bf 100644
--- a/modules/local/reduce_lca.nf
+++ b/modules/local/reduce_lca.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process REDUCE_LCA {
-    tag "Finding LCA for ${meta.id}"
+    tag "Finding ${dbtype} LCA for ${meta.id}"
     label 'process_medium'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -19,27 +11,36 @@ process REDUCE_LCA {
 
     input:
         tuple val(meta), path(blast)
-        path(blastdb_dir)
-        path(lca_cache)
+        path taxdump_files // instead of passing to --dbdir, stage and pass '.'
+        path lca_cache
+        path prot_accession2taxid
+        val dbtype
 
     output:
-        tuple val(meta), path("lca.tsv"), emit: lca
-        path "lca_error_taxids.tsv"     , emit: error_taxids
-        path "sseqid2taxid.tsv"         , emit: sseqid_to_taxids
-        path '*.version.txt'            , emit: version
+        tuple val(meta), path("*lca.tsv")               , emit: lca
+        tuple val(meta), path("*lca_error_taxids.tsv")  , emit: error_taxids
+        tuple val(meta), path("*sseqid2taxid.tsv")      , emit: sseqid_to_taxids
+        path 'versions.yml'                             , emit: versions
+
 
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-taxonomy-lca \\
             --blast ${blast} \\
-            --dbdir ${blastdb_dir} \\
-            --dbtype ncbi \\
+            --dbdir . \\
+            --dbtype ${dbtype} \\
             --cache ${lca_cache} \\
-            --lca-error-taxids lca_error_taxids.tsv \\
-            --sseqid2taxid-output sseqid2taxid.tsv \\
-            --lca-output lca.tsv
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+            --lca-error-taxids ${prefix}.lca_error_taxids.tsv \\
+            --sseqid2taxid-output ${prefix}.sseqid2taxid.tsv \\
+            --lca-output ${prefix}.lca.tsv
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
index fdd2ce933..2458abe2d 100644
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@@ -2,9 +2,8 @@
 process SAMPLESHEET_CHECK {
     tag "$samplesheet"
     label 'process_low'
-    publishDir "${params.outdir}", mode: params.publish_dir_mode
 
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda "conda-forge::python=3.8.3"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/python:3.8.3"
     } else {
@@ -15,10 +14,19 @@ process SAMPLESHEET_CHECK {
         path samplesheet
 
     output:
-        path '*.csv'
+        path '*.csv'         , emit: csv
+        path "versions.yml"  , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         """
         check_samplesheet.py $samplesheet samplesheet.valid.csv
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            python: \$(python --version 2>&1 | tail -n 1 | sed 's/^Python //')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/samtools_view_sort.nf b/modules/local/samtools_view_sort.nf
index af83e27d9..3c4d97728 100644
--- a/modules/local/samtools_view_sort.nf
+++ b/modules/local/samtools_view_sort.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process SAMTOOLS_VIEW_AND_SORT {
     tag "$meta.id"
     label 'process_medium'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::samtools=1.13" : null)
+    conda "bioconda::samtools=1.13"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/samtools:1.12--hd5e65b6_0"
     } else {
@@ -21,18 +13,25 @@ process SAMTOOLS_VIEW_AND_SORT {
         tuple val(meta), path(sam)
 
     output:
-        tuple val(meta), path("alignments.bam"), emit: bam
-        path "*.version.txt"                   , emit: version
+        tuple val(meta), path("*.alignments.bam")   , emit: bam
+        path "versions.yml"                         , emit: versions
 
     when:
         meta.cov_from_assembly.equals('0')
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def args = task.ext.args ?: ''
+        def args2 = task.ext.args2 ?: ''
+        def prefix = task.ext.prefix ?: "${meta.id}"
+
         """
-        samtools view ${options.args} -@ ${task.cpus} -bS ${sam} \\
-            | samtools sort ${options.args2} -@ ${task.cpus} -o alignments.bam
+        samtools view ${args} -@ ${task.cpus} -bS ${sam} \\
+            | samtools sort ${args2} -@ ${task.cpus} -o ${prefix}.alignments.bam
 
-        echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/seqkit_filter.nf b/modules/local/seqkit_filter.nf
index 45d4bb9fe..e32bf7908 100644
--- a/modules/local/seqkit_filter.nf
+++ b/modules/local/seqkit_filter.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process SEQKIT_FILTER {
     tag "Removing contigs < ${params.length_cutoff} bp, from ${meta.id}"
     label 'process_high'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::seqkit=0.16.1" : null)
+    conda "bioconda::seqkit=0.16.1"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/seqkit:0.16.1--h9ee0642_0"
     } else {
@@ -21,30 +13,38 @@ process SEQKIT_FILTER {
         tuple val(meta), path(metagenome)
 
     output:
-        tuple val(meta), path("filtered.fna")  , emit: fasta
-        tuple val(meta), path("gc_content.tsv"), emit: gc_content
-        path  '*.version.txt'                             , emit: version
+        tuple val(meta), path("*filtered.fna")      , emit: fasta
+        tuple val(meta), path("*gc_content.tsv")    , emit: gc_content
+        path  'versions.yml'                        , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
         def metagenomecmd = metagenome.getExtension() == 'gz' ? "gunzip -c $metagenome" : "cat $metagenome"
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         # filter contigs by specified length
+        # `seqkit seq -i` "print ID instead of full head"
         ${metagenomecmd} | \\
-            seqkit seq -j ${task.cpus} -m ${params.length_cutoff} | \\
-            seqkit sort -n > "filtered.fna"
+            seqkit seq -i -j ${task.cpus} -m ${params.length_cutoff} | \\
+            seqkit sort -n > "${prefix}.filtered.fna"
 
         # calculate gc content
-        seqkit fx2tab -j ${task.cpus} -n -lg "filtered.fna" > temp
+        seqkit fx2tab -j ${task.cpus} -n -lg "${prefix}.filtered.fna" > temp
 
         # Extract columns, create tsv
         awk '{FS="\\t"; OFS="\\t"; print \$1,\$3,\$2}' temp > temp2
-        echo -e "contig\\tgc_content\\tlength" | cat - temp2 > "gc_content.tsv"
+        echo -e "contig\\tgc_content\\tlength" | cat - temp2 > "${prefix}.gc_content.tsv"
 
         # Remove temporary files
         rm temp
         rm temp2
 
-        seqkit version | sed 's/seqkit v//g' > ${software}.version.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            seqkit: \$( seqkit | sed '3!d; s/Version: //' )
+        END_VERSIONS
         """
 }
diff --git a/modules/local/seqkit_split.nf b/modules/local/seqkit_split.nf
index 649e42178..1adbe3028 100644
--- a/modules/local/seqkit_split.nf
+++ b/modules/local/seqkit_split.nf
@@ -1,14 +1,9 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
+// Not implemented yet but thought was to split to parallelize prodigal
 process SEQKIT_SPLIT {
     tag "Splitting $meta.id for parallel processing"
     label 'process_medium'
 
-    conda (params.enable_conda ? "bioconda::seqkit=0.16.1" : null)
+    conda "bioconda::seqkit=0.16.1"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/seqkit:0.16.1--h9ee0642_0"
     } else {
@@ -20,19 +15,25 @@ process SEQKIT_SPLIT {
 
     output:
         tuple val(meta), path("outfolder/*")    , emit: fasta
-        path "*.version.txt"                    , emit: version
+        path "versions.yml"                     , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
-        def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+        def args = task.ext.args ?: ''
+        def args2 = task.ext.args2 ?: ''
         """
         seqkit \\
             split \\
             ${fasta} \\
-            ${options.args} \\
-            ${options.args2} \\
+            ${args} \\
+            ${args2} \\
             -O outfolder
 
-        seqkit version | sed 's/seqkit v//g' > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            seqkit: \$( seqkit | sed '3!d; s/Version: //' )
+        END_VERSIONS
         """
 }
diff --git a/modules/local/spades_kmer_coverage.nf b/modules/local/spades_kmer_coverage.nf
index 6e803c6ef..d82cecfa3 100644
--- a/modules/local/spades_kmer_coverage.nf
+++ b/modules/local/spades_kmer_coverage.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process SPADES_KMER_COVERAGE {
     tag "${meta.id}"
     label 'process_low'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE"
     } else {
@@ -20,22 +12,24 @@ process SPADES_KMER_COVERAGE {
     input:
         tuple val(meta), path(metagenome)
 
-
     output:
-        tuple val(meta), path("coverage.tsv")     , emit: coverage
-        path  '*.version.txt'                     , emit: version
+        tuple val(meta), path("*coverage.tsv")  , emit: coverage
+        path  'versions.yml'                    , emit: versions
 
     when:
         meta.cov_from_assembly.equals('spades')
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         autometa-coverage \\
             --assembly ${metagenome} \\
             --from-spades \\
-            --out "coverage.tsv"
+            --out "${prefix}.coverage.tsv"
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/split_kingdoms.nf b/modules/local/split_kingdoms.nf
index 396cd828e..235feba6e 100644
--- a/modules/local/split_kingdoms.nf
+++ b/modules/local/split_kingdoms.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process SPLIT_KINGDOMS {
     tag "Splitting votes into kingdoms for ${meta.id}"
     label 'process_medium'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "bioconda::autometa" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -19,26 +11,47 @@ process SPLIT_KINGDOMS {
 
     input:
         tuple val(meta), path(assembly), path(votes)
-        path(ncbi_tax_dir)
+        path taxdump_files // instead of passing to --dbdir, stage and pass '.'
+        val dbtype
 
     output:
-        tuple val(meta), path("taxonomy.tsv"), emit: taxonomy
-        tuple val(meta), path("bacteria.fna"), emit: bacteria, optional: true
-        tuple val(meta), path("archaea.fna") , emit: archaea,  optional: true
-        tuple val(meta), path("*.fna")       , emit: kingdoms, optional: true
-        path  '*.version.txt'                , emit: version
+        tuple val(meta), path("${dbtype}/*.taxonomy.tsv")      , emit: taxonomy
+        tuple val(meta), path("${dbtype}/*.fna")               , emit: fna
+        tuple val(meta), path("${dbtype}/*.unclassified.fna")  , emit: unclassified_fna, optional: true
+        path  'versions.yml'                                   , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
+        mkdir ${dbtype}
         autometa-taxonomy \\
             --votes "${votes}" \\
-            --output . \\
+            --output "./${dbtype}" \\
             --split-rank-and-write superkingdom \\
             --assembly "${assembly}" \\
-            --dbdir "${ncbi_tax_dir}" \\
-            --dbtype ncbi
-
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+            --dbdir . \\
+            --dbtype ${dbtype}
+
+        # prefix all files in temp with the prefix
+        for file in ${dbtype}/*; do
+            mv "\$file" "${dbtype}/${prefix}.\$(basename \$file)"
+        done
+
+        # Move .unclassified.fna files to a separate location for separate emitting
+        mkdir -p ${dbtype}_unclassified_fna
+
+        for file in ${dbtype}/${prefix}.unclassified.*; do
+            if [ -e "\$file" ]; then
+                mv "\$file" ${dbtype}_unclassified_fna/
+            fi
+        done
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/local/unclustered_recruitment.nf b/modules/local/unclustered_recruitment.nf
index 460bf1b9a..bb0fa793b 100644
--- a/modules/local/unclustered_recruitment.nf
+++ b/modules/local/unclustered_recruitment.nf
@@ -1,16 +1,8 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
 process RECRUIT {
     tag "sample:${meta.id}, classifier:${params.classification_method}, kmer dims:${params.classification_kmer_pca_dimensions}"
     label 'process_high'
 
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? "autometa" : null)
+    conda "autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -21,16 +13,19 @@ process RECRUIT {
     errorStrategy { task.exitStatus in 204 ? 'ignore' : 'terminate' }
 
     input:
-        tuple val(meta), path(kmers), path(coverage), path(binning), path(markers), path(taxonomy)
+        tuple val(meta), path(kmers), path(coverage), path(markers), path(taxonomy), path(binning)
 
     output:
-        tuple val(meta), path("${params.kingdom}.recruitment.tsv.gz")         , emit: binning, optional: true
-        tuple val(meta), path("${params.kingdom}.recruitment.main.tsv.gz")    , emit: main, optional: true
-        tuple val(meta), path("${params.kingdom}.recruitment.features.tsv.gz"), emit: features, optional: true
-        path  '*.version.txt'                                                 , emit: version
+        tuple val(meta), path("${params.kingdom}.recruitment.tsv.gz")           , emit: binning, optional: true
+        tuple val(meta), path("${params.kingdom}.recruitment.main.tsv.gz")      , emit: main, optional: true
+        tuple val(meta), path("${params.kingdom}.recruitment.features.tsv.gz")  , emit: features, optional: true
+        path  'versions.yml'                                                    , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        def software = getSoftwareName(task.process)
+        def prefix = task.ext.prefix ?: "${meta.id}"
         if (!params.taxonomy_aware)
         """
         autometa-unclustered-recruitment \\
@@ -41,27 +36,33 @@ process RECRUIT {
             --coverage $coverage \\
             --binning $binning \\
             --markers $markers \\
-            --output-binning ${params.kingdom}.recruitment.tsv.gz \\
-            --output-main ${params.kingdom}.recruitment.main.tsv.gz \\
-            --output-features ${params.kingdom}.recruitment.features.tsv.gz
+            --output-binning ${prefix}.${params.kingdom}.recruitment.tsv.gz \\
+            --output-main ${prefix}.${params.kingdom}.recruitment.main.tsv.gz \\
+            --output-features ${prefix}.${params.kingdom}.recruitment.features.tsv.gz
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
         else
         """
         autometa-unclustered-recruitment \\
-            --classifier ${params.classification_method} \\
-            --kmer-dimensions ${params.classification_kmer_pca_dimensions} \\
+            --classifier ${prefix}.${params.classification_method} \\
+            --kmer-dimensions ${prefix}.${params.classification_kmer_pca_dimensions} \\
             --seed 42 \\
             --taxonomy $taxonomy \\
             --kmers $kmers \\
             --coverage $coverage \\
             --binning $binning \\
             --markers $markers \\
-            --output-binning ${params.kingdom}.recruitment.tsv.gz \\
-            --output-main ${params.kingdom}.recruitment.main.tsv.gz \\
-            --output-features ${params.kingdom}.recruitment.features.tsv.gz
+            --output-binning ${prefix}.${params.kingdom}.recruitment.tsv.gz \\
+            --output-main ${prefix}.${params.kingdom}.recruitment.main.tsv.gz \\
+            --output-features ${prefix}.${params.kingdom}.recruitment.features.tsv.gz
 
-        autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+        END_VERSIONS
         """
 }
diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf
new file mode 100644
index 000000000..276f511e5
--- /dev/null
+++ b/modules/nf-core/bowtie2/align/main.nf
@@ -0,0 +1,71 @@
+process BOWTIE2_ALIGN {
+    tag "$meta.id"
+    label "process_high"
+
+    conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' :
+        'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }"
+
+    input:
+    tuple val(meta) , path(reads)
+    tuple val(meta2), path(index)
+    val   save_unaligned
+    val   sort_bam
+
+    output:
+    tuple val(meta), path("*.bam")    , emit: bam
+    tuple val(meta), path("*.log")    , emit: log
+    tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true
+    path  "versions.yml"              , emit: versionss
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ""
+    def args2 = task.ext.args2 ?: ""
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    def unaligned = ""
+    def reads_args = ""
+    if (meta.single_end) {
+        unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ""
+        reads_args = "-U ${reads}"
+    } else {
+        unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ""
+        reads_args = "-1 ${reads[0]} -2 ${reads[1]}"
+    }
+
+    def samtools_command = sort_bam ? 'sort' : 'view'
+
+    """
+    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
+    [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"`
+    [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1
+
+    bowtie2 \\
+        -x \$INDEX \\
+        $reads_args \\
+        --threads $task.cpus \\
+        $unaligned \\
+        $args \\
+        2> ${prefix}.bowtie2.log \\
+        | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
+
+    if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
+        mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
+    fi
+
+    if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
+        mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml
new file mode 100644
index 000000000..c8e9a0012
--- /dev/null
+++ b/modules/nf-core/bowtie2/align/meta.yml
@@ -0,0 +1,67 @@
+name: bowtie2_align
+description: Align reads to a reference genome using bowtie2
+keywords:
+  - align
+  - map
+  - fasta
+  - fastq
+  - genome
+  - reference
+tools:
+  - bowtie2:
+      description: |
+        Bowtie 2 is an ultrafast and memory-efficient tool for aligning
+        sequencing reads to long reference sequences.
+      homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
+      documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
+      doi: 10.1038/nmeth.1923
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test', single_end:false ]
+  - index:
+      type: file
+      description: Bowtie2 genome index files
+      pattern: "*.ebwt"
+  - save_unaligned:
+      type: boolean
+      description: |
+        Save reads that do not map to the reference (true) or discard them (false)
+        (default: false)
+  - sort_bam:
+      type: boolean
+      description: use samtools sort (true) or samtools view (false)
+      pattern: "true or false"
+output:
+  - bam:
+      type: file
+      description: Output BAM file containing read alignments
+      pattern: "*.{bam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - fastq:
+      type: file
+      description: Unaligned FastQ files
+      pattern: "*.fastq.gz"
+  - log:
+      type: file
+      description: Aligment log
+      pattern: "*.log"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
new file mode 100644
index 000000000..a941c4ee1
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -0,0 +1,24 @@
+process CUSTOM_DUMPSOFTWAREVERSIONS {
+    label 'process_single'
+
+    // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
+    conda "bioconda::multiqc=1.13"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
+
+    input:
+    path versions
+
+    output:
+    path "software_versions.yml"    , emit: yml
+    path "software_versions_mqc.yml", emit: mqc_yml
+    path "versions.yml"             , emit: versionss
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    template 'dumpsoftwareversions.py'
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
new file mode 100644
index 000000000..60b546a01
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -0,0 +1,34 @@
+name: custom_dumpsoftwareversions
+description: Custom module used to dump software versions within the nf-core pipeline template
+keywords:
+  - custom
+  - version
+tools:
+  - custom:
+      description: Custom module used to dump software versions within the nf-core pipeline template
+      homepage: https://github.com/nf-core/tools
+      documentation: https://github.com/nf-core/tools
+      licence: ["MIT"]
+input:
+  - versions:
+      type: file
+      description: YML file containing software versions
+      pattern: "*.yml"
+
+output:
+  - yml:
+      type: file
+      description: Standard YML file containing software versions
+      pattern: "software_versions.yml"
+  - mqc_yml:
+      type: file
+      description: MultiQC custom content YML file containing software versions
+      pattern: "software_versions_mqc.yml"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
new file mode 100755
index 000000000..da0334085
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
+import yaml
+import platform
+from textwrap import dedent
+
+
+def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
+    html = [
+        dedent(
+            """\\
+            <style>
+            #nf-core-versions tbody:nth-child(even) {
+                background-color: #f2f2f2;
+            }
+            </style>
+            <table class="table" style="width:100%" id="nf-core-versions">
+                <thead>
+                    <tr>
+                        <th> Process Name </th>
+                        <th> Software </th>
+                        <th> Version  </th>
+                    </tr>
+                </thead>
+            """
+        )
+    ]
+    for process, tmp_versions in sorted(versions.items()):
+        html.append("<tbody>")
+        for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
+            html.append(
+                dedent(
+                    f"""\\
+                    <tr>
+                        <td><samp>{process if (i == 0) else ''}</samp></td>
+                        <td><samp>{tool}</samp></td>
+                        <td><samp>{version}</samp></td>
+                    </tr>
+                    """
+                )
+            )
+        html.append("</tbody>")
+    html.append("</table>")
+    return "\\n".join(html)
+
+
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
+
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
+
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
+
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
+
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
+
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/modules/bowtie2/align/functions.nf b/modules/nf-core/modules/bowtie2/align/functions.nf
deleted file mode 100644
index da9da093d..000000000
--- a/modules/nf-core/modules/bowtie2/align/functions.nf
+++ /dev/null
@@ -1,68 +0,0 @@
-//
-//  Utility functions used in nf-core DSL2 module files
-//
-
-//
-// Extract name of software tool from process name using $task.process
-//
-def getSoftwareName(task_process) {
-    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
-}
-
-//
-// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
-//
-def initOptions(Map args) {
-    def Map options = [:]
-    options.args            = args.args ?: ''
-    options.args2           = args.args2 ?: ''
-    options.args3           = args.args3 ?: ''
-    options.publish_by_meta = args.publish_by_meta ?: []
-    options.publish_dir     = args.publish_dir ?: ''
-    options.publish_files   = args.publish_files
-    options.suffix          = args.suffix ?: ''
-    return options
-}
-
-//
-// Tidy up and join elements of a list to return a path string
-//
-def getPathFromList(path_list) {
-    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
-    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
-    return paths.join('/')
-}
-
-//
-// Function to save/publish module results
-//
-def saveFiles(Map args) {
-    if (!args.filename.endsWith('.version.txt')) {
-        def ioptions  = initOptions(args.options)
-        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
-        if (ioptions.publish_by_meta) {
-            def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
-            for (key in key_list) {
-                if (args.meta && key instanceof String) {
-                    def path = key
-                    if (args.meta.containsKey(key)) {
-                        path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
-                    }
-                    path = path instanceof String ? path : ''
-                    path_list.add(path)
-                }
-            }
-        }
-        if (ioptions.publish_files instanceof Map) {
-            for (ext in ioptions.publish_files) {
-                if (args.filename.endsWith(ext.key)) {
-                    def ext_list = path_list.collect()
-                    ext_list.add(ext.value)
-                    return "${getPathFromList(ext_list)}/$args.filename"
-                }
-            }
-        } else if (ioptions.publish_files == null) {
-            return "${getPathFromList(path_list)}/$args.filename"
-        }
-    }
-}
diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf
deleted file mode 100644
index ad6ed92eb..000000000
--- a/modules/nf-core/modules/bowtie2/align/main.nf
+++ /dev/null
@@ -1,71 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-process BOWTIE2_ALIGN {
-    tag "$meta.id"
-    label 'process_high'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-    conda (params.enable_conda ? 'bioconda::bowtie2=2.4.2 bioconda::samtools=1.11 conda-forge::pigz=2.3.4' : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:577a697be67b5ae9b16f637fd723b8263a3898b3-0"
-    } else {
-        container "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:577a697be67b5ae9b16f637fd723b8263a3898b3-0"
-    }
-
-    input:
-    tuple val(meta), path(reads)
-    path  index
-
-    output:
-    tuple val(meta), path('*.bam'), emit: bam
-    tuple val(meta), path('*.log'), emit: log
-    path  '*.version.txt'         , emit: version
-    tuple val(meta), path('*fastq.gz'), optional:true, emit: fastq
-
-    script:
-    def split_cpus = Math.floor(task.cpus/2)
-    def software   = getSoftwareName(task.process)
-    def prefix     = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    if (meta.single_end) {
-        def unaligned = params.save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
-        """
-        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
-        bowtie2 \\
-            -x \$INDEX \\
-            -U $reads \\
-            --threads ${split_cpus} \\
-            $unaligned \\
-            $options.args \\
-            2> ${prefix}.bowtie2.log \\
-            | samtools view -@ ${split_cpus} $options.args2 -bhS -o ${prefix}.bam -
-
-        echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//' > ${software}.version.txt
-        """
-    } else {
-        def unaligned = params.save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
-        """
-        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
-        bowtie2 \\
-            -x \$INDEX \\
-            -1 ${reads[0]} \\
-            -2 ${reads[1]} \\
-            --threads ${split_cpus} \\
-            $unaligned \\
-            $options.args \\
-            2> ${prefix}.bowtie2.log \\
-            | samtools view -@ ${split_cpus} $options.args2 -bhS -o ${prefix}.bam -
-
-        if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
-            mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
-        fi
-        if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
-            mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
-        fi
-        echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//' > ${software}.version.txt
-        """
-    }
-}
diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml
deleted file mode 100644
index 9d9cd004b..000000000
--- a/modules/nf-core/modules/bowtie2/align/meta.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: bowtie2_align
-description: Align reads to a reference genome using bowtie2
-keywords:
-    - align
-    - fasta
-    - genome
-    - reference
-tools:
-    - bowtie2:
-          description: |
-              Bowtie 2 is an ultrafast and memory-efficient tool for aligning
-              sequencing reads to long reference sequences.
-          homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
-          documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
-          doi: 10.1038/nmeth.1923
-input:
-    - meta:
-          type: map
-          description: |
-              Groovy Map containing sample information
-              e.g. [ id:'test', single_end:false ]
-    - reads:
-          type: file
-          description: |
-              List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-              respectively.
-    - index:
-          type: file
-          description: Bowtie2 genome index files
-          pattern: "*.ebwt"
-output:
-    - bam:
-          type: file
-          description: Output BAM file containing read alignments
-          pattern: "*.{bam}"
-    - version:
-          type: file
-          description: File containing software version
-          pattern: "*.{version.txt}"
-    - fastq:
-          type: file
-          description: Unaligned FastQ files
-          pattern: "*.fastq.gz"
-    - log:
-        type: file
-        description: Aligment log
-        pattern: "*.log"
-authors:
-    - "@joseespinosa"
-    - "@drpatelh"
diff --git a/modules/nf-core/modules/prodigal/functions.nf b/modules/nf-core/modules/prodigal/functions.nf
deleted file mode 100644
index da9da093d..000000000
--- a/modules/nf-core/modules/prodigal/functions.nf
+++ /dev/null
@@ -1,68 +0,0 @@
-//
-//  Utility functions used in nf-core DSL2 module files
-//
-
-//
-// Extract name of software tool from process name using $task.process
-//
-def getSoftwareName(task_process) {
-    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
-}
-
-//
-// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
-//
-def initOptions(Map args) {
-    def Map options = [:]
-    options.args            = args.args ?: ''
-    options.args2           = args.args2 ?: ''
-    options.args3           = args.args3 ?: ''
-    options.publish_by_meta = args.publish_by_meta ?: []
-    options.publish_dir     = args.publish_dir ?: ''
-    options.publish_files   = args.publish_files
-    options.suffix          = args.suffix ?: ''
-    return options
-}
-
-//
-// Tidy up and join elements of a list to return a path string
-//
-def getPathFromList(path_list) {
-    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
-    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
-    return paths.join('/')
-}
-
-//
-// Function to save/publish module results
-//
-def saveFiles(Map args) {
-    if (!args.filename.endsWith('.version.txt')) {
-        def ioptions  = initOptions(args.options)
-        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
-        if (ioptions.publish_by_meta) {
-            def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
-            for (key in key_list) {
-                if (args.meta && key instanceof String) {
-                    def path = key
-                    if (args.meta.containsKey(key)) {
-                        path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
-                    }
-                    path = path instanceof String ? path : ''
-                    path_list.add(path)
-                }
-            }
-        }
-        if (ioptions.publish_files instanceof Map) {
-            for (ext in ioptions.publish_files) {
-                if (args.filename.endsWith(ext.key)) {
-                    def ext_list = path_list.collect()
-                    ext_list.add(ext.value)
-                    return "${getPathFromList(ext_list)}/$args.filename"
-                }
-            }
-        } else if (ioptions.publish_files == null) {
-            return "${getPathFromList(path_list)}/$args.filename"
-        }
-    }
-}
diff --git a/modules/nf-core/modules/prodigal/main.nf b/modules/nf-core/modules/prodigal/main.nf
deleted file mode 100644
index 36a13c5d1..000000000
--- a/modules/nf-core/modules/prodigal/main.nf
+++ /dev/null
@@ -1,44 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-process PRODIGAL {
-    tag "Annotating $meta.id"
-    label 'process_low'
-    publishDir "${params.outdir}/${meta.id}", mode: params.publish_dir_mode
-
-
-    conda (params.enable_conda ? "bioconda::prodigal=2.6.3" : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/prodigal:2.6.3--h516909a_2"
-    } else {
-        container "quay.io/biocontainers/prodigal:2.6.3--h516909a_2"
-    }
-
-    input:
-    tuple val(meta), path(genome)
-    val(output_format)
-
-    output:
-    tuple val(meta), path("orfs.${output_format}"), emit: gene_annotations
-    tuple val(meta), path("orfs.fna"), emit: nucleotide_fasta
-    tuple val(meta), path("orfs.faa"), emit: amino_acid_fasta
-    tuple val(meta), path("orfs_all.txt"), emit: all_gene_annotations
-    path "*.version.txt"          , emit: version
-
-    script:
-    def software = getSoftwareName(task.process)
-    """
-    prodigal -i ${genome} \\
-        $options.args \\
-        -f $output_format \\
-        -d "orfs.fna" \\
-        -o "orfs.${output_format}" \\
-        -a "orfs.faa" \\
-        -s "orfs_all.txt" 
-
-    echo \$(prodigal -v 2>&1) | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p' > ${software}.version.txt
-    """
-}
diff --git a/modules/nf-core/modules/prodigal/meta.yml b/modules/nf-core/modules/prodigal/meta.yml
deleted file mode 100644
index f20d878e0..000000000
--- a/modules/nf-core/modules/prodigal/meta.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: prodigal
-description: write your description here
-keywords:
-  - sort
-tools:
-  - prodigal:
-      description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a microbial (bacterial and archaeal) gene finding program
-      homepage: {}
-      documentation: {}
-      tool_dev_url: {}
-      doi: ""
-      licence: ["GPL v3"]
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - version:
-      type: file
-      description: File containing software version
-      pattern: "*.{version.txt}"
-  - bam:
-      type: file
-      description: Sorted BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-
-authors:
-  - "@grst"
diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf
new file mode 100644
index 000000000..8a2fe478b
--- /dev/null
+++ b/modules/nf-core/prodigal/main.nf
@@ -0,0 +1,43 @@
+process PRODIGAL {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::autometa"
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
+    } else {
+        container "jasonkwan/autometa:${params.autometa_image_tag}"
+    }
+
+    input:
+    tuple val(meta), path(genome)
+    val(output_format)
+
+    output:
+    tuple val(meta), path("${prefix}.${output_format}"),    emit: gene_annotations
+    tuple val(meta), path("${prefix}.fna"),                 emit: nucleotide_fasta
+    tuple val(meta), path("${prefix}.faa"),                 emit: amino_acid_fasta
+    tuple val(meta), path("${prefix}_all.txt"),             emit: all_gene_annotations
+    path "versions.yml",                                    emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    """
+    gzip -cdf ${genome} | prodigal \\
+        $args \\
+        -f $output_format \\
+        -d "${prefix}.fna" \\
+        -o "${prefix}.${output_format}" \\
+        -a "${prefix}.faa" \\
+        -s "${prefix}_all.txt"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml
new file mode 100644
index 000000000..8cb3d12eb
--- /dev/null
+++ b/modules/nf-core/prodigal/meta.yml
@@ -0,0 +1,55 @@
+name: prodigal
+description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a microbial (bacterial and archaeal) gene finding program
+keywords:
+  - sort
+tools:
+  - prodigal:
+      description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a microbial (bacterial and archaeal) gene finding program
+      homepage: https://github.com/hyattpd/Prodigal
+      documentation: https://github.com/hyattpd/prodigal/wiki
+      tool_dev_url: https://github.com/hyattpd/Prodigal
+      doi: "10.1186/1471-2105-11-119"
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - genome:
+      type: file
+      description: fasta/fasta.gz file
+  - output_format:
+      type: string
+      description: Output format ("gbk"/"gff"/"sqn"/"sco")
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - nucleotide_fasta:
+      type: file
+      description: nucleotide sequences file
+      pattern: "*.{fna}"
+  - amino_acid_fasta:
+      type: file
+      description: protein translations file
+      pattern: "*.{faa}"
+  - all_gene_annotations:
+      type: file
+      description: complete starts file
+      pattern: "*.{_all.txt}"
+  - gene_annotations:
+      type: file
+      description: gene annotations in output_format given as input
+      pattern: "*.{output_format}"
+
+authors:
+  - "@grst"
diff --git a/nextflow.config b/nextflow.config
index 9d9e7bad5..fca8f4d8f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -4,8 +4,7 @@
 // *****************
 
 manifest {
-    name = "autometa"
-    author = "Jason C. Kwan Lab"
+    name = "KwanLab/autometa"
     homePage = "https://github.com/KwanLab/Autometa"
     defaultBranch = "main"
     description = "Autometa: Automated Extraction of Microbial Genomes from Shotgun Metagenomes"
@@ -46,13 +45,17 @@ params {
 */
 
     taxonomy_aware              = false
-    single_db_dir               = null
-    nr_dmnd_dir                 = null
-    prot_accession2taxid_gz_dir = null
-    taxdump_tar_gz_dir          = null
+    single_db_dir               = "autometa_database_directory"
+    nr_dmnd_dir                 = "${params.single_db_dir}"
+    lca_dir                     = "${params.single_db_dir}"
+    prot_accession2taxid_gz_dir = "${params.single_db_dir}"
+    taxdump_tar_gz_dir          = "${params.single_db_dir}"
     large_downloads_permission  = false
     binning_starting_rank       = "superkingdom" // choices: "superkingdom", "phylum", "class", "order", "family", "genus", "species"
 
+    gtdb_version                = "220"
+    gtdb_dir                    = "${params.single_db_dir}/gtdb"
+    use_gtdb                    = false
 /*
  * -------------------------------------------------
  *  Binning Parameters
@@ -70,9 +73,9 @@ params {
     classification_method               = "decision_tree"
     classification_kmer_pca_dimensions  = 50
     completeness                        = 20.0
-    purity                              = 90.0
-    gc_stddev_limit                     = 25.0
-    cov_stddev_limit                    = 5.0
+    purity                              = 95.0
+    gc_stddev_limit                     = 5.0
+    cov_stddev_limit                    = 25.0
     unclustered_recruitment             = false
 /*
  * -------------------------------------------------
@@ -89,13 +92,11 @@ params {
     validate_params                   = true
     show_hidden_params                = null
     schema_ignore_params              = 'genomes,modules'
-    enable_conda                      = false
     singularity_pull_docker_container = null
 
     // Config options
     custom_config_version      = 'master'
     custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
-    hostnames                  = [:]
     config_profile_description = null
     config_profile_contact     = null
     config_profile_url         = null
@@ -103,12 +104,14 @@ params {
 
     // Max resource options
     // Defaults only, expecting to be overwritten
-    max_memory                 = '16.GB'
-    max_cpus                   = 4
-    max_time                   = '240.h'
-
+    max_memory                 = '200.GB'
+    max_cpus                   = 12
+    max_time                   = '48.h'
 }
 
+trace.overwrite = true
+dag.overwrite = true
+
 params.tracedir = "${params.outdir}/trace"
 
 
@@ -131,7 +134,6 @@ profiles {
     standard {
         process.executor       = "local"
         docker.enabled         = true
-        docker.userEmulation   = true
         singularity.enabled    = false
         podman.enabled         = false
         shifter.enabled        = false
@@ -141,31 +143,31 @@ profiles {
         process.executor       = "slurm"
 	    // NOTE: You can determine your slurm partition (e.g. process.queue) with the `sinfo` command
         process.queue          = "queue"
-        docker.enabled         = true
-        docker.userEmulation   = true
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
         executor {
             queueSize = 8
         }
     }
     conda {
-        params.enable_conda    = true
+        conda.enabled          = true
         docker.enabled         = false
         singularity.enabled    = false
         podman.enabled         = false
         shifter.enabled        = false
         charliecloud.enabled   = false
+        createTimeout          = '1 h'
     }
     docker {
-        docker.enabled         = true
-        docker.userEmulation   = true
-        singularity.enabled    = false
-        podman.enabled         = false
-        shifter.enabled        = false
-        charliecloud.enabled   = false
+        docker.enabled          = true
+        conda.enabled           = false
+        singularity.enabled     = false
+        podman.enabled          = false
+        shifter.enabled         = false
+        charliecloud.enabled    = false
+        apptainer.enabled       = false
+        docker.runOptions       = '-u $(id -u):$(id -g)'
+    }
+    arm {
+        docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
     }
     singularity {
         singularity.enabled    = true
@@ -198,6 +200,11 @@ profiles {
     }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
+    
+    apptainer.registry   = 'quay.io'
+    docker.registry      = 'registry.hub.docker.com'
+    podman.registry      = 'quay.io'
+    singularity.registry = 'quay.io'
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
@@ -225,7 +232,7 @@ trace {
 }
 dag {
     enabled = true
-    file    = "${params.outdir}/trace/pipeline_dag_${trace_timestamp}.svg"
+    file    = "${params.outdir}/trace/pipeline_dag_${trace_timestamp}.html"
 }
 
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0b4f2b2dd..8e38d3062 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,17 +10,13 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "publish_dir_mode"
-            ],
+            "required": ["input", "publish_dir_mode"],
             "properties": {
                 "input": {
                     "type": "string",
                     "fa_icon": "fas fa-file-csv",
                     "description": "Path to input comma-delimited sample sheet(s).",
-                    "help_text": "Use this to specify your inputs' names, metagenomes, reads, coverage table and whether to calculate coverage from the assembly headers or from read alignments.\n\n\nSee https://autometa.readthedocs.io/en/latest/nextflow-workflow.html#sample-sheet-preparation for information on preparing an input sample sheet.",
-                    "default": ""
+                    "help_text": "Use this to specify your inputs' names, metagenomes, reads, coverage table and whether to calculate coverage from the assembly headers or from read alignments.\n\n\nSee https://autometa.readthedocs.io/en/latest/nextflow-workflow.html#sample-sheet-preparation for information on preparing an input sample sheet."
                 },
                 "outdir": {
                     "type": "string",
@@ -110,25 +106,25 @@
                 },
                 "completeness": {
                     "type": "number",
-                    "default": 20,
+                    "default": 20.0,
                     "fa_icon": "fas fa-cogs",
                     "description": "Minimum completeness needed to keep a cluster (default is at least 20% complete)"
                 },
                 "purity": {
                     "type": "number",
-                    "default": 95,
+                    "default": 95.0,
                     "fa_icon": "fas fa-cogs",
                     "description": "Minimum purity needed to keep a cluster (default is at least 95% pure)"
                 },
                 "gc_stddev_limit": {
                     "type": "number",
-                    "default": 5,
+                    "default": 5.0,
                     "fa_icon": "fas fa-cogs",
                     "description": "Maximum GC% standard deviation under which a cluster is kept (default is 5%)"
                 },
                 "cov_stddev_limit": {
                     "type": "number",
-                    "default": 25,
+                    "default": 25.0,
                     "fa_icon": "fas fa-cogs",
                     "description": "Maximum coverage standard deviation under which a cluster is kept (default is  25%)"
                 },
@@ -183,14 +179,14 @@
                     "type": "string",
                     "fa_icon": "fas fa-folder-open",
                     "description": "Currently not used do not set",
-                    "default": "Currently not used do not set",
+                    "default": "autometa_database_directory",
                     "hidden": true
                 },
                 "taxdump_tar_gz_dir": {
                     "type": "string",
                     "fa_icon": "fas fa-folder-open",
                     "description": "Currently not used do not set",
-                    "default": "Currently not used do not set",
+                    "default": "autometa_database_directory",
                     "hidden": true
                 },
                 "binning_starting_rank": {
@@ -222,27 +218,22 @@
             "properties": {
                 "max_cpus": {
                     "type": "integer",
-                    "default": 4,
+                    "default": 12,
                     "fa_icon": "fas fa-microchip",
                     "description": "Max cpus to use/request"
                 },
                 "max_memory": {
                     "type": "string",
-                    "default": "16 GB",
+                    "default": "200.GB",
                     "fa_icon": "fas fa-memory",
                     "description": "Max RAM to use/request"
                 },
                 "max_time": {
                     "type": "string",
-                    "default": "240.h",
+                    "default": "48.h",
                     "fa_icon": "fas fa-clock",
                     "description": "Max time a *single* process is allowed to run"
                 },
-                "enable_conda": {
-                    "type": "boolean",
-                    "fa_icon": "fas fa-snake",
-                    "description": "Use conda?"
-                },
                 "use_run_name": {
                     "type": "boolean",
                     "hidden": true,
@@ -269,11 +260,7 @@
                     "help_text": "Appends input to `jasonkwan/autometa`\n\njasonkwan/autometa:${params.autometa_image_tag}\""
                 }
             },
-            "required": [
-                "max_cpus",
-                "max_memory",
-                "max_time"
-            ]
+            "required": ["max_cpus", "max_memory", "max_time"]
         },
         "generic_nf_core_options": {
             "title": "Generic nf-core options",
@@ -350,11 +337,6 @@
                     "default": "https://raw.githubusercontent.com/nf-core/configs/master",
                     "hidden": true
                 },
-                "hostnames": {
-                    "type": "string",
-                    "default": "[binac:['.binac.uni-tuebingen.de'], cbe:['.cbe.vbc.ac.at'], cfc:['.hpc.uni-tuebingen.de'], crick:['.thecrick.org'], icr_davros:['.davros.compute.estate'], imperial:['.hpc.ic.ac.uk'], imperial_mb:['.hpc.ic.ac.uk'], genotoul:['.genologin1.toulouse.inra.fr', '.genologin2.toulouse.inra.fr'], genouest:['.genouest.org'], uppmax:['.uppmax.uu.se'], utd_ganymede:['ganymede.utdallas.edu'], utd_sysbio:['sysbio.utdallas.edu']]",
-                    "hidden": true
-                },
                 "show_hidden_params": {
                     "type": "string",
                     "hidden": true
@@ -364,9 +346,7 @@
                     "hidden": true
                 }
             },
-            "required": [
-                "validate_params"
-            ]
+            "required": ["validate_params"]
         }
     },
     "allOf": [
@@ -385,5 +365,26 @@
         {
             "$ref": "#/definitions/generic_nf_core_options"
         }
-    ]
+    ],
+    "properties": {
+        "lca_dir": {
+            "type": "string",
+            "default": "autometa_database_directory"
+        },
+        "gtdb_version": {
+            "type": "integer",
+            "default": 220
+        },
+        "gtdb_dir": {
+            "type": "string",
+            "default": "autometa_database_directory/gtdb"
+        },
+        "use_gtdb": {
+            "type": "boolean"
+        },
+        "schema_ignore_params": {
+            "type": "string",
+            "default": "genomes,modules"
+        }
+    }
 }
diff --git a/setup.py b/setup.py
index bd79d9947..fbe7337d1 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@ def read(fname):
             "autometa-download-dataset = autometa.validation.datasets:main",
             "autometa-cami-format = autometa.validation.cami:main",
             "autometa-benchmark = autometa.validation.benchmark:main",
+            "autometa-download-gtdb = autometa.taxonomy.download_gtdb_files:main",
             "autometa = autometa.__main__:main",
         ]
     },
diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf
index ddfd8aedf..2f9d9f918 100644
--- a/subworkflows/local/binning.nf
+++ b/subworkflows/local/binning.nf
@@ -1,73 +1,150 @@
-params.binning_options                  = [:]
-params.binning_summary_options          = [:]
-params.taxdump_tar_gz_dir               = [:]
+include { BINNING           } from '../../modules/local/binning'
+include { RECRUIT           } from '../../modules/local/unclustered_recruitment'
+include { BINNING_SUMMARY   } from '../../modules/local/binning_summary'
 
-include { BIN_CONTIGS     } from './../../modules/local/bin_contigs.nf'             addParams( options: params.binning_options                                                        )
-include { BINNING_SUMMARY } from './../../modules/local/binning_summary.nf'         addParams( options: params.binning_summary_options, taxdump_tar_gz_dir: params.taxdump_tar_gz_dir )
 
 
-workflow BINNING {
-
+workflow BIN {
     take:
-        metagenome
-        kmers_embedded
-        coverage
-        gc_content
-        markers
-        taxon_assignments
-        binning_column
+        filtered_metagenome_fasta
+        filtered_metagenome_gc_content
+        markers_ch
+        coverage_ch
+        taxonomy_results_ch
+        taxonomically_split_fna_ch
+        taxdump_files
+        dbtype
 
     main:
-        kmers_embedded
-            .join(
-                coverage
-                )
-            .join(
-                gc_content
-                )
-            .join(
-                markers
-                )
-            .set{metagenome_annotations}
-
-        if (params.taxonomy_aware) {
-            metagenome_annotations
-                .join(
-                    taxon_assignments
-                )
-                .set{binning_ch}
-        } else {
-            metagenome_annotations
-                .combine(
-                    taxon_assignments
-                )
-                .set{binning_ch}
-        }
+        ch_versions = Channel.empty()
+
+        // has taxon:
+        //     taxonomically_split_fna_ch
+        //     markers_ch
+        // not has taxon:
+        //     coverage_ch
+        //     filtered_metagenome_gc_content
+        //     taxonomy_results_ch
+
+        // Transform taxonomic-specific channels (keep taxon info)
+        taxonomically_split_fna_ch = taxonomically_split_fna_ch
+            .map { meta, files ->
+                def key = [id: meta.id, taxon: meta.taxon]
+                [key, files]
+            }
+
+        markers_ch = markers_ch
+            .map { meta, files ->
+                def key = [id: meta.id, taxon: meta.taxon]
+                [key, files]
+            }
+
+        // Transform per-sample channels
+        coverage_ch = coverage_ch
+            .map { meta, files ->
+                [meta.id, files]
+            }
+
+        filtered_metagenome_gc_content = filtered_metagenome_gc_content
+            .map { meta, files ->
+                [meta.id, files]
+            }
+
+        taxonomy_results_ch = taxonomy_results_ch
+            .map { meta, files ->
+                [meta.id, files]
+            }
+
+        // Create branched workflow
+        workflow_branch = taxonomically_split_fna_ch
+            .join(markers_ch)
+            .map { key, kmers, markers ->
+                // Use the full sample ID as the join key while preserving taxon info
+                [key.id, [id: key.id, taxon: key.taxon], kmers, markers]
+            }
+            .combine(coverage_ch, by: 0)
+            .combine(filtered_metagenome_gc_content, by: 0)
+            .combine(taxonomy_results_ch, by: 0)
+            .map { id, meta, kmers, markers, coverage, gc_content, taxonomy_results ->
+                // Final structure: [meta with taxon, files...]
+                [meta, kmers, markers, coverage, gc_content, taxonomy_results]
+            }
+
+        // Set the output channel
+        workflow_branch.set { to_bin_ch }
 
-        BIN_CONTIGS (
-            binning_ch
+        BINNING(
+            to_bin_ch
         )
 
-        BIN_CONTIGS.out.main
-            .join(
-                markers
-            ).join(
-                metagenome
+        ch_versions = ch_versions.mix(BINNING.out.versions)
+
+        if (params.unclustered_recruitment) {
+            // Prepare inputs for recruitment channel
+
+            to_bin_ch
+                .join(BINNING.out.main)
+                .set{recruitment_ch}
+
+            RECRUIT(
+                recruitment_ch
             )
-            .set{binning_summary_ch}
+            ch_versions = ch_versions.mix(RECRUIT.out.versions)
+
+            RECRUIT.out.main
+                .set{binning_results_ch}
+            binning_col = Channel.from("recruited_cluster")
+        } else {
+            binning_results_ch = BINNING.out.main
+            binning_col = Channel.from("cluster")
+        }
+
+
+        // Set inputs for binning summary
+        binning_results_ch
+            .map { meta, files -> [meta.subMap(['id']), meta, files] }
+            .join(markers_ch.map { meta, files -> [meta.subMap(['id']), files] })
+            .join(filtered_metagenome_fasta.map { meta, files -> [meta.subMap(['id']), files] })
+            .map { it.drop(1) }
+            .set{binning_summary_input_ch}
+
+        if (params.debug) {
+            binning_results_ch.view { meta ->
+                println "binning_results_ch: ${meta}"
+            }
+            markers_ch.view { meta ->
+                println "markers_ch: ${meta}"
+            }
+            filtered_metagenome_fasta.view { meta ->
+                println "filtered_metagenome_fasta: ${meta}"
+            }
+            taxdump_files.view { meta ->
+                println "taxdump_files: ${meta}"
+            }
+            markers_ch.view { meta ->
+                println "markers_ch: ${meta}"
+            }
+            binning_col.view { meta ->
+                println "binning_col: ${meta}"
+            }
+            binning_summary_input_ch.view { meta ->
+                println "binning_summary_input_ch: ${meta}"
+            }
+        }
 
-        ncbi_tax_dir = file(params.taxdump_tar_gz_dir)
+        binning_summary_input_ch
+            .combine(taxdump_files.toList())
+            .combine(dbtype)
+            .combine(binning_col)
+            .set{binning_summary_input_ch2}
 
-        BINNING_SUMMARY (
-            binning_summary_ch,
-            binning_column,
-            ncbi_tax_dir
+        BINNING_SUMMARY(
+            binning_summary_input_ch2
         )
+        ch_versions = ch_versions.mix(BINNING_SUMMARY.out.versions)
 
     emit:
-        binning = BIN_CONTIGS.out.binning
-        binning_main = BIN_CONTIGS.out.main
-        summary_stats = BINNING_SUMMARY.out.stats
-        summary_taxa = BINNING_SUMMARY.out.taxonomies
-        metabins = BINNING_SUMMARY.out.metabins
+        binning_results = binning_results_ch
+        // binning_summary = BINNING_SUMMARY
+        versions = ch_versions
 }
diff --git a/subworkflows/local/contig_coverage.nf b/subworkflows/local/calculate_coverage.nf
similarity index 74%
rename from subworkflows/local/contig_coverage.nf
rename to subworkflows/local/calculate_coverage.nf
index f52c7db4f..5769533bf 100644
--- a/subworkflows/local/contig_coverage.nf
+++ b/subworkflows/local/calculate_coverage.nf
@@ -1,39 +1,46 @@
 
+//TODO: These don't map to anything
 params.fwd_reads = null
 params.rev_reads = null
 params.se_reads = null
 
-params.align_reads_options        = [:]
-params.samtools_viewsort_options  = [:]
-params.bedtools_genomecov_options    = [:]
+include { ALIGN_READS               } from '../../modules/local/align_reads'
+include { SAMTOOLS_VIEW_AND_SORT    } from '../../modules/local/samtools_view_sort'
+include { BEDTOOLS_GENOMECOV        } from '../../modules/local/bedtools_genomecov'
+include { PARSE_BED                 } from '../../modules/local/parse_bed'
 
-include { ALIGN_READS               } from '../../modules/local/align_reads'            addParams( options: params.align_reads_options                          )
-include { SAMTOOLS_VIEW_AND_SORT    } from '../../modules/local/samtools_view_sort'     addParams( samtools_viewsort_options: params.samtools_viewsort_options  )
-include { BEDTOOLS_GENOMECOV        } from '../../modules/local/bedtools_genomecov'     addParams( options: params.bedtools_genomecov_options                   )
-include { PARSE_BED                 } from '../../modules/local/parse_bed'              addParams( )
-
-
-workflow CONTIG_COVERAGE {
+workflow CALCULATE_COVERAGE {
     take:
         metagenome_reads_ch
 
     main:
+        ch_versions = Channel.empty()
+
         ALIGN_READS(
             metagenome_reads_ch
         )
+        ch_versions = ch_versions.mix(ALIGN_READS.out.versions)
+
         SAMTOOLS_VIEW_AND_SORT(
             ALIGN_READS.out.sam
         )
+        ch_versions = ch_versions.mix(SAMTOOLS_VIEW_AND_SORT.out.versions)
+
         BEDTOOLS_GENOMECOV(
             SAMTOOLS_VIEW_AND_SORT.out.bam
         )
+        ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions)
+
         PARSE_BED(BEDTOOLS_GENOMECOV.out.bed)
+        ch_versions = ch_versions.mix(PARSE_BED.out.versions)
 
     emit:
         sam = ALIGN_READS.out.sam
         bam = SAMTOOLS_VIEW_AND_SORT.out.bam
         bed = BEDTOOLS_GENOMECOV.out.bed
         coverage = PARSE_BED.out.coverage
+        versions = ch_versions
+
 }
 
 /*
diff --git a/subworkflows/local/coverage.nf b/subworkflows/local/coverage.nf
new file mode 100644
index 000000000..b7528dad7
--- /dev/null
+++ b/subworkflows/local/coverage.nf
@@ -0,0 +1,35 @@
+include { CALCULATE_COVERAGE   } from './calculate_coverage'
+include { SPADES_KMER_COVERAGE } from '../../modules/local/spades_kmer_coverage'
+
+workflow COVERAGE {
+    take:
+        filtered_metagenome_fasta
+        filtered_metagenome_fasta_and_reads
+        user_provided_coverage_table
+
+    main:
+        // meta.cov_from_assembly.equals('0')
+
+        ch_versions = Channel.empty()
+
+        CALCULATE_COVERAGE(filtered_metagenome_fasta_and_reads)
+        ch_versions = ch_versions.mix(CALCULATE_COVERAGE.out.versions)
+
+        SPADES_KMER_COVERAGE (
+            filtered_metagenome_fasta,
+        )
+        ch_versions = ch_versions.mix(SPADES_KMER_COVERAGE.out.versions)
+
+        // https://nextflow-io.github.io/patterns/conditional-process/
+        // basically "use input-table coverage, extracted spades coverage, or calculated coverage"
+        // TODO: this seems
+        user_provided_coverage_table
+            .mix(CALCULATE_COVERAGE.out.coverage)
+            .mix(SPADES_KMER_COVERAGE.out.coverage)
+            .set{coverage_ch}
+
+
+    emit:
+        coverage_ch = coverage_ch
+        versions = ch_versions
+}
diff --git a/subworkflows/local/functions.nf b/subworkflows/local/functions.nf
deleted file mode 100644
index 4492f839c..000000000
--- a/subworkflows/local/functions.nf
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2018 nf-core
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-//
-//  Utility functions used in nf-core DSL2 module files
-//
-
-//
-// Extract name of software tool from process name using $task.process
-//
-def getSoftwareName(task_process) {
-    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
-}
-
-//
-// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
-//
-def initOptions(Map args) {
-    def Map options = [:]
-    options.args            = args.args ?: ''
-    options.args2           = args.args2 ?: ''
-    options.args3           = args.args3 ?: ''
-    options.publish_by_meta = args.publish_by_meta ?: []
-    options.publish_dir     = args.publish_dir ?: ''
-    options.publish_files   = args.publish_files
-    options.suffix          = args.suffix ?: ''
-    return options
-}
-
-//
-// Tidy up and join elements of a list to return a path string
-//
-def getPathFromList(path_list) {
-    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
-    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
-    return paths.join('/')
-}
-
-//
-// Function to save/publish module results
-//
-def saveFiles(Map args) {
-    if (!args.filename.endsWith('.version.txt')) {
-        def ioptions  = initOptions(args.options)
-        def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
-        if (ioptions.publish_by_meta) {
-            def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
-            for (key in key_list) {
-                if (args.meta && key instanceof String) {
-                    def path = key
-                    if (args.meta.containsKey(key)) {
-                        path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
-                    }
-                    path = path instanceof String ? path : ''
-                    path_list.add(path)
-                }
-            }
-        }
-        if (ioptions.publish_files instanceof Map) {
-            for (ext in ioptions.publish_files) {
-                if (args.filename.endsWith(ext.key)) {
-                    def ext_list = path_list.collect()
-                    ext_list.add(ext.value)
-                    return "${getPathFromList(ext_list)}/$args.filename"
-                }
-            }
-        } else if (ioptions.publish_files == null) {
-            return "${getPathFromList(path_list)}/$args.filename"
-        }
-    }
-}
-
-/*
- * Check file extension
- */
-def hasExtension(it, extension) {
-    it.toString().toLowerCase().endsWith(extension.toLowerCase())
-}
diff --git a/subworkflows/local/genome_coverage.nf b/subworkflows/local/genome_coverage.nf
deleted file mode 100644
index 4843e04b0..000000000
--- a/subworkflows/local/genome_coverage.nf
+++ /dev/null
@@ -1,29 +0,0 @@
-params.bedtools_genomecov_options  = [:]
-
-include { BEDTOOLS_GENOMECOV } from './../../modules/nf-core/modules/bedtools/genomecov.nf' addParams( options: params.bedtools_genomecov_options )
-
-workflow GENOME_COVERAGE {
-    take:
-        bam         // channel: [ val(meta), path(bam) ]
-        lengths     // channel: [ val(meta), path(lengths) ]  // https://bedtools.readthedocs.io/en/latest/content/general-usage.html#genome-file-format
-
-    main:
-        bedtools_input_ch = bam.combine(lengths)
-
-        BEDTOOLS_GENOMECOV (
-            bedtools_input_ch
-            )
-
-        bam.out.bed
-            .combine(lengths)
-            .combine(BEDTOOLS_GENOMECOV.out.bed)
-            .set{parse_bed_input_ch}
-
-        PARSE_BED (
-            parse_bed_input_ch
-        )
-
-    emit:
-        bed = BEDTOOLS_GENOMECOV.out.bed
-        coverage = PARSE_BED.out.coverage
-}
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 7846ddd57..3c3e5dfbc 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -4,37 +4,41 @@
 
 nextflow.enable.dsl=2
 
-params.options = [:]
 
-include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' addParams( options: params.options )
+
+include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
 
 workflow INPUT_CHECK {
     take:
         samplesheet // file: /path/to/samplesheet.csv
 
     main:
+        ch_versions = Channel.empty()
+
         SAMPLESHEET_CHECK ( samplesheet )
+        ch_versions = ch_versions.mix(SAMPLESHEET_CHECK.out.versions)
 
         // reads channel
-        SAMPLESHEET_CHECK.out
+        SAMPLESHEET_CHECK.out.csv
             .splitCsv ( header:true, sep:',' )
             .map { create_fastq_channel(it) }
             .set { reads }
         // metagenome channel
-        SAMPLESHEET_CHECK.out
+        SAMPLESHEET_CHECK.out.csv
             .splitCsv ( header:true, sep:',' )
             .map { create_metagenome_channel(it) }
             .set { metagenome }
 
         // coverage channel
-        SAMPLESHEET_CHECK.out
+        SAMPLESHEET_CHECK.out.csv
             .splitCsv ( header:true, sep:',' )
             .map { create_coverage_channel(it) }
             .set { coverage }
     emit:
-        reads      // channel: [ val(meta), [ reads ] ]
-        metagenome // channel: [ val(meta), [ assembly ]]
-        coverage   // channel: [ val(meta), [ coverage ]]
+        reads = reads      // channel: [ val(meta), [ reads ] ]
+        metagenome = metagenome // channel: [ val(meta), [ assembly ]]
+        coverage = coverage   // channel: [ val(meta), [ coverage ]]
+        versions = ch_versions
 }
 
 
diff --git a/subworkflows/local/kmers.nf b/subworkflows/local/kmers.nf
index b9f9b057c..ca11dc1c6 100644
--- a/subworkflows/local/kmers.nf
+++ b/subworkflows/local/kmers.nf
@@ -1,19 +1,29 @@
-include { COUNT_KMERS as COUNT            } from '../../modules/local/count_kmers'     addParams( options: params.count_kmers_options      )
-include { NORMALIZE_KMERS as NORMALIZE    } from '../../modules/local/normalize_kmers' addParams( options: params.normalize_kmers_options  )
-include { EMBED_KMERS as EMBED            } from '../../modules/local/embed_kmers'     addParams( options: params.embed_kmers_options      )
+include { COUNT_KMERS as COUNT            } from '../../modules/local/count_kmers'
+include { NORMALIZE_KMERS as NORMALIZE    } from '../../modules/local/normalize_kmers'
+include { EMBED_KMERS as EMBED            } from '../../modules/local/embed_kmers'
 
 
 workflow KMERS {
     take:
         fasta
     main:
+        ch_versions = Channel.empty()
+
         COUNT(fasta)
+        ch_versions = ch_versions.mix(COUNT.out.versions)
+
         NORMALIZE(COUNT.out.counts)
+        ch_versions = ch_versions.mix(NORMALIZE.out.versions)
+
         EMBED(NORMALIZE.out.normalized)
+        ch_versions = ch_versions.mix(EMBED.out.versions)
+
     emit:
         counts = COUNT.out.counts
         normalized = NORMALIZE.out.normalized
         embedded = EMBED.out.embedded
+        versions = ch_versions
+
 }
 
 /*
diff --git a/subworkflows/local/lca.nf b/subworkflows/local/lca.nf
index f5ced39f0..d697a9ce2 100644
--- a/subworkflows/local/lca.nf
+++ b/subworkflows/local/lca.nf
@@ -1,34 +1,43 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl=2
 
-params.prepare_lca_options  = [:]
-params.reduce_lca_options    = [:]
-
-include { PREPARE_LCA as PREP_DBS } from './../../modules/local/prepare_lca.nf' addParams( options: params.prepare_lca_options )
-include { REDUCE_LCA as REDUCE    } from './../../modules/local/reduce_lca.nf'  addParams( options: params.reduce_lca_options )
+include { PREPARE_LCA as PREP_DBS } from './../../modules/local/prepare_lca.nf'
+include { REDUCE_LCA as REDUCE    } from './../../modules/local/reduce_lca.nf'
 
 
 workflow LCA {
 
     take:
         blastp_results
-        blastp_dbdir
+        taxdump_files
+        prot_accession2taxid
+        dbtype
 
     main:
+        ch_versions = Channel.empty()
+
         PREP_DBS(
-            blastp_dbdir
+            taxdump_files,
+            dbtype
         )
+        ch_versions = ch_versions.mix(PREP_DBS.out.versions)
+
         REDUCE(
             blastp_results,
-            blastp_dbdir,
-            PREP_DBS.out.cache
+            taxdump_files,
+            PREP_DBS.out.cache,
+            prot_accession2taxid.toList(),
+            dbtype
         )
 
+        ch_versions = ch_versions.mix(REDUCE.out.versions)
+
     emit:
         lca = REDUCE.out.lca
         error_taxid = REDUCE.out.error_taxids
         sseqid_to_taxids = REDUCE.out.sseqid_to_taxids
         cache  = PREP_DBS.out.cache
+        versions = ch_versions
 }
 
 /*
diff --git a/subworkflows/local/mock_data.nf b/subworkflows/local/mock_data.nf
index eec9efd91..27cb0e4f0 100644
--- a/subworkflows/local/mock_data.nf
+++ b/subworkflows/local/mock_data.nf
@@ -1,13 +1,10 @@
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.get_genomes_for_mock = [:]
-include { GET_GENOMES_FOR_MOCK  } from './../../modules/local/get_genomes_for_mock.nf' addParams( options: params.get_genomes_for_mock )
+include { GET_GENOMES_FOR_MOCK  } from './../../modules/local/get_genomes_for_mock.nf'
 
 process SAMTOOLS_WGSIM {
     // This process is used to create simulated reads from an input FASTA file
     label 'process_low'
 
-    conda (params.enable_conda ? "bioconda::samtools=1.13" : null)
+    conda "bioconda::samtools=1.13"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/samtools:1.12--hd5e65b6_0"
     } else {
@@ -15,58 +12,65 @@ process SAMTOOLS_WGSIM {
     }
 
     input:
-    path fasta
+    tuple val(meta), path(metagenome)
 
     output:
-    path("*.fastq"), emit: fastq
-    path "*.version.txt"          , emit: version
+    tuple val(meta), path("reads_1.fastq"), path("reads_2.fastq"), emit: reads
+    path "versions.yml" , emit: versions
 
     """
     # https://sarahpenir.github.io/bioinformatics/Simulating-Sequence-Reads-with-wgsim/
-    wgsim -1 300 -2 300 -r 0 -R 0 -X 0 -e 0 ${fasta} reads_1.fastq reads_2.fastq
+    wgsim -1 300 -2 300 -r 0 -R 0 -X 0 -e 0 ${metagenome} reads_1.fastq reads_2.fastq
+
 
-    echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > samtools.version.txt
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
     """
 }
 
 workflow CREATE_MOCK {
 
     main:
+        ch_versions = Channel.empty()
+
         // Download and format fasta files from specfied whole genome assemblies (genomes set from "get_genomes_for_mock" parameter in ~Autometa/conf/modules.config)
         GET_GENOMES_FOR_MOCK()
-
-        // Create fake reads from input genome sequences
-        SAMTOOLS_WGSIM(GET_GENOMES_FOR_MOCK.out.metagenome)
-
+        ch_versions = ch_versions.mix(GET_GENOMES_FOR_MOCK.out.versions)
         // Format everything with a meta map for use in the main Autometa pipeline
-        GET_GENOMES_FOR_MOCK.out.fake_spades_coverage
+        // see "create_" functions in ~subworkflows/local/input_check.nf
+        GET_GENOMES_FOR_MOCK.out.assembly_to_locus
         .map { row ->
                     def meta = [:]
                     meta.id = "mock_data"
-                    meta.cov_from_assembly = "spades"
                     return [ meta, row ]
             }
-        .set { ch_fasta }
-        GET_GENOMES_FOR_MOCK.out.assembly_to_locus
+        .set { assembly_to_locus }
+        GET_GENOMES_FOR_MOCK.out.assembly_report
         .map { row ->
                     def meta = [:]
                     meta.id = "mock_data"
-                    meta.cov_from_assembly = "spades"
                     return [ meta, row ]
             }
-        .set { assembly_to_locus }
-        GET_GENOMES_FOR_MOCK.out.assembly_report
+        .set { assembly_report }
+
+        GET_GENOMES_FOR_MOCK.out.metagenome
         .map { row ->
                     def meta = [:]
                     meta.id = "mock_data"
-                    meta.cov_from_assembly = "spades"
                     return [ meta, row ]
             }
-        .set { assembly_report }
+        .set { metagenome }
+
+        // Create fake reads from input genome sequences
+        SAMTOOLS_WGSIM(metagenome)
+        ch_versions = ch_versions.mix(SAMTOOLS_WGSIM.out.versions)
 
     emit:
-        fasta = ch_fasta
-        reads = SAMTOOLS_WGSIM.out.fastq
+        reads = SAMTOOLS_WGSIM.out.reads
+        fasta = metagenome
         assembly_to_locus = assembly_to_locus
         assembly_report = assembly_report
+        versions = ch_versions
 }
diff --git a/subworkflows/local/prepare_gtdb.nf b/subworkflows/local/prepare_gtdb.nf
new file mode 100644
index 000000000..3501a27a8
--- /dev/null
+++ b/subworkflows/local/prepare_gtdb.nf
@@ -0,0 +1,60 @@
+
+include { DIAMOND_MAKEDB as GTDB_MAKEDB } from './../../modules/local/diamond_makedb.nf'
+
+
+process DOWNLOAD_GTDB {
+    tag "Downloading GTDB database version ${params.gtdb_version}"
+    label 'process_low'
+    storeDir "${params.gtdb_dir}"
+    
+    conda "bioconda::autometa"
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
+    } else {
+        container "jasonkwan/autometa:${params.autometa_image_tag}"
+    }
+
+    output:
+        path 'autometa_formatted_gtdb-version-*.faa.gz' , emit: gtdb_formated_faa
+        path 'gtdb_taxdump-version-*/*'                 , emit: gtdb_taxdump_directory
+        path "versions.yml"                             , emit: versions
+
+    script:
+        """
+        autometa-download-gtdb --version $params.gtdb_version --outdir '.'
+
+        rm gtdb-taxdump-version-*.tar.gz
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+            gtdb: $params.gtdb_version
+        END_VERSIONS
+        """
+}
+
+workflow PREPARE_GTDB_DB {
+
+    main:
+        ch_versions = Channel.empty()
+
+        // if use_gtdb and large_downloads_permission is set to true, download the GTDB database
+        if (params.use_gtdb && params.large_downloads_permission) {
+
+            DOWNLOAD_GTDB()
+            ch_versions = ch_versions.mix(DOWNLOAD_GTDB.out.versions)
+
+            // get the single gtdb_formated_faa file and create the string e.g. autometa_formatted_gtdb-version-220.db from  autometa_formatted_gtdb-version-220.faa.gz
+            dbname = DOWNLOAD_GTDB.out.gtdb_formated_faa.getName().replaceFirst(/\.gz$/, '').replaceFirst(/\.faa$/, '.dmnd')           
+            GTDB_MAKEDB(DOWNLOAD_GTDB.out.gtdb_formated_faa, dbname)
+            ch_versions = ch_versions.mix(GTDB_MAKEDB.out.versions)
+            
+        } else {
+            println '\033[0;34m `--large_downloads_permission` is set to false. Skipping GTDB database download. \033[0m'
+        }
+
+    emit:
+        diamond_db = GTDB_MAKEDB.out.diamond_db
+        gtdb_taxdump_directory = DOWNLOAD_GTDB.out.gtdb_taxdump_directory
+        versions = ch_versions
+}
diff --git a/subworkflows/local/prepare_ncbi_taxinfo.nf b/subworkflows/local/prepare_ncbi_taxinfo.nf
index 5b6737393..8c406ca6a 100644
--- a/subworkflows/local/prepare_ncbi_taxinfo.nf
+++ b/subworkflows/local/prepare_ncbi_taxinfo.nf
@@ -1,11 +1,5 @@
 // this file probably needs to be reevaluated, but from a python-first
 // perspective since the python code assumes file/directory structure
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-params.taxdump_tar_gz_dir = [:]
-params.prot_accession2taxid_gz_dir = [:]
-options = initOptions(params.options)
 
 process TEST_DOWNLOAD {
     // For development work so you don't download the entire prot.accession2taxid.gz database
@@ -13,7 +7,7 @@ process TEST_DOWNLOAD {
     label 'process_low'
     storeDir "${params.prot_accession2taxid_gz_dir}"
 
-    conda (params.enable_conda ? "conda-forge::rsync=3.2.3" : null)
+    conda "conda-forge::rsync=3.2.3"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -23,6 +17,9 @@ process TEST_DOWNLOAD {
     output:
         path("prot.accession2taxid"), emit: singlefile
 
+    when:
+        task.ext.when == null || task.ext.when
+
     script:
         """
         # https://github.com/nextflow-io/nextflow/issues/1564
@@ -36,7 +33,7 @@ process DOWNLOAD_ACESSION2TAXID {
     label 'process_low'
     storeDir "${params.prot_accession2taxid_gz_dir}"
 
-    conda (params.enable_conda ? "conda-forge::rsync=3.2.3" : null)
+    conda "conda-forge::rsync=3.2.3"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -46,7 +43,10 @@ process DOWNLOAD_ACESSION2TAXID {
     output:
         // hack nf-core options.args3 and use for output name
         path "prot.accession2taxid.gz" , emit: accession2taxid
-        path  "*.version.txt"   , emit: version
+        path "versions.yml"            , emit: versions
+    when:
+        task.ext.when == null || task.ext.when
+
     script:
         """
         rsync -a \\
@@ -59,17 +59,18 @@ process DOWNLOAD_ACESSION2TAXID {
 
         md5sum -c *.md5
 
-        rsync --version | head -n1 > rsync.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            rsync: \$(rsync --version | head -n1 | sed 's/^rsync  version //' | sed 's/\s.*//')
+        END_VERSIONS
         """
 }
 
-
 process DOWNLOAD_TAXDUMP {
     tag "Downloading taxdump.tar.gz"
     label 'process_low'
-    storeDir "${params.taxdump_tar_gz_dir}"
 
-    conda (params.enable_conda ? "conda-forge::rsync=3.2.3" : null)
+    conda "conda-forge::rsync=3.2.3"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -77,8 +78,11 @@ process DOWNLOAD_TAXDUMP {
     }
 
     output:
-        path "*" , emit: taxdump_files
-        path  "*.version.txt"   , emit: version
+        path "*.dmp"        , emit: taxdump_files
+        path "versions.yml" , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         """
@@ -95,42 +99,53 @@ process DOWNLOAD_TAXDUMP {
         tar -xf taxdump.tar.gz
         rm taxdump.tar.gz
 
-        rsync --version | head -n1 > rsync.version.txt
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            rsync: \$(rsync --version | head -n1 | sed 's/^rsync  version //' | sed 's/\s.*//')
+        END_VERSIONS
         """
 }
 
-
 workflow PREPARE_TAXONOMY_DATABASES {
     main:
+        ch_versions = Channel.empty()
+
         taxdump_dir = file(params.taxdump_tar_gz_dir)
         taxdump_dir_files = taxdump_dir.list()
         expected_files = ['citations.dmp', 'delnodes.dmp', 'division.dmp', 'gencode.dmp', 'merged.dmp', 'names.dmp', 'nodes.dmp']
 
-        if (taxdump_dir_files.containsAll(expected_files)){
-            taxdump_files = taxdump_dir_files
+        dmp_files = file("${params.taxdump_tar_gz_dir}/*.dmp")
+        taxonomy_files_exist = dmp_files.name.containsAll(expected_files)
+
+        if (taxonomy_files_exist){
+            taxdump_files = dmp_files
         } else {
             DOWNLOAD_TAXDUMP()
+            ch_versions = ch_versions.mix(DOWNLOAD_TAXDUMP.out.versions)
             DOWNLOAD_TAXDUMP.out.taxdump_files
                 .set{taxdump_files}
         }
 
-        accession2taxid_dir = file(params.prot_accession2taxid_gz_dir)
-        accession2taxid_dir_files = accession2taxid_dir_files.list()
-        expected_files = ['prot.accession2taxid']
+        taxonomy_files_exist2 = file("${params.prot_accession2taxid_gz_dir}/prot.accession2taxid.gz")
 
-        if (accession2taxid_dir_files.containsAll(expected_files)){
-            prot_accession2taxid_ch = accession2taxid_dir_files
+        if (taxonomy_files_exist2.exists()){
+            prot_accession2taxid_ch = taxonomy_files_exist2
         } else if (params.debug){
             TEST_DOWNLOAD().singlefile
                 .set{prot_accession2taxid_ch}
+
         } else {
-            DOWNLOAD_ACESSION2TAXID().accession2taxid
+            DOWNLOAD_ACESSION2TAXID()
+            DOWNLOAD_ACESSION2TAXID.out.accession2taxid
                 .set{prot_accession2taxid_ch}
+            ch_versions = ch_versions.mix(DOWNLOAD_ACESSION2TAXID.out.versions)
+
         }
 
     emit:
-        taxdump = taxdump_files
+        taxdump_files = taxdump_files
         prot_accession2taxid = prot_accession2taxid_ch
+        versions = ch_versions
 
 }
 
diff --git a/subworkflows/local/prepare_nr.nf b/subworkflows/local/prepare_nr.nf
index 0d1de4b68..36c58de06 100644
--- a/subworkflows/local/prepare_nr.nf
+++ b/subworkflows/local/prepare_nr.nf
@@ -1,20 +1,14 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
 
-params.options = [:]
-options        = initOptions(params.options)
-
-params.diamond_makedb_options = [:]
-params.nr_dmnd_dir            = [:]
-
-include { DIAMOND_MAKEDB } from './../../modules/local/diamond_makedb.nf'  addParams( options: params.diamond_makedb_options, nr_dmnd_dir: params.nr_dmnd_dir)
+include { DIAMOND_MAKEDB } from './../../modules/local/diamond_makedb.nf'
 
 process DOWNLOAD_NR {
     tag "Downloading nr.gz (>100GB download. May take some time.)"
     label 'process_low'
-    storeDir "${params.nr_dmnd_dir}"
+    label 'process_long'
+
+    println '\033[0;34m Downloading nr.gz from NCBI, this may take a long time. \033[0m'
 
-    conda (params.enable_conda ? "conda-forge::rsync=3.2.3" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -22,29 +16,34 @@ process DOWNLOAD_NR {
     }
 
     output:
-        path("nr.gz"), emit: singlefile
+        path("nr.gz")       , emit: singlefile
+        path "versions.yml" , emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-        """
-        rsync -a \\
-            --quiet \\
-            'rsync://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz' 'nr.gz'
+        def args = task.ext.args ?: '-x 8 -s 8'
 
-        rsync -a \\
-            --quiet \\
-            'rsync://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz.md5' 'nr.gz.md5'
+        """
+        aria2c ${args} 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz'
+        aria2c 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz.md5'
 
         md5sum -c *.md5
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            rsync: \$(rsync --version | head -n1 | sed 's/^rsync  version //' | sed 's/\s.*//')
+        END_VERSIONS
         """
 }
 
-
 process TEST_DOWNLOAD {
     // For development work so you don't download the entire nr.gz database
-    tag "Downloading first 10,000 lines of nr.gz"
+    tag "Downloading small set of FASTA"
     label 'process_low'
 
-    conda (params.enable_conda ? "conda-forge::rsync=3.2.3" : null)
+    conda "bioconda::autometa"
     if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
         container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
     } else {
@@ -52,41 +51,85 @@ process TEST_DOWNLOAD {
     }
 
     output:
-        path("nr.gz"), emit: singlefile
+        path "nr.gz", emit: singlefile
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
         """
-        # https://github.com/nextflow-io/nextflow/issues/1564
-        trap 'echo OK; exit 0;' EXIT
-        curl -s ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz | zcat | head -n 10000 | gzip > nr.gz
+
+cat <<-END_VERSIONS > nr
+>KJX92028.1 hypothetical protein TI39_contig5958g00003 [Zymoseptoria brevis]
+MAWTRQLVPLMLLFCGAHGLQRSSTATDQLSNSALQALGSHADLAAFVNDVEAVPEIANVILAHRGITIMAPVDSAWLRV
+DAIKRRNPAFLAWHIMNANVLTSDVPLVQYEQHPGITIPTFLSGSKNWTYSGEPASLISGGQSLTAITLKTEDNVIWVSG
+ASNVSYIKQANISYDRGIIHKIDPALQFPTSAYETAFAVGLYSYCWAVFTAGLDQEIRRIPNSTFLLPINEAFHAALPFL
+LGASREEFKRIVYRHVIPGRVLWSHEFYNASHETFEGSIVQIRGGNGRRWFVDDAMILDGSDKPLYNGVGHVVNKVLLPT
+>EFG1759503.1 decarboxylating NADP(+)-dependent phosphogluconate dehydrogenase [Escherichia coli]EGJ4377881.1 decarboxylating NADP(+)-dependent phosphogluconate dehydrogenase [Escherichia coli]
+LKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDG
+EPCVTYIGADGAGHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFTKKDEDG
+NYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKEQRVAASKVLSGPQAQPAGDKGEFIEKVRRALY
+LGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIFRAGCIIRAQFLQKITDAYIENPQIANLLLAPYFKQIADNYQQALRE
+VVAYAVQNGIPVPTFAAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL
+>WP_198835266.1 pilus assembly protein [Paracoccus sp. IB05]MBJ2149627.1 pilus assembly protein [Paracoccus sp. IB05]
+MTWRPLQRFLTRSDAAVTAEFVIVFPLVLALIFLIVFISMYISAASDLQQVVHELARYSYRYAGRPEANQLCATLERDAV
+PILVNASLLLHPENFTLISCSPPQGPDRIIVITASYDFAGSFVQSVGRTLGLSIGTISRQSLFIP
+>MBD3193859.1 hypothetical protein [Candidatus Lokiarchaeota archaeon]MBD3198741.1 hypothetical protein [Candidatus Lokiarchaeota archaeon]
+MKKGFIVLILIALVSAGGLILFFYYSNDSGNGNFNTNSEKMIINHNHAHLEDFTSIPSEWIIAAKANLSIVYWHTSHGSQ
+ITTGMSLLDAFMGDNDVYEFNNAGTGGALHYHEPSIDYSRRDLTGYTDQFDDETRTFLSSNPEYNVVIWSWCGLDKNNAS
+INAYLTNMNQLESEYPNVHFVYMTAHLEGTGEDGDLHIYNQMIRRYCNKNNKTLYDFGDIESYNPENEYFLDRDANDGCY
+YDSDGNGSLDANWATEWQSTHDGTHTYPNGGEWYDCSPAHSEAVNGNLKAYAAWYLFARLAGWNGT
+>UMM52736.1 protein ORF58 [Lake sturgeon herpesvirus]
+MGSMVKKRSRSLIPTSSITRWKTQSLKRPKATCASLRLTPRSTLSPQCHAGYGQSSPGANGLNRPVIDTWTRPSTAFGPS
+TSLGWTPQTHIFLNGNFVSHTHGCSPAFFTATQHVNIVYNKKQQTSVFAPHLLPHKQIQSGTVLTDNNKFVTDKKKTFSV
+QGVKNTRIEFTSLKNRSSNYTTNCRPLYQPAFQQFFELTGLCHGETSVTMSAMVVNNVNYTTCLYGLTNPFSFNFKICKD
+HKKFHNTLFFPSVNLYKQAKGRQHQIFESRYINSQKIYPGDVNQFGFYLQTVVAQTEYDPCLNWYFCRHFEATKSFLNTP
+NKTLILWFNERFYLAHPQVDIADPASYWPAYVTFMDLCVTPHLNHFIGFFSSGFGQYHNKNPEFIHLIPFLIFGAARGHN
+QGLDLIASYAHRLSRLQRHESLLELRLILQIAVELLKNPQITLCDDPVRGMELSYPQSDDPDNDREKRAKKRRLVVVTKP
+LCPPATVVRPLAGHQQSLVKKIQVYCQTCRRG
+END_VERSIONS
+
+gzip nr
+
         """
 }
 
 workflow PREPARE_NR_DB {
 
     main:
+        ch_versions = Channel.empty()
+
+        // TODO: this if/else can be simplified
         if (file("${params.nr_dmnd_dir}/nr.dmnd").exists()){
             // skip huge download and db creation if nr.dmnd already exists
             out_ch = file("${params.nr_dmnd_dir}/nr.dmnd")
         } else if (file("${params.nr_dmnd_dir}/nr.gz").exists()){
             // skip huge download if nr.gz already exists
             DIAMOND_MAKEDB(file("${params.nr_dmnd_dir}/nr.gz"), "nr")
-            DIAMOND_MAKEDB.out.diamond_db
-                .set{out_ch}
+            ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions)
+            out_ch = DIAMOND_MAKEDB.out.diamond_db
+
         } else if (params.debug){
             TEST_DOWNLOAD().singlefile
                 .set{nr_db_ch}
+
             DIAMOND_MAKEDB(nr_db_ch, "nr")
-            DIAMOND_MAKEDB.out.diamond_db
-                .set{out_ch}
-        } else {
+            ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions)
+            out_ch = DIAMOND_MAKEDB.out.diamond_db
+
+        } else if (params.large_downloads_permission) {
             DOWNLOAD_NR().singlefile
                 .set{nr_db_ch}
+            ch_versions = ch_versions.mix(DOWNLOAD_NR.out.versions)
             DIAMOND_MAKEDB(nr_db_ch, "nr")
-            DIAMOND_MAKEDB.out.diamond_db
-                .set{out_ch}
+            ch_versions = ch_versions.mix(DIAMOND_MAKEDB.out.versions)
+            out_ch = DIAMOND_MAKEDB.out.diamond_db
+
+        } else {
+            println '\033[0;34m Neither nr.dmnd or nr.gz were found and `--large_downloads_permission` is set to false. \033[0m'
         }
 
     emit:
         diamond_db = out_ch
+        versions = ch_versions
 }
diff --git a/subworkflows/local/process_metagenome.nf b/subworkflows/local/process_metagenome.nf
new file mode 100644
index 000000000..4bb7a7da4
--- /dev/null
+++ b/subworkflows/local/process_metagenome.nf
@@ -0,0 +1,74 @@
+include { CREATE_MOCK                 } from './mock_data'
+include { INPUT_CHECK                 } from './input_check'
+include { SEQKIT_FILTER               } from '../../modules/local/seqkit_filter'
+
+workflow PROCESS_METAGENOME {
+
+    main:
+
+    ch_versions = Channel.empty()
+
+   // Samplesheet channel
+    Channel
+        .fromPath(params.input)
+        .set{samplesheet_ch}
+
+    assembly_to_locus = Channel.empty()
+    assembly_report   = Channel.empty()
+
+    // Set the metagenome and coverage channels
+    if (params.mock_test){
+
+        CREATE_MOCK()
+        ch_versions = ch_versions.mix(CREATE_MOCK.out.versions)
+
+        CREATE_MOCK.out.fasta
+            .set{metagenome_ch}
+
+        Channel
+            .empty()
+            .set{user_provided_coverage_table}
+
+        CREATE_MOCK.out.reads
+            .set{reads_ch}
+
+        assembly_to_locus = CREATE_MOCK.out.assembly_to_locus
+        assembly_report = CREATE_MOCK.out.assembly_report
+
+    } else {
+
+        INPUT_CHECK(samplesheet_ch)
+        ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
+
+        INPUT_CHECK.out.metagenome
+            .set{metagenome_ch}
+
+        INPUT_CHECK.out.coverage
+            .set{user_provided_coverage_table}
+
+        INPUT_CHECK.out.reads
+            .set{reads_ch}
+
+
+    }
+
+    SEQKIT_FILTER(
+        metagenome_ch
+    )
+    ch_versions = ch_versions.mix(SEQKIT_FILTER.out.versions)
+
+    SEQKIT_FILTER.out.fasta
+        .join(reads_ch)
+        .set{combined_contigs_reads}
+
+    emit:
+        raw_metagenome_fasta                = metagenome_ch
+        filtered_metagenome_fasta           = SEQKIT_FILTER.out.fasta
+        user_provided_coverage_table        = user_provided_coverage_table
+        reads                               = reads_ch
+        filtered_metagenome_fasta_and_reads = combined_contigs_reads
+        filtered_metagenome_gc_content      = SEQKIT_FILTER.out.gc_content
+        assembly_to_locus                   = assembly_to_locus
+        assembly_report                     = assembly_report
+        versions                            = ch_versions
+}
diff --git a/subworkflows/local/taxon_assignment.nf b/subworkflows/local/taxon_assignment.nf
deleted file mode 100644
index f02714777..000000000
--- a/subworkflows/local/taxon_assignment.nf
+++ /dev/null
@@ -1,92 +0,0 @@
-params.prepare_lca_options  = [:]
-params.reduce_lca_options    = [:]
-params.majority_vote_options = [:]
-params.split_kingdoms_options = [:]
-params.nr_dmnd_dir = [:]
-params.taxdump_tar_gz_dir = [:]
-params.prot_accession2taxid_gz_dir = [:]
-params.diamond_blastp_options = [:]
-
-params.debug = [:]
-params.diamond_makedb_options = [:]
-params.large_downloads_permission = [:]
-
-
-include { PREPARE_NR_DB  } from './prepare_nr.nf'                         addParams( debug: params.debug, diamond_makedb_options: params.diamond_makedb_options, nr_dmnd_dir: params.nr_dmnd_dir  )
-include { PREPARE_TAXONOMY_DATABASES  } from './prepare_ncbi_taxinfo.nf'  addParams( debug: params.debug, taxdump_tar_gz_dir: params.taxdump_tar_gz_dir, prot_accession2taxid_gz_dir: params.prot_accession2taxid_gz_dir  )
-include { LCA            } from './lca.nf'                                addParams( prepare_lca_options: params.prepare_lca_options, reduce_lca_options: params.reduce_lca_options )
-include { MAJORITY_VOTE  } from './../../modules/local/majority_vote.nf'  addParams( options: params.majority_vote_options       )
-include { SPLIT_KINGDOMS } from './../../modules/local/split_kingdoms.nf' addParams( options: params.split_kingdoms_options      )
-include { DIAMOND_BLASTP } from './../../modules/local/diamond_blastp.nf' addParams( options: params.diamond_blastp_options      )
-
-
-// Autometa taxon assignment workflow
-workflow TAXON_ASSIGNMENT {
-    take:
-        metagenome
-        merged_prodigal
-
-    main:
-        // check if user has given permission for large downloads
-        if (params.large_downloads_permission) {
-            // Download and prep necessary databases
-            PREPARE_NR_DB()
-            PREPARE_NR_DB.out.diamond_db
-                .set{diamond_db}
-            PREPARE_TAXONOMY_DATABASES()
-            PREPARE_TAXONOMY_DATABASES.out.taxdump
-                .set{ncbi_taxdump}
-            PREPARE_TAXONOMY_DATABASES.out.prot_accession2taxid
-                .set{prot_accession2taxid}
-        } else {
-            // check for nr.dmnd, if not found, check for nr.gz
-            // if nr.gz exists, create nr.dmnd
-            // if nr.gz also doesn't exist, stop the pipeline
-            if (!file("${params.nr_dmnd_dir}/nr.dmnd").exists()) {
-                if (file("${params.nr_dmnd_dir}/nr.gz").exists()) {
-                    PREPARE_NR_DB()
-                    PREPARE_NR_DB.out.diamond_db
-                        .set{diamond_db}
-                } else {
-                    throw new Exception("Neither nr.dmnd or nr.gz was found")
-                }
-            } else {
-                diamond_db = file("${params.nr_dmnd_dir}/nr.dmnd", checkIfExists: true)
-            }
-        }
-
-        DIAMOND_BLASTP (
-            merged_prodigal,
-            diamond_db
-        )
-
-        ncbi_tax_dir = file(params.taxdump_tar_gz_dir)
-
-        LCA (
-            DIAMOND_BLASTP.out.diamond_results,
-            ncbi_tax_dir
-        ) // output '${blast.simpleName}.lca.tsv'
-
-        MAJORITY_VOTE (
-            LCA.out.lca,
-            ncbi_tax_dir
-        ) //output ${lca.simpleName}.votes.tsv
-
-        metagenome
-            .join(
-                MAJORITY_VOTE.out.votes
-            )
-            .set{split_kingdoms_input}
-
-        SPLIT_KINGDOMS (
-            split_kingdoms_input,
-            ncbi_tax_dir
-        )
-
-    emit:
-        taxonomy = SPLIT_KINGDOMS.out.taxonomy
-        bacteria = SPLIT_KINGDOMS.out.bacteria
-        archaea = SPLIT_KINGDOMS.out.archaea
-        orf_votes = LCA.out.lca
-        contig_votes = MAJORITY_VOTE.out.votes
-}
diff --git a/subworkflows/local/taxon_assignment_gtdb.nf b/subworkflows/local/taxon_assignment_gtdb.nf
new file mode 100644
index 000000000..3da416f5b
--- /dev/null
+++ b/subworkflows/local/taxon_assignment_gtdb.nf
@@ -0,0 +1,109 @@
+
+include { PREPARE_GTDB_DB   } from './prepare_gtdb.nf'
+include { TAXON_SPLIT       } from './taxon_split.nf'
+
+process EXTRACT_ORFS {
+    tag "Extracting ORFs from taxon-assigned metagenome contigs"
+    label 'process_low'
+    
+    conda "bioconda::autometa"
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/autometa:2.2.0--pyh7cba7a3_0"
+    } else {
+        container "jasonkwan/autometa:${params.autometa_image_tag}"
+    }
+
+    input:
+        tuple val(meta), path(contigs), path (orfs)
+
+    output:
+        tuple val(meta), path("${meta.id}.gtdb_input.fna"), path("${meta.id}_gtdb_input_orfs.faa.gz"), emit: split_orfs       
+        path "versions.yml", emit: versions
+    
+    script:
+        def prefix = task.ext.prefix ?: "${meta.id}"
+        """
+        grep -h ">" $contigs | \\
+                    sed 's/^>//' | \\
+                    cut -f1 -d" " | \\
+                    sed 's/\\\$/_/' | \\
+                grep -f - $orfs |\\
+                    cut -f1 -d" " |\\
+                        sed 's/^>//'  > orf_ids
+                        
+                # Retrieve ORF seqs from ORF IDs
+                seqkit grep \
+                    --pattern-file orf_ids \
+                    --out-file ${meta.id}_gtdb_input_orfs.faa.gz \
+                    $orfs
+
+        cat $contigs > ${meta.id}.gtdb_input.fna
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            autometa: \$(autometa --version | sed -e 's/autometa: //g')
+            gtdb: $params.gtdb_version
+        END_VERSIONS
+        """
+}
+
+
+// Autometa taxon assignment workflow
+workflow GTDB_TAXON_ASSIGNMENT {
+    take:
+        split_metagenome_contigs
+        orfs
+
+    main:
+        ch_versions = Channel.empty()
+        dbtype_ch = channel.value( 'gtdb')
+
+        PREPARE_GTDB_DB()
+        ch_versions = ch_versions.mix(PREPARE_GTDB_DB.out.versions)
+
+        // combine contigs and orfs into one channel
+        split_metagenome_contigs
+            .filter { meta, path ->
+                meta.taxon in ['bacteria', 'archaea']
+            }
+            .map { meta, path -> 
+                def cleanMeta = meta.findAll { k,v -> k != 'taxon' }
+                [cleanMeta, path]
+            }
+            .groupTuple(by: 0)
+            .combine(
+                orfs, by: 0
+            )
+            .set { contigs_and_orfs_ch }
+
+
+        EXTRACT_ORFS(contigs_and_orfs_ch)
+      
+        prot_accession2taxid_ch =Channel.fromPath(file("$baseDir/assets/dummy_file.txt", checkIfExists: true ))
+          
+
+        TAXON_SPLIT(
+            EXTRACT_ORFS.out.split_orfs,
+            PREPARE_GTDB_DB.out.diamond_db,
+            PREPARE_GTDB_DB.out.gtdb_taxdump_directory,
+            prot_accession2taxid_ch,
+            dbtype_ch
+        )
+        ch_versions = ch_versions.mix(TAXON_SPLIT.out.versions)
+
+
+        TAXON_SPLIT.out.taxonomically_split_fna.view { meta ->
+            println "taxonomically_split_fnabro: ${meta}"
+        }
+
+    emit:
+        taxonomy                = TAXON_SPLIT.out.taxonomy
+        taxonomically_split_fna = TAXON_SPLIT.out.taxonomically_split_fna
+        lca                     = TAXON_SPLIT.out.lca
+        votes                   = TAXON_SPLIT.out.votes
+        taxdump_files           = PREPARE_GTDB_DB.out.gtdb_taxdump_directory
+        dbtype                  = dbtype_ch
+        versions                = ch_versions
+
+}
+
diff --git a/subworkflows/local/taxon_assignment_ncbi.nf b/subworkflows/local/taxon_assignment_ncbi.nf
new file mode 100644
index 000000000..ee8a511b2
--- /dev/null
+++ b/subworkflows/local/taxon_assignment_ncbi.nf
@@ -0,0 +1,44 @@
+
+include { PREPARE_NR_DB                 } from './prepare_nr.nf'
+include { PREPARE_TAXONOMY_DATABASES    } from './prepare_ncbi_taxinfo.nf'
+include { TAXON_SPLIT                   } from './taxon_split.nf'
+
+// Autometa taxon assignment workflow
+workflow NCBI_TAXON_ASSIGNMENT {
+    take:
+        filtered_metagenome_fasta
+        orfs
+
+    main:
+        ch_versions = Channel.empty()
+        dbtype_ch = channel.value( 'ncbi')
+
+        PREPARE_TAXONOMY_DATABASES()
+        ch_versions = ch_versions.mix(PREPARE_TAXONOMY_DATABASES.out.versions)
+
+        PREPARE_NR_DB()
+        ch_versions = ch_versions.mix(PREPARE_NR_DB.out.versions)
+
+        contigs_and_orfs = filtered_metagenome_fasta.join(orfs)
+
+        TAXON_SPLIT(
+            contigs_and_orfs,
+            PREPARE_NR_DB.out.diamond_db,
+            PREPARE_TAXONOMY_DATABASES.out.taxdump_files,
+            PREPARE_TAXONOMY_DATABASES.out.prot_accession2taxid,
+            dbtype_ch
+        )
+
+        ch_versions = ch_versions.mix(TAXON_SPLIT.out.versions)
+
+    emit:
+        taxonomy                = TAXON_SPLIT.out.taxonomy
+        taxonomically_split_fna = TAXON_SPLIT.out.taxonomically_split_fna
+        lca                     = TAXON_SPLIT.out.lca
+        votes                   = TAXON_SPLIT.out.votes
+        taxdump_files           = PREPARE_TAXONOMY_DATABASES.out.taxdump_files
+        dbtype                  = dbtype_ch
+        versions                = ch_versions
+
+}
+
diff --git a/subworkflows/local/taxon_split.nf b/subworkflows/local/taxon_split.nf
new file mode 100644
index 000000000..312897d6c
--- /dev/null
+++ b/subworkflows/local/taxon_split.nf
@@ -0,0 +1,90 @@
+
+include { PREPARE_NR_DB  } from './prepare_nr.nf'
+include { PREPARE_TAXONOMY_DATABASES  } from './prepare_ncbi_taxinfo.nf'
+include { LCA            } from './lca.nf'
+include { MAJORITY_VOTE  } from './../../modules/local/majority_vote.nf'
+include { SPLIT_KINGDOMS } from './../../modules/local/split_kingdoms.nf'
+include { DIAMOND_BLASTP } from './../../modules/local/diamond_blastp.nf'
+
+
+
+
+// Autometa taxon assignment workflow
+workflow TAXON_SPLIT {
+    take:
+        contigs_and_orfs
+        diamond_db_ch
+        taxdump_ch
+        prot_accession2taxid_ch
+        dbtype_ch
+
+    main:
+        ch_versions = Channel.empty()
+
+        contigs_and_orfs.multiMap { meta, fna_file, orfs_file ->
+                fna: [meta, fna_file]
+                orfs: [meta, orfs_file]
+            }.set { result }
+
+        DIAMOND_BLASTP (
+            result.orfs,
+            diamond_db_ch
+        )
+        ch_versions = ch_versions.mix(DIAMOND_BLASTP.out.versions)
+
+        LCA (
+            DIAMOND_BLASTP.out.diamond_results,
+            taxdump_ch,
+            prot_accession2taxid_ch,
+            dbtype_ch
+        )
+        ch_versions = ch_versions.mix(LCA.out.versions)
+
+        MAJORITY_VOTE (
+            LCA.out.lca,
+            taxdump_ch,
+            dbtype_ch
+        )
+        ch_versions = ch_versions.mix(MAJORITY_VOTE.out.versions)
+
+        result.fna
+            .join(
+                MAJORITY_VOTE.out.votes
+            )
+            .set{split_kingdoms_input}
+
+        SPLIT_KINGDOMS (
+            split_kingdoms_input,
+            taxdump_ch,
+            dbtype_ch
+        )
+
+        // Step 1: Generate combinations of meta and fna_file and flatten them correctly
+        // handle if multiple fna files are present
+        SPLIT_KINGDOMS.out.fna.map { meta, fna_file ->
+            fna_file = fna_file instanceof List ? fna_file : [fna_file]
+            [[meta], fna_file].combinations()
+        }.flatten().collate(2) // Creates pairs of [meta, fna_file]
+        .set { tempch1 }
+
+        // Step 2: Map each pair to set the taxon correctly for each meta-fna_file pair
+        tempch1.map{  meta, fna_file ->
+            // Set the taxon by extracting it from the fna_file name
+            def new_meta = meta.clone()
+            new_meta.taxon = fna_file.getName().tokenize('.')[-2]
+            return [new_meta, fna_file] // Return a copy of meta to ensure independent taxon setting
+        } .set { taxonomically_split_fna_ch }
+
+
+        ch_versions = ch_versions.mix(SPLIT_KINGDOMS.out.versions)
+
+    emit:
+        taxonomy = SPLIT_KINGDOMS.out.taxonomy
+        taxonomically_split_fna = taxonomically_split_fna_ch
+        lca = LCA.out.lca
+        votes = MAJORITY_VOTE.out.votes
+        taxdump_files = taxdump_ch
+        dbtype = dbtype_ch
+        versions    = ch_versions
+
+}
diff --git a/subworkflows/local/taxonomy_workflow.nf b/subworkflows/local/taxonomy_workflow.nf
new file mode 100644
index 000000000..061bf749d
--- /dev/null
+++ b/subworkflows/local/taxonomy_workflow.nf
@@ -0,0 +1,53 @@
+
+include { NCBI_TAXON_ASSIGNMENT as NCBI  } from './taxon_assignment_ncbi.nf'
+include { GTDB_TAXON_ASSIGNMENT as GTDB_REFINEMENT  } from './taxon_assignment_gtdb.nf'
+
+// Autometa taxon assignment workflow
+workflow TAXONOMY_WORKFLOW {
+    take:
+        filtered_metagenome_fasta
+        merged_prodigal
+
+    main:
+        ch_versions = Channel.empty()
+
+        if (params.taxonomy_aware) {
+            NCBI(
+                filtered_metagenome_fasta,
+                merged_prodigal
+            )
+            ch_versions = ch_versions.mix(NCBI.out.versions)
+
+            if (params.use_gtdb) {
+                GTDB_REFINEMENT(
+                    NCBI.out.taxonomically_split_fna,
+                    merged_prodigal
+                )
+                ch_versions = ch_versions.mix(GTDB_REFINEMENT.out.versions)
+                taxonomy = GTDB_REFINEMENT.out.taxonomy
+                taxonomically_split_fna_ch = GTDB_REFINEMENT.out.taxonomically_split_fna
+                orf_votes = GTDB_REFINEMENT.out.lca
+                contig_votes = GTDB_REFINEMENT.out.votes
+                taxdump_files = GTDB_REFINEMENT.out.taxdump_files
+                dbtype = GTDB_REFINEMENT.out.dbtype
+
+            } else {
+                taxonomy = NCBI.out.taxonomy
+                taxonomically_split_fna_ch = NCBI.out.taxonomically_split_fna
+                orf_votes = NCBI.out.lca
+                contig_votes = NCBI.out.votes
+                taxdump_files = NCBI.out.taxdump_files
+                dbtype = NCBI.out.dbtype
+            }
+     }
+
+    emit:
+        taxonomy = taxonomy
+        taxonomically_split_fna = taxonomically_split_fna_ch
+        orf_votes = orf_votes
+        contig_votes = contig_votes
+        taxdump_files = taxdump_files
+        dbtype = dbtype
+        versions    = ch_versions
+}
+
diff --git a/subworkflows/local/unclustered_recruitment.nf b/subworkflows/local/unclustered_recruitment.nf
deleted file mode 100644
index afd656f21..000000000
--- a/subworkflows/local/unclustered_recruitment.nf
+++ /dev/null
@@ -1,70 +0,0 @@
-params.binning_options                  = [:]
-params.unclustered_recruitment_options  = [:]
-params.binning_summary_options          = [:]
-params.taxdump_tar_gz_dir               = [:]
-
-include { RECRUIT                                         } from './../../modules/local/unclustered_recruitment.nf' addParams( options: params.unclustered_recruitment_options  )
-include { BINNING_SUMMARY as UNCLUSTERED_BINNING_SUMMARY  } from './../../modules/local/binning_summary.nf'         addParams( options: params.binning_summary_options, taxdump_tar_gz_dir: params.taxdump_tar_gz_dir )
-
-
-workflow UNCLUSTERED_RECRUITMENT {
-
-    take:
-        metagenome
-        kmers_normalized
-        coverage
-        markers
-        taxon_assignments
-        binning
-
-    main:
-
-        kmers_normalized
-            .join(
-                coverage
-            ).join(
-                binning //BINNING.out.binning
-            ).join(
-                markers
-            )
-            .set{coverage_binningout_markers}
-
-        if (params.taxonomy_aware) {
-            coverage_binningout_markers
-                .join(
-                    taxon_assignments
-                )
-                .set{unclustered_recruitment_ch}
-        } else {
-            coverage_binningout_markers
-                .combine(
-                    taxon_assignments
-                )
-                .set{unclustered_recruitment_ch}
-        }
-
-        RECRUIT (
-            unclustered_recruitment_ch
-        )
-
-        RECRUIT.out.main
-            .join(
-                markers
-            ).join(
-                metagenome
-            )
-            .set{unclustered_recruitment_summary_ch}
-
-       // UNCLUSTERED_BINNING_SUMMARY (
-       //     unclustered_recruitment_summary_ch,
-       //     "recruited_cluster"
-       // )
-
-    emit:
-        recruitment = RECRUIT.out.binning
-        recruitment_main = RECRUIT.out.main
-        all_binning_results = binning | mix(RECRUIT.out) | collect
-      //  unclustered_recruitment_summary_stats = UNCLUSTERED_BINNING_SUMMARY.out.stats
-      //  unclustered_recruitment_summary_taxa = UNCLUSTERED_BINNING_SUMMARY.out.taxonomies
-      //  unclustered_recruitment_metabins = UNCLUSTERED_BINNING_SUMMARY.out.metabins
-}
diff --git a/workflows/autometa.nf b/workflows/autometa.nf
index ae84aa7c0..1a0280281 100644
--- a/workflows/autometa.nf
+++ b/workflows/autometa.nf
@@ -4,49 +4,14 @@
  * -------------------------------------------------
 */
 
-def modules = params.modules.clone()
-
-if (params.single_db_dir) {
-    internal_nr_dmnd_dir = params.single_db_dir
-    internal_prot_accession2taxid_gz_dir = params.single_db_dir
-    internal_taxdump_tar_gz_dir = params.single_db_dir
-}
-// TODO: when implementing the ability to set individual DB dirs
-// just override e.g. 'internal_nr_dmnd_location' here so users can set
-// 'single_db_dir' but also set individual other db paths if they have them
-// e.g. if they have nr.dmnd but not the other files.
-
-if (params.large_downloads_permission) {
-    // TODO: check if files already exist, if they don't fail the pipeline early at this stage
-} else {
-    // TODO: check if files exist, if they don't fail the pipeline early at this stage
-}
-
-// if these are still null then it means they weren't set, so make them null.
-// this only works because the markov models are inside the docker image.
-// that needs to be changed in future versions
-
-if (!params.taxonomy_aware) {
-    single_db_dir = null
-    internal_nr_dmnd_dir = null
-    internal_prot_accession2taxid_gz_dir = null
-    internal_taxdump_tar_gz_dir = null
-}
-
 /*
  * -------------------------------------------------
  *  Import local modules
  * -------------------------------------------------
 */
-
-include { GET_SOFTWARE_VERSIONS                   } from '../modules/local/get_software_versions'   addParams( options: [publish_files : ['csv':'']]     )
-include { SEQKIT_FILTER                           } from '../modules/local/seqkit_filter'           addParams( options: [publish_files : ['*':'']]       )
-include { SPADES_KMER_COVERAGE as COV_FROM_SPADES } from '../modules/local/spades_kmer_coverage'    addParams( options: modules['spades_kmer_coverage']  )
-include { MARKERS                                 } from '../modules/local/markers'                 addParams( options: modules['seqkit_split_options']  )
-include { BINNING                                 } from '../modules/local/binning'                 addParams( options: modules['binning_options']   )
-include { RECRUIT                                 } from '../modules/local/unclustered_recruitment' addParams( options: modules['unclustered_recruitment_options'])
-include { BINNING_SUMMARY                         } from '../modules/local/binning_summary'         addParams( options: modules['binning_summary_options']   )
-include { MOCK_DATA_REPORT                        } from '../modules/local/mock_data_reporter'      addParams( options: modules['mock_data_report']      )
+include { CUSTOM_DUMPSOFTWAREVERSIONS             } from '../modules/nf-core/custom/dumpsoftwareversions/main'
+include { MARKERS                                 } from '../modules/local/markers'
+include { MOCK_DATA_REPORT                        } from '../modules/local/mock_data_reporter'
 
 /*
  * -------------------------------------------------
@@ -56,7 +21,7 @@ include { MOCK_DATA_REPORT                        } from '../modules/local/mock_
 // https://github.com/nf-core/modules/tree/master/modules
 // https://nf-co.re/tools/#modules
 // nf-core modules --help
-include { PRODIGAL } from './../modules/nf-core/modules/prodigal/main'  addParams( options: modules['prodigal_options'] )
+include { PRODIGAL } from './../modules/nf-core/prodigal/main.nf'
 
 /*
  * -------------------------------------------------
@@ -64,78 +29,28 @@ include { PRODIGAL } from './../modules/nf-core/modules/prodigal/main'  addParam
  * -------------------------------------------------
 */
 
-include { CREATE_MOCK                 } from '../subworkflows/local/mock_data'        addParams( get_genomes_for_mock: modules['get_genomes_for_mock'])
-include { INPUT_CHECK                 } from '../subworkflows/local/input_check'      addParams( )
-include { CONTIG_COVERAGE as COVERAGE } from '../subworkflows/local/contig_coverage'  addParams( align_reads_options: modules['align_reads_options'], samtools_viewsort_options: modules['samtools_viewsort_options'], bedtools_genomecov_options: modules['bedtools_genomecov_options'])
-include { KMERS                       } from '../subworkflows/local/kmers'            addParams( count_kmers_options: modules['count_kmers_options'], normalize_kmers_options: modules['normalize_kmers_options'], embed_kmers_options: modules['embed_kmers_options'])
-include { TAXON_ASSIGNMENT            } from '../subworkflows/local/taxon_assignment' addParams( options: modules['taxon_assignment'], majority_vote_options: modules['majority_vote_options'], split_kingdoms_options: modules['split_kingdoms_options'], nr_dmnd_dir: internal_nr_dmnd_dir, taxdump_tar_gz_dir: internal_taxdump_tar_gz_dir, prot_accession2taxid_gz_dir: internal_prot_accession2taxid_gz_dir, diamond_blastp_options: modules['diamond_blastp_options'], large_downloads_permission: params.large_downloads_permission )
+include { COVERAGE                    } from '../subworkflows/local/coverage'
+include { KMERS                       } from '../subworkflows/local/kmers'
+include { PROCESS_METAGENOME          } from '../subworkflows/local/process_metagenome'
+include { TAXONOMY_WORKFLOW           } from '../subworkflows/local/taxonomy_workflow'
+include { BIN                         } from '../subworkflows/local/binning'
 
 workflow AUTOMETA {
-    // Software versions channel
-    Channel
-        .empty()
-        .set{ch_software_versions}
-    // Samplesheet channel
-    Channel
-        .fromPath(params.input)
-        .set{samplesheet_ch}
-
-    // Set the metagenome and coverage channels
-    if (params.mock_test){
-        CREATE_MOCK()
-        CREATE_MOCK.out.fasta
-            .set{metagenome_ch}
-        Channel
-            .empty()
-            .set{coverage_tab_ch}
-    } else {
-        INPUT_CHECK(samplesheet_ch)
-        INPUT_CHECK.out.metagenome
-            .set{metagenome_ch}
-        INPUT_CHECK.out.coverage
-            .set{coverage_tab_ch}
-    }
 
+    ch_versions = Channel.empty()
 
-    SEQKIT_FILTER(
-        metagenome_ch
-    )
-    SEQKIT_FILTER.out.fasta
-        .set{fasta_ch}
-
-    /*
-    * -------------------------------------------------
-    *  Find coverage, currently only pulling from SPADES output
-    * -------------------------------------------------
-    */
+    PROCESS_METAGENOME()
+    ch_versions = ch_versions.mix(PROCESS_METAGENOME.out.versions)
 
-
-    if (!params.mock_test) {
-        fasta_ch
-            .join(INPUT_CHECK.out.reads)
-            .set{coverage_input_ch}
-    } else {
-        Channel
-            .empty()
-            .set{coverage_input_ch}
-    }
-
-    COVERAGE (
-        coverage_input_ch
+    COVERAGE(
+        PROCESS_METAGENOME.out.filtered_metagenome_fasta,
+        PROCESS_METAGENOME.out.filtered_metagenome_fasta_and_reads,
+        PROCESS_METAGENOME.out.user_provided_coverage_table
     )
-    COVERAGE.out.coverage
-        .set{contig_coverage_ch}
+    ch_versions = ch_versions.mix(COVERAGE.out.versions)
 
-    COV_FROM_SPADES (
-        fasta_ch,
-    )
-    COV_FROM_SPADES.out.coverage
-        .set{spades_kmer_coverage_ch}
-    // https://nextflow-io.github.io/patterns/index.html#_conditional_process_executions
-    contig_coverage_ch
-        .mix(spades_kmer_coverage_ch)
-        .mix(coverage_tab_ch)
-        .set{coverage_ch}
+    filtered_metagenome_fasta = PROCESS_METAGENOME.out.filtered_metagenome_fasta
+    coverage_ch = COVERAGE.out.coverage_ch
 
     /*
     * -------------------------------------------------
@@ -144,9 +59,10 @@ workflow AUTOMETA {
     */
 
     PRODIGAL (
-        fasta_ch,
+        filtered_metagenome_fasta,
         "gbk"
     )
+    ch_versions = ch_versions.mix(PRODIGAL.out.versions)
 
     PRODIGAL.out.amino_acid_fasta
         .set{orfs_ch}
@@ -158,26 +74,31 @@ workflow AUTOMETA {
     */
 
     if (params.taxonomy_aware) {
-        TAXON_ASSIGNMENT (
-            fasta_ch,
+        TAXONOMY_WORKFLOW (
+            filtered_metagenome_fasta,
             orfs_ch
         )
-        TAXON_ASSIGNMENT.out.taxonomy
-            .set{taxonomy_results}
-        if (params.kingdom.equals('bacteria')) {
-            TAXON_ASSIGNMENT.out.bacteria
-                .set{kmers_input_ch}
-        } else {
-            // params.kingdom.equals('archaea')
-            TAXON_ASSIGNMENT.out.archaea
-                .set{kmers_input_ch}
-        }
+        ch_versions = ch_versions.mix(TAXONOMY_WORKFLOW.out.versions)
+
+        taxonomy_results = TAXONOMY_WORKFLOW.out.taxonomy
+        taxdump_files = TAXONOMY_WORKFLOW.out.taxdump_files
+        taxonomically_split_fna_ch = TAXONOMY_WORKFLOW.out.taxonomically_split_fna
+
     } else {
-        fasta_ch
-            .set{kmers_input_ch}
+        filtered_metagenome_fasta
+            .map { meta, fna ->
+                def new_meta = meta.clone()
+                new_meta['taxon'] = 'unclassified'
+                return [new_meta, fna]
+            }
+            .set{taxonomically_split_fna_ch}
+
         Channel
             .fromPath(file("$baseDir/assets/dummy_file.txt", checkIfExists: true ))
             .set{taxonomy_results}
+        Channel
+            .fromPath(file("$baseDir/assets/dummy_file.txt", checkIfExists: true ))
+            .set{taxdump_files}
     }
 
     /*
@@ -186,106 +107,59 @@ workflow AUTOMETA {
     * -------------------------------------------------
     */
 
-    KMERS(
-        kmers_input_ch
-    )
-    KMERS.out.normalized
-        .set{kmers_normalized_ch}
-
-    KMERS.out.embedded
-        .set{kmers_embedded_ch}
-
+    KMERS( taxonomically_split_fna_ch )
+    ch_versions = ch_versions.mix(KMERS.out.versions)
 
     // --------------------------------------------------------------------------------
     // Run hmmscan and look for marker genes in contig orfs
     // --------------------------------------------------------------------------------
-
-    MARKERS(
-        orfs_ch
-    )
-    MARKERS.out.markers_tsv
-        .set{markers_ch}
-
-    // Prepare inputs for binning channel
-    kmers_embedded_ch
-        .join(coverage_ch)
-        .join(SEQKIT_FILTER.out.gc_content)
-        .join(markers_ch)
-        .set{binning_ch}
-    if (params.taxonomy_aware) {
-        binning_ch
-            .join(taxonomy_results)
-            .set{binning_ch}
-    } else {
-        binning_ch
-            .combine(taxonomy_results)
-            .set{binning_ch}
-    }
-
-    BINNING(
-        binning_ch
-    )
-
-    if (params.unclustered_recruitment) {
-        // Prepare inputs for recruitment channel
-        kmers_normalized_ch
-            .join(coverage_ch)
-            .join(BINNING.out.main)
-            .join(markers_ch)
-            .set{recruitment_ch}
-        if (params.taxonomy_aware) {
-            recruitment_ch
-                .join(taxonomy_results)
-                .set{recruitment_ch}
-        } else {
-            recruitment_ch
-                .combine(taxonomy_results)
-                .set{recruitment_ch}
+    Channel
+        .fromList(['bacteria', 'archaea'])
+        .set { kingdoms }
+
+    // Ensure orfs_ch is defined before using
+    orfs_ch
+        .combine(kingdoms)
+        .map { pair ->
+            def (meta, orfs_file, kingdom) = pair // Correctly extract values from pair
+            def new_meta = meta.clone()
+            new_meta['taxon'] = kingdom
+            return [new_meta, orfs_file]
         }
-        RECRUIT(
-            recruitment_ch
-        )
-        RECRUIT.out.main
-            .set{binning_results_ch}
-        Channel
-            .value("recruited_cluster")
-            .set{binning_col}
-    } else {
-        BINNING.out.main
-            .set{binning_results_ch}
-        Channel
-            .value("cluster")
-            .set{binning_col}
-    }
+        .set { orfs_taxon_ch }
 
-    // Set inputs for binning summary
-    binning_results_ch
-        .join(markers_ch)
-        .join(fasta_ch)
-        .set{binning_summary_ch}
+    MARKERS( orfs_taxon_ch )
 
-    if (params.single_db_dir) {
-        ncbi = file(params.single_db_dir)
-    } else {
-        ncbi = file("$baseDir/assets/dummy_file.txt")
-    }
+    ch_versions = ch_versions.mix(MARKERS.out.versions)
 
-    BINNING_SUMMARY(
-        binning_summary_ch,
-        binning_col,
-        ncbi,
-    )
+    markers_ch = MARKERS.out.markers_tsv
 
-    if (params.mock_test){
-        binning_results_ch
-            .join(CREATE_MOCK.out.assembly_to_locus)
-            .join(CREATE_MOCK.out.assembly_report)
-            .set { mock_input_ch }
+    BIN(
+        taxonomically_split_fna_ch,
+        PROCESS_METAGENOME.out.filtered_metagenome_gc_content,
+        markers_ch,
+        coverage_ch,
+        taxonomy_results,
+        KMERS.out.embedded,
+        taxdump_files,
+        TAXONOMY_WORKFLOW.out.dbtype
+    )
 
-        MOCK_DATA_REPORT(
-            mock_input_ch,
-            file("$baseDir/lib/mock_data_report.Rmd")
-        )
-    }
+    // if (params.mock_test){
+    //     BIN.out.binning_results
+    //         .join(PROCESS_METAGENOME.out.assembly_to_locus)
+    //         .join(PROCESS_METAGENOME.out.assembly_report)
+    //         .set { mock_input_ch }
+
+    //     MOCK_DATA_REPORT(
+    //         mock_input_ch,
+    //         file("$baseDir/lib/mock_data_report.Rmd")
+    //     )
+    //     ch_versions = ch_versions.mix(MOCK_DATA_REPORT.out.versions)
+    // }
+
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
 
 }