EPPIcenter · kathrynmurie · Feb 9, 2026 · Feb 5, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/Mad4Hatter.README.md b/Mad4Hatter.README.md
@@ -6,30 +6,31 @@ This workflow runs the entire MAD4HatTeR pipeline, processing amplicon sequencin
 
 | Input Name               | Description                                                                                                                      | Type          | Required | Default                       |
 |--------------------------|----------------------------------------------------------------------------------------------------------------------------------|---------------|----------|-------------------------------|
-| pools                    | List of pool or panel names. Options: MAD4HatTeR ["D1.1","R1.1","R2.1","R1.2","v1","v2"], PfPHAST ["M1.1","M2.1","M1.addon"], Other ["4cast","ama1"] | Array[String] | Yes      | -                             |
-| sequencer                | The sequencer used to produce your data [Options: miseq, nextseq]                                                                                      | String        | Yes      | -                             |
+| pools                    | List of pool or panel names. Options: MAD4HatTeR ["D1.1","R1.1","R2.1","R1.2","v1","v2"], PfPHAST ["M1.1","M2.1","M1.addon"], Other ["4cast","ama1"]   | Array[String] | Yes      | -                             |
 | forward_fastqs           | List of forward FASTQ files. Order must match reverse_fastqs                                                                                           | Array[File]   | Yes      | -                             |
 | reverse_fastqs           | List of reverse FASTQ files. Order must match forward_fastqs                                                                                           | Array[File]   | Yes      | -                             |
-| output_directory         | Folder name for outputs                                                                                                                               | String        | Yes      | -                             |
-| amplicon_info_files      | Amplicon info file(s) to define the panel information. If not provided then the pre-defined panel info for the pools will be used.                      | Array[File]   | No       | -                             |
-| targeted_reference_files | Targeted reference file(s). If not provided then the pre-defined reference sequences for the pools will be used.                                        | Array[File]   | No       | -                             |
-| refseq_fasta             | Path to targeted reference sequences. If not provided then the pre-defined reference sequences for the pools will be used.                | File          | No       | -                             |
-| genome                   | Path to genome file. If not provided then the pre-defined reference sequences for the pools will be used.                                          | File          | No       | -                             |
-| omega_a                  | Level of statistical evidence required for DADA2 to infer a new ASV                                                              | Float         | No       | 0.000...
+| output_directory         | Folder name for outputs                                                                                                                                | String        | Yes      | -                             |
+| amplicon_info_files      | Amplicon info file(s) to define the panel information. If not provided then the pre-defined panel info for the pools will be used.                     | Array[File]   | No       | -                             |
+| targeted_reference_files | Targeted reference file(s). If not provided then the pre-defined reference sequences for the pools will be used.                                       | Array[File]   | No       | -                             |
+| refseq_fasta             | Path to targeted reference sequences. If not provided then the pre-defined reference sequences for the pools will be used.                             | File          | No       | -                             |
+| genome                   | Path to genome file. If not provided then the pre-defined reference sequences for the pools will be used.                                              | File          | No       | -                             |
+| omega_a                  | Level of statistical evidence required for DADA2 to infer a new ASV                                                                                    | Float         | No       | 0.000...
 001                 |
 | dada2_pool               | Pooling method for DADA2 to process ASVs                                                                                         | String        | No       | pseudo                        |
 | band_size                | Limit on net cumulative number of insertions in DADA2                                                                            | Int           | No       | 16                            |
 | max_ee                   | Limit on number of expected errors within a read in DADA2                                                                        | Int           | No       | 3                             |
 | cutadapt_minlen          | Minimum length for cutadapt                                                                                                      | Int           | No       | 100                           |
+| gtrim                    | If true, --nextseq-trim will be used to trim trailing G in cutadapt.  | Bool          | No       | false                         |
+| quality_score            | The quality score threshold to apply in cutadapt.                     | Int           | No       | 20                            |
 | allowed_errors           | Allowed errors for cutadapt                                                                                                      | Int           | No       | 0                             |
 | just_concatenate         | If true, just concatenate reads                                                                                                  | Boolean       | No       | false                         |
 | mask_tandem_repeats      | Mask tandem repeats                                                                                                              | Boolean       | No       | true                          |
 | mask_homopolymers        | Mask homopolymers                                                                                                                | Boolean       | No       | true                          |
 | masked_fasta             | Masked FASTA file                                                                                                                | File          | No       | -                             |
 | principal_resmarkers     | Principal resistance markers file                                                                                                | File          | No       | -                             |
 | resmarkers_info_tsv      | Resistance markers info TSV file                                                                                                 | File          | No       | -                             |
-| dada2_additional_memory  | Additional memory (in GB) to be added to the provided memory used in the DADA2 runtime configuration                            | Int           | No       | 0                             |
-| dada2_runtime_size       | DADA2 runtime size [small, medium, large]. Should be based on the size of the input dataset. Will be calculated if not provided | String        | No       | -                             |
+| dada2_additional_memory  | Additional memory (in GB) to be added to the provided memory used in the DADA2 runtime configuration                             | Int           | No       | 0                             |
+| dada2_runtime_size       | DADA2 runtime size [small, medium, large]. Should be based on the size of the input dataset. Will be calculated if not provided  | String        | No       | -                             |
 | docker_image             | The Docker image to use                                                                                                          | String        | No       | eppicenter/mad4hatter:develop |
 
 ## Pipeline Outputs

diff --git a/Mad4Hatter.wdl b/Mad4Hatter.wdl
@@ -21,7 +21,6 @@ import "modules/local/error_with_message.wdl" as ErrorWithMessage
 workflow MAD4HatTeR {
     input {
         Array[String] pools
-        String sequencer # The sequencer used to produce your data
         Array[File] forward_fastqs # List of forward fastqs. Must be in correct order.
         Array[File] reverse_fastqs # List of reverse fastqs. Must be in correct order.
         Array[File]? amplicon_info_files
@@ -32,7 +31,10 @@ workflow MAD4HatTeR {
         String dada2_pool = "pseudo" # Pooling method for DADA2 to process ASVs [Options: pseudo (default), true, false]
         Int band_size = 16 # Limit on the net cumulative number of insertions of one sequence relative to the other in DADA2
         Int max_ee = 3 # Limit on number of expected errors within a read during filtering and trimming within DADA2
+        Int max_mismatch = 0 # allow no errors when merging in dada2
         Int cutadapt_minlen = 100
+        Boolean gtrim = false
+        Int quality_score = 20
         Int allowed_errors = 0
         Boolean just_concatenate = true
         Boolean mask_tandem_repeats = true
@@ -94,11 +96,12 @@ workflow MAD4HatTeR {
 
     # Determine final amplicon info files to use. If provided, use those; otherwise, use from config.
     Array[File] amplicon_info_files_final = select_first([amplicon_info_files, get_amplicon_and_targeted_ref_from_config.amplicon_info_files])
+    Array[String] final_pools = select_first([get_amplicon_and_targeted_ref_from_config.updated_pool_names, pools])
 
     # Generate final amplicon info
     call GenerateAmpliconInfoWf.generate_amplicon_info {
         input:
-            pools = pools,
+            pools = final_pools,
             docker_image = docker_image,
             amplicon_info_files = amplicon_info_files_final
     }
@@ -110,8 +113,9 @@ workflow MAD4HatTeR {
             amplicon_info_ch = generate_amplicon_info.amplicon_info_ch,
             forward_fastqs = forward_fastqs,
             reverse_fastqs = reverse_fastqs,
-            sequencer = sequencer,
             cutadapt_minlen = cutadapt_minlen,
+            gtrim = gtrim,
+            quality_score = quality_score,
             allowed_errors = allowed_errors,
             docker_image = docker_image
     }
@@ -126,6 +130,7 @@ workflow MAD4HatTeR {
             band_size = band_size,
             omega_a = omega_a,
             max_ee = max_ee,
+            max_mismatch = max_mismatch,
             just_concatenate = just_concatenate,
             additional_memory = dada2_additional_memory,
             dada2_runtime_size = dada2_runtime_size,

diff --git a/Mad4HatterPostProcessing.wdl b/Mad4HatterPostProcessing.wdl
@@ -46,10 +46,12 @@ workflow Mad4HatterPostProcessing {
     # Determine final amplicon info files to use. If provided, use those; otherwise, use from config.
     Array[File] amplicon_info_files_final = select_first([amplicon_info_files, get_amplicon_and_targeted_ref_from_config.amplicon_info_files])
     Array[File]? targeted_reference_files_final = select_first([targeted_reference_files, get_amplicon_and_targeted_ref_from_config.targeted_reference_files])
+    Array[String] final_pools = select_first([get_amplicon_and_targeted_ref_from_config.updated_pool_names, pools])
+
 
     call GenerateAmpliconInfo.generate_amplicon_info {
         input:
-            pools = pools,
+            pools = final_pools,
             docker_image = docker_image,
             amplicon_info_files = amplicon_info_files_final
     }

diff --git a/Mad4HatterQcOnly.README.md b/Mad4HatterQcOnly.README.md
@@ -10,8 +10,9 @@ This workflow runs quality control _only_ on the selected samples.
 | **amplicon_info_files** | The TSVs that contain amplicon information.                           | Array[File]   | Yes      | -                             |
 | **forward_fastqs**      | List of forward fastqs. Must be in correct order.                     | Array[File]   | Yes      | -                             |
 | **reverse_fastqs**      | List of reverse fastqs. Must be in correct order.                     | Array[File]   | Yes      | -                             |
-| **sequencer**           | The name of the sequencer that was used to process the samples.       | String        | Yes      | -                             |
 | **cutadapt_minlen**     | The minimum length used for cutadapt. Optional.                       | Int           | No       | 100                           |
+| **gtrim**               | If true, --nextseq-trim will be used to trim trailing G in cutadapt.  | Bool          | No       | false                         |
+| **quality_score**       | The quality score threshold to apply in cutadapt.                     | Int           | No       | 20                            |
 | **allowed_errors**      | The number of errors allowed to be encountered in cutadapt. Optional. | Int           | No       | 0                             |
 | **docker_image**        | Specifies a custom Docker image to use. Optional.                     | String        | No       | eppicenter/mad4hatter:develop |
 

diff --git a/Mad4HatterQcOnly.wdl b/Mad4HatterQcOnly.wdl
@@ -10,8 +10,9 @@ workflow Mad4HatterQcOnly {
         Array[File]? amplicon_info_files
         Array[File] forward_fastqs
         Array[File] reverse_fastqs
-        String sequencer
         Int cutadapt_minlen = 100
+        Boolean gtrim = false
+        Int quality_score = 20
         Int allowed_errors = 0
         # TODO: Pin the specific docker image version here when first release is ready
         String docker_image = "eppicenter/mad4hatter:develop"
@@ -31,10 +32,11 @@ workflow Mad4HatterQcOnly {
 
     # Determine final amplicon info files to use. If provided, use those; otherwise, use from config.
     Array[File] amplicon_info_files_final = select_first([amplicon_info_files, get_amplicon_and_targeted_ref_from_config.amplicon_info_files])
+    Array[String] final_pools = select_first([get_amplicon_and_targeted_ref_from_config.updated_pool_names, pools])
 
     call GenerateAmpliconInfo.generate_amplicon_info {
         input:
-            pools = pools,
+            pools = final_pools,
             docker_image = docker_image,
             amplicon_info_files = amplicon_info_files_final
     }
@@ -44,8 +46,9 @@ workflow Mad4HatterQcOnly {
             amplicon_info_ch = generate_amplicon_info.amplicon_info_ch,
             forward_fastqs = forward_fastqs,
             reverse_fastqs = reverse_fastqs,
-            sequencer = sequencer,
             cutadapt_minlen = cutadapt_minlen,
+            gtrim = gtrim,
+            quality_score = quality_score,
             allowed_errors = allowed_errors,
             docker_image = docker_image
     }

diff --git a/modules/local/cutadapt.wdl b/modules/local/cutadapt.wdl
@@ -7,7 +7,8 @@ task cutadapt {
         File forward_fastq
         File reverse_fastq
         Int cutadapt_minlen
-        String sequencer
+        String gtrim
+        Int quality_score
         Int allowed_errors
         Int cpus = 1
         String docker_image
@@ -22,7 +23,8 @@ task cutadapt {
             -r ~{rev_primers} \
             -f ~{fwd_primers} \
             -m ~{cutadapt_minlen} \
-            -s ~{sequencer} \
+            -g ~{gtrim} \
+            -q ~{quality_score} \
             -e ~{allowed_errors} \
             -c ~{cpus} \
             -o $OUTPUT_DIR

diff --git a/modules/local/dada2_analysis.wdl b/modules/local/dada2_analysis.wdl
@@ -9,6 +9,7 @@ task dada2_analysis {
         Int band_size
         Float omega_a
         Int max_ee
+        Int max_mismatch
         Boolean just_concatenate
         Int additional_memory
         String? dada2_runtime_size
@@ -86,6 +87,7 @@ task dada2_analysis {
             --band-size ~{band_size} \
             --omega-a ~{omega_a} \
             --maxEE ~{max_ee} \
+            --max-mismatch ~{max_mismatch} \
             --cores ~{n_cores} \
             ~{if just_concatenate then "--concat-non-overlaps" else ""}
         echo "$(timestamp) : DADA2 processing complete."

diff --git a/modules/local/get_amplicon_and_targeted_ref_from_config.wdl b/modules/local/get_amplicon_and_targeted_ref_from_config.wdl
@@ -32,14 +32,39 @@ task get_amplicon_and_targeted_ref_from_config {
         missing_pools = []
 
         logging.info("Processing requested pools: ~{sep=',' pools}")
+
+        pool_name_mapping = {
+            '1A': 'D1.1',
+            '1B': 'R1.1',
+            '2' : 'R2.1',
+            '5' : 'R1.2', 
+            'D1' : 'D1.1',
+            'R1' : 'R1.2',
+            'R2' : 'R2.1',
+            'M1' : 'M1.1',
+            'M2' : 'M2.1',
+        }
+
+        updated_pool_names = []
         for pool in "~{sep=',' pools}".split(","):
             if pool in pool_config['pool_options']:
                 amplicon_info_paths.append(pool_config['pool_options'][pool]["amplicon_info_path"])
                 targeted_reference_paths.append(pool_config['pool_options'][pool]["targeted_reference_path"])
+                if pool in pool_name_mapping: 
+                    updated_pool_names.append(pool_name_mapping[pool])
+                else: 
+                    updated_pool_names.append(pool)
             else:
                 missing_pools.append(pool)
         if missing_pools:
-            raise ValueError(f"The following pools are not available in the config: {', '.join(missing_pools)}")
+            missing = ', '.join(missing_pools)
+            error_message = (
+                f"ERROR: The following pools were requested but not found in the configuration: {missing}.\n"
+                f"If you are using custom (bespoke) pools, you MUST provide the corresponding files manually:\n"
+                f"  - `--amplicon_info` must be specified\n"
+                f"  - and if running Mad4Hatter or Mad4hatterPostProcessing, EITHER `--refseq_fasta` OR `--genome` must also be provided."
+            )
+            raise ValueError(error_message)
 
         logging.info("Copying amplicon info and targeted reference files to output directories")
         os.makedirs("amplicon_info_files", exist_ok=True)
@@ -60,12 +85,18 @@ task get_amplicon_and_targeted_ref_from_config {
             output_path = os.path.join("targeted_reference_files", output_name)
             shutil.copy2(reference_file, output_path)
             logging.info(f"Copied reference file to: {output_path}")
+
+        logging.info("Writing updated pool names to output file")
+        with open("updated_pool_names.txt", "w") as f:
+            for pool_name in updated_pool_names:
+                f.write(pool_name + "\n")
         CODE
     >>>
 
     output {
         Array[File] amplicon_info_files = glob("amplicon_info_files/*")
         Array[File] targeted_reference_files = glob("targeted_reference_files/*")
+        Array[String] updated_pool_names = read_lines("updated_pool_names.txt")
     }
 
     runtime {

diff --git a/workflows/demultiplex_amplicons.wdl b/workflows/demultiplex_amplicons.wdl
@@ -8,8 +8,9 @@ workflow demultiplex_amplicons {
         File amplicon_info_ch
         Array[File] forward_fastqs
         Array[File] reverse_fastqs
-        String sequencer
         Int cutadapt_minlen
+        String gtrim
+        Int quality_score
         Int allowed_errors
         String docker_image
     }
@@ -30,7 +31,8 @@ workflow demultiplex_amplicons {
                 forward_fastq = read_pair.left,
                 reverse_fastq = read_pair.right,
                 cutadapt_minlen = cutadapt_minlen,
-                sequencer = sequencer,
+                gtrim = gtrim, 
+                quality_score=quality_score,
                 allowed_errors = allowed_errors,
                 docker_image = docker_image
         }

diff --git a/workflows/denoise_amplicons_1.wdl b/workflows/denoise_amplicons_1.wdl
@@ -10,6 +10,7 @@ workflow denoise_amplicons_1 {
         Int band_size
         Float omega_a
         Int max_ee
+        Int max_mismatch
         Boolean just_concatenate
         Int additional_memory
         String? dada2_runtime_size
@@ -24,6 +25,7 @@ workflow denoise_amplicons_1 {
             band_size = band_size,
             omega_a = omega_a,
             max_ee = max_ee,
+            max_mismatch = max_mismatch,
             just_concatenate = just_concatenate,
             additional_memory = additional_memory,
             dada2_runtime_size = dada2_runtime_size,