diff --git a/Microarray/Agilent_1-channel/Array_Annotations/Agilent_array_annotations.csv b/Microarray/Agilent_1-channel/Array_Annotations/Agilent_array_annotations.csv
new file mode 100644
index 00000000..072e0c5c
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Array_Annotations/Agilent_array_annotations.csv
@@ -0,0 +1,2 @@
+array_design,annot_type,annot_filename,download_link,download_date
+AGILENT SurePrint G3 GE 8x60k v3,agilent,072363_D_AA_20240521.txt,https://earray.chem.agilent.com/earray/array/displayViewArrayDesign.do?eArrayAction=view&arraydesignid=ADID40392,2024-11-15
diff --git a/Microarray/Agilent_1-channel/Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md b/Microarray/Agilent_1-channel/Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md
new file mode 100644
index 00000000..17a151fd
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md
@@ -0,0 +1,1515 @@
+# GeneLab bioinformatics processing pipeline for Agilent 1-channel microarray data <!-- omit in toc -->
+
+> **This page holds an overview and instructions for how GeneLab processes Agilent 1-channel microarray datasets\*. Exact processing commands and GL-DPPD-7112 version used for specific datasets are provided with their processed data in the [Open Science Data Repository (OSDR)](https://osdr.nasa.gov/bio/repo/).**  
+>
+
+---
+
+**Date:** March XX, 2025
+**Revision:** -A
+**Document Number:** GL-DPPD-7112-A
+
+**Submitted by:**  
+Crystal Han (GeneLab Data Processing Team)
+
+**Approved by:**  
+Samrawit Gebre (OSDR Project Manager)
+Lauren Sanders (OSDR Project Scientist)
+Amanda Saravia-Butler (GeneLab Science Lead)
+Barbara Novak (GeneLab Data Processing Lead)
+
+---
+
+## Updates from previous version
+
+Updated [Ensembl Reference Files](../../../GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv) to the following releases:
+- Animals: Ensembl release 112
+- Plants: Ensembl plants release 59
+- Bacteria: Ensembl bacteria release 59
+
+Software Updates:
+
+| Program | Previous Version | New Version    |
+|:--------|:-----------------|:---------------|
+|R|4.1.3|4.4.2|
+|DT|0.26|0.33|
+|dplyr|1.0.10|1.1.4|
+|stringr|1.5.0|1.5.1|
+|R.utils|2.12.2|2.12.3|
+|limma|3.50.3|3.62.2|
+|glue|1.6.2|1.8.0|
+|ggplot2|3.4.0|3.5.1|
+|biomaRt|2.50.0|2.62.0|
+|matrixStats|0.63.0|1.5.0|
+|dp_tools|1.3.4|1.3.5|
+|Quarto|1.2.313|1.6.40|
+
+Custom Annotations
+
+- Added ability to use custom gene annotations when annotations are not available in Biomart or Ensembl FTP for *Arabidopsis thaliana*, see [Step 7](#7-probe-annotations)
+
+---
+
+# Table of contents <!-- omit in toc -->
+
+- [Software used](#software-used)
+- [General processing overview with example commands](#general-processing-overview-with-example-commands)
+    - [1. Create Sample RunSheet](#1-create-sample-runsheet)
+    - [2. Load Data](#2-load-data)
+      - [2a. Load Libraries and Define Input Parameters](#2a-load-libraries-and-define-input-parameters)
+      - [2b. Define Custom Functions](#2b-define-custom-functions)
+      - [2c. Load Metadata and Raw Data](#2c-load-metadata-and-raw-data)
+      - [2d. Load Annotation Metadata](#2d-load-annotation-metadata)
+    - [3. Raw Data Quality Assessment](#3-raw-data-quality-assessment)
+      - [3a. Density Plot](#3a-density-plot)
+      - [3b. Pseudo Image Plots](#3b-pseudo-image-plots)
+      - [3c. MA Plots](#3c-ma-plots)
+      - [3d. Foreground-Background Plots](#3d-foreground-background-plots)
+      - [3e. Boxplots](#3e-boxplots)
+    - [4. Background Correction](#4-background-correction)
+    - [5. Between Array Normalization](#5-between-array-normalization)
+    - [6. Normalized Data Quality Assessment](#6-normalized-data-quality-assessment)
+      - [6a. Density Plot](#6a-density-plot)
+      - [6b. Pseudo Image Plots](#6b-pseudo-image-plots)
+      - [6c. MA Plots](#6c-ma-plots)
+      - [6d. Boxplots](#6d-boxplots)
+    - [7. Probe Annotations](#7-probe-annotations)
+      - [7a. Get Probe Annotations](#7a-get-probe-annotations)
+      - [7b. Summarize Gene Mapping](#7b-summarize-gene-mapping)
+      - [7c. Generate Annotated Raw and Normalized Expression Tables](#7c-generate-annotated-raw-and-normalized-expression-tables)
+    - [8. Perform Probe Differential Expression (DE)](#8-perform-probe-differential-expression-de)
+      - [8a. Generate Design Matrix](#8a-generate-design-matrix)
+      - [8b. Perform Individual Probe Level DE](#8b-perform-individual-probe-level-de)
+      - [8c. Add Annotation and Stats Columns and Format DE Table](#8c-add-annotation-and-stats-columns-and-format-de-table)
+
+---
+
+# Software used  
+
+|Program|Version|Relevant Links|
+|:------|:------:|:-------------|
+|R|4.4.2|[https://www.r-project.org/](https://www.r-project.org/)|
+|DT|0.33|[https://github.com/rstudio/DT](https://github.com/rstudio/DT)|
+|dplyr|1.1.4|[https://dplyr.tidyverse.org](https://dplyr.tidyverse.org)|
+|stringr|1.5.1|[https://stringr.tidyverse.org](https://stringr.tidyverse.org)|
+|R.utils|2.12.3|[https://github.com/HenrikBengtsson/R.utils](https://github.com/HenrikBengtsson/R.utils)|
+|limma|3.62.2|[https://bioconductor.org/packages/3.14/bioc/html/limma.html](https://bioconductor.org/packages/3.14/bioc/html/limma.html)|
+|glue|1.8.0|[https://glue.tidyverse.org](https://glue.tidyverse.org)|
+|ggplot2|3.5.1|[https://ggplot2.tidyverse.org](https://ggplot2.tidyverse.org)|
+|biomaRt|2.62.0|[https://bioconductor.org/packages/3.14/bioc/html/biomaRt.html](https://bioconductor.org/packages/3.14/bioc/html/biomaRt.html)|
+|matrixStats|1.5.0|[https://github.com/HenrikBengtsson/matrixStats](https://github.com/HenrikBengtsson/matrixStats)|
+|statmod|1.5.0|[https://github.com/cran/statmod](https://github.com/cran/statmod)|
+|dp_tools|1.3.5|[https://github.com/J-81/dp_tools](https://github.com/J-81/dp_tools)|
+|singularity|3.9|[https://sylabs.io](https://sylabs.io)|
+|Quarto|1.6.40|[https://quarto.org](https://quarto.org)|
+
+---
+
+# General processing overview with example commands  
+
+> Exact processing commands and output files listed in **bold** below are included with each Microarray processed dataset in the [Open Science Data Repository (OSDR)](https://osdr.nasa.gov/bio/repo/).
+
+---
+
+## 1. Create Sample RunSheet
+
+> Notes: 
+> - Rather than running the commands below to create the runsheet needed for processing, the runsheet may also be created manually by following the [file specification](../Workflow_Documentation/NF_MAAgilent1ch/examples/runsheet/README.md).
+> 
+> - These command line tools are part of the [dp_tools](https://github.com/J-81/dp_tools) program.
+
+```bash
+### Download the *ISA.zip file from the GeneLab Repository ###
+
+dpt-get-isa-archive \
+ --accession OSD-###
+
+### Parse the metadata from the *ISA.zip file to create a sample runsheet ###
+
+dpt-isa-to-runsheet --accession OSD-### \
+ --config-type microarray \
+ --config-version Latest \
+ --isa-archive *ISA.zip
+```
+
+**Parameter Definitions:**
+
+- `--accession OSD-###` - OSD accession ID (replace ### with the OSD number being processed), used to retrieve the urls for the ISA archive and raw expression files hosted on the GeneLab Repository
+- `--config-type` - Instructs the script to extract the metadata required for `microarray` processing from the ISA archive
+- `--config-version` - Specifies the `dp-tools` configuration version to use, a value of `Latest` will specify the most recent version
+- `--isa-archive` - Specifies the *ISA.zip file for the respective GLDS dataset, downloaded in the `dpt-get-isa-archive` command
+
+
+**Input Data:**
+
+- No input data required but the OSD accession ID needs to be indicated, which is used to download the respective ISA archive 
+
+**Output Data:**
+
+- *ISA.zip (compressed ISA directory containing Investigation, Study, and Assay (ISA) metadata files for the respective OSD dataset, used to define sample groups - the *ISA.zip file is located in the [OSD repository](https://osdr.nasa.gov/bio/repo/search?q=&data_source=cgene,alsda&data_type=study) under 'Study Files' -> 'metadata')
+
+- **{OSD-Accession-ID}_microarray_v{version}_runsheet.csv** (table containing metadata required for processing, version denotes the dp_tools schema used to specify the metadata to extract from the ISA archive)
+
+<br>
+
+---
+
+## 2. Load Data
+
+> Note: Steps 2 - 8 are done in R
+
+<br>
+
+### 2a. Load Libraries and Define Input Parameters
+
+```R
+### Install R packages if not already installed ###
+
+install.packages("tidyverse")
+install.packages("R.utils")
+install.packages("glue")
+install.packages("matrixStats")
+install.packages("statmod")
+if (!require("BiocManager", quietly = TRUE))
+    install.packages("BiocManager")
+BiocManager::install(version = "3.14")
+BiocManager::install("limma")
+BiocManager::install("biomaRt")
+
+
+### Import libraries ###
+
+library(tidyverse)
+library(dplyr)
+library(stringr)
+library(R.utils)
+library(glue)
+library(matrixStats)
+library(limma)
+library(biomaRt)
+library(statmod)
+
+
+# Define path to runsheet
+runsheet <- "/path/to/runsheet/{OSD-Accession-ID}_microarray_v{version}_runsheet.csv"
+
+## Set up output structure
+
+# Output Constants
+DIR_RAW_DATA <- "00-RawData"
+DIR_NORMALIZED_EXPRESSION <- "01-limma_NormExp"
+DIR_DGE <- "02-limma_DGE"
+
+dir.create(DIR_RAW_DATA)
+dir.create(DIR_NORMALIZED_EXPRESSION)
+dir.create(DIR_DGE)
+```
+
+<br>
+
+### 2b. Define Custom Functions
+
+#### all_true()
+<details>
+  <summary>wraps R <code>base::all()</code> function; overrides default behavior for empty input vector</summary>
+
+  ```R
+  all_true <- function(i_vector) {
+    if ( length(i_vector) == 0 ) {
+      stop(paste("Input vector is length zero"))
+    }
+    all(i_vector)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `i_vector=` - a vector of logical values
+
+  **Returns:** a logical of length 1; `TRUE` if all values are true, `FALSE` otherwise; stops and returns an error if input vector is empty
+</details>
+
+#### runsheet_paths_are_URIs()
+<details>
+  <summary>tests if paths provided in runsheet dataframe are URIs</summary>
+
+  ```R
+  runsheet_paths_are_URIs <- function(df_runsheet) {
+    all_true(stringr::str_starts(df_runsheet$`Array Data File Path`, "https"))
+  }
+  ```
+
+  **Custom Functions Used:**
+  - [all_true()](#all_true)
+
+  **Function Parameter Definitions:**
+  - `df_runsheet=` - a dataframe containing the sample runsheet information
+
+  **Returns:** a logical of length 1; `TRUE` if all values in the `Array Data File Path` of the runsheet start with "https", `FALSE` otherwise; stops and returns an error if input vector is empty
+</details>
+
+#### download_files_from_runsheet()
+<details>
+  <summary>downloads the raw data files</summary>
+
+  ```R
+  download_files_from_runsheet <- function(df_runsheet) {
+    urls <- df_runsheet$`Array Data File Path`
+    destinationFiles <- df_runsheet$`Array Data File Name`
+
+    mapply(function(url, destinationFile) {
+      print(paste0("Downloading from '", url, "' TO '", destinationFile, "'"))
+      if ( file.exists(destinationFile ) ) {
+        warning(paste( "Using Existing File:", destinationFile ))
+      } else {
+        download.file(url, destinationFile)
+      }
+    }, urls, destinationFiles)
+
+    destinationFiles # Return these paths
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `df_runsheet=` - a dataframe containing the sample runsheet information
+
+  **Returns:** a list of filenames that were downloaded; same as the `Array Data File Name` in the sample runsheet
+</details>
+
+#### fetch_organism_specific_annotation_table()
+<details>
+  <summary>determines the organism specific annotation file to use based on the provided organism name</summary>
+
+  ```R
+  fetch_organism_specific_annotation_table <- function(organism, annotation_table_link) {
+    # Uses the latest GeneLab annotations table to find the organism specific annotation file path and ensembl version
+    # Raises an exception if the organism does not have an associated annotation file or ensembl version yet
+    
+    all_organism_table <- read.csv(annotation_table_link)
+
+    annotation_table <- all_organism_table %>% dplyr::filter(species == organism)
+
+    # Guard clause: Ensure annotation_table populated
+    # Else: raise exception for unsupported organism
+    if (nrow(annotation_table) == 0 || annotation_table$genelab_annots_link == "" || is.na(annotation_table$ensemblVersion)) {
+      stop(glue::glue("Organism supplied '{organism}' is not supported. See the following url for supported organisms: {annotation_table_link}.  Supported organisms will correspond to a row based on the 'species' column and include a url in the 'genelab_annots_link' column of that row and a version number in the 'ensemblVersion' column."))
+    }
+
+    return(annotation_table)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `organism=` - a string containing the name of the organism (as found in the species column of the GeneLab annotation table)
+  - `annotation_table_link=` - a string specifying the URL or path to latest GeneLab Annotations file, see [GL-DPPD-7110-A_annotations.csv](../../../GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv)
+
+  **Returns:** a dataframe containing all rows in the GeneLab annotations file that match the specified organism
+</details>
+
+#### agilent_image_plot()
+<details>
+  <summary>plots pseudo images of each array</summary>
+
+  ```R
+  agilent_image_plot <- function(eListRaw, transform_func = identity) {
+    # Adapted from this discussion: https://support.bioconductor.org/p/15523/
+    copy_raw_data <- eListRaw
+    copy_raw_data$genes$Block <- 1 # Agilent arrays only have one block
+    names(copy_raw_data$genes)[2] <- "Column"
+    copy_raw_data$printer <- limma::getLayout(copy_raw_data$genes)
+
+    r <- copy_raw_data$genes$Row
+    c <- copy_raw_data$genes$Column
+    nr <- max(r)
+    nc <- max(c)
+    y <- rep(NA,nr*nc)
+    i <- (r-1)*nc+c
+    for ( array_i in seq(colnames(copy_raw_data$E)) ) {
+      y[i] <- transform_func(copy_raw_data$E[,array_i])
+      limma::imageplot(y,copy_raw_data$printer, main = rownames(copy_raw_data$targets)[array_i])
+    }
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `eListRaw=` - R object containing expression matrix to be plotted
+  - `transform_func=identity` - function used to transform expression matrix before plotting
+
+  **Returns:** pseudo images of each array
+</details>
+
+#### boxplot_expression_safe_margin()
+<details>
+  <summary>plots boxplot of expression data for each array</summary>
+
+  ```R
+  boxplot_expression_safe_margin <- function(data, transform_func = identity, xlab = "Log2 Intensity") {
+    # Basic box plot
+    df_data <- as.data.frame(transform_func(data$E))
+    ggplot2::ggplot(stack(df_data), ggplot2::aes(x=values, y=ind)) + 
+      ggplot2::geom_boxplot() + 
+      ggplot2::scale_y_discrete(limits=rev) +
+      ggplot2::labs(y= "Sample Name", x = xlab)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `data=` - R object containing expression matrix to be plotted
+  - `transform_func=identity` - function used to transform expression matrix before plotting
+  - `xlab="Log2 Intensity"` - string containing x-axis label for the plot
+
+  **Returns:** boxplot of expression data for each array
+</details>
+
+#### shortened_organism_name()
+<details>
+  <summary>shortens organism names, for example 'Homo Sapiens' to 'hsapiens'</summary>
+
+  ```R
+  shortened_organism_name <- function(long_name) {
+    #' Convert organism names like 'Homo Sapiens' into 'hsapiens'
+    tokens <- long_name %>% stringr::str_split(" ", simplify = TRUE)
+    genus_name <- tokens[1]
+
+    species_name <- tokens[2]
+
+    short_name <- stringr::str_to_lower(paste0(substr(genus_name, start = 1, stop = 1), species_name))
+
+    return(short_name)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `long_name=` - a string containing the long name of the organism
+
+  **Returns:** a string containing the short name of the organism
+</details>
+
+#### get_biomart_attribute()
+<details>
+  <summary>retrieves resolved biomart attribute source from runsheet dataframe</summary>
+
+  ```R
+  get_biomart_attribute <- function(df_rs) {
+    # check if runsheet has 'biomart_attribute' column
+    if ( !is.null(df_rs$`biomart_attribute`) ) {
+      print("Using attribute name sourced from runsheet")
+      # Format according to biomart needs
+      formatted_value <- unique(df_rs$`biomart_attribute`) %>% 
+                          stringr::str_replace_all(" ","_") %>% # Replace all spaces with underscore
+                          stringr::str_to_lower() # Lower casing only
+      return(formatted_value)
+    } else {
+      stop("ERROR: Could not find 'biomart_attribute' in runsheet")
+    }
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `df_rs=` - a dataframe containing the sample runsheet information
+
+  **Returns:** a string containing the formatted value from the `biomart_attribute` column of the runsheet, with all spaces converted to underscores and uppercase converted to lowercase; if no `biomart_attribute` exists in the runsheet, stop and return an error
+</details>
+
+#### get_ensembl_genomes_mappings_from_ftp()
+<details>
+  <summary>obtains mapping table directly from ftp; useful when biomart live service no longer exists for desired version</summary>
+
+  ```R
+  get_ensembl_genomes_mappings_from_ftp <- function(organism, ensembl_genomes_portal, ensembl_genomes_version, biomart_attribute) {
+    request_url <- glue::glue("https://ftp.ebi.ac.uk/ensemblgenomes/pub/{ensembl_genomes_portal}/release-{ensembl_genomes_version}/mysql/{ensembl_genomes_portal}_mart_{ensembl_genomes_version}/{organism}_eg_gene__efg_{biomart_attribute}__dm.txt.gz")
+
+    print(glue::glue("Mappings file URL: {request_url}"))
+
+    # Create a temporary file name
+    temp_file <- tempfile(fileext = ".gz")
+
+    # Download the gzipped table file using the download.file function
+    download.file(url = request_url, destfile = temp_file, method = "libcurl") # Use 'libcurl' to support ftps
+
+    # Uncompress the file
+    uncompressed_temp_file <- tempfile()
+    gzcon <- gzfile(temp_file, "rt")
+    content <- readLines(gzcon)
+    writeLines(content, uncompressed_temp_file)
+    close(gzcon)
+
+
+    # Load the data into a dataframe
+    mapping <- read.table(uncompressed_temp_file, # Read the uncompressed file
+                          # Add column names as follows: MAPID, TAIR, PROBEID
+                          col.names = c("MAPID", "ensembl_gene_id", biomart_attribute),
+                          header = FALSE, # No header in original table
+                          sep = "\t") # Tab separated
+
+    # Clean up temporary files
+    unlink(temp_file)
+    unlink(uncompressed_temp_file)
+
+    return(mapping)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `organism=` - a string containing the name of the organism (formatted using `shortened_organism_name()`)
+  - `ensembl_genomes_portal=` - a string containing the name of the genomes portal, for example 'plants'
+  - `ensembl_genomes_version=` - a string containing the version of Ensembl to use
+  - `biomart_attribute=` - a string containing the biomart attribute (formatted using `get_biomart_attribute()`)
+
+  **Returns:** a dataframe containing the mapping between Ensembl ID and probe ID, as obtained via FTP
+</details>
+
+#### list_to_unique_piped_string()
+<details>
+  <summary>converts character vector into string denoting unique elements separated by '|' characters</summary>
+
+  ```R
+  list_to_unique_piped_string <- function(str_list) {
+    #! Convert vector of multi-mapped genes to string separated by '|' characters
+    #! e.g. c("GO1","GO2","GO2","G03") -> "GO1|GO2|GO3"
+    return(toString(unique(str_list)) %>% stringr::str_replace_all(pattern = stringr::fixed(", "), replacement = "|"))
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `str_list=` - vector of character elements
+
+  **Returns:** a string containing the unique elements from `str_list` concatenated together, separated by '|' characters
+</details>
+
+#### runsheet_to_design_matrix()
+<details>
+  <summary>loads the GeneLab runsheet into a list of dataframes</summary>
+
+  ```R
+  runsheet_to_design_matrix <- function(runsheet_path) {
+      # Pull all factors for each sample in the study from the runsheet created in Step 1
+      df = read.csv(runsheet_path)
+      # get only Factor Value columns
+      factors = as.data.frame(df[,grep("Factor.Value", colnames(df), ignore.case=TRUE)])
+      colnames(factors) = paste("factor",1:dim(factors)[2], sep= "_")
+      
+      # Load metadata from runsheet csv file
+      compare_csv = data.frame(sample_id = df[,c("Sample.Name")], factors)
+
+      # Create data frame containing all samples and respective factors
+      study <- as.data.frame(compare_csv[,2:dim(compare_csv)[2]])
+      colnames(study) <- colnames(compare_csv)[2:dim(compare_csv)[2]]
+      rownames(study) <- compare_csv[,1] 
+      
+      # Format groups and indicate the group that each sample belongs to
+      if (dim(study)[2] >= 2){
+          group<-apply(study,1,paste,collapse = " & ") # concatenate multiple factors into one condition per sample
+      } else{
+          group<-study[,1]
+      }
+      group_names <- paste0("(",group,")",sep = "") # human readable group names
+      group <- sub("^BLOCKER_", "",  make.names(paste0("BLOCKER_", group))) # group naming compatible with R models, this maintains the default behaviour of make.names with the exception that 'X' is never prepended to group namesnames(group) <- group_names
+      names(group) <- group_names
+
+      # Format contrasts table, defining pairwise comparisons for all groups
+      contrast.names <- combn(levels(factor(names(group))),2) # generate matrix of pairwise group combinations for comparison
+      contrasts <- apply(contrast.names, MARGIN=2, function(col) sub("^BLOCKER_", "",  make.names(paste0("BLOCKER_", stringr::str_sub(col, 2, -2)))))
+      contrast.names <- c(paste(contrast.names[1,],contrast.names[2,],sep = "v"),paste(contrast.names[2,],contrast.names[1,],sep = "v")) # format combinations for output table files names
+      contrasts <- cbind(contrasts,contrasts[c(2,1),])
+      colnames(contrasts) <- contrast.names
+      sampleTable <- data.frame(condition=factor(group))
+      rownames(sampleTable) <- df[,c("Sample.Name")]
+
+      condition <- sampleTable[,'condition']
+      names_mapping <- as.data.frame(cbind(safe_name = as.character(condition), original_name = group_names))
+
+      design <- model.matrix(~ 0 + condition)
+      design_data <- list( matrix = design, mapping = names_mapping, groups = as.data.frame( cbind(sample = df[,c("Sample.Name")], group = group_names) ), contrasts = contrasts )
+      return(design_data)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `runsheet_path=` - a string containing the path to the runsheet generated in [Step 1](#1-create-sample-runsheet)
+
+  **Returns:** a list of R objects containing the sample information and metadata
+  - `design_data$matrix` - a design (or model) matrix describing the conditions in the dataset
+  - `design_data$mapping` - a dataframe mapping the human-readable group names to the names of the conditions modified for use in R
+  - `design_data$groups` - a dataframe of group names and contrasts for each sample
+  - `design_data$contrasts` - a matrix of all pairwise comparisons of the groups
+</details>
+
+#### lm_fit_pairwise()
+<details>
+  <summary>performs all pairwise comparisons using <code>limma::lmFit()</code></summary>
+
+  ```R
+  lm_fit_pairwise <- function(norm_data, design) {
+      # Approach based on limma manual section 17.4 (version 3.52.4)
+      fit <- limma::lmFit(norm_data, design)
+
+      # Create Contrast Model
+      fit.groups <- colnames(fit$design)[which(fit$assign == 1)]
+      combos <- combn(fit.groups,2)
+      contrasts<-c(paste(combos[1,],combos[2,],sep = "-"),paste(combos[2,],combos[1,],sep = "-")) # format combinations for limma:makeContrasts
+      cont.matrix <- limma::makeContrasts(contrasts=contrasts,levels=design)
+      contrast.fit <- limma::contrasts.fit(fit, cont.matrix)
+
+      contrast.fit <- limma::eBayes(contrast.fit,trend=TRUE,robust=TRUE)
+      return(contrast.fit)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `norm_data=` - an R object containing log-ratios or log-expression values for a series of arrays, with rows corresponding to genes and columns to samples
+  - `design=` - the design matrix of the microarray experiment, with rows corresponding to samples and columns to coefficients to be estimated
+
+  **Returns:** an R object of class `MArrayLM`
+</details>
+
+#### reformat_names()
+<details>
+  <summary>reformats column names for consistency across DE analyses tables within GeneLab</summary>
+
+  ```R
+  reformat_names <- function(colname, group_name_mapping) {
+    new_colname <- colname  %>% 
+                    stringr::str_replace(pattern = "^P.value.adj.condition", replacement = "Adj.p.value_") %>%
+                    stringr::str_replace(pattern = "^P.value.condition", replacement = "P.value_") %>%
+                    stringr::str_replace(pattern = "^Coef.condition", replacement = "Log2fc_") %>% # This is the Log2FC as per: https://rdrr.io/bioc/limma/man/writefit.html
+                    stringr::str_replace(pattern = "^t.condition", replacement = "T.stat_") %>%
+                    stringr::str_replace(pattern = "^Genes\\.", replacement = "") %>%
+                    stringr::str_replace(pattern = ".condition", replacement = "v")
+    
+    # remap to group names before make.names was applied
+    unique_group_name_mapping <- unique(group_name_mapping) %>% arrange(-nchar(safe_name))
+    for ( i in seq(nrow(unique_group_name_mapping)) ) {
+      safe_name <- unique_group_name_mapping[i,]$safe_name
+      original_name <- unique_group_name_mapping[i,]$original_name
+      new_colname <- new_colname %>% stringr::str_replace(pattern = stringr::fixed(safe_name), replacement = original_name)
+    }
+
+    return(new_colname)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `colnames=` - a character vector containing the column names to reformat
+  - `group_name_mapping=` - a dataframe mapping the original human-readable group names to the R modified safe names
+
+  **Returns:** a character vector containing the formatted column names
+</details>
+
+#### generate_prefixed_column_order()
+<details>
+  <summary>creates a vector of column names based on subject and given prefixes; used for both contrasts and groups column name generation</summary>
+
+  ```R
+  generate_prefixed_column_order <- function(subjects, prefixes) {
+    # Track order of columns
+    final_order = c()
+
+    # For each contrast
+    for (subject in subjects) {
+      # Generate column names for each prefix and append to final_order
+      for (prefix in prefixes) {
+        final_order <- append(final_order, glue::glue("{prefix}{subject}"))
+      }
+    }
+    return(final_order)
+  }
+  ```
+
+  **Function Parameter Definitions:**
+  - `subjects` - a character vector containing subject strings to add prefixes to 
+  - `prefixes` - a character vector of prefixes to add to the beginning of each subject string
+
+  **Returns:** a character vector with all possible combinations of prefix + subject
+</details>
+
+<br>
+
+### 2c. Load Metadata and Raw Data
+
+```R
+# fileEncoding removes strange characters from the column names
+df_rs <- read.csv(runsheet, check.names = FALSE, fileEncoding = 'UTF-8-BOM') 
+
+if ( runsheet_paths_are_URIs(df_rs) ) {
+  print("Determined Raw Data Locations are URIS")
+  local_paths <- download_files_from_runsheet(df_rs)
+} else {
+  print("Or Determined Raw Data Locations are local paths")
+  local_paths <- df_rs$`Array Data File Path`
+}
+
+# uncompress files if needed
+if ( all_true(stringr::str_ends(local_paths, ".gz")) ) {
+  print("Determined these files are gzip compressed... uncompressing now")
+  # This does the uncompression
+  lapply(local_paths, R.utils::gunzip, remove = FALSE, overwrite = TRUE)
+  # This removes the .gz extension to get the uncompressed filenames
+  local_paths <- vapply(local_paths, 
+                        stringr::str_replace, # Run this function against each item in 'local_paths'
+                        FUN.VALUE = character(1),  # Execpt an character vector as a return
+                        USE.NAMES = FALSE,  # Don't use the input to assign names for the returned list
+                        pattern = ".gz$", # first argument for applied function
+                        replacement = ""  # second argument for applied function
+                        )
+}
+
+df_local_paths <- data.frame(`Sample Name` = df_rs$`Sample Name`, `Local Paths` = local_paths, check.names = FALSE)
+
+# Load raw data into R object
+raw_data <- limma::read.maimages(df_local_paths$`Local Paths`, 
+                                 source = "agilent",  # Specify platform
+                                 green.only = TRUE, # Specify one-channel design
+                                 names = df_local_paths$`Sample Name` # Map column names as Sample Names (instead of default filenames)
+                                 )
+
+# Handle raw data which lacks certain replaceable column data
+
+## This likely arises as Agilent Feature Extraction (the process that generates the raw data files on OSDR) 
+##   gives some user flexibilty in what probe column to ouput
+
+## Missing ProbeUID "Unique integer for each unique probe in a design"
+### Source: https://www.agilent.com/cs/library/usermanuals/public/GEN-MAN-G4460-90057.pdf Page 178
+### Remedy: Assign unique integers for each probe
+
+if ( !("ProbeUID" %in% colnames(raw_data$genes)) ) {
+  # Assign unique integers for each probe
+  print("Assigning `ProbeUID` as original files did not include them")
+  raw_data$genes$ProbeUID <- seq_len(nrow(raw_data$genes))
+}
+
+# Summarize raw data
+print(paste0("Number of Arrays: ", dim(raw_data)[2]))
+print(paste0("Number of Probes: ", dim(raw_data)[1]))
+```
+
+**Custom Functions Used:**
+
+- [all_true()](#all_true)
+- [runsheet_paths_are_URIs()](#runsheet_paths_are_URIs)
+- [download_files_from_runsheet()](#download_files_from_runsheet)
+
+**Input Data:**
+
+- `runsheet` (Path to runsheet, output from [Step 1](#1-create-sample-runsheet))
+
+**Output Data:**
+
+- `df_rs` (R dataframe containing information from the runsheet)
+- `raw_data` (R object containing raw microarray data)
+
+    > Note: The raw data R object will be used to generate quality assessment (QA) plots in the next step.
+
+<br>
+
+### 2d. Load Annotation Metadata
+
+```R
+# If using custom annotation, local_annotation_dir is path to directory containing annotation file and annotation_config_path is path/url to config file
+local_annotation_dir <- NULL # <path/to/custom_annotation>
+annotation_config_path <- NULL # <path/to/config_file>
+
+annotation_table_link <- "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/GL_RefAnnotTable-A_1.1.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv"
+
+annotation_table <- fetch_organism_specific_annotation_table(unique(df_rs$organism))
+
+annotation_file_path <- annotation_table$genelab_annots_link
+ensembl_version <- as.character(annotation_table$ensemblVersion)
+```
+
+**Custom Functions Used:**
+
+- [fetch_organism_specific_annotation_table()](#fetch_organism_specific_annotation_table)
+
+**Input Data:**
+
+- `local_annotation_dir` (Path to local annotation directory if using custom annotations, see [Step 7a](#7a-get-probe-annotations))
+
+    > Note: If not using custom annotations, leave `local_annotation_dir` as `NULL`.
+
+- `annotation_config_path` (URL or path to annotation config file if using custom annotations, see [Step 7a](#7a-get-probe-annotations))
+
+    > Note: If not using custom annotations, leave `annotation_config_path` as `NULL`.
+
+- `df_rs$organism` (organism specified in the runsheet created in [Step 1](#1-create-sample-runsheet))
+- `annotation_table_link` (URL or path to latest GeneLab Annotations file, see [GL-DPPD-7110-A_annotations.csv](../../../GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv))
+
+**Output Data:**
+
+- `annotation_file_path` (reference organism annotation file url indicated in the 'genelab_annots_link' column of the GeneLab Annotations file provided in `annotation_table_link`)
+- `ensembl_version` (reference organism Ensembl version indicated in the 'ensemblVersion' column of the GeneLab Annotations file provided in `annotation_table_link`)
+
+<br>
+
+---
+
+## 3. Raw Data Quality Assessment
+
+<br>
+
+### 3a. Density Plot
+
+```R
+# Plot settings
+par(
+  xpd = TRUE # Ensure legend can extend past plot area
+)
+
+number_of_sets = ceiling(dim(raw_data)[2] / 30) # Set of 30 samples, used to scale plot
+
+limma::plotDensities(raw_data, 
+                     log = TRUE, 
+                     legend = FALSE)
+legend("topright", legend = colnames(raw_data),
+        lty = 1, # Solid line
+        col = 1:ncol(raw_data), # Ensure legend color is in sync with plot
+        ncol = number_of_sets, # Set number of columns by number of sets
+        cex = max(0.5, 1 + 0.2 - (number_of_sets*0.2)) # Reduce scale by 20% for each column beyond 1, minimum of 0.5
+      )
+```
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- Plot containing the density of raw intensities for each array (raw intensity values with background intensity values subtracted; lack of overlap indicates a need for normalization)
+
+<br>
+
+### 3b. Pseudo Image Plots
+
+```R
+agilent_image_plot(raw_data, transform_func = function(expression_matrix) log2(expression_matrix + 1))
+```
+
+**Custom Functions Used:**
+
+- [agilent_image_plot()](#agilent_image_plot)
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- Pseudo images of each array before background correction and normalization 
+
+<br>
+
+### 3c. MA Plots
+
+```R
+for ( array_i in seq(colnames(raw_data$E)) ) {
+  sample_name <- rownames(raw_data$targets)[array_i]
+  limma::plotMA(raw_data,array=array_i,xlab="Average log-expression",ylab="Expression log-ratio (this sample vs. others)", main = sample_name, status=raw_data$genes$ControlType)
+}
+```
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- M (log ratio of the subject array vs a pseudo-reference, the mean of all other arrays) vs. A (average log expression) plot for each array before background correction and normalization (negative and positive control probes are in green and red, respectively)
+
+<br>
+
+### 3d. Foreground-Background Plots
+
+```R
+for ( array_i in seq(colnames(raw_data$E)) ) {
+  sample_name <- rownames(raw_data$targets)[array_i]
+  limma::plotFB(raw_data, array = array_i, xlab = "log2 Background", ylab = "log2 Foreground", main = sample_name) 
+}
+```
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- Foreground vs. background expression plot for each array before background correction and normalization 
+
+<br>
+
+### 3e. Boxplots
+
+```R
+boxplot_expression_safe_margin(raw_data, transform_func = log2)
+```
+
+**Custom Functions Used:**
+
+- [boxplot_expression_safe_margin()](#boxplot_expression_safe_margin)
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- Boxplot of raw expression data for each array before background correction and normalization 
+
+<br>
+
+---
+
+## 4. Background Correction
+
+```R
+background_corrected_data <- limma::backgroundCorrect(raw_data, method = "normexp")
+```
+
+**Input Data:**
+
+- `raw_data` (raw data R object created in [Step 2c](#2c-load-metadata-and-raw-data) above)
+
+**Output Data:**
+
+- `background_corrected_data` (R object containing background-corrected microarray data)
+
+  >   
+  > Note: Background correction was performed using the `normexp` method as recommended by [Ritchie, M.E., et al.](http://bioinformatics.oxfordjournals.org/content/23/20/2700), which performs background correction and quantile normalization using the control probes by utilizing the `normexp.fit.control` function to estimate the parameters required by normal+exponential(normexp) convolution model with the help of negative control probes, followed by the `normexp.signal` function to perform the background correction.
+
+<br>
+
+---
+
+## 5. Between Array Normalization
+
+```R
+# Normalize background-corrected data using the quantile method
+norm_data <- limma::normalizeBetweenArrays(background_corrected_data, method = "quantile")
+
+# Summarize background-corrected and normalized data
+print(paste0("Number of Arrays: ", dim(norm_data)[2]))
+print(paste0("Number of Probes: ", dim(norm_data)[1]))
+```
+
+**Input Data:**
+
+- `background_corrected_data` (R object containing background-corrected microarray data created in [Step 4](#4-background-correction) above)
+
+**Output Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data)
+ 
+  >   
+  > Note: Normalization was performed using the `quantile` method, which forces the entire empirical distribution of all arrays to be identical
+
+<br>
+
+---
+
+## 6. Normalized Data Quality Assessment
+
+<br>
+
+### 6a. Density Plot
+
+```R
+# Plot settings
+par(
+  xpd = TRUE # Ensure legend can extend past plot area
+)
+
+number_of_sets = ceiling(dim(norm_data)[2] / 30) # Set of 30 samples, used to scale plot
+
+limma::plotDensities(norm_data, 
+                     log = TRUE, 
+                     legend = FALSE)
+legend("topright", legend = colnames(norm_data),
+        lty = 1, # Solid line
+        col = 1:ncol(norm_data), # Ensure legend color is in sync with plot
+        ncol = number_of_sets, # Set number of columns by number of sets
+        cex = max(0.5, 1 + 0.2 - (number_of_sets*0.2)) # Reduce scale by 20% for each column beyond 1, minimum of 0.5
+      )
+```
+
+**Input Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization) above)
+
+**Output Data:**
+
+- Plot containing the density of background-corrected and normalized intensities for each array (near complete overlap is expected after normalization)
+
+<br>
+
+### 6b. Pseudo Image Plots
+
+```R
+agilent_image_plot(norm_data, 
+                 transform_func = function(expression_matrix) log2(2**expression_matrix + 1) # Compute as log2 of normalized expression after adding a +1 offset to prevent negative values in the pseudoimage
+                 )
+```
+
+**Custom Functions Used:**
+
+- [agilent_image_plot()](#agilent_image_plot)
+
+**Input Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization) above)
+
+**Output Data:**
+
+- Pseudo images of each array after background correction and normalization 
+
+<br>
+
+### 6c. MA Plots
+
+```R
+for ( array_i in seq(colnames(norm_data$E)) ) {
+  sample_name <- rownames(norm_data$targets)[array_i]
+  limma::plotMA(norm_data,array=array_i,xlab="Average log-expression",ylab="Expression log-ratio (this sample vs. others)", main = sample_name, status=norm_data$genes$ControlType)
+}
+```
+
+**Input Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization) above)
+
+**Output Data:**
+
+- M (log ratio of the subject array vs a pseudo-reference, the mean of all other arrays) vs. A (average log expression) plot for each array after background correction and normalization (negative and positive control probes are in green and red, respectively)
+
+<br>
+
+### 6d. Boxplots
+
+```R
+boxplot_expression_safe_margin(norm_data)
+```
+
+**Custom Functions Used:**
+
+- [boxplot_expression_safe_margin()](#boxplot_expression_safe_margin)
+
+**Input Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization) above)
+
+**Output Data:**
+
+- Boxplot of expression data for each array after background correction and normalization 
+
+<br>
+
+---
+
+## 7. Probe Annotations
+
+<br>
+
+### 7a. Get Probe Annotations
+
+```R
+organism <- shortened_organism_name(unique(df_rs$organism))
+annot_key <- ifelse(organism %in% c("athaliana"), 'TAIR', 'ENSEMBL')
+
+if (organism %in% c("athaliana")) {
+  ENSEMBL_VERSION = ensembl_version
+  ensembl_genomes_portal = "plants"
+  print(glue::glue("Using ensembl genomes ftp to get specific version of probe id mapping table. Ensembl genomes portal: {ensembl_genomes_portal}, version: {ENSEMBL_VERSION}"))
+  expected_attribute_name <- get_biomart_attribute(df_rs)
+  df_mapping <- get_ensembl_genomes_mappings_from_ftp(
+    organism = organism,
+    ensembl_genomes_portal = ensembl_genomes_portal,
+    ensembl_genomes_version = ENSEMBL_VERSION,
+    biomart_attribute = expected_attribute_name
+    )
+
+  # TAIR from the mapping tables tend to be in the format 'AT1G01010.1' but the raw data has 'AT1G01010'
+  # So here we remove the '.NNN' from the mapping table where .NNN is any number
+  df_mapping$ensembl_gene_id <- stringr::str_replace_all(df_mapping$ensembl_gene_id, "\\.\\d+$", "")
+
+  use_custom_annot <- FALSE
+} else {
+  # Use biomart from main Ensembl website which archives keep each release on the live service
+  # locate dataset
+  expected_dataset_name <- shortened_organism_name(unique(df_rs$organism)) %>% stringr::str_c("_gene_ensembl")
+  print(paste0("Expected dataset name: '", expected_dataset_name, "'"))
+
+  expected_attribute_name <- get_biomart_attribute(df_rs)
+  print(paste0("Expected attribute name: '", expected_attribute_name, "'"))
+
+  # Specify Ensembl version used in current GeneLab reference annotations
+  ENSEMBL_VERSION <- ensembl_version
+
+  print(glue::glue("Using Ensembl biomart to get specific version of mapping table. Ensembl version: {ENSEMBL_VERSION}"))
+
+  # Check if organism/array design is supported in biomart
+  use_custom_annot <- TRUE
+
+  ensembl <- biomaRt::useEnsembl(biomart = "genes", version = ENSEMBL_VERSION)
+  ensembl_datasets <- biomaRt::listDatasets(ensembl)
+  if (expected_dataset_name %in% ensembl_datasets$dataset) {
+    ensembl <- biomaRt::useEnsembl(biomart = "genes", dataset = expected_dataset_name, version = ENSEMBL_VERSION)
+    ensembl_attributes <- biomaRt::listAttributes(ensembl)
+    if (expected_attribute_name %in% ensembl_attributes$name) {
+      use_custom_annot <- FALSE
+    }
+  }
+
+  if (use_custom_annot) {
+    unloadNamespace("biomaRt")
+  } else {
+    print(ensembl)
+
+    probe_ids <- unique(norm_data$genes$ProbeName)
+
+    # Create probe map
+    # Run Biomart Queries in chunks to prevent request timeouts
+    #   Note: If timeout is occuring (possibly due to larger load on biomart), reduce chunk size
+    CHUNK_SIZE= 1500
+    probe_id_chunks <- split(probe_ids, ceiling(seq_along(probe_ids) / CHUNK_SIZE))
+    df_mapping <- data.frame()
+    for (i in seq_along(probe_id_chunks)) {
+      probe_id_chunk <- probe_id_chunks[[i]]
+      print(glue::glue("Running biomart query chunk {i} of {length(probe_id_chunks)}. Total probes IDS in query ({length(probe_id_chunk)})"))
+      chunk_results <- biomaRt::getBM(
+          attributes = c(
+              expected_attribute_name,
+              "ensembl_gene_id"
+              ), 
+              filters = expected_attribute_name, 
+              values = probe_id_chunk, 
+              mart = ensembl)
+
+      if (nrow(chunk_results) > 0) {
+        df_mapping <- df_mapping %>% dplyr::bind_rows(chunk_results)
+      }
+      
+      Sys.sleep(10) # Slight break between requests to prevent back-to-back requests
+    }
+  }
+}
+
+# At this point, we have df_mapping from either the biomart live service or the ensembl genomes ftp archive depending on the organism
+# If no df_mapping obtained (e.g., not supported in biomart), use custom annotations; otherwise, merge in-house annotations to df_mapping
+
+if (use_custom_annot) {
+  expected_attribute_name <- 'ProbeName'
+
+  annot_type <- 'NO_CUSTOM_ANNOT'
+  if (!is.null(local_annotation_dir) && !is.null(annotation_config_path)) {
+    config_df <- read.csv(annotation_config_path, row.names=1)
+    if (unique(df_rs$`biomart_attribute`) %in% row.names(config_df)) {
+      annot_config <- config_df[unique(df_rs$`biomart_attribute`), ]
+      annot_type <- annot_config$annot_type[[1]]
+    } else {
+      warning(paste0("No entry for '", unique(df_rs$`biomart_attribute`), "' in provided config file: ", annotation_config_path))
+    }
+  } else {
+    warning("Need to provide both local_annotation_dir and annotation_config_path to use custom annotation.")
+  }
+
+  if (annot_type == 'agilent') {
+    unique_probe_ids <- read.delim(
+      file.path(local_annotation_dir, annot_config$annot_filename[[1]]),
+      header = TRUE, na.strings = c('NA', '')
+    )[c('ProbeID', 'EnsemblID', 'GeneSymbol', 'GeneName', 'RefSeqAccession', 'EntrezGeneID', 'GO')]
+
+    stopifnot(nrow(unique_probe_ids) == length(unique(unique_probe_ids$ProbeID)))
+
+    # Clean columns
+    unique_probe_ids$GO <- purrr::map_chr(stringr::str_extract_all(unique_probe_ids$GO, 'GO:\\d{7}'), ~paste0(unique(.), collapse = '|')) %>% stringr::str_replace('^$', NA_character_)
+
+    names(unique_probe_ids) <- c('ProbeName', 'ENSEMBL', 'SYMBOL', 'GENENAME', 'REFSEQ', 'ENTREZID', 'GOSLIM_IDS')
+
+    unique_probe_ids$STRING_id <- NA_character_
+
+    gene_col <- 'ENSEMBL'
+    if (sum(!is.na(unique_probe_ids$ENTREZID)) > sum(!is.na(unique_probe_ids$ENSEMBL))) {
+      gene_col <- 'ENTREZID'
+    }
+    if (sum(!is.na(unique_probe_ids$SYMBOL)) > max(sum(!is.na(unique_probe_ids$ENTREZID)), sum(!is.na(unique_probe_ids$ENSEMBL)))) {
+      gene_col <- 'SYMBOL'
+    }
+
+    unique_probe_ids <- unique_probe_ids %>%
+                        dplyr::mutate( 
+                          count_gene_mappings = 1 + stringr::str_count(get(gene_col), stringr::fixed("|")),
+                          gene_mapping_source = gene_col
+                        )
+  } else if (annot_type == 'custom') {
+    unique_probe_ids <- read.csv(
+      file.path(local_annotation_dir, annot_config$annot_filename[[1]]),
+      header = TRUE, na.strings = c('NA', '')
+    )
+  } else {
+    annot_cols <- c('ProbeName', 'ENTREZID', 'SYMBOL', 'GENENAME', 'ENSEMBL', 'REFSEQ', 'GOSLIM_IDS', 'STRING_id', 'count_gene_mappings', 'gene_mapping_source')
+    unique_probe_ids <- setNames(data.frame(matrix(NA_character_, nrow = 1, ncol = length(annot_cols))), annot_cols)
+  }
+} else {
+  annot <- read.table(
+      as.character(annotation_file_path),
+      sep = "\t",
+      header = TRUE,
+      quote = "",
+      comment.char = ""
+  )
+
+  unique_probe_ids <- df_mapping %>% 
+                        dplyr::mutate(dplyr::across(!!sym(expected_attribute_name), as.character)) %>% # Ensure probe ids treated as character type
+                        dplyr::group_by(!!sym(expected_attribute_name)) %>% 
+                        dplyr::summarise(
+                          ENSEMBL = list_to_unique_piped_string(ensembl_gene_id)
+                          ) %>%
+                        # Count number of ensembl IDS mapped
+                        dplyr::mutate( 
+                          count_gene_mappings = 1 + stringr::str_count(ENSEMBL, stringr::fixed("|")),
+                          gene_mapping_source = annot_key
+                        ) %>%
+                        dplyr::left_join(annot, by = c("ENSEMBL" = annot_key))
+}
+
+norm_data$genes <- norm_data$genes %>% 
+  dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
+  dplyr::mutate( count_gene_mappings := ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+  dplyr::mutate( gene_mapping_source := unique(unique_probe_ids$gene_mapping_source) )
+```
+
+**Custom Functions Used:**
+
+- [shortened_organism_name()](#shortened_organism_name)
+- [get_biomart_attribute()](#get_biomart_attribute)
+- [get_ensembl_genomes_mappings_from_ftp()](#get_ensembl_genomes_mappings_from_ftp)
+- [list_to_unique_piped_string()](#list_to_unique_piped_string)
+
+**Input Data:**
+
+- `df_rs$organism` (organism specified in the runsheet created in [Step 1](#1-create-sample-runsheet))
+- `df_rs$biomart_attribute` (array design biomart identifier specified in the runsheet created in [Step 1](#1-create-sample-runsheet))
+- `annotation_file_path` (reference organism annotation file url indicated in the 'genelab_annots_link' column of the GeneLab Annotations file provided in `annotation_table_link`, output from [Step 2d](#2d-load-annotation-metadata))
+- `ensembl_version` (reference organism Ensembl version indicated in the 'ensemblVersion' column of the GeneLab Annotations file provided in `annotation_table_link`, output from [Step 2d](#2d-load-annotation-metadata))
+- `annot_key` (keytype to join annotation table and microarray probes, dependent on organism, e.g. mus musculus uses 'ENSEMBL')
+- `local_annotation_dir` (path to local annotation directory if using custom annotations, output from [Step 2d](#2d-load-annotation-metadata))
+- `annotation_config_path` (URL or path to annotation config file if using custom annotations, output from [Step 2d](#2d-load-annotation-metadata))
+
+  > Note: See [Agilent_array_annotations.csv](../Array_Annotations/Agilent_array_annotations.csv) for the latest config file used at GeneLab. This file can also be created manually by following the [file specification](../Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/README.md).
+
+- `norm_data$genes` (Manufacturer's probe metadata, including probe IDs and sequence position gene annotations associated with the `norm_data` R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization))
+
+**Output Data:**
+
+- `unique_probe_ids` (R object containing probe ID to gene annotation mappings)
+- `norm_data$genes` (Probe metadata, updated to include gene annotations specified by [Biomart](https://bioconductor.org/packages/3.14/bioc/html/biomaRt.html) or custom annotations)
+
+<br>
+
+### 7b. Summarize Gene Mapping
+
+```R
+# Pie Chart with Percentages
+slices <- c(
+    'Control probes' = nrow(norm_data$gene %>% dplyr::filter(ControlType != 0) %>% dplyr::distinct(ProbeName)), 
+    'Unique Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings == 1) %>% dplyr::distinct(ProbeName)), 
+    'Multi Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings > 1) %>% dplyr::distinct(ProbeName)), 
+    'No Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings == 0) %>% dplyr::distinct(ProbeName))
+)
+pct <- round(slices/sum(slices)*100)
+chart_names <- names(slices)
+chart_names <- glue::glue("{names(slices)} ({slices})") # add count to labels
+chart_names <- paste(chart_names, pct) # add percents to labels
+chart_names <- paste(chart_names,"%",sep="") # ad % to labels
+pie(slices,labels = chart_names, col=rainbow(length(slices)),
+    main=glue::glue("Mapping to Primary Keytype\n {nrow(norm_data$gene %>% dplyr::distinct(ProbeName))} Total Unique Probes")
+    )
+
+original_mapping_rate = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(ProbeName != SystematicName) %>% dplyr::distinct(ProbeName))
+print(glue::glue("Original Manufacturer Reported Mapping Count: {original_mapping_rate}"))
+print(glue::glue("Unique Mapping Count: {slices[['Unique Mapping']]}"))
+```
+
+**Input Data:**
+
+- `norm_data$genes` (Probe metadata, updated to include gene annotations specified by [Biomart](https://bioconductor.org/packages/3.14/bioc/html/biomaRt.html) or custom annotations, output from [Step 7a](#7a-get-probe-annotations) above)
+
+**Output Data:**
+
+- A pie chart denoting the gene mapping rates for each unique probe ID
+- A printout denoting the count of unique mappings for both the original manufacturer mapping and the gene mapping
+
+<br>
+
+### 7c. Generate Annotated Raw and Normalized Expression Tables
+
+```R
+## Reorder columns before saving to file
+ANNOTATIONS_COLUMN_ORDER = c(
+  annot_key,
+  "SYMBOL",
+  "GENENAME",
+  "REFSEQ",
+  "ENTREZID",
+  "STRING_id",
+  "GOSLIM_IDS"
+)
+
+PROBE_INFO_COLUMN_ORDER = c(
+  "ProbeUID",
+  "ProbeName",
+  "count_gene_mappings",
+  "gene_mapping_source"
+)
+SAMPLE_COLUMN_ORDER <- df_rs$`Sample Name`
+
+## Generate raw intensity matrix that includes annotations
+raw_data_matrix <- background_corrected_data$genes %>% 
+                    dplyr::select(ProbeUID, ProbeName) %>%
+                    dplyr::bind_cols(background_corrected_data$E) %>% 
+                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
+                    dplyr::mutate( count_gene_mappings = ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+                    dplyr::mutate( gene_mapping_source = unique(unique_probe_ids$gene_mapping_source) )
+
+## Perform reordering
+FINAL_COLUMN_ORDER <- c(
+  ANNOTATIONS_COLUMN_ORDER, 
+  PROBE_INFO_COLUMN_ORDER, 
+  SAMPLE_COLUMN_ORDER
+  )
+
+raw_data_matrix <- raw_data_matrix %>% 
+  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
+
+write.csv(raw_data_matrix, file.path(DIR_RAW_DATA, "raw_intensities_GLmicroarray.csv"), row.names = FALSE)
+
+## Generate normalized expression matrix that includes annotations
+norm_data_matrix <- norm_data$genes %>% 
+                    dplyr::select(ProbeUID, ProbeName) %>%
+                    dplyr::bind_cols(norm_data$E) %>% 
+                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
+                    dplyr::mutate( count_gene_mappings = ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+                    dplyr::mutate( gene_mapping_source = unique(unique_probe_ids$gene_mapping_source) )
+
+norm_data_matrix <- norm_data_matrix %>% 
+  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
+
+write.csv(norm_data_matrix, file.path(DIR_NORMALIZED_EXPRESSION, "normalized_expression_GLmicroarray.csv"), row.names = FALSE)
+```
+
+**Input Data:**
+
+- `df_rs` (R dataframe containing information from the runsheet, output from [Step 2c](#2c-load-metadata-and-raw-data))
+- `annot_key` (keytype to join annotation table and microarray probes, dependent on organism, e.g. mus musculus uses 'ENSEMBL', defined in [Step 7a](#7a-get-probe-annotations))
+- `background_corrected_data` (R object containing background-corrected microarray data, output from [Step 4](#4-background-correction))
+- `norm_data` (R object containing background-corrected and normalized microarray data, output from [Step 5](#5-between-array-normalization))
+- `unique_probe_ids` (R object containing probe ID to gene annotation mappings, output from [Step 7a](#7a-get-probe-annotations))
+
+**Output Data:**
+
+- **raw_intensities_GLmicroarray.csv** (table containing the background corrected unnormalized intensity values for each sample including gene annotations)
+- **normalized_expression_GLmicroarray.csv** (table containing the background corrected, normalized intensity values for each sample including gene annotations)
+
+## 8. Perform Probe Differential Expression (DE)
+
+> Note: Run differential expression analysis only if there are at least 2 replicates per factor group.
+
+<br>
+
+### 8a. Generate Design Matrix
+
+```R
+# Loading metadata from runsheet csv file
+design_data <- runsheet_to_design_matrix(runsheet)
+design <- design_data$matrix
+
+# Write SampleTable.csv and contrasts.csv file
+write.csv(design_data$groups, file.path(DIR_DGE, "SampleTable_GLmicroarray.csv"), row.names = FALSE)
+write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"))
+```
+
+**Custom Functions Used:**
+
+- [runsheet_to_design_matrix()](#runsheet_to_design_matrix)
+
+**Input Data:**
+
+- `runsheet` (Path to runsheet, output from [Step 1](#1-create-sample-runsheet))
+
+**Output Data:**
+
+- `design_data` (a list of R objects containing the sample information and metadata
+  - `design_data$matrix` - a design (or model) matrix describing the conditions in the dataset
+  - `design_data$mapping` - a dataframe mapping the human-readable group names to the names of the conditions modified for use in R
+  - `design_data$groups` - a dataframe of group names and contrasts for each sample
+  - `design_data$contrasts` - a matrix of all pairwise comparisons of the groups)
+- `design` (R object containing the limma study design matrix, indicating the group that each sample belongs to)
+- **SampleTable_GLmicroarray.csv** (table containing samples and their respective groups)
+- **contrasts_GLmicroarray.csv** (table containing all pairwise comparisons)
+
+<br>
+
+### 8b. Perform Individual Probe Level DE
+
+```R
+# Calculate results
+res <- lm_fit_pairwise(norm_data, design)
+
+# Print DE table, without filtering
+limma::write.fit(res, adjust = 'BH', 
+                file = "INTERIM.csv",
+                row.names = FALSE,
+                quote = TRUE,
+                sep = ",")
+```
+
+**Custom Functions Used:**
+
+- [lm_fit_pairwise()](#lm_fit_pairwise)
+
+**Input Data:**
+
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization))
+- `design` (R object containing the limma study design matrix, indicating the group that each sample belongs to, created in [Step 8a](#8a-generate-design-matrix) above)
+
+**Output Data:**
+
+- INTERIM.csv (Statistical values from individual probe level DE analysis, including:
+  - Log2fc between all pairwise comparisons
+  - T statistic for all pairwise comparison tests
+  - P value for all pairwise comparison tests)
+
+<br>
+
+### 8c. Add Annotation and Stats Columns and Format DE Table
+
+```R
+## Reformat Table for consistency across DE analyses tables within GeneLab ##
+
+# Read in DE table 
+df_interim <- read.csv("INTERIM.csv")
+
+print("Remove extra columns from final table")
+
+# These columns are data mapped to column PROBEID as per the original Manufacturer and can be linked as needed
+colnames_to_remove = c(
+  "Genes.Row",
+  "Genes.Col",
+  "Genes.Start",
+  "Genes.Sequence",
+  "Genes.ControlType",
+  "Genes.GeneName",
+  "Genes.SystematicName",
+  "Genes.Description",
+  "AveExpr" # Replaced by 'All.mean' column
+)
+
+df_interim <- df_interim %>% dplyr::select(-any_of(colnames_to_remove))
+
+df_interim <- df_interim %>% dplyr::rename_with(reformat_names, .cols = matches('\\.condition|^Genes\\.'), group_name_mapping = design_data$mapping)
+
+
+# Concatenate expression values for each sample
+df_interim <- df_interim %>% dplyr::bind_cols(norm_data$E)
+
+
+## Add Group Wise Statistics ##
+
+# Group mean and standard deviations for normalized expression values are computed and added to the table
+
+unique_groups <- unique(design_data$group$group)
+for ( i in seq_along(unique_groups) ) {
+  current_group <- unique_groups[i]
+  current_samples <- design_data$group %>% 
+                      dplyr::group_by(group) %>%
+                      dplyr::summarize(
+                        samples = sort(unique(sample))
+                      ) %>%
+                      dplyr::filter(
+                        group == current_group
+                      ) %>% 
+                      dplyr::pull()
+                    
+  print(glue::glue("Computing mean and standard deviation for Group {i} of {length(unique_groups)}"))
+  print(glue::glue("Group: {current_group}"))
+  print(glue::glue("Samples in Group: '{toString(current_samples)}'"))
+  
+  df_interim <- df_interim %>% 
+    dplyr::mutate( 
+      "Group.Mean_{current_group}" := rowMeans(dplyr::select(., all_of(current_samples))),
+      "Group.Stdev_{current_group}" := matrixStats::rowSds(as.matrix(dplyr::select(., all_of(current_samples)))),
+      ) %>% 
+    dplyr::ungroup() %>%
+    as.data.frame()
+}
+
+df_interim <- df_interim %>% 
+  dplyr::mutate( 
+    "All.mean" := rowMeans(dplyr::select(., all_of(SAMPLE_COLUMN_ORDER))),
+    "All.stdev" := matrixStats::rowSds(as.matrix(dplyr::select(., all_of(SAMPLE_COLUMN_ORDER)))),
+    ) %>% 
+  dplyr::ungroup() %>%
+  as.data.frame()
+
+STAT_COLUMNS_ORDER <- generate_prefixed_column_order(
+  subjects = colnames(design_data$contrasts),
+  prefixes = c(
+    "Log2fc_",
+    "T.stat_",
+    "P.value_",
+    "Adj.p.value_"
+    )
+  )
+ALL_SAMPLE_STATS_COLUMNS_ORDER <- c(
+  "All.mean",
+  "All.stdev",
+  "F",
+  "F.p.value"
+)
+
+GROUP_MEAN_STDEV_COLUMNS_ORDER <- generate_prefixed_column_order(
+  subjects = unique(design_data$groups$group),
+  prefixes = c(
+    "Group.Mean_",
+    "Group.Stdev_"
+  )
+)
+
+FINAL_COLUMN_ORDER <- c(
+  ANNOTATIONS_COLUMN_ORDER, 
+  PROBE_INFO_COLUMN_ORDER, 
+  SAMPLE_COLUMN_ORDER, 
+  STAT_COLUMNS_ORDER, 
+  ALL_SAMPLE_STATS_COLUMNS_ORDER, 
+  GROUP_MEAN_STDEV_COLUMNS_ORDER
+  )
+
+## Assert final column order includes all columns from original table
+if (!setequal(FINAL_COLUMN_ORDER, colnames(df_interim))) {
+  write.csv(FINAL_COLUMN_ORDER, "FINAL_COLUMN_ORDER.csv")
+  NOT_IN_DF_INTERIM <- paste(setdiff(FINAL_COLUMN_ORDER, colnames(df_interim)), collapse = ":::")
+  NOT_IN_FINAL_COLUMN_ORDER <- paste(setdiff(colnames(df_interim), FINAL_COLUMN_ORDER), collapse = ":::")
+  stop(glue::glue("Column reordering attempt resulted in different sets of columns than original. Names unique to 'df_interim': {NOT_IN_FINAL_COLUMN_ORDER}. Names unique to 'FINAL_COLUMN_ORDER': {NOT_IN_DF_INTERIM}."))
+}
+
+## Perform reordering
+df_interim <- df_interim %>% dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
+
+# Save to file
+write.csv(df_interim, file.path(DIR_DGE, "differential_expression_GLmicroarray.csv"), row.names = FALSE)
+```
+
+**Custom Functions Used:**
+
+- [reformat_names()](#reformat_names)
+- [generate_prefixed_column_order()](#generate_prefixed_column_order)
+
+**Input Data:**
+
+- `design_data` (a list of R objects containing the sample information and metadata, output from [Step 8a](#8a-generate-design-matrix) above)
+- INTERIM.csv (Statistical values from individual probe level DE analysis, output from [Step 8b](#8b-perform-individual-probe-level-de) above)
+- `norm_data` (R object containing background-corrected and normalized microarray data created in [Step 5](#5-between-array-normalization))
+
+**Output Data:**
+
+- **differential_expression_GLmicroarray.csv** (table containing normalized expression for each sample, group statistics, Limma probe DE results for each pairwise comparison, and gene annotations)
+
+
+> All steps of the Microarray pipeline are performed using R markdown and the completed R markdown is rendered (via Quarto) as an html file (**NF_MAAgilent1ch_v\*_GLmicroarray.html**) and published in the [Open Science Data Repository (OSDR)](https://osdr.nasa.gov/bio/repo/) for the respective dataset.
diff --git a/Microarray/Agilent_1-channel/README.md b/Microarray/Agilent_1-channel/README.md
index 79e11ec4..23fbb213 100644
--- a/Microarray/Agilent_1-channel/README.md
+++ b/Microarray/Agilent_1-channel/README.md
@@ -1,7 +1,7 @@
 # GeneLab bioinformatics processing pipeline for Agilent 1-channel microarray data
 
 
-> **The document [`GL-DPPD-7112.md`](Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md) holds an overview and example commands for how GeneLab processes Agilent 1-channel microarray datasets. See the [Repository Links](#repository-links) descriptions below for more information. Processed data output files and processing code is provided for each GLDS dataset along with the processed data in the [Open Science Data Repository (OSDR)](https://osdr.nasa.gov/bio/repo/).**  
+> **The document [`GL-DPPD-7112-A.md`](Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md) holds an overview and example commands for how GeneLab processes Agilent 1-channel microarray datasets. See the [Repository Links](#repository-links) descriptions below for more information. Processed data output files and processing code is provided for each GLDS dataset along with the processed data in the [Open Science Data Repository (OSDR)](https://osdr.nasa.gov/bio/repo/).**  
 
 --- 
 
@@ -20,6 +20,10 @@
 
   - Contains instructions for installing and running the GeneLab MAAgilent1ch workflow
 
+* [**Array_Annotations**](Array_Annotations)
+
+  - Contains the custom annotations table used in the GeneLab NF_MAAgilent1ch
+
 ---
 **Developed by:**  
 Jonathan Oribello  
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/CHANGELOG.md b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/CHANGELOG.md
index d4beb45b..4d7c4001 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/CHANGELOG.md
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.0.5](https://github.com/nasa/GeneLab_Data_Processing/tree/NF_MAAgilent1ch_1.0.5/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch) - 2025-03-03
+
+### Added
+
+- Support for custom annotations, see [specification](examples/annotations/README.md)
+- Add option to skip differential expression analysis (`--skipDE`) ([#104](https://github.com/nasa/GeneLab_Data_Processing/issues/104))
+
 ## [1.0.4](https://github.com/nasa/GeneLab_Data_Processing/tree/NF_MAAgilent1ch_1.0.4/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch) - 2024-10-02
 
 ### Added
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md
index 83298da5..9844cd84 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/README.md
@@ -4,7 +4,7 @@
 
 ### Implementation Tools <!-- omit in toc -->
 
-The current GeneLab Agilent 1 Channel Microarray consensus processing pipeline (NF_MAAgilent1ch), [GL-DPPD-7112](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md), is implemented as a [Nextflow](https://nextflow.io/) DSL2 workflow and utilizes [Singularity](https://docs.sylabs.io/guides/3.10/user-guide/introduction.html) to run all tools in containers. This workflow (NF_MAAgilent1ch) is run using the command line interface (CLI) of any unix-based system.  While knowledge of creating workflows in Nextflow is not required to run the workflow as is, [the Nextflow documentation](https://nextflow.io/docs/latest/index.html) is a useful resource for users who want to modify and/or extend this workflow.   
+The current GeneLab Agilent 1 Channel Microarray consensus processing pipeline (NF_MAAgilent1ch), [GL-DPPD-7112-A](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md), is implemented as a [Nextflow](https://nextflow.io/) DSL2 workflow and utilizes [Singularity](https://docs.sylabs.io/guides/3.10/user-guide/introduction.html) to run all tools in containers. This workflow (NF_MAAgilent1ch) is run using the command line interface (CLI) of any unix-based system.  While knowledge of creating workflows in Nextflow is not required to run the workflow as is, [the Nextflow documentation](https://nextflow.io/docs/latest/index.html) is a useful resource for users who want to modify and/or extend this workflow.   
 
 ### Workflow & Subworkflows <!-- omit in toc -->
 
@@ -14,7 +14,7 @@ The current GeneLab Agilent 1 Channel Microarray consensus processing pipeline (
 
 ---
 The NF_MAAgilent1ch workflow is composed of three subworkflows as shown in the image above.
-Below is a description of each subworkflow and the additional output files generated that are not already indicated in the [GL-DPPD-7112 pipeline document](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md):
+Below is a description of each subworkflow and the additional output files generated that are not already indicated in the [GL-DPPD-7112-A pipeline document](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md):
 
 1. **Analysis Staging Subworkflow**
 
@@ -25,7 +25,7 @@ Below is a description of each subworkflow and the additional output files gener
 2. **Agilent 1 Channel Microarray Processing Subworkflow**
 
    - Description:
-     - This subworkflow uses the staged raw data and metadata parameters from the Analysis Staging Subworkflow to generate processed data using the [GL-DPPD-7112 pipeline](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md).
+     - This subworkflow uses the staged raw data and metadata parameters from the Analysis Staging Subworkflow to generate processed data using the [GL-DPPD-7112-A pipeline](../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md).
 
 1. **V&V Pipeline Subworkflow**
 
@@ -93,9 +93,9 @@ We recommend installing Singularity on a system wide level as per the associated
 All files required for utilizing the NF_MAAgilent1ch GeneLab workflow for processing Agilent 1 Channel Microarray data are in the [workflow_code](workflow_code) directory. To get a copy of latest NF_MAAgilent1ch version on to your system, the code can be downloaded as a zip file from the release page then unzipped after downloading by running the following commands: 
 
 ```bash
-wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_MAAgilent1ch_1.0.4/NF_MAAgilent1ch_1.0.4.zip
+wget https://github.com/nasa/GeneLab_Data_Processing/releases/download/NF_MAAgilent1ch_1.0.5/NF_MAAgilent1ch_1.0.5.zip
 
-unzip NF_MAAgilent1ch_1.0.4.zip
+unzip NF_MAAgilent1ch_1.0.5.zip
 ```
 
 <br>
@@ -104,7 +104,7 @@ unzip NF_MAAgilent1ch_1.0.4.zip
 
 ### 3. Run the Workflow
 
-While in the location containing the `NF_MAAgilent1ch_1.0.4` directory that was downloaded in [step 2](#2-download-the-workflow-files), you are now able to run the workflow. Below are three examples of how to run the NF_MAAgilent1ch workflow:
+While in the location containing the `NF_MAAgilent1ch_1.0.5` directory that was downloaded in [step 2](#2-download-the-workflow-files), you are now able to run the workflow. Below are three examples of how to run the NF_MAAgilent1ch workflow:
 > Note: Nextflow commands use both single hyphen arguments (e.g. -help) that denote general nextflow arguments and double hyphen arguments (e.g. --ensemblVersion) that denote workflow specific parameters.  Take care to use the proper number of hyphens for each argument.
 
 <br>
@@ -112,7 +112,7 @@ While in the location containing the `NF_MAAgilent1ch_1.0.4` directory that was
 #### 3a. Approach 1: Run the workflow on a GeneLab Agilent 1 Channel Microarray dataset
 
 ```bash
-nextflow run NF_MAAgilent1ch_1.0.4/main.nf \ 
+nextflow run NF_MAAgilent1ch_1.0.5/main.nf \ 
    -profile singularity \
    --osdAccession OSD-548 \
    --gldsAccession GLDS-548 
@@ -125,7 +125,7 @@ nextflow run NF_MAAgilent1ch_1.0.4/main.nf \
 > Note: Specifications for creating a runsheet manually are described [here](examples/runsheet/README.md).
 
 ```bash
-nextflow run NF_MAAgilent1ch_1.0.4/main.nf \ 
+nextflow run NF_MAAgilent1ch_1.0.5/main.nf \ 
    -profile singularity \
    --runsheetPath </path/to/runsheet> 
 ```
@@ -134,7 +134,7 @@ nextflow run NF_MAAgilent1ch_1.0.4/main.nf \
 
 **Required Parameters For All Approaches:**
 
-* `NF_MAAgilent1ch_1.0.4/main.nf` - Instructs Nextflow to run the NF_MAAgilent1ch workflow 
+* `NF_MAAgilent1ch_1.0.5/main.nf` - Instructs Nextflow to run the NF_MAAgilent1ch workflow 
 
 * `-profile` - Specifies the configuration profile(s) to load, `singularity` instructs Nextflow to setup and use singularity for all software called in the workflow
 
@@ -166,7 +166,7 @@ nextflow run NF_MAAgilent1ch_1.0.4/main.nf \
 All parameters listed above and additional optional arguments for the NF_MAAgilent1ch workflow, including debug related options that may not be immediately useful for most users, can be viewed by running the following command:
 
 ```bash
-nextflow run NF_MAAgilent1ch_1.0.4/main.nf --help
+nextflow run NF_MAAgilent1ch_1.0.5/main.nf --help
 ```
 
 See `nextflow run -h` and [Nextflow's CLI run command documentation](https://nextflow.io/docs/latest/cli.html#run) for more options and details common to all nextflow workflows.
@@ -180,11 +180,11 @@ See `nextflow run -h` and [Nextflow's CLI run command documentation](https://nex
 All R code steps and output are rendered within a Quarto document yielding the following:
 
    - Output:
-     - NF_MAAgilent1ch_1.0.4.html (html report containing executed code and output including QA plots)
+     - NF_MAAgilent1ch_1.0.5.html (html report containing executed code and output including QA plots)
   
 
 The outputs from the Analysis Staging and V&V Pipeline Subworkflows are described below:
-> Note: The outputs from the Agilent 1 Channel Microarray Processing Subworkflow are documented in the [GL-DPPD-7112.md](../../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md) processing protocol.
+> Note: The outputs from the Agilent 1 Channel Microarray Processing Subworkflow are documented in the [GL-DPPD-7112-A.md](../../../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md) processing protocol.
 
 **Analysis Staging Subworkflow**
 
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/README.md b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/README.md
new file mode 100644
index 00000000..10a81ae2
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/README.md
@@ -0,0 +1,20 @@
+# Custom Annotations Specification
+
+## Description
+
+* If using custom gene annotations when processing Agilent 1-channel datasets through GeneLab's Agilent 1-channel processing pipeline, a csv config file must be provided as specified below.
+* See [Agilent_array_annotations.csv](../Array_Annotations/Agilent_array_annotations.csv) for the latest config file used at GeneLab.
+
+
+## Example
+
+- [config.csv](config.csv)
+
+
+## Required columns
+
+| Column Name | Type | Description | Example |
+|:------------|:-----|:------------|:--------|
+| array_design | string | A bioMart attribute identifier denoting the microarray probe/probeset attribute used for annotation mapping. | AGILENT SurePrint G3 GE 8x60k v3 |
+| annot_type | string | Used to determine how the custom annotations are parsed before merging to the data. Currently, only the below are supported: <ul><li>`agilent`: Annotations file is expected to be in the AA (All Annotations) format by [Agilent](https://earray.chem.agilent.com/earray/)</li><li>`custom`: Annotations file is merged as is, expected to have the following columns: `ProbesetID`, `ENTREZID`, `SYMBOL`, `GENENAME`, `ENSEMBL`, `REFSEQ`, `GOSLIM_IDS`, `STRING_id`, `count_gene_mappings`, `gene_mapping_source`</li></ul> | agilent |
+| annot_filename | string | Name of the custom annotations file. | 072363_D_AA_20240521.txt |
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/config.csv b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/config.csv
new file mode 100644
index 00000000..3ff24708
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/examples/annotations/config.csv
@@ -0,0 +1,2 @@
+array_design,annot_type,annot_filename
+AGILENT SurePrint G3 GE 8x60k v3,agilent,072363_D_AA_20240521.txt
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/Agile1CMP.qmd b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/Agile1CMP.qmd
index 73e555d8..6a1166ed 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/Agile1CMP.qmd
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/Agile1CMP.qmd
@@ -1,6 +1,6 @@
 ---
 title: "Agilent 1 Channel Processing"
-subtitle: "Workflow Version: NF_MAAgilent1ch_1.0.4"
+subtitle: "`r paste0('Workflow Version: NF_MAAgilent1ch_', params$workflow_version)`"
 date: now
 title-block-banner: true
 format:
@@ -14,13 +14,16 @@ format:
         number-sections: true
 
 params:
+  workflow_version: NULL
   id: NULL # str, used to name output files
   runsheet: NULL # str, path to runsheet
   biomart_attribute: NULL # str, used as a fallback value if 'Array Design REF' column is not found in the runsheet
-  annotation_file_path: NULL # str, Annotation file from 'genelab_annots_link' column of https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110_annotations.csv
-  organism: NULL # str, Used to determine primary keytype
+  annotation_file_path: NULL # str, Annotation file from 'genelab_annots_link' column of GeneLab Annotations file
+  ensembl_version: NULL # str, Used to determine ensembl version
+  local_annotation_dir: NULL
+  annotation_config_path: NULL
   DEBUG_limit_biomart_query: NULL # int, If supplied, only the first n probeIDs are queried
-  
+  run_DE: 'true'
 ---
 
 ## Validate Parameters <!-- non DPPD -->
@@ -34,6 +37,10 @@ if (is.null(params$runsheet)) {
 
 runsheet = params$runsheet # <path/to/runsheet>
 
+# If using custom annotation, local_annotation_dir is path to directory containing annotation file and annotation_config_path is path/url to config file
+local_annotation_dir <- params$local_annotation_dir # <path/to/annotation_dir>
+annotation_config_path <- params$annotation_config_path # <path/to/config_file>
+
 message(params)
 
 ## Set up output structure
@@ -56,32 +63,12 @@ print("Loading Runsheet...") # NON_DPPD
 # fileEncoding removes strange characters from the column names
 df_rs <- read.csv(runsheet, check.names = FALSE, fileEncoding = 'UTF-8-BOM')
 
-## Determines the organism specific annotation file to use based on the organism in the runsheet
-fetch_organism_specific_annotation_file_path <- function(organism) {
-  # Uses the GeneLab GL-DPPD-7110_annotations.csv file to find the organism specific annotation file path
-  # Raises an exception if the organism does not have an associated annotation file yet
-  
-
-  all_organism_table <- read.csv("https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110_annotations.csv")
-
-  annotation_file_path <- all_organism_table %>% dplyr::filter(species == organism) %>% dplyr::pull(genelab_annots_link)
-
-  # Guard clause: Ensure annotation_file_path populated
-  # Else: raise exception for unsupported organism
-  if (length(annotation_file_path) == 0) {
-    stop(glue::glue("Organism supplied '{organism}' is not supported. See the following url for supported organisms: https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110_annotations.csv.  Supported organisms will correspond to a row based on the 'species' column and include a url in the 'genelab_annots_link' column of that row"))
-  }
-
-  return(annotation_file_path)
-}
-annotation_file_path <- fetch_organism_specific_annotation_file_path(unique(df_rs$organism))
-
 # NON_DPPD:START
 print("Here is the embedded runsheet")
 DT::datatable(df_rs)
 # NON_DPPD:END
 print("Loading Raw Data...") # NON_DPPD
-allTrue <- function(i_vector) {
+all_true <- function(i_vector) {
   if ( length(i_vector) == 0 ) {
     stop(paste("Input vector is length zero"))
   }
@@ -89,13 +76,13 @@ allTrue <- function(i_vector) {
 }
 
 # Define paths to raw data files
-runsheetPathsAreURIs <- function(df_runsheet) {
-  allTrue(stringr::str_starts(df_runsheet$`Array Data File Path`, "https"))
+runsheet_paths_are_URIs <- function(df_runsheet) {
+  all_true(stringr::str_starts(df_runsheet$`Array Data File Path`, "https"))
 }
 
 
 # Download raw data files
-downloadFilesFromRunsheet <- function(df_runsheet) {
+download_files_from_runsheet <- function(df_runsheet) {
   urls <- df_runsheet$`Array Data File Path`
   destinationFiles <- df_runsheet$`Array Data File Name`
 
@@ -111,9 +98,9 @@ downloadFilesFromRunsheet <- function(df_runsheet) {
   destinationFiles # Return these paths
 }
 
-if ( runsheetPathsAreURIs(df_rs) ) {
+if ( runsheet_paths_are_URIs(df_rs) ) {
   print("Determined Raw Data Locations are URIS")
-  local_paths <- downloadFilesFromRunsheet(df_rs)
+  local_paths <- download_files_from_runsheet(df_rs)
 } else {
   print("Or Determined Raw Data Locations are local paths")
   local_paths <- df_rs$`Array Data File Path`
@@ -121,7 +108,7 @@ if ( runsheetPathsAreURIs(df_rs) ) {
 
 
 # uncompress files if needed
-if ( allTrue(stringr::str_ends(local_paths, ".gz")) ) {
+if ( all_true(stringr::str_ends(local_paths, ".gz")) ) {
   print("Determined these files are gzip compressed... uncompressing now")
   # This does the uncompression
   lapply(local_paths, R.utils::gunzip, remove = FALSE, overwrite = TRUE)
@@ -174,9 +161,12 @@ message(paste0("Number of Probes: ", dim(raw_data)[1])) # NON_DPPD
 DT::datatable(raw_data$targets, caption = "Sample to File Mapping")
 DT::datatable(head(raw_data$genes, n = 20), caption = "First 20 rows of raw data file embedded probes to genes table")
 # NON_DPPD:END
+
+annotation_file_path <- params$annotation_file_path
+ensembl_version <- params$ensembl_version
 ```
 
-## QA For Raw Data
+## Raw Data Quality Assessment
 
 ### Density Plot
 
@@ -213,7 +203,7 @@ legend("topright", legend = colnames(raw_data),
 #| layout-ncol: 2
 #| column: screen-inset-right # Allow images to flow all the way to the right
 #| fig-align: left
-agilentImagePlot <- function(eListRaw, transform_func = identity) {
+agilent_image_plot <- function(eListRaw, transform_func = identity) {
   # Adapted from this discussion: https://support.bioconductor.org/p/15523/
   copy_raw_data <- eListRaw
   copy_raw_data$genes$Block <- 1 # Agilent arrays only have one block
@@ -232,7 +222,7 @@ agilentImagePlot <- function(eListRaw, transform_func = identity) {
   }
 }
 
-agilentImagePlot(raw_data, transform_func = function(expression_matrix) log2(expression_matrix + 1))
+agilent_image_plot(raw_data, transform_func = function(expression_matrix) log2(expression_matrix + 1))
 ```
 
 ### MA Plots
@@ -272,7 +262,7 @@ for ( array_i in seq(colnames(raw_data$E)) ) {
 #| fig-align: left
 #| fig-width: 14
 #| fig-height: !expr max(3, ncol(raw_data) * 0.2)
-boxplotExpressionSafeMargin <- function(data, transform_func = identity, xlab = "Log2 Intensity") {
+boxplot_expression_safe_margin <- function(data, transform_func = identity, xlab = "Log2 Intensity") {
   # NON_DPPD:START
   #' plot boxplots of expression values
   #'
@@ -287,7 +277,7 @@ boxplotExpressionSafeMargin <- function(data, transform_func = identity, xlab =
     ggplot2::labs(y= "Sample Name", x = xlab)
 }
 
-boxplotExpressionSafeMargin(raw_data, transform_func = log2)
+boxplot_expression_safe_margin(raw_data, transform_func = log2)
 ```
 
 ## Background Correction
@@ -353,7 +343,7 @@ legend("topright", legend = colnames(norm_data),
 #| layout-ncol: 2
 #| column: screen-inset-right # Allow images to flow all the way to the right
 #| fig-align: left
-agilentImagePlot(norm_data, 
+agilent_image_plot(norm_data, 
                  transform_func = function(expression_matrix) log2(2**expression_matrix + 1) # Compute as log2 of normalized expression after adding a +1 offset to prevent negative values in the pseudoimage
                  )
 ```
@@ -380,20 +370,17 @@ for ( array_i in seq(colnames(norm_data$E)) ) {
 #| fig-align: left
 #| fig-width: 14
 #| fig-height: !expr max(3, ncol(norm_data) * 0.2)
-boxplotExpressionSafeMargin(norm_data)
+boxplot_expression_safe_margin(norm_data)
 ```
 
 
+## Probe Annotations
 
-## Perform Probe Differential Expression
-
-### Probe Differential Expression (DE)
-
-#### Add Probe Annotations
+### Get Probe Annotations
 
 ``` {r retrieve-probe-annotations}
 #| message: false
-shortenedOrganismName <- function(long_name) {
+shortened_organism_name <- function(long_name) {
   #' Convert organism names like 'Homo Sapiens' into 'hsapiens'
   tokens <- long_name %>% stringr::str_split(" ", simplify = TRUE)
   genus_name <- tokens[1]
@@ -405,7 +392,7 @@ shortenedOrganismName <- function(long_name) {
   return(short_name)
 }
 
-getBioMartAttribute <- function(df_rs) {
+get_biomart_attribute <- function(df_rs) {
   #' Returns resolved biomart attribute source from runsheet
   # NON_DPPD:START
   #' this either comes from the runsheet or as a fall back, the parameters injected during render
@@ -460,122 +447,205 @@ get_ensembl_genomes_mappings_from_ftp <- function(organism, ensembl_genomes_port
   return(mapping)
 }
 
+# Convert list of multi-mapped genes to string
+list_to_unique_piped_string <- function(str_list) {
+  #! convert lists into strings denoting unique elements separated by '|' characters
+  #! e.g. c("GO1","GO2","GO2","G03") -> "GO1|GO2|GO3"
+  return(toString(unique(str_list)) %>% stringr::str_replace_all(pattern = stringr::fixed(", "), replacement = "|"))
+}
+
 
-organism <- shortenedOrganismName(unique(df_rs$organism))
+organism <- shortened_organism_name(unique(df_rs$organism))
+annot_key <- ifelse(organism %in% c("athaliana"), 'TAIR', 'ENSEMBL')
 
 if (organism %in% c("athaliana")) {
-  ensembl_genomes_version = "54"
+  ENSEMBL_VERSION = ensembl_version
   ensembl_genomes_portal = "plants"
-  print(glue::glue("Using ensembl genomes ftp to get specific version of probe id mapping table. Ensembl genomes portal: {ensembl_genomes_portal}, version: {ensembl_genomes_version}"))
-  expected_attribute_name <- getBioMartAttribute(df_rs)
+  print(glue::glue("Using ensembl genomes ftp to get specific version of probe id mapping table. Ensembl genomes portal: {ensembl_genomes_portal}, version: {ENSEMBL_VERSION}"))
+  expected_attribute_name <- get_biomart_attribute(df_rs)
   df_mapping <- get_ensembl_genomes_mappings_from_ftp(
     organism = organism,
     ensembl_genomes_portal = ensembl_genomes_portal,
-    ensembl_genomes_version = ensembl_genomes_version,
+    ensembl_genomes_version = ENSEMBL_VERSION,
     biomart_attribute = expected_attribute_name
     )
 
   # TAIR from the mapping tables tend to be in the format 'AT1G01010.1' but the raw data has 'AT1G01010'
   # So here we remove the '.NNN' from the mapping table where .NNN is any number
   df_mapping$ensembl_gene_id <- stringr::str_replace_all(df_mapping$ensembl_gene_id, "\\.\\d+$", "")
+
+  use_custom_annot <- FALSE
 } else {
   # Use biomart from main Ensembl website which archives keep each release on the live service
   # locate dataset
-  expected_dataset_name <- shortenedOrganismName(unique(df_rs$organism)) %>% stringr::str_c("_gene_ensembl")
+  expected_dataset_name <- shortened_organism_name(unique(df_rs$organism)) %>% stringr::str_c("_gene_ensembl")
   print(paste0("Expected dataset name: '", expected_dataset_name, "'"))
   message(paste0("Expected dataset name: '", expected_dataset_name, "'")) # NON_DPPD
 
+  expected_attribute_name <- get_biomart_attribute(df_rs)
+  print(paste0("Expected attribute name: '", expected_attribute_name, "'"))
+  message(paste0("Expected attribute name: '", expected_attribute_name, "'")) # NON_DPPD
 
   # Specify Ensembl version used in current GeneLab reference annotations
-  ENSEMBL_VERSION <- '107'
+  ENSEMBL_VERSION <- ensembl_version
   print(paste0("Searching for Ensembl Version: ", ENSEMBL_VERSION)) # NON_DPPD
 
   print(glue::glue("Using Ensembl biomart to get specific version of mapping table. Ensembl version: {ENSEMBL_VERSION}"))
 
-  ensembl <- biomaRt::useEnsembl(biomart = "genes", 
-                                dataset = expected_dataset_name,
-                                version = ENSEMBL_VERSION)
-  print(ensembl)
+  # Check if organism/array design is supported in biomart
+  use_custom_annot <- TRUE
 
-  expected_attribute_name <- getBioMartAttribute(df_rs)
-  print(paste0("Expected attribute name: '", expected_attribute_name, "'"))
-  message(paste0("Expected attribute name: '", expected_attribute_name, "'")) # NON_DPPD
+  ensembl <- biomaRt::useEnsembl(biomart = "genes", version = ENSEMBL_VERSION)
+  ensembl_datasets <- biomaRt::listDatasets(ensembl)
+  if (expected_dataset_name %in% ensembl_datasets$dataset) {
+    ensembl <- biomaRt::useEnsembl(biomart = "genes", dataset = expected_dataset_name, version = ENSEMBL_VERSION)
+    ensembl_attributes <- biomaRt::listAttributes(ensembl)
+    if (expected_attribute_name %in% ensembl_attributes$name) {
+      use_custom_annot <- FALSE
+    }
+  }
+
+  if (use_custom_annot) {
+    unloadNamespace("biomaRt")
+  } else {
+    print(ensembl)
 
-  probe_ids <- unique(norm_data$genes$ProbeName)
+    probe_ids <- unique(norm_data$genes$ProbeName)
 
-  # DEBUG:START
-  if ( is.integer(params$DEBUG_limit_biomart_query) ) {
-    warning(paste("DEBUG MODE: Limiting query to", params$DEBUG_limit_biomart_query, "entries"))
-    message(paste("DEBUG MODE: Limiting query to", params$DEBUG_limit_biomart_query, "entries"))
-    probe_ids <- probe_ids[1:params$DEBUG_limit_biomart_query]
-  }
-  # DEBUG:END
-
-  # Create probe map
-  # Run Biomart Queries in chunks to prevent request timeouts
-  #   Note: If timeout is occuring (possibly due to larger load on biomart), reduce chunk size
-  CHUNK_SIZE= 1500
-  probe_id_chunks <- split(probe_ids, ceiling(seq_along(probe_ids) / CHUNK_SIZE))
-  df_mapping <- data.frame()
-  for (i in seq_along(probe_id_chunks)) {
-    probe_id_chunk <- probe_id_chunks[[i]]
-    print(glue::glue("Running biomart query chunk {i} of {length(probe_id_chunks)}. Total probes IDS in query ({length(probe_id_chunk)})"))
-    message(glue::glue("Running biomart query chunk {i} of {length(probe_id_chunks)}. Total probes IDS in query ({length(probe_id_chunk)})")) # NON_DPPD
-    chunk_results <- biomaRt::getBM(
-        attributes = c(
-            expected_attribute_name,
-            "ensembl_gene_id"
-            ), 
-            filters = expected_attribute_name, 
-            values = probe_id_chunk, 
-            mart = ensembl)
-
-    if (nrow(chunk_results) > 0) {
-      df_mapping <- df_mapping %>% dplyr::bind_rows(chunk_results)
+    # DEBUG:START
+    if ( is.integer(params$DEBUG_limit_biomart_query) ) {
+      warning(paste("DEBUG MODE: Limiting query to", params$DEBUG_limit_biomart_query, "entries"))
+      message(paste("DEBUG MODE: Limiting query to", params$DEBUG_limit_biomart_query, "entries"))
+      probe_ids <- probe_ids[1:params$DEBUG_limit_biomart_query]
+    }
+    # DEBUG:END
+
+    # Create probe map
+    # Run Biomart Queries in chunks to prevent request timeouts
+    #   Note: If timeout is occuring (possibly due to larger load on biomart), reduce chunk size
+    CHUNK_SIZE= 1500
+    probe_id_chunks <- split(probe_ids, ceiling(seq_along(probe_ids) / CHUNK_SIZE))
+    df_mapping <- data.frame()
+    for (i in seq_along(probe_id_chunks)) {
+      probe_id_chunk <- probe_id_chunks[[i]]
+      print(glue::glue("Running biomart query chunk {i} of {length(probe_id_chunks)}. Total probes IDS in query ({length(probe_id_chunk)})"))
+      message(glue::glue("Running biomart query chunk {i} of {length(probe_id_chunks)}. Total probes IDS in query ({length(probe_id_chunk)})")) # NON_DPPD
+      chunk_results <- biomaRt::getBM(
+          attributes = c(
+              expected_attribute_name,
+              "ensembl_gene_id"
+              ), 
+              filters = expected_attribute_name, 
+              values = probe_id_chunk, 
+              mart = ensembl)
+
+      if (nrow(chunk_results) > 0) {
+        df_mapping <- df_mapping %>% dplyr::bind_rows(chunk_results)
+      }
+      
+      Sys.sleep(10) # Slight break between requests to prevent back-to-back requests
     }
-    
-    Sys.sleep(10) # Slight break between requests to prevent back-to-back requests
   }
 }
 
 # At this point, we have df_mapping from either the biomart live service or the ensembl genomes ftp archive depending on the organism
-```
+# If no df_mapping obtained (e.g., not supported in biomart), use custom annotations; otherwise, merge in-house annotations to df_mapping
 
-``` {r reformat-merge-probe-annotations}
+if (use_custom_annot) {
+  expected_attribute_name <- 'ProbeName'
 
-# Convert list of multi-mapped genes to string
-listToUniquePipedString <- function(str_list) {
-  #! convert lists into strings denoting unique elements separated by '|' characters
-  #! e.g. c("GO1","GO2","GO2","G03") -> "GO1|GO2|GO3"
-  return(toString(unique(str_list)) %>% stringr::str_replace_all(pattern = stringr::fixed(", "), replacement = "|"))
-}
+  annot_type <- 'NO_CUSTOM_ANNOT'
+  if (!is.null(local_annotation_dir) && !is.null(annotation_config_path)) {
+    config_df <- read.csv(annotation_config_path, row.names=1)
+    if (unique(df_rs$`biomart_attribute`) %in% row.names(config_df)) {
+      annot_config <- config_df[unique(df_rs$`biomart_attribute`), ]
+      annot_type <- annot_config$annot_type[[1]]
+    } else {
+      warning(paste0("No entry for '", unique(df_rs$`biomart_attribute`), "' in provided config file: ", annotation_config_path))
+    }
+  } else {
+    warning("Need to provide both local_annotation_dir and annotation_config_path to use custom annotation.")
+  }
+
+  if (annot_type == 'agilent') {
+    unique_probe_ids <- read.delim(
+      file.path(local_annotation_dir, annot_config$annot_filename[[1]]),
+      header = TRUE, na.strings = c('NA', '')
+    )[c('ProbeID', 'EnsemblID', 'GeneSymbol', 'GeneName', 'RefSeqAccession', 'EntrezGeneID', 'GO')]
+
+    stopifnot(nrow(unique_probe_ids) == length(unique(unique_probe_ids$ProbeID)))
+
+    # Clean columns
+    unique_probe_ids$GO <- purrr::map_chr(stringr::str_extract_all(unique_probe_ids$GO, 'GO:\\d{7}'), ~paste0(unique(.), collapse = '|')) %>% stringr::str_replace('^$', NA_character_)
+
+    names(unique_probe_ids) <- c('ProbeName', 'ENSEMBL', 'SYMBOL', 'GENENAME', 'REFSEQ', 'ENTREZID', 'GOSLIM_IDS')
+
+    unique_probe_ids$STRING_id <- NA_character_
+
+    gene_col <- 'ENSEMBL'
+    if (sum(!is.na(unique_probe_ids$ENTREZID)) > sum(!is.na(unique_probe_ids$ENSEMBL))) {
+      gene_col <- 'ENTREZID'
+    }
+    if (sum(!is.na(unique_probe_ids$SYMBOL)) > max(sum(!is.na(unique_probe_ids$ENTREZID)), sum(!is.na(unique_probe_ids$ENSEMBL)))) {
+      gene_col <- 'SYMBOL'
+    }
 
-unique_probe_ids <- df_mapping %>% 
-                      # note: '!!sym(VAR)' syntax allows usage of variable 'VAR' in dplyr functions due to NSE. ref: https://dplyr.tidyverse.org/articles/programming.html # NON_DPPD
-                      dplyr::group_by(!!sym(expected_attribute_name)) %>% 
-                      dplyr::summarise(
-                        ENSEMBL = listToUniquePipedString(ensembl_gene_id)
+    unique_probe_ids <- unique_probe_ids %>%
+                        dplyr::mutate( 
+                          count_gene_mappings = 1 + stringr::str_count(get(gene_col), stringr::fixed("|")),
+                          gene_mapping_source = gene_col
+                        )
+  } else if (annot_type == 'custom') {
+    unique_probe_ids <- read.csv(
+      file.path(local_annotation_dir, annot_config$annot_filename[[1]]),
+      header = TRUE, na.strings = c('NA', '')
+    )
+  } else {
+    annot_cols <- c('ProbeName', 'ENTREZID', 'SYMBOL', 'GENENAME', 'ENSEMBL', 'REFSEQ', 'GOSLIM_IDS', 'STRING_id', 'count_gene_mappings', 'gene_mapping_source')
+    unique_probe_ids <- setNames(data.frame(matrix(NA_character_, nrow = 1, ncol = length(annot_cols))), annot_cols)
+  }
+} else {
+  annot <- read.table(
+      as.character(annotation_file_path),
+      sep = "\t",
+      header = TRUE,
+      quote = "",
+      comment.char = ""
+  )
+
+  unique_probe_ids <- df_mapping %>% 
+                        # note: '!!sym(VAR)' syntax allows usage of variable 'VAR' in dplyr functions due to NSE. ref: https://dplyr.tidyverse.org/articles/programming.html # NON_DPPD
+                        dplyr::mutate(dplyr::across(!!sym(expected_attribute_name), as.character)) %>% # Ensure probe ids treated as character type
+                        dplyr::group_by(!!sym(expected_attribute_name)) %>% 
+                        dplyr::summarise(
+                          ENSEMBL = list_to_unique_piped_string(ensembl_gene_id)
+                          ) %>%
+                        # Count number of ensembl IDS mapped
+                        dplyr::mutate( 
+                          count_gene_mappings = 1 + stringr::str_count(ENSEMBL, stringr::fixed("|")),
+                          gene_mapping_source = annot_key
                         ) %>%
-                      # Count number of ensembl IDS mapped
-                      dplyr::mutate( 
-                        count_ENSEMBL_mappings = 1 + stringr::str_count(ENSEMBL, stringr::fixed("|"))
-                      )
+                        dplyr::left_join(annot, by = c("ENSEMBL" = annot_key))
+}
+```
 
+``` {r reformat-merge-probe-annotations}
 norm_data$genes <- norm_data$genes %>% 
   dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
-  dplyr::mutate( count_ENSEMBL_mappings = ifelse(is.na(ENSEMBL), 0, count_ENSEMBL_mappings) )
+  dplyr::mutate( count_gene_mappings := ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+  dplyr::mutate( gene_mapping_source := unique(unique_probe_ids$gene_mapping_source) )
 ```
 
-### Summarize Biomart Mapping vs. Manufacturer Mapping
+### Summarize Gene Mapping
 
 ``` {r summarize-remapping-vs-original-mapping}
 #| message = FALSE
 # Pie Chart with Percentages
 slices <- c(
     'Control probes' = nrow(norm_data$gene %>% dplyr::filter(ControlType != 0) %>% dplyr::distinct(ProbeName)), 
-    'Unique Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_ENSEMBL_mappings == 1) %>% dplyr::distinct(ProbeName)), 
-    'Multi Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_ENSEMBL_mappings > 1) %>% dplyr::distinct(ProbeName)), 
-    'No Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_ENSEMBL_mappings == 0) %>% dplyr::distinct(ProbeName))
+    'Unique Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings == 1) %>% dplyr::distinct(ProbeName)), 
+    'Multi Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings > 1) %>% dplyr::distinct(ProbeName)), 
+    'No Mapping' = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(count_gene_mappings == 0) %>% dplyr::distinct(ProbeName))
 )
 pct <- round(slices/sum(slices)*100)
 chart_names <- names(slices)
@@ -583,20 +653,81 @@ chart_names <- glue::glue("{names(slices)} ({slices})") # add count to labels
 chart_names <- paste(chart_names, pct) # add percents to labels
 chart_names <- paste(chart_names,"%",sep="") # ad % to labels
 pie(slices,labels = chart_names, col=rainbow(length(slices)),
-    main=glue::glue("Biomart Mapping to Ensembl Primary Keytype\n {nrow(norm_data$gene %>% dplyr::distinct(ProbeName))} Total Unique Probes")
+    main=glue::glue("Mapping to Primary Keytype\n {nrow(norm_data$gene %>% dplyr::distinct(ProbeName))} Total Unique Probes")
     )
 
 original_mapping_rate = nrow(norm_data$gene %>% dplyr::filter(ControlType == 0) %>% dplyr::filter(ProbeName != SystematicName) %>% dplyr::distinct(ProbeName))
 print(glue::glue("Original Manufacturer Reported Mapping Count: {original_mapping_rate}"))
-print(glue::glue("Biomart Unique Mapping Count: {slices[['Unique Mapping']]}"))
+print(glue::glue("Unique Mapping Count: {slices[['Unique Mapping']]}"))
 message(glue::glue("Original Manufacturer Reported Mapping Rate: {original_mapping_rate}")) # NON_DPPD
-message(glue::glue("Biomart Unique Mapping Rate: {slices[['Unique Mapping']]}")) # NON_DPPD
+message(glue::glue("Unique Mapping Rate: {slices[['Unique Mapping']]}")) # NON_DPPD
+```
+
+### Generate Annotated Raw and Normalized Expression Tables
+
+```{r save-tables}
+## Reorder columns before saving to file
+ANNOTATIONS_COLUMN_ORDER = c(
+  annot_key,
+  "SYMBOL",
+  "GENENAME",
+  "REFSEQ",
+  "ENTREZID",
+  "STRING_id",
+  "GOSLIM_IDS"
+)
+
+PROBE_INFO_COLUMN_ORDER = c(
+  "ProbeUID",
+  "ProbeName",
+  "count_gene_mappings",
+  "gene_mapping_source"
+)
+SAMPLE_COLUMN_ORDER <- df_rs$`Sample Name`
+
+## Generate raw intensity matrix that includes annotations
+raw_data_matrix <- background_corrected_data$genes %>% 
+                    dplyr::select(ProbeUID, ProbeName) %>%
+                    dplyr::bind_cols(background_corrected_data$E) %>% 
+                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
+                    dplyr::mutate( count_gene_mappings = ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+                    dplyr::mutate( gene_mapping_source = unique(unique_probe_ids$gene_mapping_source) )
+
+## Perform reordering
+FINAL_COLUMN_ORDER <- c(
+  ANNOTATIONS_COLUMN_ORDER, 
+  PROBE_INFO_COLUMN_ORDER, 
+  SAMPLE_COLUMN_ORDER
+  )
+
+raw_data_matrix <- raw_data_matrix %>% 
+  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
+
+write.csv(raw_data_matrix, file.path(DIR_RAW_DATA, "raw_intensities_GLmicroarray.csv"), row.names = FALSE)
+
+## Generate normalized expression matrix that includes annotations
+norm_data_matrix <- norm_data$genes %>% 
+                    dplyr::select(ProbeUID, ProbeName) %>%
+                    dplyr::bind_cols(norm_data$E) %>% 
+                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
+                    dplyr::mutate( count_gene_mappings = ifelse(is.na(count_gene_mappings), 0, count_gene_mappings) ) %>%
+                    dplyr::mutate( gene_mapping_source = unique(unique_probe_ids$gene_mapping_source) )
+
+norm_data_matrix <- norm_data_matrix %>% 
+  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
+
+write.csv(norm_data_matrix, file.path(DIR_NORMALIZED_EXPRESSION, "normalized_expression_GLmicroarray.csv"), row.names = FALSE)
 ```
 
+## Perform Probe Differential Expression (DE)
+
 ### Generate Design Matrix
 
 ``` {r generate-design-matrix}
-runsheetToDesignMatrix <- function(runsheet_path) {
+#| include: !expr params$run_DE
+#| eval: !expr params$run_DE
+
+runsheet_to_design_matrix <- function(runsheet_path) {
     df = read.csv(runsheet_path)
     # get only Factor Value columns
     factors = as.data.frame(df[,grep("Factor.Value", colnames(df), ignore.case=TRUE)])
@@ -639,7 +770,7 @@ runsheetToDesignMatrix <- function(runsheet_path) {
 
 
 # Loading metadata from runsheet csv file
-design_data <- runsheetToDesignMatrix(runsheet)
+design_data <- runsheet_to_design_matrix(runsheet)
 design <- design_data$matrix
 
 # Write SampleTable.csv and contrasts.csv file
@@ -650,7 +781,10 @@ write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"
 ### Perform Individual Probe Level DE
 
 ``` {r perform-probe-differential-expression}
-lmFitPairwise <- function(norm_data, design) {
+#| include: !expr params$run_DE
+#| eval: !expr params$run_DE
+
+lm_fit_pairwise <- function(norm_data, design) {
     #' Perform all pairwise comparisons
 
     #' Approach based on limma manual section 17.4 (version 3.52.4)
@@ -669,7 +803,7 @@ lmFitPairwise <- function(norm_data, design) {
 }
 
 # Calculate results
-res <- lmFitPairwise(norm_data, design)
+res <- lm_fit_pairwise(norm_data, design)
 DT::datatable(limma::topTable(res)) # NON_DPPD
 
 # Print DE table, without filtering
@@ -680,16 +814,34 @@ limma::write.fit(res, adjust = 'BH',
                 sep = ",")
 ```
 
-### Add Additional Columns and Format DE Table 
+### Add Annotation and Stats Columns and Format DE Table
 
-``` {r add-additional-columns-and-format-de-table}
-#| warning: false
+``` {r save-de-table}
 #| message: false
+#| include: !expr params$run_DE
+#| eval: !expr params$run_DE
 ## Reformat Table for consistency across DE analyses tables within GeneLab ##
 
 # Read in DE table 
 df_interim <- read.csv("INTERIM.csv")
 
+print("Remove extra columns from final table")
+
+# These columns are data mapped to column PROBEID as per the original Manufacturer and can be linked as needed
+colnames_to_remove = c(
+  "Genes.Row",
+  "Genes.Col",
+  "Genes.Start",
+  "Genes.Sequence",
+  "Genes.ControlType",
+  "Genes.GeneName",
+  "Genes.SystematicName",
+  "Genes.Description",
+  "AveExpr" # Replaced by 'All.mean' column
+)
+
+df_interim <- df_interim %>% dplyr::select(-any_of(colnames_to_remove))
+
 # Reformat column names
 reformat_names <- function(colname, group_name_mapping) {
   # NON_DPPD:START
@@ -708,10 +860,7 @@ reformat_names <- function(colname, group_name_mapping) {
                   stringr::str_replace(pattern = "^P.value.condition", replacement = "P.value_") %>%
                   stringr::str_replace(pattern = "^Coef.condition", replacement = "Log2fc_") %>% # This is the Log2FC as per: https://rdrr.io/bioc/limma/man/writefit.html
                   stringr::str_replace(pattern = "^t.condition", replacement = "T.stat_") %>%
-                  stringr::str_replace(pattern = stringr::fixed("Genes.ProbeName"), replacement = "ProbeName") %>% 
-                  stringr::str_replace(pattern = stringr::fixed("Genes.count_ENSEMBL_mappings"), replacement = "count_ENSEMBL_mappings") %>% 
-                  stringr::str_replace(pattern = stringr::fixed("Genes.ProbeUID"), replacement = "ProbeUID") %>% 
-                  stringr::str_replace(pattern = stringr::fixed("Genes.ENSEMBL"), replacement = "ENSEMBL") %>% 
+                  stringr::str_replace(pattern = "^Genes\\.", replacement = "") %>%
                   stringr::str_replace(pattern = ".condition", replacement = "v")
   
   # remap to group names before make.names was applied
@@ -771,85 +920,14 @@ for ( i in seq_along(unique_groups) ) {
 ## Compute all sample mean and standard deviation
 message(glue::glue("Computing mean and standard deviation for all samples"))
 # NON_DPPD:END
-all_samples <- design_data$group %>% dplyr::pull(sample)
 df_interim <- df_interim %>% 
   dplyr::mutate( 
-    "All.mean" := rowMeans(dplyr::select(., all_of(all_samples))),
-    "All.stdev" := matrixStats::rowSds(as.matrix(dplyr::select(., all_of(all_samples)))),
+    "All.mean" := rowMeans(dplyr::select(., all_of(SAMPLE_COLUMN_ORDER))),
+    "All.stdev" := matrixStats::rowSds(as.matrix(dplyr::select(., all_of(SAMPLE_COLUMN_ORDER)))),
     ) %>% 
   dplyr::ungroup() %>%
   as.data.frame()
 
-print("Remove extra columns from final table")
-
-# These columns are data mapped to column PROBEID as per the original Manufacturer and can be linked as needed
-colnames_to_remove = c(
-  "Genes.Row",
-  "Genes.Col",
-  "Genes.Start",
-  "Genes.Sequence",
-  "Genes.ControlType",
-  "Genes.ProbeName",
-  "Genes.GeneName",
-  "Genes.SystematicName",
-  "Genes.Description",
-  "AveExpr" # Replaced by 'All.mean' column
-  # "Genes.count_ENSEMBL_mappings", Keep this
-)
-
-df_interim <- df_interim %>% dplyr::select(-any_of(colnames_to_remove))
-
-## Concatenate annotations for genes (for uniquely mapped probes) ##
-### Read in annotation table for the appropriate organism ###
-annot <- read.table(
-            annotation_file_path,
-            sep = "\t",
-            header = TRUE,
-            quote = "",
-            comment.char = "",
-        )
-
-# Join annotation table and uniquely mapped data
-
-# Determine appropriate keytype
-map_primary_keytypes <- c(
-  'Caenorhabditis elegans' = 'ENSEMBL',
-  'Danio rerio' = 'ENSEMBL',
-  'Drosophila melanogaster' = 'ENSEMBL',
-  'Rattus norvegicus' = 'ENSEMBL',
-  'Saccharomyces cerevisiae' = 'ENSEMBL',
-  'Homo sapiens' = 'ENSEMBL',
-  'Mus musculus' = 'ENSEMBL',
-  'Arabidopsis thaliana' = 'TAIR'
-)
-
-df_interim <- merge(
-                annot,
-                df_interim,
-                by.x = map_primary_keytypes[[unique(df_rs$organism)]],
-                by.y = "ENSEMBL",
-                # ensure all original dge rows are kept.
-                # If unmatched in the annotation database, then fill missing with NAN
-                all.y = TRUE
-            )
-
-## Reorder columns before saving to file
-ANNOTATIONS_COLUMN_ORDER = c(
-  map_primary_keytypes[[unique(df_rs$organism)]],
-  "SYMBOL",
-  "GENENAME",
-  "REFSEQ",
-  "ENTREZID",
-  "STRING_id",
-  "GOSLIM_IDS"
-)
-
-PROBE_INFO_COLUMN_ORDER = c(
-  "ProbeUID",
-  "ProbeName",
-  "count_ENSEMBL_mappings"
-)
-SAMPLE_COLUMN_ORDER <- all_samples
 generate_prefixed_column_order <- function(subjects, prefixes) {
   #' Return a vector of columns based on subject and given prefixes
   #'  Used for both contrasts and groups column name generation
@@ -882,26 +960,21 @@ ALL_SAMPLE_STATS_COLUMNS_ORDER <- c(
   "F.p.value"
 )
 
-GROUP_MEAN_COLUMNS_ORDER <- generate_prefixed_column_order(
-  subjects = unique(design_data$groups$group),
-  prefixes = c(
-    "Group.Mean_"
-    )
-  )
-GROUP_STDEV_COLUMNS_ORDER <- generate_prefixed_column_order(
+GROUP_MEAN_STDEV_COLUMNS_ORDER <- generate_prefixed_column_order(
   subjects = unique(design_data$groups$group),
   prefixes = c(
+    "Group.Mean_",
     "Group.Stdev_"
-    )
   )
+)
+
 FINAL_COLUMN_ORDER <- c(
   ANNOTATIONS_COLUMN_ORDER, 
   PROBE_INFO_COLUMN_ORDER, 
   SAMPLE_COLUMN_ORDER, 
   STAT_COLUMNS_ORDER, 
   ALL_SAMPLE_STATS_COLUMNS_ORDER, 
-  GROUP_MEAN_COLUMNS_ORDER,
-  GROUP_STDEV_COLUMNS_ORDER
+  GROUP_MEAN_STDEV_COLUMNS_ORDER
   )
 
 ## Assert final column order includes all columns from original table
@@ -917,74 +990,6 @@ df_interim <- df_interim %>% dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
 
 # Save to file
 write.csv(df_interim, file.path(DIR_DGE, "differential_expression_GLmicroarray.csv"), row.names = FALSE)
-
-### Generate and export PCA table for GeneLab visualization plots
-## Only use positive expression values, negative values can make up a small portion ( < 0.5% ) of normalized expression values and cannot be log transformed
-exp_raw <- log2(norm_data$E) # negatives get converted to NA
-exp_raw <- na.omit(norm_data$E)
-PCA_raw <- prcomp(t(exp_raw), scale = FALSE)
-write.csv(PCA_raw$x,
-          file.path(DIR_DGE, "visualization_PCA_table_GLmicroarray.csv")
-          )
-
-## Generate raw intensity matrix that includes annotations
-raw_data_matrix <- background_corrected_data$genes %>% 
-                    dplyr::select(ProbeUID, ProbeName) %>%
-                    dplyr::bind_cols(background_corrected_data$E) %>% 
-                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
-                    dplyr::mutate( count_ENSEMBL_mappings = ifelse(is.na(ENSEMBL), 0, count_ENSEMBL_mappings) )
-
-raw_data_matrix_annotated <- merge(
-                annot,
-                raw_data_matrix,
-                by.x = map_primary_keytypes[[unique(df_rs$organism)]],
-                by.y = "ENSEMBL",
-                # ensure all original dge rows are kept.
-                # If unmatched in the annotation database, then fill missing with NAN
-                all.y = TRUE
-            )
-
-## Perform reordering
-FINAL_COLUMN_ORDER <- c(
-  ANNOTATIONS_COLUMN_ORDER, 
-  PROBE_INFO_COLUMN_ORDER, 
-  SAMPLE_COLUMN_ORDER
-  )
-
-raw_data_matrix_annotated <- raw_data_matrix_annotated %>% 
-  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
-
-write.csv(raw_data_matrix_annotated, file.path(DIR_RAW_DATA, "raw_intensities_GLmicroarray.csv"), row.names = FALSE)
-
-## Generate normalized expression matrix that includes annotations
-norm_data_matrix <- norm_data$genes %>% 
-                    dplyr::select(ProbeUID, ProbeName) %>%
-                    dplyr::bind_cols(norm_data$E) %>% 
-                    dplyr::left_join(unique_probe_ids, by = c("ProbeName" = expected_attribute_name ) ) %>%
-                    dplyr::mutate( count_ENSEMBL_mappings = ifelse(is.na(ENSEMBL), 0, count_ENSEMBL_mappings) )
-
-norm_data_matrix_annotated <- merge(
-                annot,
-                norm_data_matrix,
-                by.x = map_primary_keytypes[[unique(df_rs$organism)]],
-                by.y = "ENSEMBL",
-                # ensure all original dge rows are kept.
-                # If unmatched in the annotation database, then fill missing with NAN
-                all.y = TRUE
-            )
-
-
-## Perform reordering
-FINAL_COLUMN_ORDER <- c(
-  ANNOTATIONS_COLUMN_ORDER, 
-  PROBE_INFO_COLUMN_ORDER, 
-  SAMPLE_COLUMN_ORDER
-  )
-
-norm_data_matrix_annotated <- norm_data_matrix_annotated %>% 
-  dplyr::relocate(dplyr::all_of(FINAL_COLUMN_ORDER))
-
-write.csv(norm_data_matrix_annotated, file.path(DIR_NORMALIZED_EXPRESSION, "normalized_expression_GLmicroarray.csv"), row.names = FALSE)
 ```
 
 ## Version Reporting <!-- non DPPD -->
@@ -1053,11 +1058,11 @@ get_versions <- function() {
 ## Note Libraries that were NOT used during processing
 versions_buffer <- get_versions()
 
-if (organism %in% c("athaliana")) {
+if (organism %in% c("athaliana") || use_custom_annot) {
   versions_buffer <- glue::glue_collapse(c(
     versions_buffer,
     glue::glue("- name: biomaRt"),
-    glue::glue("  version: (Not used for plant datasets)"),
+    glue::glue("  version: (Not used for this dataset)"),
     glue::glue("  homepage: https://bioconductor.org/packages/3.14/bioc/html/biomaRt.html"),
     glue::glue("  workflow task: PROCESS_AGILE1CH")
     ), sep = "\n")
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/checks.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/checks.py
index 6e1d3167..707300f9 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/checks.py
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/checks.py
@@ -242,8 +242,8 @@ def utils_common_constraints_on_dataframe(
         col_constraints = col_constraints.copy()
 
         # limit to only columns of interest
-        query_df = df[col_set]
-        for (colname, colseries) in query_df.iteritems():
+        query_df = df[list(col_set)]
+        for (colname, colseries) in query_df.items():
             # check non null constraint
             if col_constraints.pop("nonNull", False) and nonNull(colseries) == False:
                 issues["Failed non null constraint"].append(colname)
@@ -321,7 +321,7 @@ def check_dge_table_sample_columns_constraints(
 ) -> FlagEntry:
     MINIMUM_COUNT = 0
     # data specific preprocess
-    df_dge = pd.read_csv(dge_table)[samples]
+    df_dge = pd.read_csv(dge_table)[list(samples)]
 
     schema = pa.DataFrameSchema({
         sample: pa.Column(float)
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/config.yaml b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/config.yaml
index f581d824..de550d19 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/config.yaml
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/config.yaml
@@ -78,6 +78,7 @@ Staging:
         - ISA Field Name:
             - Label
             - Parameter Value[label]
+            - Parameter Value[Label]
           ISA Table Source: Sample
           Runsheet Column Name: Label
           Processing Usage: >-
@@ -259,16 +260,6 @@ data assets:
             
     resource categories: *DGEAnalysisData
 
-  viz PCA table:
-    processed location: 
-      - *DGEDataDir
-      - "visualization_PCA_table_GLmicroarray.csv"
-
-    tags:
-      - processed
-            
-    resource categories: *neverPublished
-
 data asset sets:
   # These assets are not generated in the workflow, but are generated after the workflow
   PUTATIVE: []
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/protocol.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/protocol.py
index 6c0561bd..b3df9a71 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/protocol.py
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel/protocol.py
@@ -415,33 +415,6 @@ def validate(
                         """)
                     )
 
-            with vp.component_start(
-                name="Viz Tables",
-                description="Extended from the dge tables",
-            ):
-                with vp.payload(
-                    payloads=[
-                        {
-                            "samples": lambda: set(dataset.samples),
-                            "pca_table": lambda: dataset.data_assets[
-                                "viz PCA table"
-                            ].path,
-                        }
-                    ]
-                ):
-                    vp.add(
-                        bulkRNASeq.checks.check_viz_pca_table_index_and_columns_exist,
-                        full_description=textwrap.dedent(f"""
-                                - Check: Ensure all samples (row-indices) present and columns PC1, PC2 and PC3 are present
-                                    - Reason:
-                                        - PCA table should include all samples and PC1, PC2, PC3 (for 3D PCA viz)
-                                    - Potential Source of Problems:
-                                        - Bug in processing script
-                                    - Flag Condition:
-                                        - Green: All samples and all columns present
-                                        - Halt: At least one sample or column is missing
-                            """)
-                        )
         with vp.component_start(
             name="Processing Report",
             description="",
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/__init__.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/__init__.py
new file mode 100644
index 00000000..5faa427c
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/__init__.py
@@ -0,0 +1,9 @@
+from pathlib import Path
+
+# Import for access at the module level
+from . import checks
+from . import protocol
+from . import schemas
+
+# Set config path
+config = Path(__file__).parent / "config.yaml"
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/checks.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/checks.py
new file mode 100644
index 00000000..707300f9
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/checks.py
@@ -0,0 +1,618 @@
+import string
+from pathlib import Path
+import logging
+import enum
+from typing import Union
+import itertools
+from statistics import mean
+
+from loguru import logger as log
+import pandas as pd
+import pandera as pa
+
+from dp_tools.core.check_model import FlagCode, FlagEntry, FlagEntryWithOutliers
+from dp_tools.core.entity_model import Dataset
+
+class GroupFormatting(enum.Enum):
+    r_make_names = enum.auto()
+    ampersand_join = enum.auto()
+
+def check_contrasts_table_headers(contrasts_table: Path, runsheet: Path) -> FlagEntry:
+    # data specific preprocess
+    expected_groups = utils_runsheet_to_expected_groups(runsheet, map_to_lists=True)
+    expected_comparisons = [
+        "v".join(paired_groups)
+        for paired_groups in itertools.permutations(expected_groups, 2)
+    ]
+    df_contrasts = pd.read_csv(contrasts_table, index_col=0)
+
+    # check logic
+    differences = set(expected_comparisons).symmetric_difference(
+        set(df_contrasts.columns)
+    )
+    if not differences:
+        code = FlagCode.GREEN
+        message = f"Contrasts header includes expected comparisons as determined runsheet Factor Value Columns: {sorted(set(expected_comparisons))}"
+    else:
+        code = FlagCode.HALT
+        message = f"Contrasts header does not match expected comparisons as determined runsheet Factor Value Columns: {sorted(differences)}"
+    return {"code": code, "message": message}
+
+def check_metadata_attributes_exist(
+    dataset: Dataset, expected_attrs: list[str]
+) -> FlagEntry:
+    missing_metadata_fields = list(set(expected_attrs) - set(dataset.metadata))
+
+    # check if any missing_metadata_fields are present
+    # check logic
+    if not missing_metadata_fields:
+        code = FlagCode.GREEN
+        message = f"All expected metadata keys found: Expected {expected_attrs}, Found {sorted(set(dataset.metadata))}"
+    else:
+        code = FlagCode.HALT
+        message = f"Missing dataset metadata (source from Runsheet): {sorted(missing_metadata_fields)}"
+    return {"code": code, "message": message}
+
+def check_raw_intensities_table(
+    raw_intensities: Path, samples: list[str]
+) -> FlagEntry:
+    schema = pa.DataFrameSchema(
+        columns = {sample: pa.Column(float, pa.Check.ge(0)) for sample in samples}
+    )
+
+    log.trace(schema)
+
+    df = pd.read_csv(raw_intensities)
+
+    try:
+        schema.validate(df, lazy=True)
+        error_message = None
+    except pa.errors.SchemaErrors as err:
+        log.trace(err)
+        error_message = err.schema_errors
+    if error_message is None:
+        code = FlagCode.GREEN
+        message = (
+            f"Table conforms to schema: {repr(schema)}"
+        )
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"{error_message}"
+        )
+    return {"code": code, "message": message}
+
+def check_normalized_expression_table(
+    normalized_expression: Path, samples: list[str]
+) -> FlagEntry:
+    schema = pa.DataFrameSchema(
+        columns = {sample: pa.Column(float) for sample in samples}
+    )
+
+    df = pd.read_csv(normalized_expression)
+
+    try:
+        schema.validate(df, lazy=True)
+        error_message = None
+    except pa.errors.SchemaErrors as err:
+        log.trace(err)
+        error_message = err.schema_errors
+    if error_message is None:
+        code = FlagCode.GREEN
+        message = (
+            f"Table conforms to schema: {repr(schema)}"
+        )
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"{error_message}"
+        )
+    return {"code": code, "message": message}
+
+def check_viz_table_columns_constraints(
+    dge_table: Path, runsheet: Path, **_
+) -> FlagEntry:
+    # data specific preprocess
+    expected_groups = utils_runsheet_to_expected_groups(runsheet, map_to_lists=True)
+    expected_comparisons = [
+        "v".join(paired_groups)
+        for paired_groups in itertools.permutations(expected_groups, 2)
+    ]
+    viz_pairwise_columns_constraints = (
+        (
+            {f"Log2_Adj.p.value_{comp}" for comp in expected_comparisons},
+            {"nonNull": False},
+        ),
+        (
+            {f"Sig.1_{comp}" for comp in expected_comparisons},
+            {"allowedValues": [False, True], "nonNull": False},
+        ),
+        (
+            {f"Sig.05_{comp}" for comp in expected_comparisons},
+            {"allowedValues": [False, True], "nonNull": False},
+        ),
+        (
+            {f"Log2_P.value_{comp}" for comp in expected_comparisons},
+            {"nonNegative": False, "nonNull": False},
+        ),
+        (
+            {f"Updown_{comp}" for comp in expected_comparisons},
+            {"allowedValues": [1, -1], "nonNull": True},
+        ),
+    )
+
+    df_viz = pd.read_csv(dge_table)
+
+    # issue trackers
+    # here: {prefix+constraint: [failed_columns]}
+    issues: dict[str, list[str]] = dict()
+
+    issues = utils_common_constraints_on_dataframe(
+        df_viz, viz_pairwise_columns_constraints
+    )
+
+    # check logic
+    if not any([issue_type for issue_type in issues.values()]):
+        code = FlagCode.GREEN
+        message = (
+            f"All values in columns met constraint: {viz_pairwise_columns_constraints}"
+        )
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"Issues found {issues} that"
+            f"fail the contraint: {viz_pairwise_columns_constraints}."
+        )
+    return {"code": code, "message": message}
+
+def utils_runsheet_to_expected_groups(
+    runsheet: Path,
+    formatting: GroupFormatting = GroupFormatting.ampersand_join,
+    limit_to_samples: list = None,
+    map_to_lists: bool = False,
+) -> Union[dict[str, str], dict[str, list[str]]]:
+    df_rs = (
+        pd.read_csv(runsheet, index_col="Sample Name", dtype=str)
+        .filter(regex="^Factor Value\[.*\]")
+        .sort_index()
+    )  # using only Factor Value columns
+
+    if limit_to_samples:
+        df_rs = df_rs.filter(items=limit_to_samples, axis="rows")
+
+    match formatting:
+        case GroupFormatting.r_make_names:
+            expected_conditions_based_on_runsheet = (
+                df_rs.apply(lambda x: "...".join(x), axis="columns")
+                .apply(r_style_make_names)  # join factors with '...'
+                .to_dict()
+            )  # reformat entire group in the R style
+        case GroupFormatting.ampersand_join:
+            expected_conditions_based_on_runsheet = df_rs.apply(
+                lambda x: f"({' & '.join(x)})", axis="columns"
+            ).to_dict()
+        case _:
+            raise ValueError(
+                f"Formatting method invalid, must be one of the following: {list(GroupFormatting)}"
+            )
+
+    # convert from {sample: group} dict
+    #         to {group: [samples]} dict
+    if map_to_lists:
+        unique_groups = set(expected_conditions_based_on_runsheet.values())
+        reformatted_dict: dict[str, list[str]] = dict()
+        for query_group in unique_groups:
+            reformatted_dict[query_group] = [
+                sample
+                for sample, group in expected_conditions_based_on_runsheet.items()
+                if group == query_group
+            ]
+        expected_conditions_based_on_runsheet: dict[str, list[str]] = reformatted_dict
+
+    return expected_conditions_based_on_runsheet
+
+## Dataframe and Series specific helper functions
+def nonNull(df: pd.DataFrame) -> bool:
+    # negation since it checks if any are null
+    return ~df.isnull().any(axis=None)
+
+
+def nonNegative(df: pd.DataFrame) -> bool:
+    """This ignores null values, use nonNull to validate that condition"""
+    return ((df >= 0) | (df.isnull())).all(axis=None)
+
+
+def onlyAllowedValues(df: pd.DataFrame, allowed_values: list) -> bool:
+    """This ignores null values, use nonNull to validate that condition"""
+    return ((df.isin(allowed_values)) | (df.isnull())).all(axis=None)
+
+
+def utils_common_constraints_on_dataframe(
+    df: pd.DataFrame, constraints: tuple[tuple[set, dict], ...]
+) -> dict:
+
+    issues: dict[str, list[str]] = {
+        "Failed non null constraint": list(),
+        "Failed non negative constraint": list(),
+    }
+
+    for (col_set, col_constraints) in constraints:
+        # this will avoid overriding the original constraints dictionary
+        # which is likely used in the check message
+        col_constraints = col_constraints.copy()
+
+        # limit to only columns of interest
+        query_df = df[list(col_set)]
+        for (colname, colseries) in query_df.items():
+            # check non null constraint
+            if col_constraints.pop("nonNull", False) and nonNull(colseries) == False:
+                issues["Failed non null constraint"].append(colname)
+            # check non negative constraint
+            if (
+                col_constraints.pop("nonNegative", False)
+                and nonNegative(colseries) == False
+            ):
+                issues["Failed non negative constraint"].append(colname)
+            # check allowed values constraint
+            if allowedValues := col_constraints.pop("allowedValues", False):
+                if onlyAllowedValues(colseries, allowedValues) == False:
+                    issues["Failed non negative constraint"].append(colname)
+
+            # raise exception if there are unhandled constraint keys
+            if col_constraints:
+                raise ValueError(f"Unhandled constraint types: {col_constraints}")
+
+    return issues
+
+def check_dge_table_log2fc_within_reason(
+    dge_table: Path, runsheet: Path, **_
+) -> FlagEntry:
+    """ Note: This function assumes the normalized expression values are log2 transformed
+    """
+    LOG2FC_CROSS_METHOD_PERCENT_DIFFERENCE_THRESHOLD = 1  # Percent
+    PERCENT_ROWS_WITHIN_TOLERANCE = 99.9  # Percent
+
+    # data specific preprocess
+    expected_groups = utils_runsheet_to_expected_groups(runsheet, map_to_lists=True)
+    expected_comparisons = [
+        "v".join(paired_groups)
+        for paired_groups in itertools.permutations(expected_groups, 2)
+    ]
+    df_dge = pd.read_csv(dge_table)
+
+    # Track error messages
+    error_list: list[tuple[str,float]] = list()
+    for comparison in expected_comparisons:
+        query_column = f"Log2fc_{comparison}"
+        group1_mean_col = (
+            "Group.Mean_" + comparison.split(")v(")[0] + ")"
+        )  # Uses parens and adds them back to prevent slicing on 'v' within factor names
+        group2_mean_col = "Group.Mean_" + "(" + comparison.split(")v(")[1]
+        print(df_dge[group1_mean_col].describe())
+        print(df_dge[group2_mean_col].describe())
+        computed_log2fc = df_dge[group1_mean_col] - df_dge[group2_mean_col]
+        abs_percent_difference = abs(
+            ((computed_log2fc - df_dge[query_column]) / df_dge[query_column]) * 100
+        )
+        percent_within_tolerance = (
+            mean(
+                abs_percent_difference
+                < LOG2FC_CROSS_METHOD_PERCENT_DIFFERENCE_THRESHOLD
+            )
+            * 100
+        )
+
+        if percent_within_tolerance < PERCENT_ROWS_WITHIN_TOLERANCE: # add current query column to error list
+            error_list.append((query_column,percent_within_tolerance))
+
+    # inplace sort error list for deterministic order
+    error_list.sort()
+    if error_list == list():
+        code = FlagCode.GREEN
+        message = f"All log2fc values recomputed and consistent (within {LOG2FC_CROSS_METHOD_PERCENT_DIFFERENCE_THRESHOLD})"
+    else:
+        code = FlagCode.HALT
+        message = f"At least one log2fc values recomputed and is not consistent (within {LOG2FC_CROSS_METHOD_PERCENT_DIFFERENCE_THRESHOLD}). These columns were flagged: {error_list}"
+
+    return {"code": code, "message": message}
+
+def check_dge_table_sample_columns_constraints(
+    dge_table: Path, samples: set[str], **_
+) -> FlagEntry:
+    MINIMUM_COUNT = 0
+    # data specific preprocess
+    df_dge = pd.read_csv(dge_table)[list(samples)]
+
+    schema = pa.DataFrameSchema({
+        sample: pa.Column(float)
+        for sample in samples
+    })
+
+    try:
+        schema.validate(df_dge, lazy=True)
+        error_cases = None
+        error_data = None
+    except pa.errors.SchemaErrors as err:
+        error_cases = err.failure_cases
+        error_data = err.data
+    if error_cases == error_data == None:
+        code = FlagCode.GREEN
+        message = (
+            f"All values in columns: {samples} met constraints: {repr(schema)}"
+        )
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"{error_cases}:::{error_data}"
+        )
+    return {"code": code, "message": message}
+
+def check_dge_table_group_statistical_columns_constraints(
+    dge_table: Path, runsheet: Path, **_
+) -> FlagEntry:
+    expected_groups = utils_runsheet_to_expected_groups(runsheet, map_to_lists=True)
+    expected_comparisons = [
+        "v".join(paired_groups)
+        for paired_groups in itertools.permutations(expected_groups, 2)
+    ]
+
+    resolved_constraints = (
+        ({f"Log2fc_{comp}" for comp in expected_comparisons}, {"nonNull": True}),
+        ({f"T.stat_{comp}" for comp in expected_comparisons}, {"nonNull": True}),
+        # can be removed from analysis before p-value and adj-p-value assessed
+        # ref: https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
+        (
+            {f"P.value_{comp}" for comp in expected_comparisons},
+            {"nonNegative": True, "nonNull": False},
+        ),
+        (
+            {f"Adj.p.value_{comp}" for comp in expected_comparisons},
+            {"nonNegative": True, "nonNull": False},
+        ),
+    )
+
+    df_dge = pd.read_csv(dge_table)
+
+    # issue trackers
+    # here: {prefix+constraint: [failed_columns]}
+    issues: dict[str, list[str]] = dict()
+
+    issues = utils_common_constraints_on_dataframe(df_dge, resolved_constraints)
+
+    # check logic
+    if not any([issue_type for issue_type in issues.values()]):
+        code = FlagCode.GREEN
+        message = f"All values in columns met constraint: {resolved_constraints}"
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"Issues found {issues} that" f"fail the contraint: {resolved_constraints}."
+        )
+    return {"code": code, "message": message}
+
+def enclose_in_parens(s: str) -> str:
+    """Recreates R's make.names function for individual strings.
+    This function is often used to create syntactically valid names in R which are then saved in R outputs.
+    Source: https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/make.names
+
+    Args:
+        s (str): A string to convert
+
+    Returns:
+        str: A string enclosed in parentheses
+    """
+    return f"({s})"
+
+def r_style_make_names(s: str) -> str:
+    """Recreates R's make.names function for individual strings.
+    This function is often used to create syntactically valid names in R which are then saved in R outputs.
+    Source: https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/make.names
+
+    Args:
+        s (str): A string to convert
+
+    Returns:
+        str: A string converted in the same way as R's make.names function
+    """
+    EXTRA_WHITELIST_CHARACTERS = "_ΩπϴλθijkuΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω_µ" # Note: there are two "μμ" like characters one is greek letter mu, the other is the micro sign
+    VALID_CHARACTERS = string.ascii_letters + string.digits + "." + EXTRA_WHITELIST_CHARACTERS
+    REPLACEMENT_CHAR = "."
+    new_string_chars = list()
+    for char in s:
+        if char in VALID_CHARACTERS:
+            new_string_chars.append(char)
+        else:
+            new_string_chars.append(REPLACEMENT_CHAR)
+    return "".join(new_string_chars)
+
+def check_if_valid_extensions(file, valid_ext):
+    """description
+    params (description + data type)
+    return (description + data type)"""
+    pass
+def check_factor():
+    """description"""
+    pass
+
+def check_if_extensions_valid(file, valid_ext):
+    """ Description of the function
+    Description Line 2
+
+    :param file: Input raw data file
+    :type file: Path
+    :param valid_ext: Extensions that are allow for the raw data files
+    :type valid_ext: list[str]
+    :return: A required fields-only flag entry dictionary
+    :rtype: FlagEntry
+    """
+    pass # Does nothing (... is equivalent)
+
+def check_factor_values_in_runsheet():
+    """ Description of the function
+    Description Line 2
+
+    :param file: Input raw data file
+    :type file: Path
+    :param valid_ext: Extensions that are allow for the raw data files
+    :type valid_ext: list[str]
+    :return: A required fields-only flag entry dictionary
+    :rtype: FlagEntry
+    """
+    pass # Does nothing (... is equivalent)
+
+def check_sample_table_against_runsheet(
+    runsheet: Path, sampleTable: Path, all_samples_required: bool
+) -> FlagEntry:
+    """Check the sample table includes all samples as denoted in the runsheet.
+
+    Args:
+        runsheet (Path): csv file used for processing, the index denotes all samples
+        sampleTable (Path): csv file that pairs each sample with resolved experimental group (called condition within the table)
+        all_samples_required (bool): denotes if all samples must be shared or if a subset of samples from the runsheet is okay.
+
+    Returns:
+        FlagEntry: A check result
+    """
+    # data specific preprocess
+    df_rs = pd.read_csv(runsheet, index_col="Sample Name").sort_index()
+    df_sample = pd.read_csv(sampleTable, index_col="sample").sort_index()
+
+    extra_samples: dict[str, set[str]] = {
+        "unique_to_runsheet": set(df_rs.index) - set(df_sample.index),
+        "unique_to_sampleTable": set(df_sample.index) - set(df_rs.index),
+    }
+
+    # check logic
+    if any(
+        [
+            (extra_samples["unique_to_runsheet"] and all_samples_required),
+            (extra_samples["unique_to_sampleTable"]),
+        ]
+    ):
+        code = FlagCode.HALT
+        message = f"Samples mismatched: {[f'{entry}:{v}' for entry, v  in extra_samples.items() if v]}"
+    else:
+        code = FlagCode.GREEN
+        message = f"All samples accounted for based on runsheet (All samples required?: {all_samples_required})"
+    return {"code": code, "message": message}
+
+def check_dge_table_fixed_statistical_columns_constraints(
+    dge_table: Path, **_
+) -> FlagEntry:
+    # data specific preprocess
+    fixed_stats_columns = (
+        ({"All.mean", "All.stdev"}, {"nonNull": True, "nonNegative": True}),
+        ({"F.p.value"}, {"nonNull": False, "nonNegative": True}),
+    )
+
+    df_dge = pd.read_csv(dge_table)
+
+    # issue trackers
+    # here: {prefix+constraint: [failed_columns]}
+    issues: dict[str, list[str]] = dict()
+
+    issues = utils_common_constraints_on_dataframe(df_dge, fixed_stats_columns)
+
+    # check logic
+    if not any([issue_type for issue_type in issues.values()]):
+        code = FlagCode.GREEN
+        message = f"All values in columns met constraint: {fixed_stats_columns}"
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"Issues found {issues} that" f"fail the constraint: {fixed_stats_columns}."
+        )
+    return {"code": code, "message": message}
+
+def check_dge_table_comparison_statistical_columns_exist(
+    dge_table: Path, runsheet: Path, **_
+) -> FlagEntry:
+    # data specific preprocess
+    COMPARISON_PREFIXES = ["Log2fc_", "T.stat_", "P.value_", "Adj.p.value_"]
+    expected_groups = utils_runsheet_to_expected_groups(runsheet, map_to_lists=True)
+    expected_comparisons = [
+        "v".join(paired_groups)
+        for paired_groups in itertools.permutations(expected_groups, 2)
+    ]
+    expected_columns = {
+        "".join(comb)
+        for comb in itertools.product(COMPARISON_PREFIXES, expected_comparisons)
+    }
+    df_dge_columns = set(pd.read_csv(dge_table).columns)
+    missing_cols = expected_columns - df_dge_columns
+
+    # check logic
+    if not missing_cols:
+        code = FlagCode.GREEN
+        message = f"All comparison summary statistic columns (Prefixes: {COMPARISON_PREFIXES}) present. {sorted(list(expected_columns))}"
+    else:
+        code = FlagCode.HALT
+        message = f"Missing these comparison summary statistic columns (Prefixes: {COMPARISON_PREFIXES}): {sorted(list(missing_cols))}"
+    return {"code": code, "message": message}
+
+def check_dge_table_fixed_statistical_columns_exist(dge_table: Path, **_) -> FlagEntry:
+    # data specific preprocess
+    fixed_stats_columns = {
+        "All.mean": {"nonNull": True, "nonNegative": True},
+        "All.stdev": {"nonNull": True, "nonNegative": True},
+        "F.p.value": {"nonNull": False, "nonNegative": True},
+    }
+    expected_columns = set(fixed_stats_columns)
+    df_dge_columns = set(pd.read_csv(dge_table).columns)
+    missing_cols = expected_columns - df_dge_columns
+
+    # check logic
+    if not missing_cols:
+        code = FlagCode.GREEN
+        message = f"All dataset summary stat columns present. {sorted(list(expected_columns))}"
+    else:
+        code = FlagCode.HALT
+        message = (
+            f"Missing these dataset summary stat columns: {sorted(list(missing_cols))}"
+        )
+    return {"code": code, "message": message}
+
+def check_sample_table_for_correct_group_assignments(
+    runsheet: Path, sampleTable: Path
+) -> FlagEntry:
+    """Check the sample table is assigned to the correct experimental group.
+    An experimental group is defined by the Factor Value columns found in the runsheet.
+
+    Args:
+        runsheet (Path): csv file used for processing, includes metadata used for experimental group designation
+        sampleTable (Path): csv file that pairs each sample with resolved experimental group (called condition within the table)
+
+    Returns:
+        FlagEntry: A check result
+    """
+    df_sample = pd.read_csv(sampleTable, index_col=0).sort_index()
+    # data specific preprocess
+    df_rs = (
+        pd.read_csv(runsheet, index_col="Sample Name", dtype=str) # Ensure no factor value columns are misinterpreted as numeric
+        .filter(regex="^Factor Value\[.*\]")
+        .loc[df_sample.index]  # ensure only sampleTable groups are checked
+        .sort_index()
+    )  # using only Factor Value columns
+
+    # TODO: refactor with utils_runsheet_to_expected_groups
+    expected_conditions_based_on_runsheet = df_rs.apply(
+        lambda x: " & ".join(x), axis="columns"
+    ).apply(  # join factors with '...'
+        enclose_in_parens
+    )  # reformat entire group in the R style
+
+    mismatched_rows = expected_conditions_based_on_runsheet != df_sample["group"]
+
+    # check logic
+    if not any(mismatched_rows):
+        code = FlagCode.GREEN
+        message = f"Conditions are formatted and assigned correctly based on runsheet for all {len(df_sample)} samples in sample table: {list(df_sample.index)}"
+    else:
+        code = FlagCode.HALT
+        mismatch_description = (
+            df_sample[mismatched_rows]["group"]
+            + " <--SAMPLETABLE : RUNSHEET--> "
+            + expected_conditions_based_on_runsheet[mismatched_rows]
+        ).to_dict()
+        message = f"Mismatch in expected conditions based on runsheet for these rows: {mismatch_description}"
+    return {"code": code, "message": message}
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/config.yaml b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/config.yaml
new file mode 100644
index 00000000..8c3d11de
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/config.yaml
@@ -0,0 +1,238 @@
+# TOP LEVEL
+NAME: "microarray"
+VERSION: "0"
+
+# anchors for reuse
+_anchors:
+  RawDataDir: &RawDataDir "00-RawData"
+  NormExpDir: &NormExpDir "01-limma_NormExp"
+  DGEDataDir: &DGEDataDir "02-limma_DGE"
+  neverPublished: &neverPublished
+    subcategory: null
+    subdirectory: null
+    publish to repo: false
+    include subdirectory in table: false
+    table order: -1
+
+# configuration for microarray data
+Staging:
+  General:
+    Required Metadata:
+      From ISA:
+        - ISA Field Name: Study Assay Measurement Type
+          ISA Table Source: Investigation
+          Investigation Subtable: STUDY ASSAYS
+          Runsheet Column Name: Study Assay Measurement
+          Processing Usage: >-
+            Mapping to the appropriate processing pipeline for the assay.
+          Example: transcription profiling
+
+        - ISA Field Name: Study Assay Technology Type
+          ISA Table Source: Investigation
+          Investigation Subtable: STUDY ASSAYS
+          Runsheet Column Name: Study Assay Technology Type
+          Processing Usage: >-
+            Mapping to the appropriate processing pipeline for the assay.
+          Example: DNA microarray
+
+        - ISA Field Name: Study Assay Technology Platform
+          ISA Table Source: Investigation
+          Investigation Subtable: STUDY ASSAYS
+          Runsheet Column Name: Study Assay Technology Platform
+          Processing Usage: >-
+            Mapping to the appropriate processing pipeline for the assay.
+          Example: Affymetrix
+
+        - ISA Field Name: 
+            - Characteristics[Organism]
+            - Characteristics[organism]
+          ISA Table Source: Sample
+          Runsheet Column Name: organism
+          Processing Usage: >-
+            Mapping to the appropriate alignment reference and annotation databases.
+          Example: Arabidopsis thaliana
+
+        - ISA Field Name: 
+            - Array Design REF
+          ISA Table Source: Assay
+          Runsheet Column Name: biomart_attribute
+          Processing Usage: >-
+            Used for identifying biomart attribute name for biomart mapping purposes
+          Example: agilent_wholegenome_4x44k_v1
+
+        - ISA Field Name: Source Name
+          ISA Table Source: Sample
+          Runsheet Column Name: Source Name
+          Processing Usage: >-
+            The source entity that sample is derived from.
+          Example: GSM502538 1
+
+        - ISA Field Name: Sample Name
+          ISA Table Source: Assay
+          Runsheet Column Name: sample_name
+          Runsheet Index: true
+          Processing Usage: >-
+            Sample name is used as a unique sample identifier during processing
+          Example: Atha_Col-0_Root_WT_Ctrl_45min_Rep1_GSM502538
+
+        - ISA Field Name:
+            - Label
+            - Parameter Value[label]
+            - Parameter Value[Label]
+          ISA Table Source: Sample
+          Runsheet Column Name: Label
+          Processing Usage: >-
+            Used to determine if the dataset is one or two channel.
+          Example: Cy3
+
+        - ISA Field Name: Array Data File
+          ISA Table Source: Assay
+          Runsheet Column Name: Array Data File Name
+          Processing Usage: >-
+            Name of the raw data file(s). In some cases, this file is a 
+            dataset-wise zip folder containing all raw data files.  When this occurs, 
+            the ISA Field Named 'Comment: Array Data File Name' designates the
+            raw data file mapping between contents of the dataset-wise zip.
+            Note: Actual locations are denoted in 'array_data_file_path'
+          Example: Atha_Col-0_HypocotylCC_WT_GC_Rep2_GSM2152575_raw.CEL.gz
+    
+        - ISA Field Name: Array Data File
+          ISA Table Source: Assay
+          Runsheet Column Name: Array Data File Path
+          GLDS URL Mapping: true
+          Processing Usage: >-
+            Actual locations of the raw data files.  Can be either a local path or a uri.
+          Example: 'https://genelab-data.ndc.nasa.gov/genelab/static/media/dataset/GLDS-205/...'
+
+        - ISA Field Name: Comment[Array Data File Name]
+          ISA Table Source: Assay
+          Runsheet Column Name: Comment[Array Data File Name]
+          Value If Not Found: "N/A"
+          Processing Usage: >-
+            Actual locations of the raw data files.  Can be either a local path or a uri.
+          Example: 'https://genelab-data.ndc.nasa.gov/genelab/static/media/dataset/GLDS-205/...'
+    
+        - ISA Field Name: Hybridization Assay Name
+          ISA Table Source: Assay
+          Runsheet Column Name: Hybridization Assay Name
+          Processing Usage: >-
+            Used to map samples to specific hybridization 
+            (e.g. two channels where each channel is a different sample) 
+            Also used to determine if separate channel files exist for two channel studies.
+          Example: GSM502538
+    
+        - ISA Field Name: Factor Value[{factor_name}]
+          ISA Table Source: [Assay, Sample]
+          Runsheet Column Name: Factor Value[{factor_name}]
+          Matches Multiple Columns: true
+          Match Regex: "Factor Value\\[.*\\]"
+          Append Column Following: "Unit"
+          Processing Usage: >-
+            Factor values in a study. Used to assign experimental groups for each sample.
+            Note: On the runsheet, a subsequent 'Unit' Column value will be 
+            suffix-concatenated if it exists.
+          Example: Basal Control
+    
+        - ISA Field Name: Unit
+          ISA Table Source: [Assay, Sample]
+          Runsheet Column Name: null
+          Matches Multiple Columns: true
+          Autoload: false # handled by factor value loading above
+          Processing Usage: >-
+            Unit to be suffix-concatenated onto prior Factor value columns.
+          Example: day
+
+      From User:
+        - Runsheet Column Name: GLDS
+          Processing Usage: >-
+            The GLDS accession number
+          Example: GLDS-205
+  
+        - Runsheet Column Name: array_data_file_path
+          Processing Usage: >-
+            The location of the array data file. Can be either a url or local path.
+            Note: For GLDS raw data assets, either the filelisting json API or the OpenAPI
+            may be used to retrieve urls given the array data filename (sourced from ISA archive).
+          Example: /some/local/path OR https://genelab-data.ndc.nasa.gov/genelab/static/media/dataset/GLDS-123_microarray_E-MTAB-3289.raw.1.zip?version=1
+  
+        - Runsheet Column Name: is_array_data_file_compressed
+          Processing Usage: >-
+            Denotes if raw data files are compressed.
+            Note: This is determined by '.gz' extension during 
+            runsheet generation.
+          Example: 'TRUE'
+  
+        - Runsheet Column Name: version
+          Processing Usage: >-
+            The ISA archive version number.
+            Note: Retrieved from the GeneLab
+          Example: 2
+
+ISA Meta:
+  Valid Study Assay Technology And Measurement Types:
+    - measurement: "transcription profiling"
+      technology: "DNA microarray"
+    - measurement: "transcription profiling"
+      technology: "microarray"
+
+  # this is prepended to all file names in the curation assay table
+  Global file prefix: "{datasystem}_array_"
+
+  # configuration related to updating investigation file
+  # each must refer to a STUDY PROCESS in the 'ISA_investigation.yaml' file
+  # LEADCAP_organism should be the studied organisms scientific name with a leading cap
+  Post Processing Add Study Protocol: 
+    GeneLab Agilent 1 Channel data processing protocol::{LEADCAP_organism} V1
+
+data assets:
+  runsheet:
+    processed location: 
+      - "Metadata"
+      - "{dataset}_microarray_v0_runsheet.csv"
+
+    tags:
+      - raw
+
+    resource categories: *neverPublished
+
+  raw intensities table:
+    processed location: 
+      - *RawDataDir
+      - "raw_intensities_GLmicroarray.csv"
+
+    tags:
+      - processed
+            
+    resource categories:
+      subcategory: Raw Intensities Table
+      subdirectory: ""
+      publish to repo: true
+      include subdirectory in table: false
+      table order: 1
+
+  normalized expression table:
+    processed location: 
+      - *NormExpDir
+      - "normalized_expression_GLmicroarray.csv"
+
+    tags:
+      - processed
+            
+    resource categories:
+      subcategory: Normalized Expression Table
+      subdirectory: ""
+      publish to repo: true
+      include subdirectory in table: false
+      table order: 2
+
+data asset sets:
+  # These assets are not generated in the workflow, but are generated after the workflow
+  PUTATIVE: []
+  glds metadata:
+    - "runsheet"
+  processed:
+    - "viz PCA table"
+    - "DE annotated extended for viz table"
+    - "DE contrasts table"
+    - "DE annotated table"
+    - "sample table"
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/protocol.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/protocol.py
new file mode 100644
index 00000000..7ee08a29
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/protocol.py
@@ -0,0 +1,309 @@
+from pathlib import Path
+import re
+from typing import Union
+import yaml
+import logging
+import textwrap
+
+from dp_tools.core.entity_model import Dataset
+
+log = logging.getLogger(__name__)
+
+from dp_tools.core.check_model import ValidationProtocol, FlagCode
+# ORDER is intentional to allow shared name functions to overload in favor of microarray
+from dp_tools import bulkRNASeq
+
+from . import checks
+
+CONFIG = {
+    "Metadata-check_metadata_attributes_exist": {
+        "expected_attrs": ["biomart_attribute", "organism"]
+    },
+}
+
+
+def validate(
+    dataset: Dataset,
+    config_path: Path = None,
+    run_args: dict = None,
+    report_args: dict = None,
+    protocol_args: dict = None,
+    defer_run: bool = False,
+) -> Union[ValidationProtocol, ValidationProtocol.Report]:
+
+    if config_path is not None:
+        with open(config_path, "r") as f:
+            config = yaml.safe_load(f)
+    else:
+        config = CONFIG
+
+    if run_args is None:
+        run_args = dict()
+
+    if report_args is None:
+        report_args = dict()
+
+    if protocol_args is None:
+        protocol_args = dict()
+    # init validation protocol
+    vp = ValidationProtocol(**protocol_args)
+    # fmt: on
+    with vp.component_start(
+        name=dataset.name,
+        description="Validate microarray processed data",
+    ):
+        with vp.component_start(
+            name="Metadata", description="Metadata file validation"
+        ):
+            with vp.payload(payloads=[{"dataset": dataset}]):
+                vp.add(
+                    checks.check_metadata_attributes_exist,
+                    config=config["Metadata-check_metadata_attributes_exist"],
+                    full_description=textwrap.dedent(f"""
+                        - Check: Runsheet includes required metadata columns, {config["Metadata-check_metadata_attributes_exist"]["expected_attrs"]}
+                            - Reason:
+                                - 'Organism' column is used to map the appropriate annotation table
+                                - 'biomart_attribute' column is used to map the appropriate biomart attribute for initial Ensembl based gene mapping
+                            - Potential Source of Problems:
+                                - Runsheet does not include these columns. If automatically generated using 'dp_tools', report this to the maintainer of 'dp_tools'.  If manually generated, ensure {config["Metadata-check_metadata_attributes_exist"]["expected_attrs"]} columns are populated.
+                            - Flag Condition:
+                                - Green: Columns {config["Metadata-check_metadata_attributes_exist"]["expected_attrs"]} exist
+                                - Halt: Columns {config["Metadata-check_metadata_attributes_exist"]["expected_attrs"]} do not exist
+                    """)
+                )
+                vp.add_manual(
+                    description = "Manually validate runsheet against ISA assay table",
+                    start_instructions = "Open runsheet (in Metadata folder). Open OSD webpage, navigate to microarray assay table",
+                    pass_fail_questions = [
+                        "Does the runsheet open?",
+                        "Is the number of samples in parity?",
+                        "Do the file extensions in the 'Array Data File Name' column end in '.txt' or '.txt.gz'?",
+                        "Do all factor values exist in both tables. Does the runsheet have units included as appropriate?",
+                    ]
+                )
+        with vp.component_start(
+            name="Raw Intensities",
+            description="",
+        ):
+            with vp.payload(
+                payloads=[
+                    {
+                        "raw_intensities": lambda: dataset.data_assets["raw intensities table"].path,
+                        "samples": dataset.samples
+                    }
+                ]
+            ):
+                vp.add(
+                    checks.check_raw_intensities_table,
+                    full_description=textwrap.dedent(f"""
+                        - Check: Ensure raw intensities table has all samples and values within [0,+inf)
+                            - Reason:
+                                - Part of processing output
+                            - Potential Source of Problems:
+                                - Bug in processing script or malformed raw data files
+                            - Flag Condition:
+                                - Green: All conditions met
+                                - Halt: At least one condition failed
+                    """)
+                    )
+            with vp.payload(
+                payloads=[
+                    {
+                        "organism": lambda: dataset.metadata["organism"],
+                        "samples": lambda: set(dataset.samples),
+                        "dge_table": lambda: dataset.data_assets[
+                            "raw intensities table"
+                        ].path,
+                        "runsheet": lambda: dataset.data_assets["runsheet"].path,
+                    }
+                ]
+            ):
+                vp.add(
+                    bulkRNASeq.checks.check_dge_table_annotation_columns_exist,
+                    full_description=textwrap.dedent(f"""
+                            - Check: Ensure ['SYMBOL','GENENAME','REFSEQ','ENTREZID','STRING_id','GOSLIMS_IDS','ENSEMBL'] columns exist (note: 'ENSEMBL' is replaced by 'TAIR' for Arabidospis)
+                                - Reason:
+                                    - These columns should be populated during annotation for all single gene mapping probes
+                                - Potential Source of Problems:
+                                    - Bug in processing script during annotation step
+                                - Flag Condition:
+                                    - Green: All columns present
+                                    - Halt: At least one column is missing
+                        """)
+                    )
+        with vp.component_start(
+            name="Normalized expression",
+            description="",
+        ):
+            with vp.payload(
+                payloads=[
+                    {
+                        "normalized_expression": lambda: dataset.data_assets["normalized expression table"].path,
+                        "samples": dataset.samples
+                    }
+                ]
+            ):
+                vp.add(
+                    checks.check_normalized_expression_table,
+                    full_description=textwrap.dedent(f"""
+                        - Check: Ensure normalized expression table has all samples and values within (-inf,+inf)
+                            - Reason:
+                                - Part of processing output. Note: Values are log2 transformed
+                            - Potential Source of Problems:
+                                - Bug in processing script
+                            - Flag Condition:
+                                - Green: All conditions met
+                                - Halt: At least one condition failed
+                    """)
+                    )
+            with vp.payload(
+                payloads=[
+                    {
+                        "organism": lambda: dataset.metadata["organism"],
+                        "samples": lambda: set(dataset.samples),
+                        "dge_table": lambda: dataset.data_assets[
+                            "normalized expression table"
+                        ].path,
+                        "runsheet": lambda: dataset.data_assets["runsheet"].path,
+                    }
+                ]
+            ):
+                vp.add(
+                    bulkRNASeq.checks.check_dge_table_annotation_columns_exist,
+                    full_description=textwrap.dedent(f"""
+                            - Check: Ensure ['SYMBOL','GENENAME','REFSEQ','ENTREZID','STRING_id','GOSLIMS_IDS','ENSEMBL'] columns exist (note: 'ENSEMBL' is replaced by 'TAIR' for Arabidospis)
+                                - Reason:
+                                    - These columns should be populated during annotation for all single gene mapping probes
+                                - Potential Source of Problems:
+                                    - Bug in processing script during annotation step
+                                - Flag Condition:
+                                    - Green: All columns present
+                                    - Halt: At least one column is missing
+                        """)
+                    )
+        with vp.component_start(
+            name="Processing Report",
+            description="",
+        ):
+            vp.add_manual(
+                description = "Loading report",
+                start_instructions = "Load html report into web browser",
+                pass_fail_questions = [
+                    "Does the report render without issue?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 2 Load Metadata and Raw Data",
+                start_instructions = "Navigate to section 2",
+                pass_fail_questions = [
+                    "Does the content of the table match the runsheet?",
+                    "Do the number of entries in the embedded runsheet match the number of samples/runsheet-rows?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 3 QA For Raw Data - Density Plots",
+                start_instructions = "Navigate to section 3 - Density Plots",
+                pass_fail_questions = [
+                    "Is every sample included in the legend?",
+                    "Are axes and axe titles clear?",
+                ],
+                pass_flag_questions = [
+                    "Are all lines have similar one or two peak shape, each line not likely overlapping?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 3 QA For Raw Data - Pseudo Image Plots",
+                start_instructions = "Navigate to section 3 - Pseudo Image Plots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                    "Is each plot white-blue gradient?",
+                ],
+                pass_flag_questions = [
+                    "Are there clear physical streaks, bright (i.e. white) circles or other features?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 3 QA For Raw Data - MA Plots",
+                start_instructions = "Navigate to section 3 - MA Plots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Are positive control points distributed from left end to right end of MA cloud",
+                    "Are negative control points concentrated at left end of MA cloud?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 3 QA For Raw Data - Foreground Background Plots",
+                start_instructions = "Navigate to section 3 - Foreground Background Plots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Are the vast majority (i.e. 99.9%) of points are above the blue diagonal line",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 3 QA For Raw Data - Boxplots",
+                start_instructions = "Navigate to section 3 - Boxplots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Do the boxplots have overlapping distributions?",
+                ]
+            )
+            ###################################################
+            ### Normalized data plots
+            ###################################################
+            vp.add_manual(
+                description = "Section 6 Normalized Data Quality Assessment - Density Plots",
+                start_instructions = "Navigate to section 6 - Density Plots",
+                pass_fail_questions = [
+                    "Is every sample included in the legend?",
+                    "Are axes and axe titles clear?",
+                ],
+                pass_flag_questions = [
+                    "Are all lines nearly fully overlapping?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 6 Normalized Data Quality Assessment - Pseudo Image Plots",
+                start_instructions = "Navigate to section 6 - Pseudo Image Plots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Is each plot white-blue gradient?",
+                    "Are there clear physical streaks, bright (i.e. white) circles or other features?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 6 Normalized Data Quality Assessment - MA Plots",
+                start_instructions = "Navigate to section 6 - MA Plots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Are positive control points distributed from left end to right end of MA 'cloud'",
+                    "Are negative control points concentrated at left end of MA 'cloud'?",
+                ]
+            )
+            vp.add_manual(
+                description = "Section 6 Normalized Data Quality Assessment - Boxplots",
+                start_instructions = "Navigate to section 6 - Boxplots",
+                pass_fail_questions = [
+                    "Does every sample have its own plot?",
+                ],
+                pass_flag_questions = [
+                    "Do the boxplots have largely the same distributions? (i.e. are the boxes all nearly horizontally aligned)",
+                ]
+            )
+    # return protocol object without running or generating a report
+    if defer_run:
+        return vp
+
+    vp.run(**run_args)
+
+    # return report
+    return vp.report(**report_args, combine_with_flags=dataset.loaded_assets_dicts)
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/schemas.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/schemas.py
new file mode 100644
index 00000000..9db06de0
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/bin/dp_tools__agilent_1_channel_skipDE/schemas.py
@@ -0,0 +1,29 @@
+""" Schemas for validation 
+Uses Schema to allow usage of validation functions
+"""
+import pandera as pa
+
+check_single_value = pa.Check(
+    lambda x: len(x.unique()) == 1,
+    title="Check that all values in the column are identical",
+    description="Useful for columns that contain dataset level metadata like organism and paired_end.",
+    error="Dataset level columns do NOT contain one unique value"
+    )
+
+runsheet = pa.DataFrameSchema(
+        columns={
+            "Original Sample Name": pa.Column(str),
+            "Study Assay Measurement": pa.Column(str),
+            "Study Assay Technology Type": pa.Column(str),
+            "Study Assay Technology Platform": pa.Column(str),
+            "Source Name": pa.Column(str),
+            "Label": pa.Column(str),
+            "Hybridization Assay Name": pa.Column(str),
+            "Array Data File Name": pa.Column(str),
+            "Array Data File Path": pa.Column(str),
+            "Original Sample Name": pa.Column(str),
+            "organism": pa.Column(str, check_single_value),
+        },
+        # define checks at the DataFrameSchema-level
+        # checks=check_read2_path_populated_if_paired_end
+    )
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/default.config b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/default.config
index d4217d6a..c636a6ea 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/default.config
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/default.config
@@ -12,6 +12,7 @@ params {
   Parameters that CAN be overwritten
   */
   runsheetPath = false
+  referenceStorePath = './References' // Path to custom references
   biomart_attribute = false // Must be supplied if runsheet 'Array design REF' column doesn't indicate it
   outputDir = "." // the location for the output from the pipeline (also includes raw data and metadata)
   publish_dir_mode = "link" // method for creating publish directory.  Default here for hardlink
@@ -22,11 +23,13 @@ params {
   */
   // For now, this particular is good for all organisms listed on the file.
   annotation_file_path = "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110_annotations.csv"
+  annotation_config_path = "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/master/Microarray/Agilent_1-channel/Array_Annotations/Agilent_array_annotations.csv"
 
   /*
   DEBUG related parameters, not likely useful in production
   */
   skipVV = false // if true, VV will not be performed
+  skipDE = false // if true, DE will not be performed
   limit_biomart_query = false // if set to a value, that value is the maximum number of biomart probe IDs to query
   max_flag_code = 80 // Maximum flag value allowed, exceeding this value during V&V will cause the workflow to halt
 
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/software/by_docker_image.config b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/software/by_docker_image.config
index b9ceb8a9..1a8e1480 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/software/by_docker_image.config
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/config/software/by_docker_image.config
@@ -1,9 +1,9 @@
 // Config that specifies packaged conda yml files for each process
 process {
     withName: 'AGILE1CH' {
-        container = "quay.io/j_81/gl_images:NF_AffyMP-A_1.0.0-RC7"
+        container = "quay.io/nasa_genelab/gl-microarray:1.0.0"
     }
     withName: 'RUNSHEET_FROM_GLDS|VV_AGILE1CH|GENERATE_MD5SUMS|UPDATE_ISA_TABLES|GENERATE_SOFTWARE_TABLE' {
-        container = "quay.io/j_81/dp_tools:1.3.1"
+        container = "quay.io/nasa_genelab/dp_tools:1.3.5"
     }
 }
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/main.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/main.nf
index d2c4fcd5..ddea4730 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/main.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/main.nf
@@ -10,6 +10,7 @@ include { VV_AGILE1CH } from './modules/VV_AGILE1CH.nf'
 include { AGILE1CH } from './modules/AGILE1CH.nf'
 include { RUNSHEET_FROM_GLDS } from './modules/RUNSHEET_FROM_GLDS.nf'
 include { GENERATE_SOFTWARE_TABLE } from './modules/GENERATE_SOFTWARE_TABLE'
+include { DUMP_META } from './modules/DUMP_META'
 
 /**************************************************
 * HELP MENU  **************************************
@@ -33,6 +34,7 @@ if (params.help) {
   println("                        the GLDS accession id to process through the NF_MAAgilent1ch.")
   println("  --runsheetPath        Use a local runsheet instead one automatically generated from a GLDS ISA archive.")
   println("  --skipVV              Skip automated V&V. Default: false")
+  println("  --skipDE              Skip DE. Default: false")
   println("  --outputDir           Directory to save staged raw files and processed files. Default: <launch directory>")
   exit 0
   }
@@ -76,15 +78,16 @@ workflow {
       channel.fromPath( "${ projectDir }/bin/Agile1CMP.qmd" ),
       ch_runsheet,
       PARSE_ANNOTATION_TABLE.out.annotations_db_url,
-      ch_meta | map { it.organism },
-      params.limit_biomart_query
+      PARSE_ANNOTATION_TABLE.out.reference_version_and_source,
+      params.limit_biomart_query,
+      params.skipDE
     )
 
     VV_AGILE1CH( 
       ch_runsheet, 
       AGILE1CH.out.de,
       params.skipVV,
-      "${ projectDir }/bin/dp_tools__agilent_1_channel" // dp_tools plugin
+      "${ projectDir }/bin/${ params.skipDE ? 'dp_tools__agilent_1_channel_skipDE' : 'dp_tools__agilent_1_channel' }" // dp_tools plugin
       )
 
     // Software Version Capturing
@@ -100,9 +103,13 @@ workflow {
 
     GENERATE_SOFTWARE_TABLE(
       ch_software_versions | unique | collectFile(newLine: true, sort: true, cache: false),
-      ch_runsheet | splitCsv(header: true, quote: '"') | first | map{ row -> row['Array Data File Name'] }
+      ch_runsheet | splitCsv(header: true, quote: '"') | first | map{ row -> row['Array Data File Name'] },
+      params.skipDE
     )
 
+    // export meta for post processing usage
+    ch_meta | DUMP_META
+
     emit:
       meta = ch_meta 
       runsheet = ch_runsheet
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/AGILE1CH.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/AGILE1CH.nf
index cdd1780b..eec53231 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/AGILE1CH.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/AGILE1CH.nf
@@ -8,19 +8,13 @@ process AGILE1CH {
     path(qmd) // quarto qmd file to render
     path(runsheet_csv) // runsheet to supply as parameter
     path(annotation_file_path) // runsheet to supply as parameter
-    val(organism) // runsheet to supply as parameter
+    tuple val(ensemblVersion), val(ensemblSource)
     val(limit_biomart_query) // DEBUG option, limits biomart queries to the number specified if not set to false
+    val(skipDE) // whether to skip DE
 
   output:
     path("NF_MAAgilent1ch_v${workflow.manifest.version}_GLmicroarray.html"), emit: report
 
-    tuple path("02-limma_DGE/contrasts_GLmicroarray.csv"),
-          path("02-limma_DGE/SampleTable_GLmicroarray.csv"),
-          path("02-limma_DGE/differential_expression_GLmicroarray.csv"),
-          path("02-limma_DGE/visualization_PCA_table_GLmicroarray.csv"),
-          path("01-limma_NormExp/normalized_expression_GLmicroarray.csv"),
-          path("00-RawData/raw_intensities_GLmicroarray.csv"), emit: de_all_files
-
     tuple path("02-limma_DGE"),
           path("01-limma_NormExp"),
           path("00-RawData"), emit: de
@@ -29,14 +23,19 @@ process AGILE1CH {
 
   script:
     def limit_biomart_query_parameter = limit_biomart_query ? "-P DEBUG_limit_biomart_query:${limit_biomart_query}" : ''
+    def run_DE = skipDE ? "-P run_DE:'false'" : ''
     """
         export HOME=\$PWD;
         
         quarto render \$PWD/${qmd} \
+            -P 'workflow_version:${workflow.manifest.version}' \
             -P 'runsheet:${runsheet_csv}' \
             -P 'annotation_file_path:${annotation_file_path}' \
-            -P 'organism:${organism}' \
-            ${limit_biomart_query_parameter}
+            -P 'ensembl_version:${ensemblVersion}' \
+            -P 'local_annotation_dir:${params.referenceStorePath}' \
+            -P 'annotation_config_path:${params.annotation_config_path}' \
+            ${limit_biomart_query_parameter} \
+            ${run_DE}
 
         # Rename report
         mv Agile1CMP.html NF_MAAgilent1ch_v${workflow.manifest.version}_GLmicroarray.html
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/main.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/main.nf
new file mode 100644
index 00000000..3be63b64
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/main.nf
@@ -0,0 +1,16 @@
+process DUMP_META {
+  publishDir "${ params.outputDir }/${ params.gldsAccession }/GeneLab",
+    mode: params.publish_dir_mode
+  
+  input:
+    val(meta)
+  
+  output:
+    path("meta.sh")
+  
+  script:
+  """
+  # Write the meta file
+  reformat_meta.sh '${ meta }' > meta.sh
+  """
+}
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/resources/usr/bin/reformat_meta.sh b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/resources/usr/bin/reformat_meta.sh
new file mode 100644
index 00000000..69855303
--- /dev/null
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/DUMP_META/resources/usr/bin/reformat_meta.sh
@@ -0,0 +1,28 @@
+input=$1
+
+# Remove leading and trailing brackets
+input=$(echo "$input" | sed 's/^\[//; s/\]$//')
+
+# Convert the input to KEY="VALUE" format with sanitized keys
+interim_output=$(echo "$input" | sed -E 's/:/=/g' | tr ',' '\n')
+
+while IFS='=' read -r key value; do
+    # Remove leading and ending quotes from the value, if present
+    value=$(echo "$value" | sed 's/^"\(.*\)"$/\1/')
+
+    # Wrap the value in double quotes
+    value="'$value'"
+
+    # Remove leading spaces
+    key=$(echo "$key" | sed 's/^ *//g')
+
+    # Sanitize the key to follow valid Bash variable name format
+    # Convert upper case to lower case, replace spaces with underscores, and remove square brackets
+    sanitized_key=$(echo "$key" | tr '[:upper:] ' '[:lower:]_' | sed 's/[][]//g')
+
+
+    # Append the key-value pair to the formatted output
+    formatted_output+="$sanitized_key=$value\n"
+done <<< "$interim_output"
+
+echo -e "$formatted_output"
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/main.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/main.nf
index 6939155d..811d463b 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/main.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/main.nf
@@ -6,12 +6,13 @@ process GENERATE_SOFTWARE_TABLE {
   input:
     path("software_versions.yaml")
     val(filename)
+    val(skipDE)
   
   output:
     path("software_versions_GLmicroarray.md")
   
   script:
     """
-    SoftwareYamlToMarkdownTable.py software_versions.yaml \"$filename\"
+    SoftwareYamlToMarkdownTable.py software_versions.yaml \"$filename\" $skipDE
     """
 }
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/resources/usr/bin/SoftwareYamlToMarkdownTable.py b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/resources/usr/bin/SoftwareYamlToMarkdownTable.py
index 40516f5b..ebbea763 100755
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/resources/usr/bin/SoftwareYamlToMarkdownTable.py
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/GENERATE_SOFTWARE_TABLE/resources/usr/bin/SoftwareYamlToMarkdownTable.py
@@ -42,7 +42,8 @@
 @click.command()
 @click.argument("input_yaml", type=click.Path(exists=True))
 @click.argument("filename")
-def yamlToMarkdown(input_yaml: Path, filename: str):
+@click.argument("skip_de", type=click.BOOL)
+def yamlToMarkdown(input_yaml: Path, filename: str, skip_de: bool):
     """ Using a software versions """
     with open(input_yaml, "r") as f:
         data = yaml.safe_load(f)
@@ -54,6 +55,10 @@ def yamlToMarkdown(input_yaml: Path, filename: str):
     if not filename.endswith('.gz'):
         AGILENT_SOFTWARE_DPPD.remove('r.utils')
 
+    if skip_de:
+        AGILENT_SOFTWARE_DPPD.remove('matrixstats')
+        AGILENT_SOFTWARE_DPPD.remove('statmod')
+
     # Filter to direct software used (i.e. exclude dependencies of the software)
     df = df.loc[df["name"].str.lower().isin(AGILENT_SOFTWARE_DPPD)]
 
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/main.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/main.nf
index b899e81b..d63e95af 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/main.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/main.nf
@@ -5,13 +5,14 @@ process GENERATE_PROTOCOL {
 
   input:
     path("software_versions_GLmicroarray.md")
-    val(organism)
+    path("meta.sh")
+    val(skipDE)
   
   output:
     path("PROTOCOL_GLmicroarray.txt")
   
   script:
   """
-  generate_protocol.sh $workflow.manifest.version \"$organism\"
+  generate_protocol.sh $workflow.manifest.version $skipDE
   """
 }
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh
index b52a0ad1..6b8db2f7 100755
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/POST_PROCESSING/GENERATE_PROTOCOL/resources/usr/bin/generate_protocol.sh
@@ -25,8 +25,8 @@ done < <(sed -n '/|/p' "$software_versions_file" | sed 's/^ *|//;s/|$//')
 # Print the extracted versions
 env | grep "_VERSION"
 
-# Get organism
-organism=$2
+# Determine mapped sections
+source meta.sh
 
 # List of organisms
 organism_list=("Homo sapiens" "Mus musculus" "Rattus norvegicus" "Drosophila melanogaster" "Caenorhabditis elegans" "Danio rerio" "Saccharomyces cerevisiae")
@@ -34,6 +34,8 @@ organism_list=("Homo sapiens" "Mus musculus" "Rattus norvegicus" "Drosophila mel
 # Check the value of 'organism' variable and set 'GENE_MAPPING_STEP' accordingly
 if [[ $organism == "Arabidopsis thaliana" ]]; then
     GENE_MAPPING_STEP="Ensembl gene ID mappings were retrieved for each probe using the Plants Ensembl database ftp server (plants.ensembl.org, release 54)."
+elif [[ $biomart_attribute == "AGILENT SurePrint G3 GE 8x60k v3" ]]; then
+    GENE_MAPPING_STEP="Gene annotations were retrieved for each probe from Agilent (https://earray.chem.agilent.com/earray/array/displayViewArrayDesign.do?eArrayAction=view&arraydesignid=ADID40392, created May 2024, accessed November 2024)."
 elif [[ " ${organism_list[*]} " == *"${organism//\"/}"* ]]; then
     GENE_MAPPING_STEP="Ensembl gene ID mappings were retrieved for each probe using biomaRt (version ${biomaRt_VERSION}), Ensembl database (ensembl.org, release 107)."
 else
@@ -61,8 +63,22 @@ else
     GENE_ANNOTATION_DB="TBD"
 fi
 
+# Check if DGE was performed
+if $2; then
+    DE_STEP=""
+else
+    DE_STEP="Differential expression analysis was performed in R (version ${R_VERSION}) using limma (version ${limma_VERSION}); all groups were compared pairwise for each probe to generate a moderated t-statistic and associated p- and adjusted p-value."
+fi
+
+# Gene annotations
+if [[ $biomart_attribute == "AGILENT SurePrint G3 GE 8x60k v3" ]]; then
+    ANNOT_STEP=""  # Already covered in GENE_MAPPING_STEP
+else
+    ANNOT_STEP="Gene annotations were assigned using the custom annotation tables generated in-house as detailed in GL-DPPD-7110 ([https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md]), with STRINGdb (version 2.8.4), PANTHER.db (version 1.0.11), and ${GENE_ANNOTATION_DB} (version 3.15.0)."
+fi
+
 # Read the template file
-template="Data were processed as described in GL-DPPD-7112 ([https://github.com/nasa/GeneLab_Data_Processing/blob/master/Microarray/Agilent_1-channel/Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md]), using NF_MAAgilent1ch version $1 ([https://github.com/nasa/GeneLab_Data_Processing/tree/NF_MAAgilent1ch_$1/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch]). In short, a RunSheet containing raw data file location and processing metadata from the study's *ISA.zip file was generated using dp_tools (version ${dp_tools_VERSION}). The raw array data files were loaded into R (version ${R_VERSION}) using limma (version ${limma_VERSION}). Raw data quality assurance density, pseudo image, MA, and foreground-background plots were generated using limma (version ${limma_VERSION}), and boxplots were generated using ggplot2 (version ${ggplot2_VERSION}). The raw intensity data was background corrected and normalized across arrays via the limma (version ${limma_VERSION}) quantile method. Normalized data quality assurance density, pseudo image, and MA plots were generated using limma (version ${limma_VERSION}), and boxplots were generated using ggplot2 (version ${ggplot2_VERSION}). ${GENE_MAPPING_STEP} Differential expression analysis was performed in R (version ${R_VERSION}) using limma (version ${limma_VERSION}); all groups were compared pairwise for each probe to generate a moderated t-statistic and associated p- and adjusted p-value. Gene annotations were assigned using the custom annotation tables generated in-house as detailed in GL-DPPD-7110 ([https://github.com/nasa/GeneLab_Data_Processing/blob/GL_RefAnnotTable_1.0.0/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110.md]), with STRINGdb (version 2.8.4), PANTHER.db (version 1.0.11), and ${GENE_ANNOTATION_DB} (version 3.15.0)."
+template="Data were processed as described in GL-DPPD-7112-A ([https://github.com/nasa/GeneLab_Data_Processing/blob/master/Microarray/Agilent_1-channel/Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md]), using NF_MAAgilent1ch version $1 ([https://github.com/nasa/GeneLab_Data_Processing/tree/NF_MAAgilent1ch_$1/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch]). In short, a RunSheet containing raw data file location and processing metadata from the study's *ISA.zip file was generated using dp_tools (version ${dp_tools_VERSION}). The raw array data files were loaded into R (version ${R_VERSION}) using limma (version ${limma_VERSION}). Raw data quality assurance density, pseudo image, MA, and foreground-background plots were generated using limma (version ${limma_VERSION}), and boxplots were generated using ggplot2 (version ${ggplot2_VERSION}). The raw intensity data was background corrected and normalized across arrays via the limma (version ${limma_VERSION}) quantile method. Normalized data quality assurance density, pseudo image, and MA plots were generated using limma (version ${limma_VERSION}), and boxplots were generated using ggplot2 (version ${ggplot2_VERSION}). ${GENE_MAPPING_STEP} ${DE_STEP} ${ANNOT_STEP}"
 
 # Output the filled template
 echo "$template" > PROTOCOL_GLmicroarray.txt
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/VV_AGILE1CH.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/VV_AGILE1CH.nf
index e1eeaee2..e43ec0f2 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/VV_AGILE1CH.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/modules/VV_AGILE1CH.nf
@@ -6,13 +6,13 @@ process VV_AGILE1CH {
     saveAs: { "VV_Logs/VV_log_${ task.process.replace(":","-") }_GLmicroarray.tsv.MANUAL_CHECKS_PENDING" }
   // V&V'ed data publishing
   publishDir "${ params.outputDir }/${ params.gldsAccession }",
-    pattern: '00-RawData',
+    pattern: '00-RawData/**',
     mode: params.publish_dir_mode
   publishDir "${ params.outputDir }/${ params.gldsAccession }",
-    pattern: '01-limma_NormExp',
+    pattern: '01-limma_NormExp/**',
     mode: params.publish_dir_mode
   publishDir "${ params.outputDir }/${ params.gldsAccession }",
-    pattern: '02-limma_DGE',
+    pattern: '02-limma_DGE/**',
     mode: params.publish_dir_mode
   publishDir "${ params.outputDir }/${ params.gldsAccession }",
     pattern: 'Metadata/**',
@@ -22,18 +22,15 @@ process VV_AGILE1CH {
 
   input:
     path("VV_INPUT/Metadata/*") // While files from processing are staged, we instead want to use the files located in the publishDir for QC
-    tuple path("VV_INPUT/02-limma_DGE"),
-          path("VV_INPUT/01-limma_NormExp"),
-          path("VV_INPUT/00-RawData")
-     // "While files from processing are staged, we instead want to use the files located in the publishDir for QC
+    path("VV_INPUT/*") // "While files from processing are staged, we instead want to use the files located in the publishDir for QC
     val(skipVV) // Skips running V&V but will still publish the files
-    path("dp_tools__agilent_1_channel")
+    path(dp_tools_path)
   
   output:
     path("Metadata/*_runsheet.csv"), emit: VVed_runsheet
-    tuple path("02-limma_DGE"),
-          path("01-limma_NormExp"),
-          path("00-RawData"), emit: VVed_de_data
+    path("02-limma_DGE/*"), emit: VVed_DGE, optional: true
+    path("01-limma_NormExp/*"), emit: VVed_NormExp
+    path("00-RawData/*"), emit: VVed_rawData
     path("VV_report_GLmicroarray.tsv.MANUAL_CHECKS_PENDING"), optional: params.skipVV, emit: log
     path("versions.yml"), emit: versions
 
@@ -45,7 +42,7 @@ process VV_AGILE1CH {
 
     # Run V&V unless user requests to skip V&V
     if ${ !skipVV} ; then
-      dpt validation run dp_tools__agilent_1_channel . Metadata/*_runsheet.csv
+      dpt validation run ${ dp_tools_path } . Metadata/*_runsheet.csv
       mv VV_report.tsv.MANUAL_CHECKS_PENDING VV_report_GLmicroarray.tsv.MANUAL_CHECKS_PENDING
     fi
 
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config
index d0d705ee..efa5fdf4 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/nextflow.config
@@ -41,11 +41,11 @@ profiles {
 
 manifest {
     homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/Microarray/Agilent_1-channel'
-    description = 'Agilent 1 Channel Microarray Workflow for Document GL-DPPD-7112'
+    description = 'Agilent 1 Channel Microarray Workflow for Document GL-DPPD-7112-A'
     mainScript = 'main.nf'
     defaultBranch = 'main'
-    nextflowVersion = '>=23.10.1'
-    version = '1.0.4'
+    nextflowVersion = '>=24.10.5'
+    version = '1.0.5'
 }
 
 def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/post_processing.nf b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/post_processing.nf
index 21203fc4..64d791fa 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/post_processing.nf
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/NF_MAAgilent1ch/workflow_code/post_processing.nf
@@ -51,18 +51,20 @@ workflow {
     ch_processed_directory = Channel.fromPath("${ params.outputDir }/${ params.gldsAccession }", checkIfExists: true)
     ch_runsheet = Channel.fromPath("${ params.outputDir }/${ params.gldsAccession }/Metadata/*_runsheet.csv", checkIfExists: true)
     ch_software_versions = Channel.fromPath("${ params.outputDir }/${ params.gldsAccession }/GeneLab/software_versions_GLmicroarray.md", checkIfExists: true)
+    ch_processing_meta = Channel.fromPath("${ params.outputDir }/${ params.gldsAccession }/GeneLab/meta.sh", checkIfExists: true)
     GENERATE_MD5SUMS(      
       ch_processed_directory, 
       ch_runsheet,       
-      "${ projectDir }/bin/dp_tools__agilent_1_channel" // dp_tools plugin
+      "${ projectDir }/bin/${ params.skipDE ? 'dp_tools__agilent_1_channel_skipDE' : 'dp_tools__agilent_1_channel' }" // dp_tools plugin
     )
     UPDATE_ISA_TABLES(
       ch_processed_directory, 
       ch_runsheet,       
-      "${ projectDir }/bin/dp_tools__agilent_1_channel" // dp_tools plugin
+      "${ projectDir }/bin/${ params.skipDE ? 'dp_tools__agilent_1_channel_skipDE' : 'dp_tools__agilent_1_channel' }" // dp_tools plugin
     )
     GENERATE_PROTOCOL(
       ch_software_versions,
-      ch_runsheet | splitCsv(header: true, quote: '"') | first | map{ row -> row['organism'] }
+      ch_processing_meta,
+      params.skipDE
     )
 }
\ No newline at end of file
diff --git a/Microarray/Agilent_1-channel/Workflow_Documentation/README.md b/Microarray/Agilent_1-channel/Workflow_Documentation/README.md
index 1b90bac6..a20ca913 100644
--- a/Microarray/Agilent_1-channel/Workflow_Documentation/README.md
+++ b/Microarray/Agilent_1-channel/Workflow_Documentation/README.md
@@ -6,7 +6,7 @@
 
 |Pipeline Version|Current Workflow Version (for respective pipeline version)|Nextflow Version|
 |:---------------|:---------------------------------------------------------|:---------------|
-|*[GL-DPPD-7112.md](../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112.md)|[NF_MAAgilent1ch_1.0.4](NF_MAAgilent1ch)|23.10.1|
+|*[GL-DPPD-7112-A.md](../Pipeline_GL-DPPD-7112_Versions/GL-DPPD-7112-A.md)|[NF_MAAgilent1ch_1.0.5](NF_MAAgilent1ch)|24.10.5|
 
 *Current GeneLab Pipeline/Workflow Implementation