diff --git a/R/accessory.R b/R/accessory.R index 0ffc434..29c952f 100644 --- a/R/accessory.R +++ b/R/accessory.R @@ -910,6 +910,8 @@ checkListIsSubset <- function(test_list, #' sample sheet variables to be used as blocking factors #' @param convert_to_list Convert output to a list as used internally by #' shinyngs? +#' @param validate_design Validate design matrix (check for NAs, full rank, +#' numeric columns, special characters)? Set to FALSE to skip these checks. #' #' @return output Validated contrasts data frame #' @export @@ -921,7 +923,8 @@ read_contrasts <- reference_column = "reference", target_column = "target", blocking_column = "blocking", - convert_to_list = FALSE) { + convert_to_list = FALSE, + validate_design = TRUE) { # Read the contrasts depending on the file format (CSV or YAML) if (grepl("\\.csv$", filename)) { @@ -1029,37 +1032,51 @@ read_contrasts <- formula_vars <- all.vars(as.formula(contrasts$formula[i])) } - design_cols <- unique(na.omit(c(contrasts[[variable_column]][i], blocking_vars, formula_vars))) - design_matrix <- samples[, design_cols, drop = FALSE] - - # Ensure there are no NA values in the design matrix. - if (any(is.na(design_matrix))) { - stop("NA values found in one or more design matrix columns.") - } - - # Check that the design matrix is full rank. - mm <- model.matrix(~ . - 1, data = design_matrix) - if (qr(mm)$rank < ncol(mm)) { - stop(paste("Design matrix is not full rank.", "Model matrix columns:", paste(colnames(mm), collapse = ", "), "\n")) - } - - # Warn about continuous covariates in the design matrix columns. - for (col in design_cols) { - if (is.numeric(samples[[col]])) { - warning(paste("Column", col, "is numeric and may be treated as continuous.")) + if (validate_design) { + design_cols <- unique(na.omit(c(contrasts[[variable_column]][i], blocking_vars, formula_vars))) + + # Filter samples if exclude columns are specified for this contrast + contrast_samples <- samples + if ("exclude_samples_col" %in% colnames(contrasts) && "exclude_samples_values" %in% colnames(contrasts)) { + if (!is.na(contrasts$exclude_samples_col[i]) && !is.na(contrasts$exclude_samples_values[i])) { + exclude_col <- contrasts$exclude_samples_col[i] + exclude_vals <- simpleSplit(contrasts$exclude_samples_values[i], ";") + contrast_samples <- samples[!samples[[exclude_col]] %in% exclude_vals, , drop = FALSE] + } } - } - - # Check that values in design matrix columns do not contain disallowed special characters. - for (col in design_cols) { - vals <- as.character(samples[[col]]) - for (sc in c("/", "\\\\")) { # Default special characters: c("/", "\\\\") - if (any(grepl(sc, vals))) { - warning(paste("Column", col, "contains special character", sc, - "which may cause issues downstream.")) + + design_matrix <- contrast_samples[, design_cols, drop = FALSE] + + # Ensure there are no NA values in the design matrix. + if (any(is.na(design_matrix))) { + stop("NA values found in one or more design matrix columns.") + } + + # Check that the design matrix is full rank. + mm <- model.matrix(~ . - 1, data = design_matrix) + if (qr(mm)$rank < ncol(mm)) { + stop(paste("Design matrix is not full rank.", "Model matrix columns:", paste(colnames(mm), collapse = ", "), "\n")) + } + + # Warn about continuous covariates in the design matrix columns. + for (col in design_cols) { + if (is.numeric(samples[[col]])) { + warning(paste("Column", col, "is numeric and may be treated as continuous.")) + } + } + + # Check that values in design matrix columns do not contain disallowed special characters. + for (col in design_cols) { + vals <- as.character(samples[[col]]) + for (sc in c("/", "\\\\")) { # Default special characters: c("/", "\\\\") + if (any(grepl(sc, vals))) { + warning(paste("Column", col, "contains special character", sc, + "which may cause issues downstream.")) + } } } } + var <- contrasts[i, variable_column] ref <- contrasts[i, reference_column] tgt <- contrasts[i, target_column] diff --git a/man/read_contrasts.Rd b/man/read_contrasts.Rd index c4c0e55..3585785 100644 --- a/man/read_contrasts.Rd +++ b/man/read_contrasts.Rd @@ -11,7 +11,8 @@ read_contrasts( reference_column = "reference", target_column = "target", blocking_column = "blocking", - convert_to_list = FALSE + convert_to_list = FALSE, + validate_design = TRUE ) } \arguments{ @@ -33,10 +34,21 @@ sample sheet variables to be used as blocking factors} \item{convert_to_list}{Convert output to a list as used internally by shinyngs?} + +\item{validate_design}{Validate design matrix (check for NAs, full rank, +numeric columns, special characters)? Set to FALSE to skip these checks.} } \value{ output Validated contrasts data frame } \description{ -Read and validate a contrasts file against sample metadata +Checks: +1. No duplicate contrast IDs. Ensure that the required columns (variable, reference, target) are present. +2. Values in the contrast variable column exist as column names in the sample metadata. +3. If blocking factors are supplied, checks that they are present in the sample metadata. +4. Design matrix is full rank. +5. Warn about continuous covariates (e.g. numeric patient IDs treated as continuous). +6. Values of specified columns don't contain special characters. +7. Verify that the specified reference and target values exist in the corresponding sample metadata column. +8. Issue a warning if the reference and target levels are identical. }