Skip to content

Commit 030bae3

Browse files
authored
Merge pull request #169 from Alamar-Biosciences/release/v1.4.1
Release v1.4.1
2 parents 23d74f5 + 959832a commit 030bae3

File tree

11 files changed

+1030
-250
lines changed

11 files changed

+1030
-250
lines changed

DESCRIPTION

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: NULISAseqR
33
Title: Analysis of NULISAseq Data
4-
Version: 1.4.0
4+
Version: 1.4.1
55
Authors@R:
66
person("Alamar Biosciences Bioinformatics Team", , , "bioinfo@alamarbio.com", role = c("aut", "cre", "cph"),
77
comment = "Contributors: Dwight Kuo, Joanne C. Beer, Eliza Chai, Sumedh Sankhe, Kasun Buddika")
@@ -36,12 +36,11 @@ Imports: circlize,
3636
lmerTest,
3737
logger,
3838
openxlsx,
39-
pander,
4039
parallelly,
4140
pheatmap,
4241
plotly,
4342
purrr,
44-
qs,
43+
qs2,
4544
ragg,
4645
RColorBrewer,
4746
reactable,

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ RUN apt-get update && \
1515
rm -rf /var/lib/apt/lists/*
1616

1717
RUN install.r future pagedown uuid XML xml2 ggrepel emmeans fields optparse pheatmap rcompanion \
18-
corrplot pander gdata dplyr testthat qs lmerTest logger devtools remotes knitr tidyverse \
18+
corrplot gdata dplyr testthat qs2 lmerTest logger devtools remotes knitr tidyverse \
1919
ggplot2 htmltools DT plotly lubridate scales httr stringr ggpubr openxlsx renv coin \
2020
fs reactable janitor future.apply future.callr showtext svglite patchwork
2121

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
# NULISAseqR 1.4.1 (2026-01-16)
2+
3+
## Changes
4+
5+
### Enhancements
6+
* **importNULISAseq()** - Improved robustness and flexibility for handling NULISAseq data files:
7+
- Added validation of internal `AUTO_PLATE` IDs with duplicate detection before processing
8+
- Enhanced parameter mapping for `excludeSamples`, `excludeTargets`, and control parameters (`IC`, `IPC`, `SC`, `NC`, `Bridge`, `Calibrator`) using prioritized keys (user-provided plate names, internal IDs, or fallback names)
9+
- Improved error handling with clear messages when duplicate plate IDs are detected with named exclusions
10+
* **get_internal_plate_id()** - New utility function to extract `AUTO_PLATE` ID from NULISAseq XML file headers
11+
112
# NULISAseqR 1.4.0 (2026-01-11)
213

314
## Overview

R/generate_pca.R

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,17 @@
2323
#' should match the existing row names of \code{data}; defaults to \code{NULL} (all targets).
2424
#' @param shape_by Character string specifying the column name from \code{sampleInfo}
2525
#' to use for point shapes; defaults to \code{NULL}.
26-
#' @param encircle Logical indicating whether to draw ellipses around groups;
26+
#' @param ellipse Logical indicating whether to draw ellipses around groups;
2727
#' defaults to \code{TRUE}.
28-
#' @param encircleFill Logical indicating whether to fill the ellipses;
28+
#' @param ellipseType Character string specifying the type of ellipse. Options include
29+
#' \code{"t"} for t-distribution and \code{"norm"} for normal distribution;
30+
#' defaults to \code{"t"}.
31+
#' @param ellipseAlpha Numeric value between 0 and 1 for ellipse transparency;
32+
#' defaults to \code{0.15}.
33+
#' @param ellipseFill Logical indicating whether to fill the ellipses;
2934
#' defaults to \code{TRUE}.
35+
#' @param ellipseLineSize Numeric value for the ellipse border line width;
36+
#' defaults to \code{0} (no border).
3037
#' @param sample_colors Named vector of custom colors for sample groups.
3138
#' Names should match the levels in \code{annotate_sample_by}; defaults to \code{NULL}.
3239
#' @param components Integer vector of length 2 specifying which principal components to plot.
@@ -134,8 +141,11 @@ generate_pca <- function(data,
134141
sample_subset = NULL,
135142
target_subset = NULL,
136143
shape_by = NULL,
137-
encircle = TRUE,
138-
encircleFill = TRUE,
144+
ellipse = TRUE,
145+
ellipseType = "t",
146+
ellipseAlpha = 0.15,
147+
ellipseFill = TRUE,
148+
ellipseLineSize = 0,
139149
sample_colors = NULL,
140150
components = c(1, 2),
141151
output_dir = NULL,
@@ -266,8 +276,11 @@ generate_pca <- function(data,
266276
vline = 0,
267277
pointSize = 2,
268278
legendPosition = 'right',
269-
encircle = encircle,
270-
encircleFill = encircleFill,
279+
ellipse = ellipse,
280+
ellipseType = ellipseType,
281+
ellipseAlpha = ellipseAlpha,
282+
ellipseFill = ellipseFill,
283+
ellipseLineSize = ellipseLineSize,
271284
...)
272285

273286
# Add title if specified

R/mergeNULISAseq.R

Lines changed: 124 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -926,22 +926,71 @@ importNULISAseq <- function(files,
926926
}
927927
}
928928

929+
# Validate AUTO_PLATE IDs and handle named list parameters
930+
# Pre-scan all files for their internal AUTO_PLATE IDs
931+
internal_ids <- sapply(files, get_internal_plate_id, USE.NAMES = FALSE)
932+
933+
# Check for duplicates
934+
has_duplicates <- any(duplicated(internal_ids[!is.na(internal_ids)]))
935+
936+
# Check if user is trying to use ANY named list parameters
937+
# All of these parameters support named list format and would be affected by duplicate IDs
938+
is_named_list <- function(param) {
939+
is.list(param) && !is.null(names(param))
940+
}
941+
942+
any_named_params <- is_named_list(IPC) || is_named_list(SC) || is_named_list(NC) ||
943+
is_named_list(Bridge) || is_named_list(Calibrator) ||
944+
is_named_list(excludeSamples) || is_named_list(excludeTargets)
945+
946+
if (has_duplicates && any_named_params && !user_provided_plateName) {
947+
# Find which ID is duplicated for a better error message
948+
# Exclude NA values to avoid reporting NA as a duplicate ID
949+
dup_id <- internal_ids[duplicated(internal_ids) & !is.na(internal_ids)]
950+
951+
# Identify which parameters are using named lists
952+
named_param_list <- c(
953+
if (is_named_list(IPC)) "IPC",
954+
if (is_named_list(SC)) "SC",
955+
if (is_named_list(NC)) "NC",
956+
if (is_named_list(Bridge)) "Bridge",
957+
if (is_named_list(Calibrator)) "Calibrator",
958+
if (is_named_list(excludeSamples)) "excludeSamples",
959+
if (is_named_list(excludeTargets)) "excludeTargets"
960+
)
961+
962+
stop(sprintf(
963+
"Error: Duplicate AUTO_PLATE ID '%s' found across files.\nNamed list parameter(s) provided: %s\nUser must manually define 'plateName' to correctly map these parameters to the intended files.",
964+
paste(unique(dup_id), collapse = ", "),
965+
paste(named_param_list, collapse = ", ")
966+
))
967+
}
968+
969+
# Define keys for parameter mapping
970+
# Priority: 1. User plateName, 2. Internal IDs, 3. Fallback to Plate_XX naming
971+
keys_for_matching <- if (user_provided_plateName) {
972+
plateName
973+
} else if (!has_duplicates && !any(is.na(internal_ids))) {
974+
internal_ids
975+
} else {
976+
# Default fallback if internal IDs are missing or messy
977+
format_width <- if(length(files) > 99) 3 else 2
978+
paste0('Plate_', formatC(seq_along(files), width=format_width, format='d', flag='0'))
979+
}
980+
929981
# Check IC only accept vector
930982
if(!is.null(IC) && !is.vector(IC)) {
931983
stop("IC must be the same across plates/runs. IC must be NULL or a vector")
932984
}
933985

934-
# Process all parameters that can be named lists
935-
param_names_for_processing <- if (user_provided_plateName) plateName else seq_along(files)
936-
937-
IC <- process_named_param(IC, param_names_for_processing) # should be a vector, same IC across plates
938-
IPC <- process_named_param(IPC, param_names_for_processing)
939-
SC <- process_named_param(SC, param_names_for_processing)
940-
NC <- process_named_param(NC, param_names_for_processing)
941-
Bridge <- process_named_param(Bridge, param_names_for_processing)
942-
Calibrator <- process_named_param(Calibrator, param_names_for_processing)
943-
excludeSamples <- process_named_param(excludeSamples, param_names_for_processing)
944-
excludeTargets <- process_named_param(excludeTargets, param_names_for_processing)
986+
IC <- process_named_param(IC, keys_for_matching) # should be a vector, same IC across plates
987+
IPC <- process_named_param(IPC, keys_for_matching)
988+
SC <- process_named_param(SC, keys_for_matching)
989+
NC <- process_named_param(NC, keys_for_matching)
990+
Bridge <- process_named_param(Bridge, keys_for_matching)
991+
Calibrator <- process_named_param(Calibrator, keys_for_matching)
992+
excludeSamples <- process_named_param(excludeSamples, keys_for_matching)
993+
excludeTargets <- process_named_param(excludeTargets, keys_for_matching)
945994

946995
if (verbose) {
947996
logger::log_info("Processing {length(files)} NULISAseq files.")
@@ -958,7 +1007,6 @@ importNULISAseq <- function(files,
9581007
runs <- list()
9591008
for (i in seq_along(files)) {
9601009
if (verbose) message(sprintf("Loading file %d/%d: %s", i, length(files), basename(files[i])))
961-
plateID_to_pass <- if (user_provided_plateName) plateName[i] else NULL
9621010
tryCatch({
9631011
runs[[i]] <- loadNULISAseq(
9641012
file = files[i],
@@ -969,7 +1017,7 @@ importNULISAseq <- function(files,
9691017
Bridge = pick_param(Bridge, i),
9701018
Calibrator = pick_param(Calibrator, i),
9711019
sample_group_covar = sample_group_covar,
972-
plateID = plateID_to_pass, # Pass user-provided plateID or NULL
1020+
plateID = if (user_provided_plateName) plateName[i] else NULL, # Pass user-provided plateID or NULL
9731021
excludeSamples = pick_param(excludeSamples, i),
9741022
excludeTargets = pick_param(excludeTargets, i),
9751023
...
@@ -2381,27 +2429,27 @@ format_wide_to_long <- function(merged, AQ = FALSE, exclude_sample_cols = "plate
23812429
return(final_data)
23822430
}
23832431

2384-
#' An helper function that processes parameters that can be specified
2432+
#' A helper function that processes parameters that can be specified
23852433
#' as named lists (per plate), vectors (applied to all plates), or NULL
2386-
#'
2434+
#'
23872435
#' @param param The parameter to process. Can be:
23882436
#' - `NULL`: No action for any plate
23892437
#' - A vector: Applied to all plates
2390-
#' - A named list: Specific values for specific plates, names must match plate_names,
2391-
#' length of plate_names must match length of list
2392-
#' @param param Character string of the parameter
2438+
#' - A named list: Specific values for specific plates. Names must be valid plate names
2439+
#' from `plate_names`. Partial lists are allowed - plates not specified will receive `NULL`.
23932440
#' @param plate_names Character vector of plate names to match against
23942441
#'
23952442
#' @return A named list with one element per plate, where:
23962443
#' - Names correspond to `plate_names`
23972444
#' - Values are either the plate-specific parameter or `NULL`
2398-
#'
2445+
#'
23992446
#' @details
24002447
#' This function handles the following input patterns:
24012448
#' \itemize{
24022449
#' \item{\code{param = NULL}: Returns `NULL`}
24032450
#' \item{\code{param = c("value1", "value2")}: Returns a named list where all plates get the vector}
2404-
#' \item{\code{param = list("Plate_01" = "value1", "Plate_02" = "value2")}: Returns a named list with plate-specific values}
2451+
#' \item{\code{param = list("Plate_01" = "value1", "Plate_02" = "value2")}: Returns a named list
2452+
#' with plate-specific values. Plates not specified in the list will receive `NULL`.}
24052453
#' }
24062454
#'
24072455
#' @keywords internal
@@ -2424,31 +2472,26 @@ process_named_param <- function(param, plate_names) {
24242472
if (is.null(names(param))) {
24252473
stop(sprintf("'%s' must be a named list. Unnamed lists are not accepted.", param_name))
24262474
}
2427-
2428-
# Validate exact match
2429-
if (!identical(sort(names(param)), sort(plate_names))) {
2430-
invalid_names <- setdiff(names(param), plate_names)
2431-
missing_names <- setdiff(plate_names, names(param))
2432-
2433-
error_parts <- character()
2434-
if (length(invalid_names) > 0) {
2435-
error_parts <- c(error_parts, sprintf("invalid names: %s", paste(invalid_names, collapse = ", ")))
2436-
}
2437-
if (length(missing_names) > 0) {
2438-
error_parts <- c(error_parts, sprintf("missing names: %s", paste(missing_names, collapse = ", ")))
2439-
}
2440-
2441-
stop(sprintf("'%s' names do not match plate_names. %s",
2442-
param_name, paste(error_parts, collapse = "; ")))
2443-
}
2444-
2445-
if (length(param) != length(plate_names)) {
2446-
stop(sprintf("'%s' length (%d) must match plate_names length (%d)",
2447-
param_name, length(param), length(plate_names)))
2475+
2476+
# Check for invalid names (names that don't exist in plate_names)
2477+
invalid_names <- setdiff(names(param), plate_names)
2478+
if (length(invalid_names) > 0) {
2479+
stop(sprintf("'%s' contains invalid plate names: %s. Valid names are: %s",
2480+
param_name,
2481+
paste(invalid_names, collapse = ", "),
2482+
paste(plate_names, collapse = ", ")))
24482483
}
2449-
2450-
# Reorder to match plate_names order
2451-
result <- lapply(plate_names, function(plate) param[[plate]])
2484+
2485+
# Allow partial named lists - fill in NULL for missing plates
2486+
# This is useful for excludeSamples/excludeTargets where users only want to
2487+
# exclude from specific plates
2488+
result <- lapply(plate_names, function(plate) {
2489+
if (plate %in% names(param)) {
2490+
param[[plate]]
2491+
} else {
2492+
NULL
2493+
}
2494+
})
24522495
names(result) <- plate_names
24532496
return(result)
24542497
}
@@ -2501,3 +2544,40 @@ remove_ic_from_long <- function(df, ic_targets) {
25012544
# Handle factor columns by converting to character for comparison
25022545
return(df[!as.character(df$Target) %in% ic_targets, ])
25032546
}
2547+
2548+
#' Extract AUTO_PLATE ID from a NULISA XML File
2549+
#'
2550+
#' Scans the beginning of an XML file to retrieve the value of the `AUTO_PLATE`
2551+
#' attribute. This is used for early validation of plate identities before
2552+
#' performing a full data load.
2553+
#'
2554+
#' @param file_path Character string. The path to the XML file.
2555+
#'
2556+
#' @return A character string containing the plate ID if found; otherwise, returns `NA`.
2557+
#'
2558+
#' @details
2559+
#' To optimize performance, the function only reads the first 1000 lines
2560+
#' of the file, assuming metadata is located in the header section.
2561+
#'
2562+
#' @keywords internal
2563+
get_internal_plate_id <- function(file_path) {
2564+
# Read only the beginning of the file to save memory/time
2565+
# Adjust n if the AUTO_PLATE tag appears very late in your XML
2566+
lines <- tryCatch(
2567+
suppressWarnings(readLines(file_path, n = 1000, warn = FALSE)),
2568+
error = function(e) NULL
2569+
)
2570+
if (is.null(lines)) {
2571+
return(NA)
2572+
}
2573+
content <- paste(lines, collapse = " ")
2574+
2575+
# Regex to find AUTO_PLATE="AnythingInsideQuotes"
2576+
match <- regmatches(content, regexec('AUTO_PLATE="([^"]+)"', content))
2577+
2578+
if (length(match[[1]]) > 1) {
2579+
return(match[[1]][2]) # Return the captured group (the ID)
2580+
} else {
2581+
return(NA) # Not found
2582+
}
2583+
}

0 commit comments

Comments
 (0)