@@ -926,22 +926,71 @@ importNULISAseq <- function(files,
926926 }
927927 }
928928
929+ # Validate AUTO_PLATE IDs and handle named list parameters
930+ # Pre-scan all files for their internal AUTO_PLATE IDs
931+ internal_ids <- sapply(files , get_internal_plate_id , USE.NAMES = FALSE )
932+
933+ # Check for duplicates
934+ has_duplicates <- any(duplicated(internal_ids [! is.na(internal_ids )]))
935+
936+ # Check if user is trying to use ANY named list parameters
937+ # All of these parameters support named list format and would be affected by duplicate IDs
938+ is_named_list <- function (param ) {
939+ is.list(param ) && ! is.null(names(param ))
940+ }
941+
942+ any_named_params <- is_named_list(IPC ) || is_named_list(SC ) || is_named_list(NC ) ||
943+ is_named_list(Bridge ) || is_named_list(Calibrator ) ||
944+ is_named_list(excludeSamples ) || is_named_list(excludeTargets )
945+
946+ if (has_duplicates && any_named_params && ! user_provided_plateName ) {
947+ # Find which ID is duplicated for a better error message
948+ # Exclude NA values to avoid reporting NA as a duplicate ID
949+ dup_id <- internal_ids [duplicated(internal_ids ) & ! is.na(internal_ids )]
950+
951+ # Identify which parameters are using named lists
952+ named_param_list <- c(
953+ if (is_named_list(IPC )) " IPC" ,
954+ if (is_named_list(SC )) " SC" ,
955+ if (is_named_list(NC )) " NC" ,
956+ if (is_named_list(Bridge )) " Bridge" ,
957+ if (is_named_list(Calibrator )) " Calibrator" ,
958+ if (is_named_list(excludeSamples )) " excludeSamples" ,
959+ if (is_named_list(excludeTargets )) " excludeTargets"
960+ )
961+
962+ stop(sprintf(
963+ " Error: Duplicate AUTO_PLATE ID '%s' found across files.\n Named list parameter(s) provided: %s\n User must manually define 'plateName' to correctly map these parameters to the intended files." ,
964+ paste(unique(dup_id ), collapse = " , " ),
965+ paste(named_param_list , collapse = " , " )
966+ ))
967+ }
968+
969+ # Define keys for parameter mapping
970+ # Priority: 1. User plateName, 2. Internal IDs, 3. Fallback to Plate_XX naming
971+ keys_for_matching <- if (user_provided_plateName ) {
972+ plateName
973+ } else if (! has_duplicates && ! any(is.na(internal_ids ))) {
974+ internal_ids
975+ } else {
976+ # Default fallback if internal IDs are missing or messy
977+ format_width <- if (length(files ) > 99 ) 3 else 2
978+ paste0(' Plate_' , formatC(seq_along(files ), width = format_width , format = ' d' , flag = ' 0' ))
979+ }
980+
929981 # Check IC only accept vector
930982 if (! is.null(IC ) && ! is.vector(IC )) {
931983 stop(" IC must be the same across plates/runs. IC must be NULL or a vector" )
932984 }
933985
934- # Process all parameters that can be named lists
935- param_names_for_processing <- if (user_provided_plateName ) plateName else seq_along(files )
936-
937- IC <- process_named_param(IC , param_names_for_processing ) # should be a vector, same IC across plates
938- IPC <- process_named_param(IPC , param_names_for_processing )
939- SC <- process_named_param(SC , param_names_for_processing )
940- NC <- process_named_param(NC , param_names_for_processing )
941- Bridge <- process_named_param(Bridge , param_names_for_processing )
942- Calibrator <- process_named_param(Calibrator , param_names_for_processing )
943- excludeSamples <- process_named_param(excludeSamples , param_names_for_processing )
944- excludeTargets <- process_named_param(excludeTargets , param_names_for_processing )
986+ IC <- process_named_param(IC , keys_for_matching ) # should be a vector, same IC across plates
987+ IPC <- process_named_param(IPC , keys_for_matching )
988+ SC <- process_named_param(SC , keys_for_matching )
989+ NC <- process_named_param(NC , keys_for_matching )
990+ Bridge <- process_named_param(Bridge , keys_for_matching )
991+ Calibrator <- process_named_param(Calibrator , keys_for_matching )
992+ excludeSamples <- process_named_param(excludeSamples , keys_for_matching )
993+ excludeTargets <- process_named_param(excludeTargets , keys_for_matching )
945994
946995 if (verbose ) {
947996 logger :: log_info(" Processing {length(files)} NULISAseq files." )
@@ -958,7 +1007,6 @@ importNULISAseq <- function(files,
9581007 runs <- list ()
9591008 for (i in seq_along(files )) {
9601009 if (verbose ) message(sprintf(" Loading file %d/%d: %s" , i , length(files ), basename(files [i ])))
961- plateID_to_pass <- if (user_provided_plateName ) plateName [i ] else NULL
9621010 tryCatch({
9631011 runs [[i ]] <- loadNULISAseq(
9641012 file = files [i ],
@@ -969,7 +1017,7 @@ importNULISAseq <- function(files,
9691017 Bridge = pick_param(Bridge , i ),
9701018 Calibrator = pick_param(Calibrator , i ),
9711019 sample_group_covar = sample_group_covar ,
972- plateID = plateID_to_pass , # Pass user-provided plateID or NULL
1020+ plateID = if ( user_provided_plateName ) plateName [ i ] else NULL , # Pass user-provided plateID or NULL
9731021 excludeSamples = pick_param(excludeSamples , i ),
9741022 excludeTargets = pick_param(excludeTargets , i ),
9751023 ...
@@ -2381,27 +2429,27 @@ format_wide_to_long <- function(merged, AQ = FALSE, exclude_sample_cols = "plate
23812429 return (final_data )
23822430}
23832431
2384- # ' An helper function that processes parameters that can be specified
2432+ # ' A helper function that processes parameters that can be specified
23852433# ' as named lists (per plate), vectors (applied to all plates), or NULL
2386- # '
2434+ # '
23872435# ' @param param The parameter to process. Can be:
23882436# ' - `NULL`: No action for any plate
23892437# ' - A vector: Applied to all plates
2390- # ' - A named list: Specific values for specific plates, names must match plate_names,
2391- # ' length of plate_names must match length of list
2392- # ' @param param Character string of the parameter
2438+ # ' - A named list: Specific values for specific plates. Names must be valid plate names
2439+ # ' from `plate_names`. Partial lists are allowed - plates not specified will receive `NULL`.
23932440# ' @param plate_names Character vector of plate names to match against
23942441# '
23952442# ' @return A named list with one element per plate, where:
23962443# ' - Names correspond to `plate_names`
23972444# ' - Values are either the plate-specific parameter or `NULL`
2398- # '
2445+ # '
23992446# ' @details
24002447# ' This function handles the following input patterns:
24012448# ' \itemize{
24022449# ' \item{\code{param = NULL}: Returns `NULL`}
24032450# ' \item{\code{param = c("value1", "value2")}: Returns a named list where all plates get the vector}
2404- # ' \item{\code{param = list("Plate_01" = "value1", "Plate_02" = "value2")}: Returns a named list with plate-specific values}
2451+ # ' \item{\code{param = list("Plate_01" = "value1", "Plate_02" = "value2")}: Returns a named list
2452+ # ' with plate-specific values. Plates not specified in the list will receive `NULL`.}
24052453# ' }
24062454# '
24072455# ' @keywords internal
@@ -2424,31 +2472,26 @@ process_named_param <- function(param, plate_names) {
24242472 if (is.null(names(param ))) {
24252473 stop(sprintf(" '%s' must be a named list. Unnamed lists are not accepted." , param_name ))
24262474 }
2427-
2428- # Validate exact match
2429- if (! identical(sort(names(param )), sort(plate_names ))) {
2430- invalid_names <- setdiff(names(param ), plate_names )
2431- missing_names <- setdiff(plate_names , names(param ))
2432-
2433- error_parts <- character ()
2434- if (length(invalid_names ) > 0 ) {
2435- error_parts <- c(error_parts , sprintf(" invalid names: %s" , paste(invalid_names , collapse = " , " )))
2436- }
2437- if (length(missing_names ) > 0 ) {
2438- error_parts <- c(error_parts , sprintf(" missing names: %s" , paste(missing_names , collapse = " , " )))
2439- }
2440-
2441- stop(sprintf(" '%s' names do not match plate_names. %s" ,
2442- param_name , paste(error_parts , collapse = " ; " )))
2443- }
2444-
2445- if (length(param ) != length(plate_names )) {
2446- stop(sprintf(" '%s' length (%d) must match plate_names length (%d)" ,
2447- param_name , length(param ), length(plate_names )))
2475+
2476+ # Check for invalid names (names that don't exist in plate_names)
2477+ invalid_names <- setdiff(names(param ), plate_names )
2478+ if (length(invalid_names ) > 0 ) {
2479+ stop(sprintf(" '%s' contains invalid plate names: %s. Valid names are: %s" ,
2480+ param_name ,
2481+ paste(invalid_names , collapse = " , " ),
2482+ paste(plate_names , collapse = " , " )))
24482483 }
2449-
2450- # Reorder to match plate_names order
2451- result <- lapply(plate_names , function (plate ) param [[plate ]])
2484+
2485+ # Allow partial named lists - fill in NULL for missing plates
2486+ # This is useful for excludeSamples/excludeTargets where users only want to
2487+ # exclude from specific plates
2488+ result <- lapply(plate_names , function (plate ) {
2489+ if (plate %in% names(param )) {
2490+ param [[plate ]]
2491+ } else {
2492+ NULL
2493+ }
2494+ })
24522495 names(result ) <- plate_names
24532496 return (result )
24542497 }
@@ -2501,3 +2544,40 @@ remove_ic_from_long <- function(df, ic_targets) {
25012544 # Handle factor columns by converting to character for comparison
25022545 return (df [! as.character(df $ Target ) %in% ic_targets , ])
25032546}
2547+
2548+ # ' Extract AUTO_PLATE ID from a NULISA XML File
2549+ # '
2550+ # ' Scans the beginning of an XML file to retrieve the value of the `AUTO_PLATE`
2551+ # ' attribute. This is used for early validation of plate identities before
2552+ # ' performing a full data load.
2553+ # '
2554+ # ' @param file_path Character string. The path to the XML file.
2555+ # '
2556+ # ' @return A character string containing the plate ID if found; otherwise, returns `NA`.
2557+ # '
2558+ # ' @details
2559+ # ' To optimize performance, the function only reads the first 1000 lines
2560+ # ' of the file, assuming metadata is located in the header section.
2561+ # '
2562+ # ' @keywords internal
2563+ get_internal_plate_id <- function (file_path ) {
2564+ # Read only the beginning of the file to save memory/time
2565+ # Adjust n if the AUTO_PLATE tag appears very late in your XML
2566+ lines <- tryCatch(
2567+ suppressWarnings(readLines(file_path , n = 1000 , warn = FALSE )),
2568+ error = function (e ) NULL
2569+ )
2570+ if (is.null(lines )) {
2571+ return (NA )
2572+ }
2573+ content <- paste(lines , collapse = " " )
2574+
2575+ # Regex to find AUTO_PLATE="AnythingInsideQuotes"
2576+ match <- regmatches(content , regexec(' AUTO_PLATE="([^"]+)"' , content ))
2577+
2578+ if (length(match [[1 ]]) > 1 ) {
2579+ return (match [[1 ]][2 ]) # Return the captured group (the ID)
2580+ } else {
2581+ return (NA ) # Not found
2582+ }
2583+ }
0 commit comments