From 0942ac05be37f988adb5d3c63c812c69a1419548 Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Mon, 23 Feb 2026 19:44:18 +0000 Subject: [PATCH 1/7] Reduce cyclomatic complexity of try_merge_additional_files() Co-authored-by: sbfnk --- R/globals.R | 2 +- R/load-survey-utils.R | 254 +++++++++++++++++--------------- man/get_mergeable_files.Rd | 12 ++ man/resolve_longitudinal_key.Rd | 13 ++ man/try_merge_one_file.Rd | 23 +++ 5 files changed, 187 insertions(+), 117 deletions(-) create mode 100644 man/get_mergeable_files.Rd create mode 100644 man/resolve_longitudinal_key.Rd create mode 100644 man/try_merge_one_file.Rd diff --git a/R/globals.R b/R/globals.R index b58a03d0..ca9aa85e 100644 --- a/R/globals.R +++ b/R/globals.R @@ -88,7 +88,7 @@ utils::globalVariables(c( "title", # <.list_surveys> "creator", # <.list_surveys> "identifier.2", # <.list_surveys> - "..merge_id", # + "..merge_id", # "..main_id", # "..original.lower.age.limit", # "..segment", # diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index fcfd5521..70348665 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -112,6 +112,130 @@ join_compatible_files <- function(survey_files, contact_data) { ) } +#' Identify which survey files share columns with a main table +#' @autoglobal +#' @keywords internal +get_mergeable_files <- function(survey_files, contact_data, main_cols) { + can_merge <- vapply( + survey_files, + function(x) { + any(colnames(contact_data[[x]]) %in% main_cols) + }, + TRUE + ) + names(can_merge[can_merge]) +} + +#' Resolve the unique key for a merged data.table with duplicates +#' +#' Validates a user-provided participant_key or auto-detects one via +#' find_unique_key(). +#' @autoglobal +#' @keywords internal +resolve_longitudinal_key <- function(merged, participant_key = NULL) { + if (!is.null(participant_key)) { + missing_cols <- setdiff(participant_key, names(merged)) + if ( + length(missing_cols) == 0 && + anyDuplicated(merged, by = participant_key) == 0L + ) { + return(participant_key) + } + } + find_unique_key(merged, "part_id") +} + +#' Try merging a single additional file into a main survey table +#' +#' @return A list with components: merged (data.table or NULL), detected_key +#' (character vector or NULL), file (the file that was merged, or NULL). +#' @autoglobal +#' @keywords internal +try_merge_one_file <- function( + file, + type, + main_survey, + contact_data, + participant_key = NULL, + call = rlang::caller_env() +) { + contact_data[[file]] <- contact_data[[file]][, + ..merge_id := seq_len(.N) + ] + common_id <- intersect( + colnames(contact_data[[file]]), + colnames(main_survey) + ) + merged <- tryCatch( + { + merge( + main_survey, + contact_data[[file]], + by = common_id, + all.x = TRUE + ) + }, + error = function(cond) { + if (!grepl("cartesian", cond$message, fixed = TRUE)) { + cli::cli_abort( + "Merge failed for {.file {basename(file)}} on \\ + {.val {common_id}}: {cond$message}", + call = call + ) + } + NULL + } + ) + + if (is.null(merged)) { + return(list(merged = NULL, detected_key = NULL, file = NULL)) + } + + has_duplicates <- anyDuplicated(merged[, "..main_id", with = FALSE]) > 0 + accept_merge <- !has_duplicates + detected_key <- NULL + + if (has_duplicates && type == "contact") { + return(list(merged = NULL, detected_key = NULL, file = NULL)) + } + + if (has_duplicates) { + unique_key <- resolve_longitudinal_key(merged, participant_key) + if (!is.null(unique_key)) { + accept_merge <- TRUE + merged[, ("..main_id") := seq_len(.N)] + detected_key <- unique_key + } + } + + if (!accept_merge) { + return(list(merged = NULL, detected_key = NULL, file = NULL)) + } + + ## Issue warnings about match quality + matched_main <- sum(!is.na(merged[["..merge_id"]])) + unmatched_main <- nrow(merged) - matched_main + if (unmatched_main > 0) { + cli::cli_warn( + "Only {matched_main} matching value{?s} in {.val {common_id}} \\ + column{?s} when pulling {.file {basename(file)}} into \\ + {.val {type}} survey.", + call = call + ) + } + unmatched_merge <- nrow(contact_data[[file]]) - matched_main + if (unmatched_merge > 0) { + cli::cli_warn( + "{unmatched_merge} row{?s} could not be matched when pulling \\ + {.file {basename(file)}} into {.val {type}} survey.", + call = call + ) + } + merged[, ("..merge_id") := NULL] + + list(merged = merged, detected_key = detected_key, file = file) +} + ## lastly, merge in any additional files that can be merged #' @autoglobal try_merge_additional_files <- function( @@ -122,140 +246,40 @@ try_merge_additional_files <- function( participant_key = NULL, call = rlang::caller_env() ) { - # Track the observation key for participants (returned to caller) observation_key <- NULL for (type in main_types) { - # Track final detected key for this type (to show one message at end) final_detected_key <- NULL - - main_cols <- colnames(main_surveys[[type]]) - can_merge <- vapply( - survey_files, - function(x) { - any(colnames(contact_data[[x]]) %in% main_cols) - }, - TRUE + merge_files <- get_mergeable_files( + survey_files, contact_data, colnames(main_surveys[[type]]) ) - merge_files <- survey_files[can_merge] + while (length(merge_files) > 0) { merged_files <- NULL for (file in merge_files) { - contact_data[[file]] <- contact_data[[file]][, - ..merge_id := seq_len(.N) - ] - common_id <- intersect( - colnames(contact_data[[file]]), - colnames(main_surveys[[type]]) - ) - merged <- tryCatch( - { - merge( - main_surveys[[type]], - contact_data[[file]], - by = common_id, - all.x = TRUE - ) - }, - error = function(cond) { - if (!grepl("cartesian", cond$message, fixed = TRUE)) { - cli::cli_abort( - "Merge failed for {.file {basename(file)}} on \\ - {.val {common_id}}: {cond$message}", - call = call - ) - } - NULL - } + result <- try_merge_one_file( + file, type, main_surveys[[type]], contact_data, + participant_key = participant_key, call = call ) - - if (is.null(merged)) { - next + if (!is.null(result$merged)) { + main_surveys[[type]] <- result$merged + merged_files <- c(merged_files, result$file) } - - # Check if merge created duplicates (longitudinal data case) - has_duplicates <- anyDuplicated(merged[, "..main_id", with = FALSE]) > 0 - - # If duplicates exist, check if there's a valid unique key - # (this handles longitudinal surveys where sday files create multiple - # rows per participant) - accept_merge <- !has_duplicates - if (has_duplicates && type == "contact") { - # No methodology for longitudinal contacts - reject merge with - # duplicates - next - } - if (has_duplicates) { - # Only participants reach here (contacts exit early above) - # Use user-specified key if valid, else auto-detect - if (!is.null(participant_key)) { - # Check if all key columns exist in merged data - missing_cols <- setdiff(participant_key, names(merged)) - if ( - length(missing_cols) == 0 && - anyDuplicated(merged, by = participant_key) == 0L - ) { - # User's key works - unique_key <- participant_key - } else { - # Key doesn't work or columns missing - auto-detect - unique_key <- find_unique_key(merged, "part_id") - } - } else { - unique_key <- find_unique_key(merged, "part_id") - } - - if (!is.null(unique_key)) { - accept_merge <- TRUE - # Update ..main_id to reflect the new unique key - merged[, ("..main_id") := seq_len(.N)] - final_detected_key <- unique_key - } - } - - if (accept_merge) { - ## we're keeping the merge; now check for any warnings to issue - matched_main <- sum(!is.na(merged[["..merge_id"]])) - unmatched_main <- nrow(merged) - matched_main - if (unmatched_main > 0) { - cli::cli_warn( - "Only {matched_main} matching value{?s} in {.val {common_id}} \\ - column{?s} when pulling {.file {basename(file)}} into \\ - {.val {type}} survey.", - call = call - ) - } - unmatched_merge <- nrow(contact_data[[file]]) - matched_main - if (unmatched_merge > 0) { - cli::cli_warn( - "{unmatched_merge} row{?s} could not be matched when pulling \\ - {.file {basename(file)}} into {.val {type}} survey.", - call = call - ) - } - merged[, ("..merge_id") := NULL] - main_surveys[[type]] <- merged - merged_files <- c(merged_files, file) + if (!is.null(result$detected_key)) { + final_detected_key <- result$detected_key } } survey_files <- setdiff(survey_files, merged_files) - main_cols <- colnames(main_surveys[[type]]) - can_merge <- vapply( - survey_files, - function(x) { - any(colnames(contact_data[[x]]) %in% main_cols) - }, - TRUE - ) if (is.null(merged_files)) { merge_files <- NULL } else { - merge_files <- names(can_merge[can_merge]) + merge_files <- get_mergeable_files( + survey_files, contact_data, colnames(main_surveys[[type]]) + ) } } # Show one message about detected longitudinal data (if not suppressed) - # Show if: we detected a key AND user's key doesn't match user_key_matches <- !is.null(participant_key) && setequal(final_detected_key, participant_key) if (!is.null(final_detected_key) && !user_key_matches) { @@ -275,8 +299,6 @@ try_merge_additional_files <- function( ) } - # Store the observation key for participants (excluding part_id since - # that's always the participant identifier after internal renaming) if (type == "participant" && !is.null(final_detected_key)) { obs_cols <- setdiff(final_detected_key, "part_id") if (length(obs_cols) > 0) { diff --git a/man/get_mergeable_files.Rd b/man/get_mergeable_files.Rd new file mode 100644 index 00000000..b50f174a --- /dev/null +++ b/man/get_mergeable_files.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load-survey-utils.R +\name{get_mergeable_files} +\alias{get_mergeable_files} +\title{Identify which survey files share columns with a main table} +\usage{ +get_mergeable_files(survey_files, contact_data, main_cols) +} +\description{ +Identify which survey files share columns with a main table +} +\keyword{internal} diff --git a/man/resolve_longitudinal_key.Rd b/man/resolve_longitudinal_key.Rd new file mode 100644 index 00000000..d4c492bb --- /dev/null +++ b/man/resolve_longitudinal_key.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load-survey-utils.R +\name{resolve_longitudinal_key} +\alias{resolve_longitudinal_key} +\title{Resolve the unique key for a merged data.table with duplicates} +\usage{ +resolve_longitudinal_key(merged, participant_key = NULL) +} +\description{ +Validates a user-provided participant_key or auto-detects one via +find_unique_key(). +} +\keyword{internal} diff --git a/man/try_merge_one_file.Rd b/man/try_merge_one_file.Rd new file mode 100644 index 00000000..be59f5d2 --- /dev/null +++ b/man/try_merge_one_file.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load-survey-utils.R +\name{try_merge_one_file} +\alias{try_merge_one_file} +\title{Try merging a single additional file into a main survey table} +\usage{ +try_merge_one_file( + file, + type, + main_survey, + contact_data, + participant_key = NULL, + call = rlang::caller_env() +) +} +\value{ +A list with components: merged (data.table or NULL), detected_key +(character vector or NULL), file (the file that was merged, or NULL). +} +\description{ +Try merging a single additional file into a main survey table +} +\keyword{internal} From 4600007089fad6955abcbf490be83ce3b32544ad Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Mon, 23 Feb 2026 19:44:33 +0000 Subject: [PATCH 2/7] Add NEWS entry for extracted helpers Co-authored-by: sbfnk --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 42a231a4..b7adedbf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -51,6 +51,10 @@ data. The `wpp2017` package is now a suggested dependency rather than a required import (#258). +* Reduced cyclomatic complexity of `try_merge_additional_files()` by extracting + `get_mergeable_files()`, `resolve_longitudinal_key()`, and + `try_merge_one_file()` helpers (#289). + * `get_survey()`, `download_survey()`, `list_surveys()`, `get_citation()`, and `survey_countries()` now warn unconditionally when called. These functions were soft-deprecated in 0.5.0 and users should switch to the From 08a2f5b2769be27a0dea14252ba50fbbf3654646 Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Mon, 23 Feb 2026 22:53:48 +0000 Subject: [PATCH 3/7] Fix formatting and unmatched-merge count in try_merge_one_file Co-authored-by: sbfnk --- R/load-survey-utils.R | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index 70348665..1d27fe85 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -223,7 +223,8 @@ try_merge_one_file <- function( call = call ) } - unmatched_merge <- nrow(contact_data[[file]]) - matched_main + matched_merge <- uniqueN(merged[["..merge_id"]], na.rm = TRUE) + unmatched_merge <- nrow(contact_data[[file]]) - matched_merge if (unmatched_merge > 0) { cli::cli_warn( "{unmatched_merge} row{?s} could not be matched when pulling \\ @@ -251,15 +252,21 @@ try_merge_additional_files <- function( for (type in main_types) { final_detected_key <- NULL merge_files <- get_mergeable_files( - survey_files, contact_data, colnames(main_surveys[[type]]) + survey_files, + contact_data, + colnames(main_surveys[[type]]) ) while (length(merge_files) > 0) { merged_files <- NULL for (file in merge_files) { result <- try_merge_one_file( - file, type, main_surveys[[type]], contact_data, - participant_key = participant_key, call = call + file, + type, + main_surveys[[type]], + contact_data, + participant_key = participant_key, + call = call ) if (!is.null(result$merged)) { main_surveys[[type]] <- result$merged @@ -274,7 +281,9 @@ try_merge_additional_files <- function( merge_files <- NULL } else { merge_files <- get_mergeable_files( - survey_files, contact_data, colnames(main_surveys[[type]]) + survey_files, + contact_data, + colnames(main_surveys[[type]]) ) } } From 5130230731fb7ecd7485294478ee0b3618961005 Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 09:29:55 +0000 Subject: [PATCH 4/7] Simplify helpers and reduce try_merge_additional_files to complexity 12 Co-authored-by: sbfnk --- NEWS.md | 7 +- R/load-survey-utils.R | 215 +++++++++++++++++++------------- man/get_mergeable_files.Rd | 12 -- man/resolve_longitudinal_key.Rd | 13 -- man/try_merge_one_file.Rd | 23 ---- 5 files changed, 131 insertions(+), 139 deletions(-) delete mode 100644 man/get_mergeable_files.Rd delete mode 100644 man/resolve_longitudinal_key.Rd delete mode 100644 man/try_merge_one_file.Rd diff --git a/NEWS.md b/NEWS.md index b7adedbf..bc68458a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -52,8 +52,11 @@ required import (#258). * Reduced cyclomatic complexity of `try_merge_additional_files()` by extracting - `get_mergeable_files()`, `resolve_longitudinal_key()`, and - `try_merge_one_file()` helpers (#289). + helper functions (#289). + +* Fixed unmatched-merge warning count when merging files with duplicate keys; + previously, the count could be wrong (or negative) due to counting join pairs + rather than distinct matched rows (#289). * `get_survey()`, `download_survey()`, `list_surveys()`, `get_citation()`, and `survey_countries()` now warn unconditionally when called. These functions diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index 1d27fe85..f1cfef20 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -113,8 +113,7 @@ join_compatible_files <- function(survey_files, contact_data) { } #' Identify which survey files share columns with a main table -#' @autoglobal -#' @keywords internal +#' @noRd get_mergeable_files <- function(survey_files, contact_data, main_cols) { can_merge <- vapply( survey_files, @@ -130,8 +129,7 @@ get_mergeable_files <- function(survey_files, contact_data, main_cols) { #' #' Validates a user-provided participant_key or auto-detects one via #' find_unique_key(). -#' @autoglobal -#' @keywords internal +#' @noRd resolve_longitudinal_key <- function(merged, participant_key = NULL) { if (!is.null(participant_key)) { missing_cols <- setdiff(participant_key, names(merged)) @@ -147,10 +145,10 @@ resolve_longitudinal_key <- function(merged, participant_key = NULL) { #' Try merging a single additional file into a main survey table #' -#' @return A list with components: merged (data.table or NULL), detected_key -#' (character vector or NULL), file (the file that was merged, or NULL). +#' @return A list with components: merged (data.table or NULL) and detected_key +#' (character vector or NULL). #' @autoglobal -#' @keywords internal +#' @noRd try_merge_one_file <- function( file, type, @@ -159,6 +157,8 @@ try_merge_one_file <- function( participant_key = NULL, call = rlang::caller_env() ) { + null_result <- list(merged = NULL, detected_key = NULL) + contact_data[[file]] <- contact_data[[file]][, ..merge_id := seq_len(.N) ] @@ -188,31 +188,39 @@ try_merge_one_file <- function( ) if (is.null(merged)) { - return(list(merged = NULL, detected_key = NULL, file = NULL)) + return(null_result) } has_duplicates <- anyDuplicated(merged[, "..main_id", with = FALSE]) > 0 - accept_merge <- !has_duplicates detected_key <- NULL - if (has_duplicates && type == "contact") { - return(list(merged = NULL, detected_key = NULL, file = NULL)) - } - if (has_duplicates) { - unique_key <- resolve_longitudinal_key(merged, participant_key) - if (!is.null(unique_key)) { - accept_merge <- TRUE - merged[, ("..main_id") := seq_len(.N)] - detected_key <- unique_key + if (type == "contact") { + return(null_result) + } + detected_key <- resolve_longitudinal_key(merged, participant_key) + if (is.null(detected_key)) { + return(null_result) } + merged[, ("..main_id") := seq_len(.N)] } - if (!accept_merge) { - return(list(merged = NULL, detected_key = NULL, file = NULL)) - } + warn_merge_quality(merged, contact_data[[file]], common_id, file, type, call) + merged[, ("..merge_id") := NULL] - ## Issue warnings about match quality + list(merged = merged, detected_key = detected_key) +} + +#' Warn about unmatched rows after a merge +#' @noRd +warn_merge_quality <- function( + merged, + file_data, + common_id, + file, + type, + call +) { matched_main <- sum(!is.na(merged[["..merge_id"]])) unmatched_main <- nrow(merged) - matched_main if (unmatched_main > 0) { @@ -224,7 +232,7 @@ try_merge_one_file <- function( ) } matched_merge <- uniqueN(merged[["..merge_id"]], na.rm = TRUE) - unmatched_merge <- nrow(contact_data[[file]]) - matched_merge + unmatched_merge <- nrow(file_data) - matched_merge if (unmatched_merge > 0) { cli::cli_warn( "{unmatched_merge} row{?s} could not be matched when pulling \\ @@ -232,9 +240,87 @@ try_merge_one_file <- function( call = call ) } - merged[, ("..merge_id") := NULL] +} + +#' Try merging all compatible files into a single main survey table +#' +#' Iteratively merges files that share columns with the main table, repeating +#' until no further merges are possible. +#' @return A list with merged (the updated main survey), detected_key, and +#' remaining survey_files. +#' @noRd +merge_all_files <- function( + type, + main_survey, + survey_files, + contact_data, + participant_key = NULL, + call = rlang::caller_env() +) { + detected_key <- NULL + merge_files <- get_mergeable_files( + survey_files, contact_data, colnames(main_survey) + ) + + while (length(merge_files) > 0) { + merged_files <- NULL + for (file in merge_files) { + result <- try_merge_one_file( + file, + type, + main_survey, + contact_data, + participant_key = participant_key, + call = call + ) + if (!is.null(result$merged)) { + main_survey <- result$merged + merged_files <- c(merged_files, file) + } + if (!is.null(result$detected_key)) { + detected_key <- result$detected_key + } + } + survey_files <- setdiff(survey_files, merged_files) + if (is.null(merged_files)) break + merge_files <- get_mergeable_files( + survey_files, contact_data, colnames(main_survey) + ) + } + + list( + merged = main_survey, + detected_key = detected_key, + survey_files = survey_files + ) +} + +#' Inform user about detected longitudinal data +#' @noRd +inform_longitudinal_key <- function( + detected_key, + participant_key = NULL, + call = rlang::caller_env() +) { + if (is.null(detected_key)) return(invisible(NULL)) + user_key_matches <- !is.null(participant_key) && + setequal(detected_key, participant_key) + if (user_key_matches) return(invisible(NULL)) - list(merged = merged, detected_key = detected_key, file = file) + key_code <- paste0( + "c(", + paste0("\"", detected_key, "\"", collapse = ", "), + ")" + ) + cli::cli_inform( + c( + "Detected longitudinal data with unique key: {.val {detected_key}}.", + "*" = "Will treat individuals with the same {.val part_id} as unique.", + i = "To suppress this message, use: \\ + {.code load_survey(..., participant_key = {key_code})}" + ), + call = call + ) } ## lastly, merge in any additional files that can be merged @@ -250,81 +336,32 @@ try_merge_additional_files <- function( observation_key <- NULL for (type in main_types) { - final_detected_key <- NULL - merge_files <- get_mergeable_files( + result <- merge_all_files( + type, + main_surveys[[type]], survey_files, contact_data, - colnames(main_surveys[[type]]) + participant_key = participant_key, + call = call ) + main_surveys[[type]] <- result$merged[, ..main_id := NULL] + survey_files <- result$survey_files - while (length(merge_files) > 0) { - merged_files <- NULL - for (file in merge_files) { - result <- try_merge_one_file( - file, - type, - main_surveys[[type]], - contact_data, - participant_key = participant_key, - call = call - ) - if (!is.null(result$merged)) { - main_surveys[[type]] <- result$merged - merged_files <- c(merged_files, result$file) - } - if (!is.null(result$detected_key)) { - final_detected_key <- result$detected_key - } - } - survey_files <- setdiff(survey_files, merged_files) - if (is.null(merged_files)) { - merge_files <- NULL - } else { - merge_files <- get_mergeable_files( - survey_files, - contact_data, - colnames(main_surveys[[type]]) - ) - } - } - - # Show one message about detected longitudinal data (if not suppressed) - user_key_matches <- !is.null(participant_key) && - setequal(final_detected_key, participant_key) - if (!is.null(final_detected_key) && !user_key_matches) { - key_code <- paste0( - "c(", - paste0("\"", final_detected_key, "\"", collapse = ", "), - ")" - ) - cli::cli_inform( - c( - "Detected longitudinal data with unique key: {.val {final_detected_key}}.", - "*" = "Will treat individuals with the same {.val part_id} as unique.", - i = "To suppress this message, use: \\ - {.code load_survey(..., participant_key = {key_code})}" - ), - call = call - ) - } + inform_longitudinal_key(result$detected_key, participant_key, call) - if (type == "participant" && !is.null(final_detected_key)) { - obs_cols <- setdiff(final_detected_key, "part_id") + if (type == "participant" && !is.null(result$detected_key)) { + obs_cols <- setdiff(result$detected_key, "part_id") if (length(obs_cols) > 0) { observation_key <- obs_cols } } - - main_surveys[[type]] <- main_surveys[[type]][, ..main_id := NULL] } - if (length(survey_files) > 0) { - for (file in survey_files) { - cli::cli_warn( - message = "Could not merge {.file {file}}.", - call = call - ) - } + for (file in survey_files) { + cli::cli_warn( + message = "Could not merge {.file {file}}.", + call = call + ) } list( diff --git a/man/get_mergeable_files.Rd b/man/get_mergeable_files.Rd deleted file mode 100644 index b50f174a..00000000 --- a/man/get_mergeable_files.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load-survey-utils.R -\name{get_mergeable_files} -\alias{get_mergeable_files} -\title{Identify which survey files share columns with a main table} -\usage{ -get_mergeable_files(survey_files, contact_data, main_cols) -} -\description{ -Identify which survey files share columns with a main table -} -\keyword{internal} diff --git a/man/resolve_longitudinal_key.Rd b/man/resolve_longitudinal_key.Rd deleted file mode 100644 index d4c492bb..00000000 --- a/man/resolve_longitudinal_key.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load-survey-utils.R -\name{resolve_longitudinal_key} -\alias{resolve_longitudinal_key} -\title{Resolve the unique key for a merged data.table with duplicates} -\usage{ -resolve_longitudinal_key(merged, participant_key = NULL) -} -\description{ -Validates a user-provided participant_key or auto-detects one via -find_unique_key(). -} -\keyword{internal} diff --git a/man/try_merge_one_file.Rd b/man/try_merge_one_file.Rd deleted file mode 100644 index be59f5d2..00000000 --- a/man/try_merge_one_file.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load-survey-utils.R -\name{try_merge_one_file} -\alias{try_merge_one_file} -\title{Try merging a single additional file into a main survey table} -\usage{ -try_merge_one_file( - file, - type, - main_survey, - contact_data, - participant_key = NULL, - call = rlang::caller_env() -) -} -\value{ -A list with components: merged (data.table or NULL), detected_key -(character vector or NULL), file (the file that was merged, or NULL). -} -\description{ -Try merging a single additional file into a main survey table -} -\keyword{internal} From 0bfde9c131f0b352f0959a61c8a98f25fc82c46d Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 09:47:58 +0000 Subject: [PATCH 5/7] Address review: fix formatting, warn on ignored participant_key Co-authored-by: sbfnk --- R/load-survey-utils.R | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index f1cfef20..9595e58d 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -120,7 +120,7 @@ get_mergeable_files <- function(survey_files, contact_data, main_cols) { function(x) { any(colnames(contact_data[[x]]) %in% main_cols) }, - TRUE + logical(1) ) names(can_merge[can_merge]) } @@ -139,6 +139,10 @@ resolve_longitudinal_key <- function(merged, participant_key = NULL) { ) { return(participant_key) } + cli::cli_warn( + "Provided {.arg participant_key} {.val {participant_key}} did not \\ + uniquely identify rows; auto-detecting a key instead." + ) } find_unique_key(merged, "part_id") } @@ -259,11 +263,13 @@ merge_all_files <- function( ) { detected_key <- NULL merge_files <- get_mergeable_files( - survey_files, contact_data, colnames(main_survey) + survey_files, + contact_data, + colnames(main_survey) ) while (length(merge_files) > 0) { - merged_files <- NULL + merged_files <- character(0) for (file in merge_files) { result <- try_merge_one_file( file, @@ -282,9 +288,13 @@ merge_all_files <- function( } } survey_files <- setdiff(survey_files, merged_files) - if (is.null(merged_files)) break + if (length(merged_files) == 0) { + break + } merge_files <- get_mergeable_files( - survey_files, contact_data, colnames(main_survey) + survey_files, + contact_data, + colnames(main_survey) ) } @@ -302,14 +312,18 @@ inform_longitudinal_key <- function( participant_key = NULL, call = rlang::caller_env() ) { - if (is.null(detected_key)) return(invisible(NULL)) + if (is.null(detected_key)) { + return(invisible(NULL)) + } user_key_matches <- !is.null(participant_key) && setequal(detected_key, participant_key) - if (user_key_matches) return(invisible(NULL)) + if (user_key_matches) { + return(invisible(NULL)) + } key_code <- paste0( "c(", - paste0("\"", detected_key, "\"", collapse = ", "), + paste0('"', detected_key, '"', collapse = ", "), ")" ) cli::cli_inform( From 6702271345c69f050652eb7a8a20f9d39e7fc8e2 Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:15:38 +0000 Subject: [PATCH 6/7] Distinguish warning messages for invalid participant_key, consistent := style Co-authored-by: sbfnk --- R/load-survey-utils.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index 9595e58d..28f9b8cb 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -139,10 +139,17 @@ resolve_longitudinal_key <- function(merged, participant_key = NULL) { ) { return(participant_key) } - cli::cli_warn( - "Provided {.arg participant_key} {.val {participant_key}} did not \\ - uniquely identify rows; auto-detecting a key instead." - ) + if (length(missing_cols) > 0) { + cli::cli_warn( + "Provided {.arg participant_key} contains column{?s} not found in \\ + merged data: {.val {missing_cols}}; auto-detecting a key instead." + ) + } else { + cli::cli_warn( + "Provided {.arg participant_key} {.val {participant_key}} did not \\ + uniquely identify rows; auto-detecting a key instead." + ) + } } find_unique_key(merged, "part_id") } @@ -164,7 +171,7 @@ try_merge_one_file <- function( null_result <- list(merged = NULL, detected_key = NULL) contact_data[[file]] <- contact_data[[file]][, - ..merge_id := seq_len(.N) + ("..merge_id") := seq_len(.N) ] common_id <- intersect( colnames(contact_data[[file]]), @@ -358,7 +365,7 @@ try_merge_additional_files <- function( participant_key = participant_key, call = call ) - main_surveys[[type]] <- result$merged[, ..main_id := NULL] + main_surveys[[type]] <- result$merged[, ("..main_id") := NULL] survey_files <- result$survey_files inform_longitudinal_key(result$detected_key, participant_key, call) From f8d9de11e13fa96b44c6fef0789c399b02566387 Mon Sep 17 00:00:00 2001 From: sbfnk-bot <242615673+sbfnk-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:26:49 +0000 Subject: [PATCH 7/7] Thread call param through resolve_longitudinal_key, use basename consistently Co-authored-by: sbfnk --- R/load-survey-utils.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/R/load-survey-utils.R b/R/load-survey-utils.R index 28f9b8cb..0baa7787 100644 --- a/R/load-survey-utils.R +++ b/R/load-survey-utils.R @@ -130,7 +130,11 @@ get_mergeable_files <- function(survey_files, contact_data, main_cols) { #' Validates a user-provided participant_key or auto-detects one via #' find_unique_key(). #' @noRd -resolve_longitudinal_key <- function(merged, participant_key = NULL) { +resolve_longitudinal_key <- function( + merged, + participant_key = NULL, + call = rlang::caller_env() +) { if (!is.null(participant_key)) { missing_cols <- setdiff(participant_key, names(merged)) if ( @@ -142,12 +146,14 @@ resolve_longitudinal_key <- function(merged, participant_key = NULL) { if (length(missing_cols) > 0) { cli::cli_warn( "Provided {.arg participant_key} contains column{?s} not found in \\ - merged data: {.val {missing_cols}}; auto-detecting a key instead." + merged data: {.val {missing_cols}}; auto-detecting a key instead.", + call = call ) } else { cli::cli_warn( "Provided {.arg participant_key} {.val {participant_key}} did not \\ - uniquely identify rows; auto-detecting a key instead." + uniquely identify rows; auto-detecting a key instead.", + call = call ) } } @@ -209,7 +215,7 @@ try_merge_one_file <- function( if (type == "contact") { return(null_result) } - detected_key <- resolve_longitudinal_key(merged, participant_key) + detected_key <- resolve_longitudinal_key(merged, participant_key, call) if (is.null(detected_key)) { return(null_result) } @@ -380,7 +386,7 @@ try_merge_additional_files <- function( for (file in survey_files) { cli::cli_warn( - message = "Could not merge {.file {file}}.", + message = "Could not merge {.file {basename(file)}}.", call = call ) }