UCD-SERG · Copilot · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 27, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: serocalculator
 Title: Estimating Infection Rates from Serological Data
-Version: 1.4.0.9003
+Version: 1.4.0.9004
 Authors@R: c(
     person("Kristina", "Lai", , "[email protected]", role = c("aut", "cre")),
     person("Chris", "Orwa", role = "aut"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,11 @@ S3method(autoplot,summary.seroincidence.by)
 S3method(compare_seroincidence,default)
 S3method(compare_seroincidence,seroincidence)
 S3method(compare_seroincidence,seroincidence.by)
+S3method(compute_icc,default)
+S3method(compute_icc,seroincidence)
+S3method(compute_icc,seroincidence.by)
+S3method(print,icc_seroincidence)
+S3method(print,icc_seroincidence.by)
 S3method(print,seroincidence)
 S3method(print,seroincidence.by)
 S3method(print,summary.pop_data)
@@ -26,6 +31,7 @@ export(as_sr_params)
 export(autoplot)
 export(check_pop_data)
 export(compare_seroincidence)
+export(compute_icc)
 export(count_strata)
 export(est.incidence)
 export(est.incidence.by)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 # serocalculator (development version)
 
+## New features
+
+* Added `compute_icc()` function to calculate Intraclass Correlation Coefficient (ICC) for seroincidence estimates from clustered sampling designs
+  - Computes ICC using design effect (DEFF) method
+  - Works with both single and stratified seroincidence estimates
+  - Includes print methods for formatted output
+
 # serocalculator 1.4.0
 
 ## New features

diff --git a/R/compute_cluster_robust_var.R b/R/compute_cluster_robust_var.R
@@ -0,0 +1,119 @@
+#' Compute cluster-robust variance for seroincidence estimates
+#'
+#' @description
+#' Computes cluster-robust (sandwich) variance estimates for seroincidence
+#' parameter estimates when data come from a clustered sampling design.
+#' This adjusts the standard errors to account for within-cluster correlation.
+#'
+#' @param fit a `seroincidence` object from [est_seroincidence()]
+#' @param cluster_var name(s) of the cluster variable(s) in the data.
+#'   Can be a single variable or vector of variables for multi-level clustering.
+#' @param stratum_var optional name of the stratum variable
+#'
+#' @return variance of log(lambda) accounting for clustering
+#' @keywords internal
+#' @noRd
+.compute_cluster_robust_var <- function(
+    fit,
+    cluster_var,
+    stratum_var = NULL) {
+  # Extract stored data (already split by antigen_iso)
+  pop_data_list <- attr(fit, "pop_data")
+  sr_params_list <- attr(fit, "sr_params")
+  noise_params_list <- attr(fit, "noise_params")
+  antigen_isos <- attr(fit, "antigen_isos")
+
+  # Get MLE estimate
+  log_lambda_mle <- fit$estimate
+
+  # Combine pop_data list back into a single data frame
+  # to get cluster info
+  pop_data_combined <- do.call(rbind, pop_data_list)
+
+  # Compute score (gradient) using numerical differentiation
+  # The score is the derivative of log-likelihood w.r.t. log(lambda)
+  epsilon <- 1e-6
+
+  # For each observation, compute the contribution to the score
+  # We need to identify which cluster each observation belongs to
+
+  # Handle multiple clustering levels by creating composite cluster ID
+  if (length(cluster_var) == 1) {
+    cluster_ids <- pop_data_combined[[cluster_var]]
+  } else {
+    # Create composite cluster ID from multiple variables
+    cluster_ids <- interaction(
+      pop_data_combined[, cluster_var, drop = FALSE],
+      drop = TRUE,
+      sep = "_"
+    )
+  }
+
+  # Get unique clusters
+  unique_clusters <- unique(cluster_ids)
+  n_clusters <- length(unique_clusters)
+
+  # Compute cluster-level scores
+  cluster_scores <- numeric(n_clusters)
+
+  for (i in seq_along(unique_clusters)) {
+    cluster_id <- unique_clusters[i]
+
+    # Get observations in this cluster
+    cluster_mask <- cluster_ids == cluster_id
+
+    # Create temporary pop_data with only this cluster
+    pop_data_cluster <- pop_data_combined[cluster_mask, , drop = FALSE]
+
+    # Split by antigen
+    pop_data_cluster_list <- split(
+      pop_data_cluster,
+      pop_data_cluster$antigen_iso
+    )
+
+    # Ensure all antigen_isos are represented
+    # (add empty data frames if missing)
+    for (ag in antigen_isos) {
+      if (!ag %in% names(pop_data_cluster_list)) {
+        # Create empty data frame with correct structure
+        pop_data_cluster_list[[ag]] <- pop_data_list[[ag]][0, , drop = FALSE]
+      }
+    }
+
+    # Compute log-likelihood for this cluster at MLE
+    ll_cluster_mle <- -(.nll(
+      log.lambda = log_lambda_mle,
+      pop_data = pop_data_cluster_list,
+      antigen_isos = antigen_isos,
+      curve_params = sr_params_list,
+      noise_params = noise_params_list,
+      verbose = FALSE
+    ))
+
+    # Compute log-likelihood at MLE + epsilon
+    ll_cluster_plus <- -(.nll(
+      log.lambda = log_lambda_mle + epsilon,
+      pop_data = pop_data_cluster_list,
+      antigen_isos = antigen_isos,
+      curve_params = sr_params_list,
+      noise_params = noise_params_list,
+      verbose = FALSE
+    ))
+
+    # Numerical derivative (score for this cluster)
+    cluster_scores[i] <- (ll_cluster_plus - ll_cluster_mle) / epsilon
+  }
+
+  # Compute B matrix (middle of sandwich)
+  # B = sum of outer products of cluster scores
+  b_matrix <- sum(cluster_scores^2) # nolint: object_name_linter
+
+  # Get Hessian (already computed by nlm)
+  h_matrix <- fit$hessian # nolint: object_name_linter
+
+  # Sandwich variance: V = H^(-1) * B * H^(-1)
+  # Since we have a scalar parameter, this simplifies to:
+  var_log_lambda_robust <- b_matrix / (h_matrix^2)
+
+  return(var_log_lambda_robust)
+}
diff --git a/R/compute_icc.R b/R/compute_icc.R
@@ -0,0 +1,79 @@
+#' Calculate Intraclass Correlation Coefficient (ICC) for seroincidence
+#'
+#' @description
+#' Computes the Intraclass Correlation Coefficient (ICC) for a seroincidence
+#' estimate from a clustered sampling design. The ICC measures the proportion
+#' of total variance that is due to between-cluster variation, indicating
+#' how correlated observations within the same cluster are.
+#'
+#' The ICC is estimated using the design effect (DEFF):
+#' \deqn{ICC = \frac{DEFF - 1}{\bar{m} - 1}}
+#' where DEFF is the ratio of cluster-robust to standard variance, and
+#' \eqn{\bar{m}} is the average cluster size.
+#'
+#' @param fit a `seroincidence` object from [est_seroincidence()] or a
+#'   `seroincidence.by` object from [est_seroincidence_by()] that was
+#'   fitted with clustering (i.e., with `cluster_var` specified)
+#'
+#' @return
+#' * For `seroincidence` objects: A list with components `icc`, `deff`,
+#'   `avg_cluster_size`, `min_cluster_size`, `max_cluster_size`, `n_clusters`,
+#'   `cluster_var`, and `antigen_isos`
+#' * For `seroincidence.by` objects: A data frame with one row per stratum
+#'   containing the same information plus stratum identifiers
+#'
+#' @export
+#' @example inst/examples/exm-compute_icc.R
+compute_icc <- function(fit) {
+  UseMethod("compute_icc")
+}
+
+#' @export
+compute_icc.seroincidence <- function(fit) {
+  return(.compute_icc_single(fit))
+}
+
+#' @export
+compute_icc.seroincidence.by <- function(fit) {
+  # Process each stratum
+  icc_results <- lapply(names(fit), function(stratum_name) {
+    stratum_fit <- fit[[stratum_name]]
+    .icc_single_to_df_row(stratum_name, stratum_fit)
+  })
+
+  # Combine into a data frame
+  result_df <- do.call(rbind, icc_results)
+
+  # Add strata information from attributes
+  strata_info <- attr(fit, "Strata")
+  if (!is.null(strata_info)) {
+    result_df <- merge(
+      strata_info,
+      result_df,
+      by = "Stratum",
+      all.y = TRUE
+    )
+
+    # Reorder columns to put Stratum and strata variables first
+    strata_cols <- setdiff(names(strata_info), "Stratum")
+    other_cols <- setdiff(
+      names(result_df),
+      c("Stratum", strata_cols)
+    )
+    result_df <- result_df[, c("Stratum", strata_cols, other_cols)]
+  }
+
+  class(result_df) <- c("icc_seroincidence.by", "data.frame")
+  return(result_df)
+}
+
+#' @export
+compute_icc.default <- function(fit) {
+  cli::cli_abort(c(
+    "x" = paste(
+      "{.arg fit} must be a {.cls seroincidence} or",
+      "{.cls seroincidence.by} object from {.fun est_seroincidence}",
+      "or {.fun est_seroincidence_by}."
+    )
+  ))
+}
diff --git a/R/compute_icc_single.R b/R/compute_icc_single.R
@@ -0,0 +1,88 @@
+#' Internal function to compute ICC for a single seroincidence object
+#' @noRd
+#' @keywords internal
+.compute_icc_single <- function(fit) {
+  # Check that clustering was used
+  cluster_var <- attr(fit, "cluster_var")
+  if (is.null(cluster_var)) {
+    cli::cli_abort(c(
+      "x" = paste(
+        "ICC can only be computed for models fitted with clustering.",
+        "Please specify {.arg cluster_var} in {.fun est_seroincidence}."
+      )
+    ))
+  }
+
+  # Check for multiple clustering levels
+  if (length(cluster_var) > 1) {
+    cli::cli_abort(c(
+      "x" = paste(
+        "ICC calculation only allowed for one level of clustering.",
+        "{.arg cluster_var} has {length(cluster_var)} levels:",
+        "{cluster_var}. Please use a single cluster variable."
+      )
+    ))
+  }
+
+  stratum_var <- attr(fit, "stratum_var")
+  antigen_isos <- attr(fit, "antigen_isos")
+
+  # Get the combined population data
+  pop_data_list <- attr(fit, "pop_data")
+  pop_data_combined <- do.call(rbind, pop_data_list)
+
+  # Get cluster information
+  cluster_ids <- pop_data_combined[[cluster_var]]
+  unique_clusters <- unique(cluster_ids)
+  n_clusters <- length(unique_clusters)
+
+  # Calculate cluster sizes (number of observations per cluster)
+  cluster_sizes <- table(cluster_ids)
+  avg_cluster_size <- mean(cluster_sizes)
+  min_cluster_size <- min(cluster_sizes)
+  max_cluster_size <- max(cluster_sizes)
+
+  # Compute standard variance (from Hessian)
+  var_standard <- 1 / fit$hessian |> as.numeric()
+
+  # Compute cluster-robust variance
+  var_robust <- .compute_cluster_robust_var(
+    fit = fit,
+    cluster_var = cluster_var,
+    stratum_var = stratum_var
+  ) |> as.numeric()
+
+  # Calculate design effect
+  deff <- var_robust / var_standard
+
+  # Calculate ICC using design effect
+  # DEFF = 1 + (m̄ - 1) × ICC
+  # ICC = (DEFF - 1) / (m̄ - 1)
+  if (avg_cluster_size > 1) {
+    icc <- (deff - 1) / (avg_cluster_size - 1)
+  } else {
+    cli::cli_warn(c(
+      "!" = paste(
+        "Average cluster size is 1; ICC cannot be computed.",
+        "Returning NA."
+      )
+    ))
+    icc <- NA_real_
+  }
+
+  # Return results as a list
+  result <- list(
+    icc = icc,
+    deff = deff,
+    avg_cluster_size = avg_cluster_size,
+    min_cluster_size = min_cluster_size,
+    max_cluster_size = max_cluster_size,
+    n_clusters = n_clusters,
+    cluster_var = cluster_var,
+    antigen_isos = paste(antigen_isos, collapse = ", ")
+  )
+
+  class(result) <- c("icc_seroincidence", "list")
+
+  return(result)
+}
diff --git a/R/icc_single_to_df_row.R b/R/icc_single_to_df_row.R
@@ -0,0 +1,23 @@
+#' Helper function to convert single seroincidence ICC result to data frame row
+#' @noRd
+#' @keywords internal
+.icc_single_to_df_row <- function(stratum_name, stratum_fit) {
+  icc_res <- .compute_icc_single(stratum_fit)
+
+  # Add stratum identifier
+  icc_res$Stratum <- stratum_name
+
+  # Convert to data frame row
+  data.frame(
+    Stratum = icc_res$Stratum,
+    icc = icc_res$icc,
+    deff = icc_res$deff,
+    avg_cluster_size = icc_res$avg_cluster_size,
+    min_cluster_size = icc_res$min_cluster_size,
+    max_cluster_size = icc_res$max_cluster_size,
+    n_clusters = icc_res$n_clusters,
+    cluster_var = icc_res$cluster_var,
+    antigen_isos = icc_res$antigen_isos,
+    stringsAsFactors = FALSE
+  )
+}