Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: serocalculator
Title: Estimating Infection Rates from Serological Data
Version: 1.4.0.9003
Version: 1.4.0.9004
Authors@R: c(
person("Kristina", "Lai", , "[email protected]", role = c("aut", "cre")),
person("Chris", "Orwa", role = "aut"),
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ S3method(autoplot,summary.seroincidence.by)
S3method(compare_seroincidence,default)
S3method(compare_seroincidence,seroincidence)
S3method(compare_seroincidence,seroincidence.by)
S3method(compute_icc,default)
S3method(compute_icc,seroincidence)
S3method(compute_icc,seroincidence.by)
S3method(print,icc_seroincidence)
S3method(print,icc_seroincidence.by)
S3method(print,seroincidence)
S3method(print,seroincidence.by)
S3method(print,summary.pop_data)
Expand All @@ -26,6 +31,7 @@ export(as_sr_params)
export(autoplot)
export(check_pop_data)
export(compare_seroincidence)
export(compute_icc)
export(count_strata)
export(est.incidence)
export(est.incidence.by)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# serocalculator (development version)

## New features

* Added `compute_icc()` function to calculate Intraclass Correlation Coefficient (ICC) for seroincidence estimates from clustered sampling designs
- Computes ICC using design effect (DEFF) method
- Works with both single and stratified seroincidence estimates
- Includes print methods for formatted output

# serocalculator 1.4.0

## New features
Expand Down
119 changes: 119 additions & 0 deletions R/compute_cluster_robust_var.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#' Compute cluster-robust variance for seroincidence estimates
#'
#' @description
#' Computes cluster-robust (sandwich) variance estimates for seroincidence
#' parameter estimates when data come from a clustered sampling design.
#' This adjusts the standard errors to account for within-cluster correlation.
#'
#' @param fit a `seroincidence` object from [est_seroincidence()]
#' @param cluster_var name(s) of the cluster variable(s) in the data.
#' Can be a single variable or vector of variables for multi-level clustering.
#' @param stratum_var optional name of the stratum variable
#'
#' @return variance of log(lambda) accounting for clustering
#' @keywords internal
#' @noRd
.compute_cluster_robust_var <- function(
fit,
cluster_var,
stratum_var = NULL) {
# Extract stored data (already split by antigen_iso)
pop_data_list <- attr(fit, "pop_data")
sr_params_list <- attr(fit, "sr_params")
noise_params_list <- attr(fit, "noise_params")
antigen_isos <- attr(fit, "antigen_isos")

# Get MLE estimate
log_lambda_mle <- fit$estimate

# Combine pop_data list back into a single data frame
# to get cluster info
pop_data_combined <- do.call(rbind, pop_data_list)

# Compute score (gradient) using numerical differentiation
# The score is the derivative of log-likelihood w.r.t. log(lambda)
epsilon <- 1e-6

# For each observation, compute the contribution to the score
# We need to identify which cluster each observation belongs to

# Handle multiple clustering levels by creating composite cluster ID
if (length(cluster_var) == 1) {
cluster_ids <- pop_data_combined[[cluster_var]]
} else {
# Create composite cluster ID from multiple variables
cluster_ids <- interaction(
pop_data_combined[, cluster_var, drop = FALSE],
drop = TRUE,
sep = "_"
)
}

# Get unique clusters
unique_clusters <- unique(cluster_ids)
n_clusters <- length(unique_clusters)

# Compute cluster-level scores
cluster_scores <- numeric(n_clusters)

for (i in seq_along(unique_clusters)) {
cluster_id <- unique_clusters[i]

# Get observations in this cluster
cluster_mask <- cluster_ids == cluster_id

# Create temporary pop_data with only this cluster
pop_data_cluster <- pop_data_combined[cluster_mask, , drop = FALSE]

# Split by antigen
pop_data_cluster_list <- split(
pop_data_cluster,
pop_data_cluster$antigen_iso
)

# Ensure all antigen_isos are represented
# (add empty data frames if missing)
for (ag in antigen_isos) {
if (!ag %in% names(pop_data_cluster_list)) {
# Create empty data frame with correct structure
pop_data_cluster_list[[ag]] <- pop_data_list[[ag]][0, , drop = FALSE]
}
}

# Compute log-likelihood for this cluster at MLE
ll_cluster_mle <- -(.nll(
log.lambda = log_lambda_mle,
pop_data = pop_data_cluster_list,
antigen_isos = antigen_isos,
curve_params = sr_params_list,
noise_params = noise_params_list,
verbose = FALSE
))

# Compute log-likelihood at MLE + epsilon
ll_cluster_plus <- -(.nll(
log.lambda = log_lambda_mle + epsilon,
pop_data = pop_data_cluster_list,
antigen_isos = antigen_isos,
curve_params = sr_params_list,
noise_params = noise_params_list,
verbose = FALSE
))

# Numerical derivative (score for this cluster)
cluster_scores[i] <- (ll_cluster_plus - ll_cluster_mle) / epsilon
}

# Compute B matrix (middle of sandwich)
# B = sum of outer products of cluster scores
b_matrix <- sum(cluster_scores^2) # nolint: object_name_linter

# Get Hessian (already computed by nlm)
h_matrix <- fit$hessian # nolint: object_name_linter

# Sandwich variance: V = H^(-1) * B * H^(-1)
# Since we have a scalar parameter, this simplifies to:
var_log_lambda_robust <- b_matrix / (h_matrix^2)

return(var_log_lambda_robust)
}
79 changes: 79 additions & 0 deletions R/compute_icc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#' Calculate Intraclass Correlation Coefficient (ICC) for seroincidence
#'
#' @description
#' Computes the Intraclass Correlation Coefficient (ICC) for a seroincidence
#' estimate from a clustered sampling design. The ICC measures the proportion
#' of total variance that is due to between-cluster variation, indicating
#' how correlated observations within the same cluster are.
#'
#' The ICC is estimated using the design effect (DEFF):
#' \deqn{ICC = \frac{DEFF - 1}{\bar{m} - 1}}
#' where DEFF is the ratio of cluster-robust to standard variance, and
#' \eqn{\bar{m}} is the average cluster size.
#'
#' @param fit a `seroincidence` object from [est_seroincidence()] or a
#' `seroincidence.by` object from [est_seroincidence_by()] that was
#' fitted with clustering (i.e., with `cluster_var` specified)
#'
#' @return
#' * For `seroincidence` objects: A list with components `icc`, `deff`,
#' `avg_cluster_size`, `min_cluster_size`, `max_cluster_size`, `n_clusters`,
#' `cluster_var`, and `antigen_isos`
#' * For `seroincidence.by` objects: A data frame with one row per stratum
#' containing the same information plus stratum identifiers
#'
#' @export
#' @example inst/examples/exm-compute_icc.R
compute_icc <- function(fit) {
UseMethod("compute_icc")
}

#' @export
compute_icc.seroincidence <- function(fit) {
return(.compute_icc_single(fit))
}

#' @export
compute_icc.seroincidence.by <- function(fit) {
# Process each stratum
icc_results <- lapply(names(fit), function(stratum_name) {
stratum_fit <- fit[[stratum_name]]
.icc_single_to_df_row(stratum_name, stratum_fit)
})

# Combine into a data frame
result_df <- do.call(rbind, icc_results)

# Add strata information from attributes
strata_info <- attr(fit, "Strata")
if (!is.null(strata_info)) {
result_df <- merge(
strata_info,
result_df,
by = "Stratum",
all.y = TRUE
)

# Reorder columns to put Stratum and strata variables first
strata_cols <- setdiff(names(strata_info), "Stratum")
other_cols <- setdiff(
names(result_df),
c("Stratum", strata_cols)
)
result_df <- result_df[, c("Stratum", strata_cols, other_cols)]
}

class(result_df) <- c("icc_seroincidence.by", "data.frame")
return(result_df)
}

#' @export
compute_icc.default <- function(fit) {
cli::cli_abort(c(
"x" = paste(
"{.arg fit} must be a {.cls seroincidence} or",
"{.cls seroincidence.by} object from {.fun est_seroincidence}",
"or {.fun est_seroincidence_by}."
)
))
}
88 changes: 88 additions & 0 deletions R/compute_icc_single.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#' Internal function to compute ICC for a single seroincidence object
#' @noRd
#' @keywords internal
.compute_icc_single <- function(fit) {
# Check that clustering was used
cluster_var <- attr(fit, "cluster_var")
if (is.null(cluster_var)) {
cli::cli_abort(c(
"x" = paste(
"ICC can only be computed for models fitted with clustering.",
"Please specify {.arg cluster_var} in {.fun est_seroincidence}."
)
))
}

# Check for multiple clustering levels
if (length(cluster_var) > 1) {
cli::cli_abort(c(
"x" = paste(
"ICC calculation only allowed for one level of clustering.",
"{.arg cluster_var} has {length(cluster_var)} levels:",
"{cluster_var}. Please use a single cluster variable."
)
))
}

stratum_var <- attr(fit, "stratum_var")
antigen_isos <- attr(fit, "antigen_isos")

# Get the combined population data
pop_data_list <- attr(fit, "pop_data")
pop_data_combined <- do.call(rbind, pop_data_list)

# Get cluster information
cluster_ids <- pop_data_combined[[cluster_var]]
unique_clusters <- unique(cluster_ids)
n_clusters <- length(unique_clusters)

# Calculate cluster sizes (number of observations per cluster)
cluster_sizes <- table(cluster_ids)
avg_cluster_size <- mean(cluster_sizes)
min_cluster_size <- min(cluster_sizes)
max_cluster_size <- max(cluster_sizes)

# Compute standard variance (from Hessian)
var_standard <- 1 / fit$hessian |> as.numeric()

# Compute cluster-robust variance
var_robust <- .compute_cluster_robust_var(
fit = fit,
cluster_var = cluster_var,
stratum_var = stratum_var
) |> as.numeric()

# Calculate design effect
deff <- var_robust / var_standard

# Calculate ICC using design effect
# DEFF = 1 + (m̄ - 1) × ICC
# ICC = (DEFF - 1) / (m̄ - 1)
if (avg_cluster_size > 1) {
icc <- (deff - 1) / (avg_cluster_size - 1)
} else {
cli::cli_warn(c(
"!" = paste(
"Average cluster size is 1; ICC cannot be computed.",
"Returning NA."
)
))
icc <- NA_real_
}

# Return results as a list
result <- list(
icc = icc,
deff = deff,
avg_cluster_size = avg_cluster_size,
min_cluster_size = min_cluster_size,
max_cluster_size = max_cluster_size,
n_clusters = n_clusters,
cluster_var = cluster_var,
antigen_isos = paste(antigen_isos, collapse = ", ")
)

class(result) <- c("icc_seroincidence", "list")

return(result)
}
23 changes: 23 additions & 0 deletions R/icc_single_to_df_row.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Helper function to convert single seroincidence ICC result to data frame row
#' @noRd
#' @keywords internal
.icc_single_to_df_row <- function(stratum_name, stratum_fit) {
icc_res <- .compute_icc_single(stratum_fit)

# Add stratum identifier
icc_res$Stratum <- stratum_name

# Convert to data frame row
data.frame(
Stratum = icc_res$Stratum,
icc = icc_res$icc,
deff = icc_res$deff,
avg_cluster_size = icc_res$avg_cluster_size,
min_cluster_size = icc_res$min_cluster_size,
max_cluster_size = icc_res$max_cluster_size,
n_clusters = icc_res$n_clusters,
cluster_var = icc_res$cluster_var,
antigen_isos = icc_res$antigen_isos,
stringsAsFactors = FALSE
)
}
Loading