plot

ehsanx · ehsanx · commit cdff82c2e9ec · 2025-10-13T15:21:24.000-07:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: svyTable1
 Title: Create Survey-Weighted Descriptive Statistics and Diagnostic Tables
-Version: 0.5.0
+Version: 0.6.0
 Authors@R: c(person("Ehsan", "Karim",
                     email = "ehsan.karim@gmail.com",
                     role = c("aut", "cre")),
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,9 @@ importFrom(WeightedROC,WeightedAUC)
 importFrom(WeightedROC,WeightedROC)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
+importFrom(graphics,abline)
+importFrom(graphics,plot)
+importFrom(graphics,title)
 importFrom(magrittr,"%>%")
 importFrom(stats,coef)
 importFrom(stats,confint)
@@ -20,6 +23,7 @@ importFrom(stats,plogis)
 importFrom(stats,quantile)
 importFrom(stats,residuals)
 importFrom(stats,vcov)
+importFrom(stats,weights)
 importFrom(survey,SE)
 importFrom(survey,degf)
 importFrom(survey,regTermTest)
diff --git a/R/svyAUC.R b/R/svyAUC.R
@@ -1,21 +1,23 @@
-#' Calculate a Design-Correct AUC for a Survey Model
+#' Calculate and Optionally Plot a Design-Correct AUC for a Survey Model
 #'
 #' @description
 #' This function calculates the Area Under the Curve (AUC) and its design-correct
-#' standard error and 95% confidence interval for a survey logistic regression
-#' model. It correctly accounts for strata and clusters by using a
-#' replicate-weights survey design object.
+#' standard error and 95% confidence interval. It can also generate a plot of
+#' the weighted ROC curve.
 #'
 #' @param fit A fitted model object of class `svyglm`.
 #' @param design A replicate-weights survey design object, typically created with `as.svrepdesign`.
+#' @param plot A logical value. If `TRUE`, an ROC curve is plotted. Defaults to `FALSE`.
 #'
 #' @return
-#' A `data.frame` containing the AUC point estimate, its standard error (SE),
-#' and the lower and upper bounds of the 95% confidence interval.
+#' If `plot = FALSE` (default), returns a `data.frame` with the AUC, SE, and 95% CI.
+#' If `plot = TRUE`, invisibly returns a list containing the summary `data.frame`
+#' and another `data.frame` with the ROC curve coordinates (TPR and FPR).
 #'
 #' @importFrom survey withReplicates SE
 #' @importFrom WeightedROC WeightedROC WeightedAUC
-#' @importFrom stats model.frame model.matrix coef plogis
+#' @importFrom stats model.frame model.matrix coef plogis weights
+#' @importFrom graphics plot abline title
 #'
 #' @export
 #'
@@ -57,7 +59,7 @@
 #'   print(auc_results)
 #' }
 #' }
-svyAUC <- function(fit, design) {
+svyAUC <- function(fit, design, plot = FALSE) {
 
   # Input Validation
   if (!inherits(design, "svyrep.design")) {
@@ -69,6 +71,7 @@ svyAUC <- function(fit, design) {
 
   outcome_name <- all.vars(fit$formula[[2]])[1]
 
+  # Define the statistic function to be used with replicates
   auc_statistic <- function(weights, data) {
     model_formula <- formula(fit)
     mf <- model.frame(model_formula, data)
@@ -98,26 +101,57 @@ svyAUC <- function(fit, design) {
     WeightedROC::WeightedAUC(roc_curve)
   }
 
+  # Run the calculation across all replicate weights
   result <- survey::withReplicates(
     design,
     theta = auc_statistic,
     return.replicates = TRUE
   )
 
+  # Manually calculate the confidence interval
   auc_estimate <- result$theta
   se <- survey::SE(result)
-  # ci <- stats::confint(result)
 
-  output <- data.frame(
+  summary_df <- data.frame(
     AUC = auc_estimate,
     SE = se,
-    # CI_Lower = ci[1],
-    # CI_Upper = ci[2]
     CI_Lower = auc_estimate - 1.96 * se,
     CI_Upper = auc_estimate + 1.96 * se
   )
+  rownames(summary_df) <- NULL
 
-  rownames(output) <- NULL
-  return(output)
+  # --- PLOTTING LOGIC ---
+  if (plot) {
+    # Calculate ROC curve points using the full-sample weights
+    full_weights <- weights(design, "sampling")
+    roc_data <- auc_statistic(full_weights, design$variables) # Temporarily re-run to get roc_curve
+
+    # Actually need the curve, not just the AUC
+    predictions <- predict(fit, newdata = design$variables, type = "response")
+    outcome <- design$variables[[outcome_name]]
+    if(is.factor(outcome)) {
+      outcome <- as.numeric(outcome) - 1
+    }
+
+    roc_curve_points <- WeightedROC::WeightedROC(
+      guess = predictions,
+      label = outcome,
+      weight = full_weights
+    )
+
+    plot(roc_curve_points$FPR, roc_curve_points$TPR,
+         type = 'l',
+         xlab = "1 - Specificity (FPR)",
+         ylab = "Sensitivity (TPR)",
+         main = "Survey-Weighted ROC Curve"
+    )
+    abline(0, 1, lty = 2)
+    title(sub = paste0("AUC = ", round(summary_df$AUC, 3)), adj = 1)
+
+    invisible(list(summary = summary_df, roc_data = roc_curve_points))
+
+  } else {
+    return(summary_df)
+  }
 }
 
diff --git a/man/svyAUC.Rd b/man/svyAUC.Rd
diff --git a/vignettes/references.bib b/vignettes/references.bib
@@ -10,7 +10,7 @@ @misc{nhanes_reliability_estimates
   author = {Centers for Disease Control and Prevention},
   title = {NHANES Tutorials: Reliability of Estimates Module},
   organization = {National Center for Health Statistics},
-  year = {2005},
+  year = {2025},
   url = {https://wwwn.cdc.gov/nchs/nhanes/tutorials/reliabilityofestimates.aspx},
   note = {Accessed: October 12, 2025}
 }
diff --git a/vignettes/using-svyTable1.Rmd b/vignettes/using-svyTable1.Rmd
@@ -294,9 +294,6 @@ The package also includes `svygof()` to perform the Archer-Lemeshow goodness-of-
 
 ```{r gof-test}
 # We use the same model and design from the regression diagnostics example
-# fit_obesity <- svyglm(...)
-# adult_design_complete <- svydesign(...)
-
 # 1. Run the goodness-of-fit test
 gof_results <- svygof(fit_obesity, adult_design_complete)
 
@@ -307,6 +304,8 @@ knitr::kable(
 )
 ```
 
+This significant p-value suggests that there is evidence of a poor fit. The model does not accurately predict the outcomes across the different risk groups, indicating that it may be mis-specified or missing important variables or interactions.
+
 ## Design-Correct AUC for Model Performance
 
 To evaluate the predictive performance of a model, you can calculate the Area Under the Curve (AUC) using `svyAUC()`. This function correctly accounts for the complex survey design (strata, clusters, and weights) by using a replicate-weights design object, which provides a more accurate estimate of the AUC's variance and confidence interval.
@@ -324,13 +323,28 @@ fit_obesity_rep <- svyglm(
 )
 
 # 3. Calculate the design-correct AUC
-auc_results <- svyAUC(fit_obesity_rep, rep_design)
+auc_results_list <- svyAUC(fit_obesity_rep, rep_design, plot = TRUE)
 
-# 4. Display the results
+# 2. Display the summary table from the list
 knitr::kable(
-  auc_results,
+  auc_results_list$summary,
   caption = "Design-Correct AUC for Obesity Model"
 )
+
+# Use the roc_data component to build a custom ggplot
+library(ggplot2)
+ggplot(auc_results_list$roc_data, aes(x = FPR, y = TPR)) +
+  geom_line(color = "blue", size = 1) +
+  geom_abline(linetype = "dashed") +
+  labs(
+    title = "Survey-Weighted ROC Curve",
+    subtitle = paste0("AUC = ", round(auc_results_list$summary$AUC, 3)),
+    x = "1 - Specificity (FPR)",
+    y = "Sensitivity (TPR)"
+  ) +
+  theme_minimal()
 ```
 
+An AUC of 0.5 represents a model with no better-than-random chance of discriminating between outcomes. The model's AUC of 0.587 is very close to this baseline, which indicates poor to failed discrimination. It is not effective at distinguishing between individuals who are obese and those who are not.
+
 ## References

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ @misc{nhanes_reliability_estimates`
`10`	`10`	`author = {Centers for Disease Control and Prevention},`
`11`	`11`	`title = {NHANES Tutorials: Reliability of Estimates Module},`
`12`	`12`	`organization = {National Center for Health Statistics},`
`13`		`- year = {2005},`
	`13`	`+ year = {2025},`
`14`	`14`	`url = {https://wwwn.cdc.gov/nchs/nhanes/tutorials/reliabilityofestimates.aspx},`
`15`	`15`	`note = {Accessed: October 12, 2025}`
`16`	`16`	`}`