Skip to content

Commit cdff82c

Browse files
committed
plot
1 parent e00c659 commit cdff82c

File tree

6 files changed

+83
-29
lines changed

6 files changed

+83
-29
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: svyTable1
22
Title: Create Survey-Weighted Descriptive Statistics and Diagnostic Tables
3-
Version: 0.5.0
3+
Version: 0.6.0
44
Authors@R: c(person("Ehsan", "Karim",
55
email = "[email protected]",
66
role = c("aut", "cre")),

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ importFrom(WeightedROC,WeightedAUC)
1010
importFrom(WeightedROC,WeightedROC)
1111
importFrom(dplyr,mutate)
1212
importFrom(dplyr,select)
13+
importFrom(graphics,abline)
14+
importFrom(graphics,plot)
15+
importFrom(graphics,title)
1316
importFrom(magrittr,"%>%")
1417
importFrom(stats,coef)
1518
importFrom(stats,confint)
@@ -20,6 +23,7 @@ importFrom(stats,plogis)
2023
importFrom(stats,quantile)
2124
importFrom(stats,residuals)
2225
importFrom(stats,vcov)
26+
importFrom(stats,weights)
2327
importFrom(survey,SE)
2428
importFrom(survey,degf)
2529
importFrom(survey,regTermTest)

R/svyAUC.R

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
1-
#' Calculate a Design-Correct AUC for a Survey Model
1+
#' Calculate and Optionally Plot a Design-Correct AUC for a Survey Model
22
#'
33
#' @description
44
#' This function calculates the Area Under the Curve (AUC) and its design-correct
5-
#' standard error and 95% confidence interval for a survey logistic regression
6-
#' model. It correctly accounts for strata and clusters by using a
7-
#' replicate-weights survey design object.
5+
#' standard error and 95% confidence interval. It can also generate a plot of
6+
#' the weighted ROC curve.
87
#'
98
#' @param fit A fitted model object of class `svyglm`.
109
#' @param design A replicate-weights survey design object, typically created with `as.svrepdesign`.
10+
#' @param plot A logical value. If `TRUE`, an ROC curve is plotted. Defaults to `FALSE`.
1111
#'
1212
#' @return
13-
#' A `data.frame` containing the AUC point estimate, its standard error (SE),
14-
#' and the lower and upper bounds of the 95% confidence interval.
13+
#' If `plot = FALSE` (default), returns a `data.frame` with the AUC, SE, and 95% CI.
14+
#' If `plot = TRUE`, invisibly returns a list containing the summary `data.frame`
15+
#' and another `data.frame` with the ROC curve coordinates (TPR and FPR).
1516
#'
1617
#' @importFrom survey withReplicates SE
1718
#' @importFrom WeightedROC WeightedROC WeightedAUC
18-
#' @importFrom stats model.frame model.matrix coef plogis
19+
#' @importFrom stats model.frame model.matrix coef plogis weights
20+
#' @importFrom graphics plot abline title
1921
#'
2022
#' @export
2123
#'
@@ -57,7 +59,7 @@
5759
#' print(auc_results)
5860
#' }
5961
#' }
60-
svyAUC <- function(fit, design) {
62+
svyAUC <- function(fit, design, plot = FALSE) {
6163

6264
# Input Validation
6365
if (!inherits(design, "svyrep.design")) {
@@ -69,6 +71,7 @@ svyAUC <- function(fit, design) {
6971

7072
outcome_name <- all.vars(fit$formula[[2]])[1]
7173

74+
# Define the statistic function to be used with replicates
7275
auc_statistic <- function(weights, data) {
7376
model_formula <- formula(fit)
7477
mf <- model.frame(model_formula, data)
@@ -98,26 +101,57 @@ svyAUC <- function(fit, design) {
98101
WeightedROC::WeightedAUC(roc_curve)
99102
}
100103

104+
# Run the calculation across all replicate weights
101105
result <- survey::withReplicates(
102106
design,
103107
theta = auc_statistic,
104108
return.replicates = TRUE
105109
)
106110

111+
# Manually calculate the confidence interval
107112
auc_estimate <- result$theta
108113
se <- survey::SE(result)
109-
# ci <- stats::confint(result)
110114

111-
output <- data.frame(
115+
summary_df <- data.frame(
112116
AUC = auc_estimate,
113117
SE = se,
114-
# CI_Lower = ci[1],
115-
# CI_Upper = ci[2]
116118
CI_Lower = auc_estimate - 1.96 * se,
117119
CI_Upper = auc_estimate + 1.96 * se
118120
)
121+
rownames(summary_df) <- NULL
119122

120-
rownames(output) <- NULL
121-
return(output)
123+
# --- PLOTTING LOGIC ---
124+
if (plot) {
125+
# Calculate ROC curve points using the full-sample weights
126+
full_weights <- weights(design, "sampling")
127+
roc_data <- auc_statistic(full_weights, design$variables) # Temporarily re-run to get roc_curve
128+
129+
# Actually need the curve, not just the AUC
130+
predictions <- predict(fit, newdata = design$variables, type = "response")
131+
outcome <- design$variables[[outcome_name]]
132+
if(is.factor(outcome)) {
133+
outcome <- as.numeric(outcome) - 1
134+
}
135+
136+
roc_curve_points <- WeightedROC::WeightedROC(
137+
guess = predictions,
138+
label = outcome,
139+
weight = full_weights
140+
)
141+
142+
plot(roc_curve_points$FPR, roc_curve_points$TPR,
143+
type = 'l',
144+
xlab = "1 - Specificity (FPR)",
145+
ylab = "Sensitivity (TPR)",
146+
main = "Survey-Weighted ROC Curve"
147+
)
148+
abline(0, 1, lty = 2)
149+
title(sub = paste0("AUC = ", round(summary_df$AUC, 3)), adj = 1)
150+
151+
invisible(list(summary = summary_df, roc_data = roc_curve_points))
152+
153+
} else {
154+
return(summary_df)
155+
}
122156
}
123157

man/svyAUC.Rd

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/references.bib

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ @misc{nhanes_reliability_estimates
1010
author = {Centers for Disease Control and Prevention},
1111
title = {NHANES Tutorials: Reliability of Estimates Module},
1212
organization = {National Center for Health Statistics},
13-
year = {2005},
13+
year = {2025},
1414
url = {https://wwwn.cdc.gov/nchs/nhanes/tutorials/reliabilityofestimates.aspx},
1515
note = {Accessed: October 12, 2025}
1616
}

vignettes/using-svyTable1.Rmd

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,6 @@ The package also includes `svygof()` to perform the Archer-Lemeshow goodness-of-
294294

295295
```{r gof-test}
296296
# We use the same model and design from the regression diagnostics example
297-
# fit_obesity <- svyglm(...)
298-
# adult_design_complete <- svydesign(...)
299-
300297
# 1. Run the goodness-of-fit test
301298
gof_results <- svygof(fit_obesity, adult_design_complete)
302299
@@ -307,6 +304,8 @@ knitr::kable(
307304
)
308305
```
309306

307+
This significant p-value suggests that there is evidence of a poor fit. The model does not accurately predict the outcomes across the different risk groups, indicating that it may be mis-specified or missing important variables or interactions.
308+
310309
## Design-Correct AUC for Model Performance
311310

312311
To evaluate the predictive performance of a model, you can calculate the Area Under the Curve (AUC) using `svyAUC()`. This function correctly accounts for the complex survey design (strata, clusters, and weights) by using a replicate-weights design object, which provides a more accurate estimate of the AUC's variance and confidence interval.
@@ -324,13 +323,28 @@ fit_obesity_rep <- svyglm(
324323
)
325324
326325
# 3. Calculate the design-correct AUC
327-
auc_results <- svyAUC(fit_obesity_rep, rep_design)
326+
auc_results_list <- svyAUC(fit_obesity_rep, rep_design, plot = TRUE)
328327
329-
# 4. Display the results
328+
# 2. Display the summary table from the list
330329
knitr::kable(
331-
auc_results,
330+
auc_results_list$summary,
332331
caption = "Design-Correct AUC for Obesity Model"
333332
)
333+
334+
# Use the roc_data component to build a custom ggplot
335+
library(ggplot2)
336+
ggplot(auc_results_list$roc_data, aes(x = FPR, y = TPR)) +
337+
geom_line(color = "blue", size = 1) +
338+
geom_abline(linetype = "dashed") +
339+
labs(
340+
title = "Survey-Weighted ROC Curve",
341+
subtitle = paste0("AUC = ", round(auc_results_list$summary$AUC, 3)),
342+
x = "1 - Specificity (FPR)",
343+
y = "Sensitivity (TPR)"
344+
) +
345+
theme_minimal()
334346
```
335347

348+
An AUC of 0.5 represents a model with no better-than-random chance of discriminating between outcomes. The model's AUC of 0.587 is very close to this baseline, which indicates poor to failed discrimination. It is not effective at distinguishing between individuals who are obese and those who are not.
349+
336350
## References

0 commit comments

Comments
 (0)