Skip to content

Commit 65a8ccd

Browse files
committed
gof + auc
1 parent 1c28cee commit 65a8ccd

File tree

9 files changed

+500
-19
lines changed

9 files changed

+500
-19
lines changed

.Rhistory

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,3 @@
1-
paste(missing_vars, collapse = ", ")))
2-
}
3-
# --- Helper function for formatting ---
4-
format_num <- function(n, is_weighted) {
5-
if (is_weighted) n <- round(n)
6-
if (commas) return(format(n, big.mark = ","))
7-
return(as.character(n))
8-
}
9-
# --- Table Generation ---
10-
if (nrow(df) == 0) return(data.frame(Error = "Input data has 0 rows"))
11-
if(!is.factor(df[[strata_var]])) df[[strata_var]] <- factor(df[[strata_var]])
121
df[[strata_var]] <- droplevels(df[[strata_var]])
132
strata_levels <- levels(df[[strata_var]])
143
unweighted_n_overall <- nrow(df)
@@ -510,3 +499,14 @@ devtools::build_vignettes()
510499
devtools::check()
511500
# This will automatically change 0.2.0 to 0.3.0
512501
usethis::use_version("minor")
502+
devtools::document()
503+
# In your R console
504+
devtools::install()
505+
# In your R console
506+
devtools::install()
507+
# In your R console
508+
devtools::install()
509+
# In your R console
510+
devtools::install()
511+
devtools::build_vignettes()
512+
devtools::install()

DESCRIPTION

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
Package: svyTable1
2-
Title: Create Survey-Weighted Descriptive Statistics Tables
3-
Version: 0.4.0
2+
Title: Create Survey-Weighted Descriptive Statistics and Diagnostic Tables
3+
Version: 0.5.0
44
Authors@R: c(person("Ehsan", "Karim",
55
email = "[email protected]",
66
role = c("aut", "cre")),
77
person("Esteban", "Valencia",
88
comment = "Provided feedback on generalizing the svydiag function, tested installation issues and fixed a bug regarding effective sample size calculation.",
99
role = "ctb"))
10-
Description: A simple tool to create 'Table 1' summaries from complex
11-
survey data, with options for weighted, unweighted, and mixed displays.
10+
Description: A tool to create publication-ready descriptive summary tables
11+
from complex survey data. It also provides a suite of functions to
12+
evaluate survey-weighted regression models, including coefficient
13+
diagnostics, goodness-of-fit tests, and design-correct AUC calculations.
1214
License: MIT + file LICENSE
1315
Encoding: UTF-8
1416
RoxygenNote: 7.3.3
@@ -17,7 +19,8 @@ Imports:
1719
dplyr,
1820
tibble,
1921
magrittr,
20-
stats
22+
stats,
23+
WeightedROC
2124
Suggests:
2225
knitr,
2326
rmarkdown,

NAMESPACE

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export("%>%")
4+
export(svyAUC)
45
export(svydiag)
6+
export(svygof)
57
export(svytable1)
68
import(stats)
9+
importFrom(WeightedROC,WeightedAUC)
10+
importFrom(WeightedROC,WeightedROC)
711
importFrom(dplyr,mutate)
812
importFrom(dplyr,select)
913
importFrom(magrittr,"%>%")
1014
importFrom(stats,coef)
1115
importFrom(stats,confint)
16+
importFrom(stats,fitted)
17+
importFrom(stats,model.frame)
18+
importFrom(stats,model.matrix)
19+
importFrom(stats,plogis)
20+
importFrom(stats,quantile)
21+
importFrom(stats,residuals)
1222
importFrom(stats,vcov)
1323
importFrom(survey,SE)
1424
importFrom(survey,degf)
25+
importFrom(survey,regTermTest)
1526
importFrom(survey,svyby)
1627
importFrom(survey,svyciprop)
28+
importFrom(survey,svydesign)
29+
importFrom(survey,svyglm)
1730
importFrom(survey,svymean)
1831
importFrom(survey,svytable)
1932
importFrom(survey,svyvar)
33+
importFrom(survey,withReplicates)
2034
importFrom(tibble,tibble)

R/svyAUC.R

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#' Calculate a Design-Correct AUC for a Survey Model
2+
#'
3+
#' @description
4+
#' This function calculates the Area Under the Curve (AUC) and its design-correct
5+
#' standard error and 95% confidence interval for a survey logistic regression
6+
#' model. It correctly accounts for strata and clusters by using a
7+
#' replicate-weights survey design object.
8+
#'
9+
#' @param fit A fitted model object of class `svyglm`.
10+
#' @param design A replicate-weights survey design object, typically created with `as.svrepdesign`.
11+
#'
12+
#' @return
13+
#' A `data.frame` containing the AUC point estimate, its standard error (SE),
14+
#' and the lower and upper bounds of the 95% confidence interval.
15+
#'
16+
#' @importFrom survey withReplicates SE
17+
#' @importFrom WeightedROC WeightedROC WeightedAUC
18+
#' @importFrom stats model.frame model.matrix coef plogis
19+
#'
20+
#' @export
21+
#'
22+
#' @examples
23+
#' \dontrun{
24+
#' # Ensure required packages are loaded
25+
#' if (requireNamespace("survey", quietly = TRUE) &&
26+
#' requireNamespace("NHANES", quietly = TRUE) &&
27+
#' requireNamespace("dplyr", quietly = TRUE)) {
28+
#'
29+
#' # 1. Prepare Data
30+
#' data(NHANESraw, package = "NHANES")
31+
#' nhanes_data <- NHANESraw %>%
32+
#' dplyr::filter(Age >= 20) %>%
33+
#' dplyr::mutate(ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
34+
#' levels = c("Not Obese", "Obese"))) %>%
35+
#' dplyr::filter(complete.cases(ObeseStatus, Age, Gender, Race1,
36+
#' WTMEC2YR, SDMVPSU, SDMVSTRA))
37+
#'
38+
#' # 2. Create a replicate design object
39+
#' std_design <- survey::svydesign(
40+
#' ids = ~SDMVPSU,
41+
#' strata = ~SDMVSTRA,
42+
#' weights = ~WTMEC2YR,
43+
#' nest = TRUE,
44+
#' data = nhanes_data
45+
#' )
46+
#' rep_design <- survey::as.svrepdesign(std_design)
47+
#'
48+
#' # 3. Fit a survey logistic regression model using the replicate design
49+
#' fit_obesity_rep <- survey::svyglm(
50+
#' ObeseStatus ~ Age + Gender + Race1,
51+
#' design = rep_design,
52+
#' family = quasibinomial()
53+
#' )
54+
#'
55+
#' # 4. Calculate the design-correct AUC
56+
#' auc_results <- svyAUC(fit_obesity_rep, rep_design)
57+
#' print(auc_results)
58+
#' }
59+
#' }
60+
svyAUC <- function(fit, design) {
61+
62+
# Input Validation
63+
if (!inherits(design, "svyrep.design")) {
64+
stop("Error: This function requires a replicate-weights survey design object (created with as.svrepdesign).")
65+
}
66+
if (!inherits(fit, "svyglm")) {
67+
stop("Error: This function is designed for 'svyglm' model objects.")
68+
}
69+
70+
outcome_name <- all.vars(fit$formula[[2]])[1]
71+
72+
auc_statistic <- function(weights, data) {
73+
model_formula <- formula(fit)
74+
mf <- model.frame(model_formula, data)
75+
mm <- model.matrix(model_formula, mf)
76+
beta <- coef(fit)
77+
eta <- mm %*% beta
78+
predictions <- as.vector(plogis(eta))
79+
80+
outcome <- data[[outcome_name]]
81+
if(is.factor(outcome)) {
82+
outcome <- as.numeric(outcome) - 1
83+
}
84+
85+
local_data <- data.frame(
86+
predictions = predictions,
87+
outcome = outcome,
88+
w = weights
89+
)
90+
91+
local_data <- local_data[local_data$w > 0 & !is.na(local_data$w), ]
92+
93+
roc_curve <- WeightedROC::WeightedROC(
94+
guess = local_data$predictions,
95+
label = local_data$outcome,
96+
weight = local_data$w
97+
)
98+
WeightedROC::WeightedAUC(roc_curve)
99+
}
100+
101+
result <- survey::withReplicates(
102+
design,
103+
theta = auc_statistic,
104+
return.replicates = TRUE
105+
)
106+
107+
auc_estimate <- result$theta
108+
se <- survey::SE(result)
109+
ci <- stats::confint(result)
110+
111+
output <- data.frame(
112+
AUC = auc_estimate,
113+
SE = se,
114+
CI_Lower = ci[1],
115+
CI_Upper = ci[2]
116+
)
117+
118+
rownames(output) <- NULL
119+
return(output)
120+
}
121+

R/svygof.R

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#' Archer-Lemeshow Goodness-of-Fit Test for Survey Models
2+
#'
3+
#' @description
4+
#' Performs an Archer-Lemeshow goodness-of-fit (GOF) test for logistic
5+
#' regression models fitted with complex survey data. This test is an extension
6+
#' of the Hosmer-Lemeshow test for survey designs.
7+
#'
8+
#' @details
9+
#' The function automates the process of calculating residuals and fitted values,
10+
#' creating groups (deciles by default) based on fitted probabilities,
11+
#' building a new survey design with these variables, and running a final
12+
#' Wald test. A non-significant p-value (e.g., p > 0.05) suggests no evidence
13+
#' of a poor fit.
14+
#'
15+
#' @param fit A fitted model object of class `svyglm`.
16+
#' @param design A survey design object of class `svydesign` or `svyrep.design`
17+
#' that was used to fit the model.
18+
#' @param G An integer specifying the number of groups to create based on
19+
#' fitted probabilities. Defaults to 10 (deciles).
20+
#'
21+
#' @return
22+
#' A `data.frame` containing the F-statistic, the numerator (df1) and
23+
#' denominator (df2) degrees of freedom, and the p-value for the test.
24+
#'
25+
#' @source
26+
#' The implementation is a formalized function based on the script and discussion
27+
#' in the R-help mailing list archives: \url{https://stat.ethz.ch/pipermail/r-help/2016-November/443223.html}
28+
#'
29+
#' @importFrom survey svydesign svyglm regTermTest
30+
#' @importFrom stats residuals fitted quantile
31+
#'
32+
#' @export
33+
#'
34+
#' @examples
35+
#' \dontrun{
36+
#' # Ensure required packages are loaded
37+
#' if (requireNamespace("survey", quietly = TRUE) &&
38+
#' requireNamespace("NHANES", quietly = TRUE) &&
39+
#' requireNamespace("dplyr", quietly = TRUE)) {
40+
#'
41+
#' # 1. Prepare Data
42+
#' data(NHANESraw, package = "NHANES")
43+
#' nhanes_data <- NHANESraw %>%
44+
#' dplyr::filter(Age >= 20) %>%
45+
#' dplyr::mutate(ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
46+
#' levels = c("Not Obese", "Obese"))) %>%
47+
#' dplyr::filter(complete.cases(ObeseStatus, Age, Gender, Race1,
48+
#' WTMEC2YR, SDMVPSU, SDMVSTRA))
49+
#'
50+
#' # 2. Create a replicate design object
51+
#' std_design <- survey::svydesign(
52+
#' ids = ~SDMVPSU,
53+
#' strata = ~SDMVSTRA,
54+
#' weights = ~WTMEC2YR,
55+
#' nest = TRUE,
56+
#' data = nhanes_data
57+
#' )
58+
#' rep_design <- survey::as.svrepdesign(std_design)
59+
#'
60+
#' # 3. Fit a survey logistic regression model using the replicate design
61+
#' fit_obesity_rep <- survey::svyglm(
62+
#' ObeseStatus ~ Age + Gender + Race1,
63+
#' design = rep_design,
64+
#' family = quasibinomial()
65+
#' )
66+
#'
67+
#' # 4. Calculate the design-correct AUC
68+
#' auc_results <- svyAUC(fit_obesity_rep, rep_design)
69+
#' print(auc_results)
70+
#' }
71+
#' }
72+
svygof <- function(fit, design, G = 10) {
73+
74+
# Get residuals and fitted values from the model
75+
resids <- stats::residuals(fit, type = "response")
76+
fitted_vals <- stats::fitted(fit)
77+
78+
# Create a data frame of model results, using row names to link back
79+
model_data <- data.frame(
80+
.id = names(resids),
81+
r = resids,
82+
f = fitted_vals
83+
)
84+
85+
# Use the data directly from the design object, which is the most reliable source
86+
data_with_res <- design$variables
87+
data_with_res$.id <- rownames(data_with_res)
88+
data_with_res <- merge(data_with_res, model_data, by = ".id", all.x = TRUE)
89+
90+
# Create G groups based on fitted values
91+
breaks <- stats::quantile(data_with_res$f, probs = seq(0, 1, 1 / G), na.rm = TRUE)
92+
unique_breaks <- unique(breaks)
93+
data_with_res$g <- cut(data_with_res$f, breaks = unique_breaks, include.lowest = TRUE)
94+
95+
# Rebuild the design object using its internal components
96+
new_design <- survey::svydesign(
97+
ids = design$cluster,
98+
strata = design$strata,
99+
weights = design$weights,
100+
data = data_with_res,
101+
nest = isTRUE(design$nest)
102+
)
103+
104+
# Run the test
105+
decile_model <- survey::svyglm(r ~ g, design = new_design, na.action = na.omit)
106+
test_result <- survey::regTermTest(decile_model, ~g)
107+
108+
# Return a tidy data frame
109+
output <- data.frame(
110+
F_statistic = test_result$Ftest[1],
111+
df1 = test_result$df,
112+
df2 = test_result$ddf,
113+
p_value = test_result$p
114+
)
115+
116+
return(output)
117+
}

0 commit comments

Comments
 (0)