ehsanx
diff --git a/‎.Rhistory‎
Lines changed: 11 additions & 11 deletions b/‎.Rhistory‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 8 additions & 5 deletions b/‎DESCRIPTION‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 14 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎R/svyAUC.R‎
Lines changed: 121 additions & 0 deletions b/‎R/svyAUC.R‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎R/svygof.R‎
Lines changed: 117 additions & 0 deletions b/‎R/svygof.R‎
Lines changed: 117 additions & 0 deletions
@@ -1,14 +1,3 @@
-paste(missing_vars, collapse = ", ")))
-}
-# --- Helper function for formatting ---
-format_num <- function(n, is_weighted) {
-if (is_weighted) n <- round(n)
-if (commas) return(format(n, big.mark = ","))
-return(as.character(n))
-}
-# --- Table Generation ---
-if (nrow(df) == 0) return(data.frame(Error = "Input data has 0 rows"))
-if(!is.factor(df[[strata_var]])) df[[strata_var]] <- factor(df[[strata_var]])
 df[[strata_var]] <- droplevels(df[[strata_var]])
 strata_levels <- levels(df[[strata_var]])
 unweighted_n_overall <- nrow(df)
@@ -510,3 +499,14 @@ devtools::build_vignettes()
 devtools::check()
 # This will automatically change 0.2.0 to 0.3.0
 usethis::use_version("minor")
+devtools::document()
+# In your R console
+devtools::install()
+# In your R console
+devtools::install()
+# In your R console
+devtools::install()
+# In your R console
+devtools::install()
+devtools::build_vignettes()
+devtools::install()
@@ -1,14 +1,16 @@
 Package: svyTable1
-Title: Create Survey-Weighted Descriptive Statistics Tables
-Version: 0.4.0
+Title: Create Survey-Weighted Descriptive Statistics and Diagnostic Tables
+Version: 0.5.0
 Authors@R: c(person("Ehsan", "Karim",
                     email = "[email protected]",
                     role = c("aut", "cre")),
              person("Esteban", "Valencia",
                     comment = "Provided feedback on generalizing the svydiag function, tested installation issues and fixed a bug regarding effective sample size calculation.",
                     role = "ctb"))
-Description: A simple tool to create 'Table 1' summaries from complex 
-    survey data, with options for weighted, unweighted, and mixed displays.
+Description: A tool to create publication-ready descriptive summary tables 
+    from complex survey data. It also provides a suite of functions to 
+    evaluate survey-weighted regression models, including coefficient 
+    diagnostics, goodness-of-fit tests, and design-correct AUC calculations.
 License: MIT + file LICENSE
 Encoding: UTF-8
 RoxygenNote: 7.3.3
@@ -17,7 +19,8 @@ Imports:
     dplyr,
     tibble,
     magrittr,
-    stats
+    stats,
+    WeightedROC
 Suggests: 
     knitr,
     rmarkdown,
 
@@ -1,20 +1,34 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(svyAUC)
 export(svydiag)
+export(svygof)
 export(svytable1)
 import(stats)
+importFrom(WeightedROC,WeightedAUC)
+importFrom(WeightedROC,WeightedROC)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(magrittr,"%>%")
 importFrom(stats,coef)
 importFrom(stats,confint)
+importFrom(stats,fitted)
+importFrom(stats,model.frame)
+importFrom(stats,model.matrix)
+importFrom(stats,plogis)
+importFrom(stats,quantile)
+importFrom(stats,residuals)
 importFrom(stats,vcov)
 importFrom(survey,SE)
 importFrom(survey,degf)
+importFrom(survey,regTermTest)
 importFrom(survey,svyby)
 importFrom(survey,svyciprop)
+importFrom(survey,svydesign)
+importFrom(survey,svyglm)
 importFrom(survey,svymean)
 importFrom(survey,svytable)
 importFrom(survey,svyvar)
+importFrom(survey,withReplicates)
 importFrom(tibble,tibble)
@@ -0,0 +1,121 @@
+#' Calculate a Design-Correct AUC for a Survey Model
+#'
+#' @description
+#' This function calculates the Area Under the Curve (AUC) and its design-correct
+#' standard error and 95% confidence interval for a survey logistic regression
+#' model. It correctly accounts for strata and clusters by using a
+#' replicate-weights survey design object.
+#'
+#' @param fit A fitted model object of class `svyglm`.
+#' @param design A replicate-weights survey design object, typically created with `as.svrepdesign`.
+#'
+#' @return
+#' A `data.frame` containing the AUC point estimate, its standard error (SE),
+#' and the lower and upper bounds of the 95% confidence interval.
+#'
+#' @importFrom survey withReplicates SE
+#' @importFrom WeightedROC WeightedROC WeightedAUC
+#' @importFrom stats model.frame model.matrix coef plogis
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # Ensure required packages are loaded
+#' if (requireNamespace("survey", quietly = TRUE) &&
+#'     requireNamespace("NHANES", quietly = TRUE) &&
+#'     requireNamespace("dplyr", quietly = TRUE)) {
+#'
+#'   # 1. Prepare Data
+#'   data(NHANESraw, package = "NHANES")
+#'   nhanes_data <- NHANESraw %>%
+#'     dplyr::filter(Age >= 20) %>%
+#'     dplyr::mutate(ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
+#'                                        levels = c("Not Obese", "Obese"))) %>%
+#'     dplyr::filter(complete.cases(ObeseStatus, Age, Gender, Race1,
+#'                                  WTMEC2YR, SDMVPSU, SDMVSTRA))
+#'
+#'   # 2. Create a replicate design object
+#'   std_design <- survey::svydesign(
+#'     ids = ~SDMVPSU,
+#'     strata = ~SDMVSTRA,
+#'     weights = ~WTMEC2YR,
+#'     nest = TRUE,
+#'     data = nhanes_data
+#'   )
+#'   rep_design <- survey::as.svrepdesign(std_design)
+#'
+#'   # 3. Fit a survey logistic regression model using the replicate design
+#'   fit_obesity_rep <- survey::svyglm(
+#'     ObeseStatus ~ Age + Gender + Race1,
+#'     design = rep_design,
+#'     family = quasibinomial()
+#'   )
+#'
+#'   # 4. Calculate the design-correct AUC
+#'   auc_results <- svyAUC(fit_obesity_rep, rep_design)
+#'   print(auc_results)
+#' }
+#' }
+svyAUC <- function(fit, design) {
+
+  # Input Validation
+  if (!inherits(design, "svyrep.design")) {
+    stop("Error: This function requires a replicate-weights survey design object (created with as.svrepdesign).")
+  }
+  if (!inherits(fit, "svyglm")) {
+    stop("Error: This function is designed for 'svyglm' model objects.")
+  }
+
+  outcome_name <- all.vars(fit$formula[[2]])[1]
+
+  auc_statistic <- function(weights, data) {
+    model_formula <- formula(fit)
+    mf <- model.frame(model_formula, data)
+    mm <- model.matrix(model_formula, mf)
+    beta <- coef(fit)
+    eta <- mm %*% beta
+    predictions <- as.vector(plogis(eta))
+
+    outcome <- data[[outcome_name]]
+    if(is.factor(outcome)) {
+      outcome <- as.numeric(outcome) - 1
+    }
+
+    local_data <- data.frame(
+      predictions = predictions,
+      outcome = outcome,
+      w = weights
+    )
+
+    local_data <- local_data[local_data$w > 0 & !is.na(local_data$w), ]
+
+    roc_curve <- WeightedROC::WeightedROC(
+      guess = local_data$predictions,
+      label = local_data$outcome,
+      weight = local_data$w
+    )
+    WeightedROC::WeightedAUC(roc_curve)
+  }
+
+  result <- survey::withReplicates(
+    design,
+    theta = auc_statistic,
+    return.replicates = TRUE
+  )
+
+  auc_estimate <- result$theta
+  se <- survey::SE(result)
+  ci <- stats::confint(result)
+
+  output <- data.frame(
+    AUC = auc_estimate,
+    SE = se,
+    CI_Lower = ci[1],
+    CI_Upper = ci[2]
+  )
+
+  rownames(output) <- NULL
+  return(output)
+}
+
@@ -0,0 +1,117 @@
+#' Archer-Lemeshow Goodness-of-Fit Test for Survey Models
+#'
+#' @description
+#' Performs an Archer-Lemeshow goodness-of-fit (GOF) test for logistic
+#' regression models fitted with complex survey data. This test is an extension
+#' of the Hosmer-Lemeshow test for survey designs.
+#'
+#' @details
+#' The function automates the process of calculating residuals and fitted values,
+#' creating groups (deciles by default) based on fitted probabilities,
+#' building a new survey design with these variables, and running a final
+#' Wald test. A non-significant p-value (e.g., p > 0.05) suggests no evidence
+#' of a poor fit.
+#'
+#' @param fit A fitted model object of class `svyglm`.
+#' @param design A survey design object of class `svydesign` or `svyrep.design`
+#'   that was used to fit the model.
+#' @param G An integer specifying the number of groups to create based on
+#'   fitted probabilities. Defaults to 10 (deciles).
+#'
+#' @return
+#' A `data.frame` containing the F-statistic, the numerator (df1) and
+#' denominator (df2) degrees of freedom, and the p-value for the test.
+#'
+#' @source
+#' The implementation is a formalized function based on the script and discussion
+#' in the R-help mailing list archives: \url{https://stat.ethz.ch/pipermail/r-help/2016-November/443223.html}
+#'
+#' @importFrom survey svydesign svyglm regTermTest
+#' @importFrom stats residuals fitted quantile
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # Ensure required packages are loaded
+#' if (requireNamespace("survey", quietly = TRUE) &&
+#'     requireNamespace("NHANES", quietly = TRUE) &&
+#'     requireNamespace("dplyr", quietly = TRUE)) {
+#'
+#'   # 1. Prepare Data
+#'   data(NHANESraw, package = "NHANES")
+#'   nhanes_data <- NHANESraw %>%
+#'     dplyr::filter(Age >= 20) %>%
+#'     dplyr::mutate(ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
+#'                                        levels = c("Not Obese", "Obese"))) %>%
+#'     dplyr::filter(complete.cases(ObeseStatus, Age, Gender, Race1,
+#'                                  WTMEC2YR, SDMVPSU, SDMVSTRA))
+#'
+#'   # 2. Create a replicate design object
+#'   std_design <- survey::svydesign(
+#'     ids = ~SDMVPSU,
+#'     strata = ~SDMVSTRA,
+#'     weights = ~WTMEC2YR,
+#'     nest = TRUE,
+#'     data = nhanes_data
+#'   )
+#'   rep_design <- survey::as.svrepdesign(std_design)
+#'
+#'   # 3. Fit a survey logistic regression model using the replicate design
+#'   fit_obesity_rep <- survey::svyglm(
+#'     ObeseStatus ~ Age + Gender + Race1,
+#'     design = rep_design,
+#'     family = quasibinomial()
+#'   )
+#'
+#'   # 4. Calculate the design-correct AUC
+#'   auc_results <- svyAUC(fit_obesity_rep, rep_design)
+#'   print(auc_results)
+#' }
+#' }
+svygof <- function(fit, design, G = 10) {
+
+  # Get residuals and fitted values from the model
+  resids <- stats::residuals(fit, type = "response")
+  fitted_vals <- stats::fitted(fit)
+
+  # Create a data frame of model results, using row names to link back
+  model_data <- data.frame(
+    .id = names(resids),
+    r = resids,
+    f = fitted_vals
+  )
+
+  # Use the data directly from the design object, which is the most reliable source
+  data_with_res <- design$variables
+  data_with_res$.id <- rownames(data_with_res)
+  data_with_res <- merge(data_with_res, model_data, by = ".id", all.x = TRUE)
+
+  # Create G groups based on fitted values
+  breaks <- stats::quantile(data_with_res$f, probs = seq(0, 1, 1 / G), na.rm = TRUE)
+  unique_breaks <- unique(breaks)
+  data_with_res$g <- cut(data_with_res$f, breaks = unique_breaks, include.lowest = TRUE)
+
+  # Rebuild the design object using its internal components
+  new_design <- survey::svydesign(
+    ids = design$cluster,
+    strata = design$strata,
+    weights = design$weights,
+    data = data_with_res,
+    nest = isTRUE(design$nest)
+  )
+
+  # Run the test
+  decile_model <- survey::svyglm(r ~ g, design = new_design, na.action = na.omit)
+  test_result <- survey::regTermTest(decile_model, ~g)
+
+  # Return a tidy data frame
+  output <- data.frame(
+    F_statistic = test_result$Ftest[1],
+    df1 = test_result$df,
+    df2 = test_result$ddf,
+    p_value = test_result$p
+  )
+
+  return(output)
+}