ehsanx
diff --git a/‎NAMESPACE‎
Lines changed: 9 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎R/svyglmdiag.R‎
Lines changed: 139 additions & 0 deletions b/‎R/svyglmdiag.R‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎R/utils-pipe.R‎
Lines changed: 15 additions & 0 deletions b/‎R/utils-pipe.R‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 68 additions & 14 deletions b/‎README.md‎
Lines changed: 68 additions & 14 deletions
diff --git a/‎man/pipe.Rd‎
Lines changed: 9 additions & 0 deletions b/‎man/pipe.Rd‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎man/svyglmdiag.Rd‎
Lines changed: 101 additions & 0 deletions b/‎man/svyglmdiag.Rd‎
Lines changed: 101 additions & 0 deletions
@@ -1,11 +1,20 @@
 # Generated by roxygen2: do not edit by hand
 
+export("%>%")
+export(svyglmdiag)
 export(svytable1)
 import(stats)
+importFrom(dplyr,bind_cols)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(magrittr,"%>%")
+importFrom(stats,confint)
+importFrom(stats,setNames)
 importFrom(survey,SE)
 importFrom(survey,degf)
 importFrom(survey,svyby)
 importFrom(survey,svyciprop)
 importFrom(survey,svymean)
 importFrom(survey,svytable)
 importFrom(survey,svyvar)
+importFrom(tibble,as_tibble)
@@ -0,0 +1,139 @@
+#' Perform Reliability Diagnostics on Survey Regression Models
+#'
+#' @description
+#' This function takes a fitted survey regression model object (e.g., from `svyglm`)
+#' and produces a tibble with key reliability and diagnostic metrics for each
+#' coefficient.
+#'
+#' @details
+#' The output provides a comprehensive overview to help assess the stability and
+#' precision of each regression coefficient. The metrics include:
+#' \itemize{
+#'   \item \strong{Standard Error (SE)}: A measure of the estimate's precision. Smaller is better.
+#'   \item \strong{p-value}: The probability of observing the data if the coefficient were zero.
+#'   \item \strong{Confidence Interval (CI) Width}: A wide CI indicates greater uncertainty.
+#'   \item \strong{Relative Standard Error (RSE)}: Calculated as `(SE / |Estimate|) * 100`.
+#' }
+#'
+#' \strong{Note on RSE}: While included for comparative purposes, the use of RSE to
+#' evaluate the reliability of regression coefficients is not recommended by
+#' agencies like NCHS/CDC. Coefficients near zero can have an extremely large RSE
+#' even if precisely estimated. It is better to rely on the standard error,
+#' p-value, and confidence interval width for reliability assessment.
+#'
+#' @param fit A fitted model object, typically of class `svyglm`.
+#' @param p_threshold A numeric value (between 0 and 1) for the significance threshold. Defaults to `0.05`.
+#' @param rse_threshold A numeric value for flagging high Relative Standard Error (RSE). Defaults to `30`.
+#'
+#' @return
+#' A `tibble` containing the following columns:
+#' \itemize{
+#'   \item \code{Term}: The name of the regression coefficient.
+#'   \item \code{Estimate}: The coefficient's point estimate (e.g., on the log-odds scale for logistic models).
+#'   \item \code{SE}: The standard error of the estimate.
+#'   \item \code{p.value}: The p-value for the coefficient.
+#'   \item \code{is_significant}: A logical flag, `TRUE` if `p.value` is less than `p_threshold`.
+#'   \item \code{CI_Lower}: The lower bound of the 95% confidence interval.
+#'   \item \code{CI_Upper}: The upper bound of the 95% confidence interval.
+#'   \item \code{CI_Width}: The absolute width of the confidence interval (`CI_Upper - CI_Lower`).
+#'   \item \code{RSE_percent}: The Relative Standard Error, as a percentage.
+#'   \item \code{is_rse_high}: A logical flag, `TRUE` if `RSE_percent` is greater than or equal to `rse_threshold`.
+#' }
+#'
+#' @importFrom dplyr mutate bind_cols select
+#' @importFrom tibble as_tibble
+#' @importFrom stats confint setNames
+#'
+#' @export
+#'
+#' @examples
+#' # Ensure required packages are loaded
+#' if (requireNamespace("survey", quietly = TRUE) &&
+#'     requireNamespace("NHANES", quietly = TRUE) &&
+#'     requireNamespace("dplyr", quietly = TRUE)) {
+#'
+#'   # 1. Prepare Data using the NHANES example
+#'   data(NHANESraw, package = "NHANES")
+#'   nhanes_adults_with_na <- NHANESraw %>%
+#'     dplyr::filter(Age >= 20) %>%
+#'     dplyr::mutate(
+#'       ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
+#'                            levels = c("Not Obese", "Obese")),
+#'       Race1 = factor(Race1)
+#'     )
+#'
+#'   # Create a complete-case design object for the regression model
+#'   nhanes_complete <- nhanes_adults_with_na[complete.cases(
+#'     nhanes_adults_with_na[, c("ObeseStatus", "Age", "Race1")]
+#'   ), ]
+#'
+#'   adult_design_complete <- survey::svydesign(
+#'     id = ~SDMVPSU,
+#'     strata = ~SDMVSTRA,
+#'     weights = ~WTMEC2YR,
+#'     nest = TRUE,
+#'     data = nhanes_complete
+#'   )
+#'
+#'   # 2. Fit a survey-weighted logistic regression model
+#'   fit <- survey::svyglm(
+#'     ObeseStatus ~ Age + Race1,
+#'     design = adult_design_complete,
+#'     family = quasibinomial()
+#'   )
+#'
+#'   # 3. Get the reliability diagnostics table
+#'   diagnostics_table <- svyglmdiag(fit)
+#'
+#'   # Print the resulting table
+#'   print(diagnostics_table)
+#'
+#'   # For a publication-ready table, pipe the result to kable()
+#'   if (requireNamespace("knitr", quietly = TRUE)) {
+#'     knitr::kable(diagnostics_table,
+#'                  caption = "Reliability Diagnostics for NHANES Obesity Model",
+#'                  digits = 3)
+#'   }
+#' }
+
+svyglmdiag <- function(fit, p_threshold = 0.05, rse_threshold = 30) {
+
+  # --- Input validation ---
+  if (!inherits(fit, "svyglm")) {
+    warning("This function is designed for 'svyglm' objects. Results may be unexpected.")
+  }
+
+  # 1. Get the standard model summary and confidence intervals
+  summary_fit <- summary(fit)
+  conf_int_fit <- stats::confint(fit)
+
+  # 2. Combine these into a single, informative table
+  reliability_df <- tibble::as_tibble(summary_fit$coefficients, rownames = "Term")
+  names(reliability_df) <- c("Term", "Estimate", "SE", "t.value", "p.value")
+
+  # 3. Add CIs, calculate metrics, and add flags
+  reliability_df <- reliability_df %>%
+    dplyr::bind_cols(tibble::as_tibble(conf_int_fit) %>%
+                       stats::setNames(c("CI_Lower", "CI_Upper"))) %>%
+    dplyr::mutate(
+      RSE_percent = (SE / abs(Estimate)) * 100,
+      CI_Width = CI_Upper - CI_Lower,
+      is_significant = p.value < p_threshold,
+      is_rse_high = RSE_percent >= rse_threshold
+    ) %>%
+    # Reorder and select the final columns for a clean output
+    dplyr::select(
+      Term,
+      Estimate,
+      SE,
+      p.value,
+      is_significant,
+      CI_Lower,
+      CI_Upper,
+      CI_Width,
+      RSE_percent,
+      is_rse_high
+    )
+
+  return(reliability_df)
+}
@@ -0,0 +1,15 @@
+#' Pipe operator
+#'
+#' See \code{magrittr::\link[magrittr]{\%>\%}} for details.
+#'
+#' @name %>%
+#' @rdname pipe
+#' @keywords internal
+#' @export
+#' @importFrom magrittr %>%
+NULL
+
+utils::globalVariables(c(
+  "Estimate", "CI_Upper", "CI_Lower", "p.value", "RSE_percent", "Term",
+  "is_significant", "CI_Width", "is_rse_high"
+))
@@ -15,6 +15,7 @@ The package was developed to simplify a common task in epidemiology and public h
 - **Built-in Reliability Checks:** Automatically apply NCHS Data Presentation Standards for Proportions to flag or suppress unreliable estimates.
 - **Flexible Output Modes:** Easily switch between `"mixed"`, `"weighted"`, and `"unweighted"` summaries.
 - **Readability:** Option to format large numbers with commas for improved readability.
+- **Regression Diagnostics**: Includes the `svyglmdiag()` helper function to assess the reliability of coefficients from `svyglm()` models.
 
 ---
 
@@ -131,20 +132,73 @@ knitr::kable(results_list$reliability_metrics)
 
 ---
 
-## 📊 Example Output
-
-| Variable | Level | Overall | Missing | Not Obese | Obese |
-|-----------|--------|----------|----------|------------|--------|
-| n |  | 11,778 | 547 | 7,073 | 4,158 |
-| Age | Mean (SD) | 47.18 (16.89) | 56.29 (19.15) | 46.45 (17.32) | 48.25 (15.87) |
-| Race1 | Black | 2,577 (11.4%) | 108 (12.1%) | 1,296 (9.1%) | 1,173 (15.8%) |
-| Hispanic | 1,210 (5.8%) | 62 (2.9%) | 714 (5.7%) | 434 (6.0%) |
-| ... | ... | ... | ... | ... | ... |
-| TotChol | Mean (SD) | 5.07 (1.07) | 5.00 (1.42) | 5.07 (1.08) | 5.06 (1.04) |
-| Missing, n (%) | 1,169 (5.6%) | 426 (15.5%) | 480 (5.5%) | 263 (5.4%) |
-| SmokeNow | No | 2,779 (24.2%) | 142 (29.1%) | 1,580 (23.2%) | 1,057 (26.0%) |
-| Yes | 2,454 (20.1%) | 102 (19.5%) | 1,594 (21.4%) | 758 (17.6%) |
-| Missing | 6,545 (55.7%) | 303 (51.4%) | 3,899 (55.4%) | 2,343 (56.4%) |
+## 📊 Example Output 1
+
+Example `svytable1` output table from Example C with the reliability checks applied. 
+
+|Variable |Level |Overall |Missing |Not Obese |Obese |
+|:---|:---|:---|:---|:---|:---|
+|n | |11,778 |547 |7,073 |4,158 |
+|Age |Mean (SD) |47.18 (16.89) |56.29 (19.15) |46.45 (17.32) |48.25 (15.87) |
+|Race1 |Black |2,577 (11.4%) |108 (12.1%) |1,296 (9.1%) |1,173 (15.8%) |
+| |Hispanic |1,210 (5.8%) |* |714 (5.7%) |434 (6.0%) |
+| |Mexican |1,680 (8.2%) |* |920 (7.3%) |685 (9.8%) |
+| |White |5,017 (67.2%) |235 (69.6%) |3,114 (68.6%) |1,668 (64.4%) |
+| |Other |1,294 (7.4%) |67 (6.0%) |1,029 (9.3%) |198 (4.0%) |
+|Education |8th Grade |1,321 (6.1%) |* |770 (5.9%) |472 (6.3%) |
+| |9 - 11th Grade |1,787 (11.8%) |84 (16.8%) |1,021 (11.3%) |682 (12.5%) |
+| |High School |2,595 (21.5%) |121 (21.1%) |1,496 (20.2%) |978 (23.8%) |
+| |Some College |3,399 (31.3%) |144 (33.4%) |1,968 (29.4%) |1,287 (34.5%) |
+| |College Grad |2,656 (29.3%) |* |1,805 (33.0%) |735 (22.8%) |
+| |Missing |20 (0.1%) |* |* |* |
+|TotChol |Mean (SD) |5.07 (1.07) |5.00 (1.42) |5.07 (1.08) |5.06 (1.04) |
+| |Missing, n (%) |1,169 (5.6%) |426 (15.5%) |480 (5.5%) |263 (5.4%) |
+|SmokeNow |No |2,779 (24.2%) |142 (29.1%) |1,580 (23.2%) |1,057 (26.0%) |
+| |Yes |2,454 (20.1%) |102 (19.5%) |1,594 (21.4%) |758 (17.6%) |
+| |Missing |6,545 (55.7%) |303 (51.4%) |3,899 (55.4%) |2,343 (56.4%) |
+
+
+
+---
+
+#### Example D: Reliability Checks for Regression Models
+
+Beyond descriptive tables, the package provides `svyglmdiag()` to assess the reliability of coefficients from a survey-weighted regression model. It calculates key metrics like p-values, standard errors, and confidence interval widths.
+
+```r
+# 1. Fit a logistic regression model using the complete-case design
+fit_obesity <- svyglm(
+  ObeseStatus ~ Age + Gender + Race1,
+  design = adult_design_complete,
+  family = quasibinomial()
+)
+
+# 2. Get the reliability diagnostics table for the model
+diagnostics_table <- svyglmdiag(fit_obesity)
+
+# 3. Display the diagnostics table
+knitr::kable(
+  diagnostics_table,
+  caption = "Table 3: Reliability Diagnostics for Obesity Model Coefficients",
+  digits = 3
+)
+```
+---
+
+## 📊 Example Output 2
+
+Example output table for Example D, which demonstrates the `svyglmdiag()` function.
+
+|Term | Estimate| SE| p.value|is_significant | CI_Lower| CI_Upper| CI_Width| RSE_percent|is_rse_high |
+|:---|---:|---:|---:|:---|---:|---:|---:|---:|:---|
+|(Intercept) | -0.381| 0.109| 0.002|TRUE | -0.604| -0.158| 0.445| 28.486|FALSE |
+|Age | 0.008| 0.002| 0.000|TRUE | 0.005| 0.012| 0.007| 20.782|FALSE |
+|Gendermale | -0.061| 0.057| 0.294|FALSE | -0.179| 0.056| 0.236| 93.470|TRUE |
+|Race1Hispanic | -0.493| 0.103| 0.000|TRUE | -0.704| -0.282| 0.422| 20.870|FALSE |
+|Race1Mexican | -0.225| 0.087| 0.016|TRUE | -0.403| -0.046| 0.357| 38.733|TRUE |
+|Race1White | -0.654| 0.081| 0.000|TRUE | -0.821| -0.488| 0.334| 12.421|FALSE |
+|Race1Other | -1.351| 0.131| 0.000|TRUE | -1.620| -1.082| 0.538| 9.707|FALSE |
+
 
 ---