version 0.4.0

ehsanx · ehsanx · commit 1c28cee98510 · 2025-10-13T13:27:50.000-07:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,12 @@
 Package: svyTable1
 Title: Create Survey-Weighted Descriptive Statistics Tables
-Version: 0.3.0
-Author: Ehsan Karim <ehsan.karim@gmail.com>
-Maintainer: Ehsan Karim <ehsan.karim@gmail.com>
+Version: 0.4.0
+Authors@R: c(person("Ehsan", "Karim",
+                    email = "ehsan.karim@gmail.com",
+                    role = c("aut", "cre")),
+             person("Esteban", "Valencia",
+                    comment = "Provided feedback on generalizing the svydiag function, tested installation issues and fixed a bug regarding effective sample size calculation.",
+                    role = "ctb"))
 Description: A simple tool to create 'Table 1' summaries from complex 
     survey data, with options for weighted, unweighted, and mixed displays.
 License: MIT + file LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,20 +1,20 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(svyglmdiag)
+export(svydiag)
 export(svytable1)
 import(stats)
-importFrom(dplyr,bind_cols)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(magrittr,"%>%")
+importFrom(stats,coef)
 importFrom(stats,confint)
-importFrom(stats,setNames)
+importFrom(stats,vcov)
 importFrom(survey,SE)
 importFrom(survey,degf)
 importFrom(survey,svyby)
 importFrom(survey,svyciprop)
 importFrom(survey,svymean)
 importFrom(survey,svytable)
 importFrom(survey,svyvar)
-importFrom(tibble,as_tibble)
+importFrom(tibble,tibble)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,8 @@
+# svyTable1 0.4.0
+
+## MAJOR IMPROVEMENTS
+
+* `svyglmdiag()` has been renamed to the more general `svydiag()` and now
+    supports additional models like `svycoxph`; a general installation issue 
+    and a bug regarding effective sample size calculation is 
+    fixed. (Thanks to Esteban Valencia)
diff --git a/R/svydiag.R b/R/svydiag.R
@@ -1,9 +1,9 @@
 #' Perform Reliability Diagnostics on Survey Regression Models
 #'
 #' @description
-#' This function takes a fitted survey regression model object (e.g., from `svyglm`)
-#' and produces a tibble with key reliability and diagnostic metrics for each
-#' coefficient.
+#' This function takes a fitted survey regression model object (e.g., from `svyglm`
+#' or `svycoxph`) and produces a tibble with key reliability and diagnostic
+#' metrics for each coefficient.
 #'
 #' @details
 #' The output provides a comprehensive overview to help assess the stability and
@@ -21,7 +21,7 @@
 #' even if precisely estimated. It is better to rely on the standard error,
 #' p-value, and confidence interval width for reliability assessment.
 #'
-#' @param fit A fitted model object, typically of class `svyglm`.
+#' @param fit A fitted model object from the `survey` package, such as `svyglm` or `svycoxph`.
 #' @param p_threshold A numeric value (between 0 and 1) for the significance threshold. Defaults to `0.05`.
 #' @param rse_threshold A numeric value for flagging high Relative Standard Error (RSE). Defaults to `30`.
 #'
@@ -40,9 +40,9 @@
 #'   \item \code{is_rse_high}: A logical flag, `TRUE` if `RSE_percent` is greater than or equal to `rse_threshold`.
 #' }
 #'
-#' @importFrom dplyr mutate bind_cols select
-#' @importFrom tibble as_tibble
-#' @importFrom stats confint setNames
+#' @importFrom dplyr mutate select
+#' @importFrom tibble tibble
+#' @importFrom stats confint coef vcov
 #'
 #' @export
 #'
@@ -82,8 +82,8 @@
 #'     family = quasibinomial()
 #'   )
 #'
-#'   # 3. Get the reliability diagnostics table
-#'   diagnostics_table <- svyglmdiag(fit)
+#'   # 3. Get the reliability diagnostics table using the new function
+#'   diagnostics_table <- svydiag(fit)
 #'
 #'   # Print the resulting table
 #'   print(diagnostics_table)
@@ -96,25 +96,31 @@
 #'   }
 #' }
 
-svyglmdiag <- function(fit, p_threshold = 0.05, rse_threshold = 30) {
+svydiag <- function(fit, p_threshold = 0.05, rse_threshold = 30) {
 
-  # --- Input validation ---
-  if (!inherits(fit, "svyglm")) {
-    warning("This function is designed for 'svyglm' objects. Results may be unexpected.")
-  }
+  # 1. Robustly extract key model components using accessor functions
+  s_fit <- summary(fit)
+  estimates <- stats::coef(fit)
+  se <- sqrt(diag(stats::vcov(fit)))
+  conf_int <- stats::confint(fit)
 
-  # 1. Get the standard model summary and confidence intervals
-  summary_fit <- summary(fit)
-  conf_int_fit <- stats::confint(fit)
+  # P-values are most reliably extracted from the summary coefficient table.
+  # This assumes the p-value is the last column, which is standard for most
+  # survey models (svyglm, svycoxph, etc.).
+  p_vals <- s_fit$coefficients[, ncol(s_fit$coefficients)]
 
   # 2. Combine these into a single, informative table
-  reliability_df <- tibble::as_tibble(summary_fit$coefficients, rownames = "Term")
-  names(reliability_df) <- c("Term", "Estimate", "SE", "t.value", "p.value")
+  reliability_df <- tibble::tibble(
+    Term = names(estimates),
+    Estimate = estimates,
+    SE = se,
+    p.value = p_vals,
+    CI_Lower = conf_int[, 1],
+    CI_Upper = conf_int[, 2]
+  )
 
-  # 3. Add CIs, calculate metrics, and add flags
+  # 3. Calculate derived metrics, add flags, and finalize the output
   reliability_df <- reliability_df %>%
-    dplyr::bind_cols(tibble::as_tibble(conf_int_fit) %>%
-                       stats::setNames(c("CI_Lower", "CI_Upper"))) %>%
     dplyr::mutate(
       RSE_percent = (SE / abs(Estimate)) * 100,
       CI_Width = CI_Upper - CI_Lower,
diff --git a/R/svytable1.R b/R/svytable1.R
@@ -7,7 +7,7 @@
 #' Data Presentation Standards.
 #'
 #' @param design A survey design object created by the survey package.
-#' @param strata_var A string with the name of the stratification variable.
+#' @param strata_var A string with the name of the stratification variable. If this variable contains NA values, they will be automatically grouped into a separate 'Missing' stratum in the output table.
 #' @param table_vars A character vector of variable names to summarize.
 #' @param mode A string specifying the output type: "mixed" (default), "weighted", or "unweighted".
 #' @param commas Logical; if TRUE (default), large numbers in counts are formatted with commas.
@@ -229,7 +229,7 @@ svytable1 <- function(design, strata_var, table_vars,
               ci_low <- metrics$ci_low[level_index]; ci_high <- metrics$ci_high[level_index]
               pct_val <- metrics$prop[level_index]; se <- metrics$se[level_index]
 
-              effective_n <- if(!is.na(deff) && deff > 0) n / deff else 0
+              effective_n <- if(!is.na(deff)) n / max(1, deff) else 0
               ciw <- ci_high - ci_low
               rciw <- if(!is.na(pct_val) && pct_val > 0) (ciw / pct_val) * 100 else Inf
               rse <- if(!is.na(pct_val) && pct_val > 0) (se / pct_val) * 100 else Inf
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ The package was developed to simplify a common task in epidemiology and public h
 - **Built-in Reliability Checks:** Automatically apply NCHS Data Presentation Standards for Proportions to flag or suppress unreliable estimates.
 - **Flexible Output Modes:** Easily switch between `"mixed"`, `"weighted"`, and `"unweighted"` summaries.
 - **Readability:** Option to format large numbers with commas for improved readability.
-- **Regression Diagnostics**: Includes the `svyglmdiag()` helper function to assess the reliability of coefficients from `svyglm()` models.
+- **Regression Diagnostics**: Includes the `svydiag()` helper function to assess the reliability of coefficients from `svyglm()` models.
 
 ---
 
@@ -25,7 +25,8 @@ You can install the development version of **svyTable1** from GitHub with:
 
 ```r
 # install.packages("devtools")
-devtools::install_github("ehsanx/svyTable1", build_vignettes = TRUE)
+# In README.md
+devtools::install_github("ehsanx/svyTable1", build_vignettes = TRUE, dependencies = TRUE)
 ```
 
 ---
@@ -163,7 +164,7 @@ Example `svytable1` output table from Example C with the reliability checks appl
 
 #### Example D: Reliability Checks for Regression Models
 
-Beyond descriptive tables, the package provides `svyglmdiag()` to assess the reliability of coefficients from a survey-weighted regression model. It calculates key metrics like p-values, standard errors, and confidence interval widths.
+Beyond descriptive tables, the package provides `svydiag()` to assess the reliability of coefficients from a survey-weighted regression model. It calculates key metrics like p-values, standard errors, and confidence interval widths.
 
 ```r
 # 1. Fit a logistic regression model using the complete-case design
@@ -174,7 +175,7 @@ fit_obesity <- svyglm(
 )
 
 # 2. Get the reliability diagnostics table for the model
-diagnostics_table <- svyglmdiag(fit_obesity)
+diagnostics_table <- svydiag(fit_obesity)
 
 # 3. Display the diagnostics table
 knitr::kable(
@@ -187,7 +188,7 @@ knitr::kable(
 
 ## 📊 Example Output 2
 
-Example output table for Example D, which demonstrates the `svyglmdiag()` function.
+Example output table for Example D, which demonstrates the `svydiag()` function.
 
 |Term | Estimate| SE| p.value|is_significant | CI_Lower| CI_Upper| CI_Width| RSE_percent|is_rse_high |
 |:---|---:|---:|---:|:---|---:|---:|---:|---:|:---|
diff --git a/man/svydiag.Rd b/man/svydiag.Rd
diff --git a/man/svytable1.Rd b/man/svytable1.Rd
diff --git a/vignettes/using-svyTable1.Rmd b/vignettes/using-svyTable1.Rmd
@@ -91,6 +91,8 @@ knitr::kable(
 )
 ```
 
+It's important to note that `svytable1()` automatically detects missing (`NA`) values in the stratification variable. It treats these observations as a distinct group, creating a separate 'Missing' column in the table to ensure all data is accounted for.
+
 #### Example B: Summarizing Complete Data
 
 ```{r}
@@ -231,13 +233,13 @@ An RSE of **30%** has historically been a common cutoff for determining if an es
 
 ## Extending Reliability Checks to Regression Models
 
-While `svytable1()` focuses on descriptive statistics, a common next step in analysis is fitting a regression model. Assessing the reliability of regression coefficients is just as important as checking descriptive estimates. To support this workflow, the `svyTable1` package now includes `svyglmdiag()`, a helper function for diagnosing the stability of coefficients from `svyglm()` models.
+While `svytable1()` focuses on descriptive statistics, a common next step in analysis is fitting a regression model. Assessing the reliability of regression coefficients is just as important as checking descriptive estimates. To support this workflow, the `svyTable1` package now includes `svydiag()`, a helper function for diagnosing the stability of coefficients from `svyglm()` models.
 
 The function provides key metrics recommended for this purpose, such as the **p-value**, **standard error**, and **confidence interval width**. It also includes the Relative Standard Error (RSE) for comparison, though it is not the recommended primary check for regression coefficients due to its tendency to be misleading for estimates near zero.
 
 ### Example: Running Diagnostics on a Survey-Weighted Model
 
-Let's fit a logistic regression model to predict obesity (`ObeseStatus`) using the complete-case NHANES data we prepared earlier. We'll then use `svyglmdiag()` to assess the reliability of our model's coefficients.
+Let's fit a logistic regression model to predict obesity (`ObeseStatus`) using the complete-case NHANES data we prepared earlier. We'll then use `svydiag()` to assess the reliability of our model's coefficients.
 
 ```{r reg-diagnostics}
 # 1. Fit a survey-weighted logistic regression model
@@ -249,7 +251,7 @@ fit_obesity <- svyglm(
 )
 
 # 2. Run the diagnostics function on the fitted model
-diagnostics_table <- svyglmdiag(fit_obesity)
+diagnostics_table <- svydiag(fit_obesity)
 
 # 3. Display the diagnostics table
 knitr::kable(
@@ -261,7 +263,7 @@ knitr::kable(
 
 ## Interpreting the Regression Diagnostics Table
 
-The output from `svyglmdiag()` provides a clear, term-by-term *report card* for your regression model. It helps you evaluate the reliability and interpretability of each coefficient.
+The output from `svydiag()` provides a clear, term-by-term *report card* for your regression model. It helps you evaluate the reliability and interpretability of each coefficient.
 
 ### Key Columns
 
@@ -283,7 +285,7 @@ The output from `svyglmdiag()` provides a clear, term-by-term *report card* for
 
 ---
 
-Overall, `svyglmdiag()` complements traditional regression output by making it easier to identify which predictors are both statistically meaningful and statistically stable under complex survey design.
+Overall, `svydiag()` complements traditional regression output by making it easier to identify which predictors are both statistically meaningful and statistically stable under complex survey design.
 
 
 ## References