Skip to content

Commit f8ad00a

Browse files
committed
v3
1 parent 8c00069 commit f8ad00a

File tree

8 files changed

+431
-20
lines changed

8 files changed

+431
-20
lines changed

NAMESPACE

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export("%>%")
4+
export(svyglmdiag)
35
export(svytable1)
46
import(stats)
7+
importFrom(dplyr,bind_cols)
8+
importFrom(dplyr,mutate)
9+
importFrom(dplyr,select)
10+
importFrom(magrittr,"%>%")
11+
importFrom(stats,confint)
12+
importFrom(stats,setNames)
513
importFrom(survey,SE)
614
importFrom(survey,degf)
715
importFrom(survey,svyby)
816
importFrom(survey,svyciprop)
917
importFrom(survey,svymean)
1018
importFrom(survey,svytable)
1119
importFrom(survey,svyvar)
20+
importFrom(tibble,as_tibble)

R/svyglmdiag.R

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#' Perform Reliability Diagnostics on Survey Regression Models
2+
#'
3+
#' @description
4+
#' This function takes a fitted survey regression model object (e.g., from `svyglm`)
5+
#' and produces a tibble with key reliability and diagnostic metrics for each
6+
#' coefficient.
7+
#'
8+
#' @details
9+
#' The output provides a comprehensive overview to help assess the stability and
10+
#' precision of each regression coefficient. The metrics include:
11+
#' \itemize{
12+
#' \item \strong{Standard Error (SE)}: A measure of the estimate's precision. Smaller is better.
13+
#' \item \strong{p-value}: The probability of observing the data if the coefficient were zero.
14+
#' \item \strong{Confidence Interval (CI) Width}: A wide CI indicates greater uncertainty.
15+
#' \item \strong{Relative Standard Error (RSE)}: Calculated as `(SE / |Estimate|) * 100`.
16+
#' }
17+
#'
18+
#' \strong{Note on RSE}: While included for comparative purposes, the use of RSE to
19+
#' evaluate the reliability of regression coefficients is not recommended by
20+
#' agencies like NCHS/CDC. Coefficients near zero can have an extremely large RSE
21+
#' even if precisely estimated. It is better to rely on the standard error,
22+
#' p-value, and confidence interval width for reliability assessment.
23+
#'
24+
#' @param fit A fitted model object, typically of class `svyglm`.
25+
#' @param p_threshold A numeric value (between 0 and 1) for the significance threshold. Defaults to `0.05`.
26+
#' @param rse_threshold A numeric value for flagging high Relative Standard Error (RSE). Defaults to `30`.
27+
#'
28+
#' @return
29+
#' A `tibble` containing the following columns:
30+
#' \itemize{
31+
#' \item \code{Term}: The name of the regression coefficient.
32+
#' \item \code{Estimate}: The coefficient's point estimate (e.g., on the log-odds scale for logistic models).
33+
#' \item \code{SE}: The standard error of the estimate.
34+
#' \item \code{p.value}: The p-value for the coefficient.
35+
#' \item \code{is_significant}: A logical flag, `TRUE` if `p.value` is less than `p_threshold`.
36+
#' \item \code{CI_Lower}: The lower bound of the 95% confidence interval.
37+
#' \item \code{CI_Upper}: The upper bound of the 95% confidence interval.
38+
#' \item \code{CI_Width}: The absolute width of the confidence interval (`CI_Upper - CI_Lower`).
39+
#' \item \code{RSE_percent}: The Relative Standard Error, as a percentage.
40+
#' \item \code{is_rse_high}: A logical flag, `TRUE` if `RSE_percent` is greater than or equal to `rse_threshold`.
41+
#' }
42+
#'
43+
#' @importFrom dplyr mutate bind_cols select
44+
#' @importFrom tibble as_tibble
45+
#' @importFrom stats confint setNames
46+
#'
47+
#' @export
48+
#'
49+
#' @examples
50+
#' # Ensure required packages are loaded
51+
#' if (requireNamespace("survey", quietly = TRUE) &&
52+
#' requireNamespace("NHANES", quietly = TRUE) &&
53+
#' requireNamespace("dplyr", quietly = TRUE)) {
54+
#'
55+
#' # 1. Prepare Data using the NHANES example
56+
#' data(NHANESraw, package = "NHANES")
57+
#' nhanes_adults_with_na <- NHANESraw %>%
58+
#' dplyr::filter(Age >= 20) %>%
59+
#' dplyr::mutate(
60+
#' ObeseStatus = factor(ifelse(BMI >= 30, "Obese", "Not Obese"),
61+
#' levels = c("Not Obese", "Obese")),
62+
#' Race1 = factor(Race1)
63+
#' )
64+
#'
65+
#' # Create a complete-case design object for the regression model
66+
#' nhanes_complete <- nhanes_adults_with_na[complete.cases(
67+
#' nhanes_adults_with_na[, c("ObeseStatus", "Age", "Race1")]
68+
#' ), ]
69+
#'
70+
#' adult_design_complete <- survey::svydesign(
71+
#' id = ~SDMVPSU,
72+
#' strata = ~SDMVSTRA,
73+
#' weights = ~WTMEC2YR,
74+
#' nest = TRUE,
75+
#' data = nhanes_complete
76+
#' )
77+
#'
78+
#' # 2. Fit a survey-weighted logistic regression model
79+
#' fit <- survey::svyglm(
80+
#' ObeseStatus ~ Age + Race1,
81+
#' design = adult_design_complete,
82+
#' family = quasibinomial()
83+
#' )
84+
#'
85+
#' # 3. Get the reliability diagnostics table
86+
#' diagnostics_table <- svyglmdiag(fit)
87+
#'
88+
#' # Print the resulting table
89+
#' print(diagnostics_table)
90+
#'
91+
#' # For a publication-ready table, pipe the result to kable()
92+
#' if (requireNamespace("knitr", quietly = TRUE)) {
93+
#' knitr::kable(diagnostics_table,
94+
#' caption = "Reliability Diagnostics for NHANES Obesity Model",
95+
#' digits = 3)
96+
#' }
97+
#' }
98+
99+
svyglmdiag <- function(fit, p_threshold = 0.05, rse_threshold = 30) {
100+
101+
# --- Input validation ---
102+
if (!inherits(fit, "svyglm")) {
103+
warning("This function is designed for 'svyglm' objects. Results may be unexpected.")
104+
}
105+
106+
# 1. Get the standard model summary and confidence intervals
107+
summary_fit <- summary(fit)
108+
conf_int_fit <- stats::confint(fit)
109+
110+
# 2. Combine these into a single, informative table
111+
reliability_df <- tibble::as_tibble(summary_fit$coefficients, rownames = "Term")
112+
names(reliability_df) <- c("Term", "Estimate", "SE", "t.value", "p.value")
113+
114+
# 3. Add CIs, calculate metrics, and add flags
115+
reliability_df <- reliability_df %>%
116+
dplyr::bind_cols(tibble::as_tibble(conf_int_fit) %>%
117+
stats::setNames(c("CI_Lower", "CI_Upper"))) %>%
118+
dplyr::mutate(
119+
RSE_percent = (SE / abs(Estimate)) * 100,
120+
CI_Width = CI_Upper - CI_Lower,
121+
is_significant = p.value < p_threshold,
122+
is_rse_high = RSE_percent >= rse_threshold
123+
) %>%
124+
# Reorder and select the final columns for a clean output
125+
dplyr::select(
126+
Term,
127+
Estimate,
128+
SE,
129+
p.value,
130+
is_significant,
131+
CI_Lower,
132+
CI_Upper,
133+
CI_Width,
134+
RSE_percent,
135+
is_rse_high
136+
)
137+
138+
return(reliability_df)
139+
}

R/utils-pipe.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#' Pipe operator
2+
#'
3+
#' See \code{magrittr::\link[magrittr]{\%>\%}} for details.
4+
#'
5+
#' @name %>%
6+
#' @rdname pipe
7+
#' @keywords internal
8+
#' @export
9+
#' @importFrom magrittr %>%
10+
NULL
11+
12+
utils::globalVariables(c(
13+
"Estimate", "CI_Upper", "CI_Lower", "p.value", "RSE_percent", "Term",
14+
"is_significant", "CI_Width", "is_rse_high"
15+
))

README.md

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ The package was developed to simplify a common task in epidemiology and public h
1515
- **Built-in Reliability Checks:** Automatically apply NCHS Data Presentation Standards for Proportions to flag or suppress unreliable estimates.
1616
- **Flexible Output Modes:** Easily switch between `"mixed"`, `"weighted"`, and `"unweighted"` summaries.
1717
- **Readability:** Option to format large numbers with commas for improved readability.
18+
- **Regression Diagnostics**: Includes the `svyglmdiag()` helper function to assess the reliability of coefficients from `svyglm()` models.
1819

1920
---
2021

@@ -131,20 +132,73 @@ knitr::kable(results_list$reliability_metrics)
131132

132133
---
133134

134-
## 📊 Example Output
135-
136-
| Variable | Level | Overall | Missing | Not Obese | Obese |
137-
|-----------|--------|----------|----------|------------|--------|
138-
| n | | 11,778 | 547 | 7,073 | 4,158 |
139-
| Age | Mean (SD) | 47.18 (16.89) | 56.29 (19.15) | 46.45 (17.32) | 48.25 (15.87) |
140-
| Race1 | Black | 2,577 (11.4%) | 108 (12.1%) | 1,296 (9.1%) | 1,173 (15.8%) |
141-
| Hispanic | 1,210 (5.8%) | 62 (2.9%) | 714 (5.7%) | 434 (6.0%) |
142-
| ... | ... | ... | ... | ... | ... |
143-
| TotChol | Mean (SD) | 5.07 (1.07) | 5.00 (1.42) | 5.07 (1.08) | 5.06 (1.04) |
144-
| Missing, n (%) | 1,169 (5.6%) | 426 (15.5%) | 480 (5.5%) | 263 (5.4%) |
145-
| SmokeNow | No | 2,779 (24.2%) | 142 (29.1%) | 1,580 (23.2%) | 1,057 (26.0%) |
146-
| Yes | 2,454 (20.1%) | 102 (19.5%) | 1,594 (21.4%) | 758 (17.6%) |
147-
| Missing | 6,545 (55.7%) | 303 (51.4%) | 3,899 (55.4%) | 2,343 (56.4%) |
135+
## 📊 Example Output 1
136+
137+
Example `svytable1` output table from Example C with the reliability checks applied.
138+
139+
|Variable |Level |Overall |Missing |Not Obese |Obese |
140+
|:---|:---|:---|:---|:---|:---|
141+
|n | |11,778 |547 |7,073 |4,158 |
142+
|Age |Mean (SD) |47.18 (16.89) |56.29 (19.15) |46.45 (17.32) |48.25 (15.87) |
143+
|Race1 |Black |2,577 (11.4%) |108 (12.1%) |1,296 (9.1%) |1,173 (15.8%) |
144+
| |Hispanic |1,210 (5.8%) |* |714 (5.7%) |434 (6.0%) |
145+
| |Mexican |1,680 (8.2%) |* |920 (7.3%) |685 (9.8%) |
146+
| |White |5,017 (67.2%) |235 (69.6%) |3,114 (68.6%) |1,668 (64.4%) |
147+
| |Other |1,294 (7.4%) |67 (6.0%) |1,029 (9.3%) |198 (4.0%) |
148+
|Education |8th Grade |1,321 (6.1%) |* |770 (5.9%) |472 (6.3%) |
149+
| |9 - 11th Grade |1,787 (11.8%) |84 (16.8%) |1,021 (11.3%) |682 (12.5%) |
150+
| |High School |2,595 (21.5%) |121 (21.1%) |1,496 (20.2%) |978 (23.8%) |
151+
| |Some College |3,399 (31.3%) |144 (33.4%) |1,968 (29.4%) |1,287 (34.5%) |
152+
| |College Grad |2,656 (29.3%) |* |1,805 (33.0%) |735 (22.8%) |
153+
| |Missing |20 (0.1%) |* |* |* |
154+
|TotChol |Mean (SD) |5.07 (1.07) |5.00 (1.42) |5.07 (1.08) |5.06 (1.04) |
155+
| |Missing, n (%) |1,169 (5.6%) |426 (15.5%) |480 (5.5%) |263 (5.4%) |
156+
|SmokeNow |No |2,779 (24.2%) |142 (29.1%) |1,580 (23.2%) |1,057 (26.0%) |
157+
| |Yes |2,454 (20.1%) |102 (19.5%) |1,594 (21.4%) |758 (17.6%) |
158+
| |Missing |6,545 (55.7%) |303 (51.4%) |3,899 (55.4%) |2,343 (56.4%) |
159+
160+
161+
162+
---
163+
164+
#### Example D: Reliability Checks for Regression Models
165+
166+
Beyond descriptive tables, the package provides `svyglmdiag()` to assess the reliability of coefficients from a survey-weighted regression model. It calculates key metrics like p-values, standard errors, and confidence interval widths.
167+
168+
```r
169+
# 1. Fit a logistic regression model using the complete-case design
170+
fit_obesity <- svyglm(
171+
ObeseStatus ~ Age + Gender + Race1,
172+
design = adult_design_complete,
173+
family = quasibinomial()
174+
)
175+
176+
# 2. Get the reliability diagnostics table for the model
177+
diagnostics_table <- svyglmdiag(fit_obesity)
178+
179+
# 3. Display the diagnostics table
180+
knitr::kable(
181+
diagnostics_table,
182+
caption = "Table 3: Reliability Diagnostics for Obesity Model Coefficients",
183+
digits = 3
184+
)
185+
```
186+
---
187+
188+
## 📊 Example Output 2
189+
190+
Example output table for Example D, which demonstrates the `svyglmdiag()` function.
191+
192+
|Term | Estimate| SE| p.value|is_significant | CI_Lower| CI_Upper| CI_Width| RSE_percent|is_rse_high |
193+
|:---|---:|---:|---:|:---|---:|---:|---:|---:|:---|
194+
|(Intercept) | -0.381| 0.109| 0.002|TRUE | -0.604| -0.158| 0.445| 28.486|FALSE |
195+
|Age | 0.008| 0.002| 0.000|TRUE | 0.005| 0.012| 0.007| 20.782|FALSE |
196+
|Gendermale | -0.061| 0.057| 0.294|FALSE | -0.179| 0.056| 0.236| 93.470|TRUE |
197+
|Race1Hispanic | -0.493| 0.103| 0.000|TRUE | -0.704| -0.282| 0.422| 20.870|FALSE |
198+
|Race1Mexican | -0.225| 0.087| 0.016|TRUE | -0.403| -0.046| 0.357| 38.733|TRUE |
199+
|Race1White | -0.654| 0.081| 0.000|TRUE | -0.821| -0.488| 0.334| 12.421|FALSE |
200+
|Race1Other | -1.351| 0.131| 0.000|TRUE | -1.620| -1.082| 0.538| 9.707|FALSE |
201+
148202

149203
---
150204

man/pipe.Rd

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/svyglmdiag.Rd

Lines changed: 101 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)