Add Kish-method to rescale_weights#575
Conversation
|
@bwiernik This is the relevant part of the code: .rescale_weights_kish <- function(nest, probability_weights, data_tmp, data, by, weight_non_na) {
p_weights <- data_tmp[[probability_weights]]
# design effect according to Kish
deff <- mean(p_weights^2) / (mean(p_weights)^2)
# rescale weights, so their mean is 1
z_weights <- p_weights * (1 / mean(p_weights))
# divide weights by design effect
data$pweights <- NA_real_
data$pweights[weight_non_na] <- z_weights / deff
# return result
data
} |
library(datawizard)
data(nhanes_sample)
x1 <- rescale_weights(nhanes_sample, "SDMVSTRA", "WTINT2YR")
x2 <- rescale_weights(nhanes_sample, "SDMVSTRA", "WTINT2YR", method = "klish")
# two variants of Carle weights
sum(x1$pweights_a)
#> [1] 2992
sum(x1$pweights_b)
#> [1] 2244.715
# klish weights
sum(x2$pweights)
#> [1] 2162.54
# Rescaled weights
head(cbind(Carle_A = x1$pweights_a, Carle_B = x1$pweights_b, Klish = x2$pweights), 15)
#> Carle_A Carle_B Klish
#> [1,] 1.5733612 1.2005159 1.3952529
#> [2,] 0.6231745 0.5246593 0.5661343
#> [3,] 0.8976966 0.5439111 0.3805718
#> [4,] 0.7083628 0.5498944 0.5003582
#> [5,] 0.4217782 0.3119698 0.2108234
#> [6,] 0.6877550 0.5155503 0.4036216
#> [7,] 1.8855878 1.4637614 1.3319014
#> [8,] 1.2947757 1.0900898 1.1762627
#> [9,] 0.7072244 0.5231011 0.3535021
#> [10,] 0.7600105 0.5937167 0.5703616
#> [11,] 0.5465290 0.4242645 0.3860455
#> [12,] 0.3560001 0.2702126 0.2686613
#> [13,] 1.4453354 1.1713903 1.0993270
#> [14,] 1.2947757 1.0900898 1.1762627
#> [15,] 1.4853544 1.1274198 1.1209469Created on 2024-12-18 with reprex v2.1.1 |
|
@etiennebacher Regardless of whether we add the Kish method or not, we should consider changing the names of the returned columns |
|
No opinions regarding the name change but it's a good time to do it before 1.0 so feel free to go ahead with this. |
|
Here are some comparisons: library(datawizard)
data(nhanes_sample)
# compare different methods, using multilevel-Poisson regression
d <- rescale_weights(nhanes_sample, "SDMVSTRA", "WTINT2YR")
result1 <- lme4::glmer(
total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),
family = poisson(),
data = d,
weights = pweights_a
)
result2 <- lme4::glmer(
total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),
family = poisson(),
data = d,
weights = pweights_b
)
d <- rescale_weights(
nhanes_sample,
probability_weights = "WTINT2YR",
method = "kish"
)
result3 <- lme4::glmer(
total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),
family = poisson(),
data = d,
weights = pweights
)
result4 <- lme4::glmer(
total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),
family = poisson(),
data = d
)
parameters::compare_parameters(
list(result1, result2, result3, result4),
exponentiate = TRUE,
column_names = c("Carle (A)", "Carle (B)", "Kish", "unweighted")
) |> print(table_width = Inf)
#> Number of weighted observations differs from number of unweighted
#> observations.
#> Parameter | Carle (A) | Carle (B) | Kish | unweighted
#> -------------------------------------------------------------------------------------------------------
#> (Intercept) | 12.20 (10.52, 14.14) | 11.95 (10.27, 13.92) | 11.72 (9.89, 13.87) | 13.62 (12.52, 14.83)
#> RIAGENDR [2] | 0.41 ( 0.40, 0.42) | 0.42 ( 0.40, 0.43) | 0.42 (0.41, 0.43) | 0.35 ( 0.34, 0.36)
#> age [log] | 1.69 ( 1.63, 1.75) | 1.66 ( 1.60, 1.73) | 1.66 (1.59, 1.73) | 1.49 ( 1.44, 1.54)
#> RIDRETH1 [2] | 0.90 ( 0.84, 0.97) | 0.90 ( 0.83, 0.98) | 0.98 (0.90, 1.07) | 0.95 ( 0.88, 1.02)
#> RIDRETH1 [3] | 1.19 ( 1.14, 1.24) | 1.21 ( 1.16, 1.27) | 1.23 (1.17, 1.29) | 1.22 ( 1.19, 1.26)
#> RIDRETH1 [4] | 2.16 ( 2.07, 2.26) | 2.16 ( 2.06, 2.28) | 2.11 (1.99, 2.23) | 2.32 ( 2.25, 2.40)
#> RIDRETH1 [5] | 1.01 ( 0.95, 1.07) | 1.05 ( 0.97, 1.12) | 1.09 (1.01, 1.18) | 1.05 ( 0.99, 1.11)
#> -------------------------------------------------------------------------------------------------------
#> Observations | 2617 | 1965 | 1903 | 2595Created on 2024-12-18 with reprex v2.1.1 |
Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
|
@bwiernik would you like to review this PR, or do you trust me/us? ;-) |
bwiernik
left a comment
There was a problem hiding this comment.
Thanks for your patience. A few wording clarifications and I think it would be good to support by for Kish. Otherwise looks good
|
will fix failing tests and add some more tests later. |
No description provided.