Skip to content

Commit 8370ee5

Browse files
committed
R2 incorrect for methods that use update(model, ~1) and have missing data
Fixes #803
1 parent 9d7d968 commit 8370ee5

File tree

6 files changed

+105
-12
lines changed

6 files changed

+105
-12
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: performance
33
Title: Assessment of Regression Models Performance
4-
Version: 0.13.0.1
4+
Version: 0.13.0.2
55
Authors@R:
66
c(person(given = "Daniel",
77
family = "Lüdecke",

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
the full model), or can return singularity checks for each random effects term
99
separately.
1010

11+
## Bug fixes
12+
13+
* Fixed issue with wrong computation of pseudo-R2 for some models where the
14+
base-model (null model) was updated using the original data, which could
15+
include missing values. Now the model frame is used, ensuring the correct
16+
number of observations in the returned base-model, thus calculating the correct
17+
log-likelihood and returning the correct pseudo-R2.
18+
1119
# performance 0.13.0
1220

1321
## Breaking changes

R/r2_coxsnell.R

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,25 +218,45 @@ r2_coxsnell.svycoxph <- function(model, ...) {
218218

219219
#' @export
220220
r2_coxsnell.multinom <- function(model, ...) {
221-
l_base <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
221+
l_base <- insight::get_loglikelihood(stats::update(
222+
model,
223+
~1,
224+
trace = FALSE,
225+
data = insight::get_data(model, source = "mf")
226+
))
222227
.r2_coxsnell(model, l_base)
223228
}
224229

225230
#' @export
226231
r2_coxsnell.clm2 <- function(model, ...) {
227-
l_base <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
232+
l_base <- insight::get_loglikelihood(stats::update(
233+
model,
234+
location = ~1,
235+
scale = ~1,
236+
data = insight::get_data(model, source = "mf")
237+
))
228238
.r2_coxsnell(model, l_base)
229239
}
230240

231241
#' @export
232242
r2_coxsnell.bayesx <- function(model, ...) {
233-
junk <- utils::capture.output(l_base <- insight::get_loglikelihood(stats::update(model, ~1))) # nolint
243+
junk <- utils::capture.output(
244+
l_base <- insight::get_loglikelihood(stats::update(
245+
model,
246+
~1,
247+
data = insight::get_data(model, source = "mf")
248+
))
249+
)
234250
.r2_coxsnell(model, l_base)
235251
}
236252

237253
#' @export
238254
r2_coxsnell.clm <- function(model, ...) {
239-
l_base <- insight::get_loglikelihood(stats::update(model, ~1))
255+
l_base <- insight::get_loglikelihood(stats::update(
256+
model,
257+
~1,
258+
data = insight::get_data(model, source = "mf")
259+
))
240260
# if no loglik, return NA
241261
if (length(as.numeric(l_base)) == 0) {
242262
return(NULL)

R/r2_mcfadden.R

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ r2_mcfadden.glm <- function(model, verbose = TRUE, ...) {
7373
return(NULL)
7474
}
7575

76-
l_null <- insight::get_loglikelihood(stats::update(model, ~1))
76+
l_null <- insight::get_loglikelihood(stats::update(
77+
model,
78+
~1,
79+
data = insight::get_data(model, source = "mf")
80+
))
7781
.r2_mcfadden(model, l_null)
7882
}
7983

@@ -162,21 +166,35 @@ r2_mcfadden.vglm <- function(model, ...) {
162166
insight::format_error("Can't get log-likelihood when `summ` is not zero.")
163167
}
164168

165-
l_null <- insight::get_loglikelihood(stats::update(model, ~1))
169+
l_null <- insight::get_loglikelihood(stats::update(
170+
model,
171+
~1,
172+
data = insight::get_data(model, source = "mf")
173+
))
166174
.r2_mcfadden(model, l_null)
167175
}
168176

169177

170178
#' @export
171179
r2_mcfadden.clm2 <- function(model, ...) {
172-
l_null <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
180+
l_null <- insight::get_loglikelihood(stats::update(
181+
model,
182+
location = ~1,
183+
scale = ~1,
184+
data = insight::get_data(model, source = "mf")
185+
))
173186
.r2_mcfadden(model, l_null)
174187
}
175188

176189

177190
#' @export
178191
r2_mcfadden.multinom <- function(model, ...) {
179-
l_null <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
192+
l_null <- insight::get_loglikelihood(stats::update(
193+
model,
194+
~1,
195+
trace = FALSE,
196+
data = insight::get_data(model, source = "mf")
197+
))
180198
.r2_mcfadden(model, l_null)
181199
}
182200

R/r2_nagelkerke.R

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,33 @@ r2_nagelkerke.negbinmfx <- r2_nagelkerke.logitmfx
170170

171171
#' @export
172172
r2_nagelkerke.multinom <- function(model, ...) {
173-
l_base <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
173+
l_base <- insight::get_loglikelihood(stats::update(
174+
model,
175+
~1,
176+
trace = FALSE,
177+
data = insight::get_data(model, source = "mf")
178+
))
174179
.r2_nagelkerke(model, l_base)
175180
}
176181

177182
#' @export
178183
r2_nagelkerke.clm2 <- function(model, ...) {
179-
l_base <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
184+
l_base <- insight::get_loglikelihood(stats::update(
185+
model,
186+
location = ~1,
187+
scale = ~1,
188+
data = insight::get_data(model, source = "mf")
189+
))
180190
.r2_nagelkerke(model, l_base)
181191
}
182192

183193
#' @export
184194
r2_nagelkerke.clm <- function(model, ...) {
185-
l_base <- insight::get_loglikelihood(stats::update(model, ~1))
195+
l_base <- insight::get_loglikelihood(stats::update(
196+
model,
197+
~1,
198+
data = insight::get_data(model, source = "mf")
199+
))
186200
# if no loglik, return NA
187201
if (length(as.numeric(l_base)) == 0) {
188202
return(NULL)

tests/testthat/test-r2_nagelkerke.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,36 @@ test_that("r2_nagelkerke", {
1717
}
1818
)
1919
})
20+
21+
test_that("r2_nagelkerke, multinom, correct base-model with NA", {
22+
skip_on_cran()
23+
skip_if_not_installed("nnet")
24+
25+
n_obs <- 1000
26+
softmax <- function(x){
27+
exp(x - max(x)) / sum(exp(x - max(x)))
28+
}
29+
sample_y <- function(x){
30+
sample(1:3, size = 1, prob = softmax(c(0.25 * x, -0.1 * x, 0 * x)))
31+
}
32+
set.seed(123)
33+
sim_df <- data.frame(x = rnorm(n_obs, 0, 1), y = NA)
34+
35+
for(i in 1:nrow(sim_df)){
36+
sim_df$y[i] <- sample_y(sim_df$x[i])
37+
}
38+
39+
sim_df$x[1:500] <- NA
40+
sim_df2 <- sim_df[!is.na(sim_df$x),]
41+
42+
m1 <- nnet::multinom(y ~ x, data = sim_df, trace = FALSE)
43+
m2 <- nnet::multinom(y ~ x, data = sim_df2, trace = FALSE)
44+
45+
out1 <- r2_nagelkerke(m1)
46+
out2 <- r2_nagelkerke(m2)
47+
expect_equal(out1, out2, tolerance = 1e-4, ignore_attr = TRUE)
48+
49+
out1 <- r2_mcfadden(m1)
50+
out2 <- r2_mcfadden(m2)
51+
expect_equal(out1$R2, out2$R2, tolerance = 1e-4, ignore_attr = TRUE)
52+
})

0 commit comments

Comments
 (0)