-
Notifications
You must be signed in to change notification settings - Fork 11
Open
Description
The problem
I am having trouble with grouped multivariate regressions.
I believe the last error is in pattern matching with grepl (when xvar names have yvar in them)
Reproducible example
library(dplyr)
library(purrr)
mtcars %>%
arrow::to_duckdb() %>%
group_by(am) %>%
modeldb::linear_regression_db(mpg, auto_count = TRUE)
#> # A tibble: 2 × 11
#> am `(Intercept)` cyl disp hp drat wt qsec vs gear
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 8.64 -0.534 -0.0203 0.0622 0.592 1.95 -0.884 0.739 8.65
#> 2 1 -138. -1.28 0.180 -0.160 -4.95 -10.5 8.09 0.943 12.3
#> # ℹ 1 more variable: carb <dbl>
mtcars %>%
group_by(am) %>%
dplyr::reframe(
reg = list(lm(mpg ~ ., data = dplyr::pick(dplyr::everything()))),
) %>%
.$reg %>%
map(coefficients) %>%
bind_rows()
#> # A tibble: 2 × 10
#> `(Intercept)` cyl disp hp drat wt qsec vs gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 8.64 -0.534 -0.0203 0.0622 0.592 1.95 -0.884 0.739 8.65 -4.81
#> 2 -138. -1.28 0.180 -0.160 -4.95 -10.5 8.09 0.943 12.3 4.69
mtcars %>%
group_by(cyl) %>%
modeldb::linear_regression_db(mpg, auto_count = TRUE)
#> Error in `map()`:
#> ℹ In index: 2.
#> Caused by error in `solve.default()`:
#> ! system is computationally singular: reciprocal condition number = 9.04685e-21
#> Backtrace:
#> ▆
#> 1. ├─mtcars %>% group_by(cyl) %>% ...
#> 2. ├─modeldb::linear_regression_db(., mpg, auto_count = TRUE)
#> 3. │ └─modeldb:::mlr(...)
#> 4. │ └─purrr::map(seq_len(vars_count + 1), ~as.numeric(solve(xm[[.x]], ym[[.x]])))
#> 5. │ └─purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 6. │ ├─purrr:::with_indexed_errors(...)
#> 7. │ │ └─base::withCallingHandlers(...)
#> 8. │ ├─purrr:::call_with_cleanup(...)
#> 9. │ └─modeldb (local) .f(.x[[i]], ...)
#> 10. │ ├─base::solve(xm[[.x]], ym[[.x]])
#> 11. │ └─base::solve.default(xm[[.x]], ym[[.x]])
#> 12. └─base::.handleSimpleError(...)
#> 13. └─purrr (local) h(simpleError(msg, call))
#> 14. └─cli::cli_abort(...)
#> 15. └─rlang::abort(...)
mtcars %>%
group_by(cyl) %>%
dplyr::reframe(
reg = list(lm(mpg ~ ., data = dplyr::pick(dplyr::everything()))),
) %>%
.$reg %>%
map(coefficients) %>%
bind_rows()
#> # A tibble: 3 × 10
#> `(Intercept)` disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 60.9 -0.345 -0.0332 -4.19 4.48 -0.106 -3.64 -6.33 4.07 3.22
#> 2 32.8 0.0746 -0.0425 1.52 5.12 -2.33 -1.75 NA NA NA
#> 3 6.25 -0.0234 0.152 -5.74 -0.726 1.36 NA 4.87 NA -4.77
mtcars %>%
arrow::to_duckdb() %>%
dbplyr::window_order(mpg) %>%
mutate(lag_mpg = lag(mpg)) %>%
filter(!is.na(lag_mpg)) %>%
group_by(am) %>%
modeldb::linear_regression_db(mpg, auto_count = TRUE)
#> Warning in matrix(as.numeric(.x), nrow = matrix_size): data length [81] is not
#> a sub-multiple or multiple of the number of rows [10]
#> Warning in matrix(as.numeric(.x), nrow = matrix_size): data length [81] is not
#> a sub-multiple or multiple of the number of rows [10]
#> Error in `map()`:
#> ℹ In index: 1.
#> Caused by error in `solve.default()`:
#> ! 'a' (10 x 9) must be square
#> Backtrace:
#> ▆
#> 1. ├─... %>% ...
#> 2. ├─modeldb::linear_regression_db(., mpg, auto_count = TRUE)
#> 3. │ └─modeldb:::mlr(...)
#> 4. │ └─purrr::map(seq_len(vars_count + 1), ~as.numeric(solve(xm[[.x]], ym[[.x]])))
#> 5. │ └─purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 6. │ ├─purrr:::with_indexed_errors(...)
#> 7. │ │ └─base::withCallingHandlers(...)
#> 8. │ ├─purrr:::call_with_cleanup(...)
#> 9. │ └─modeldb (local) .f(.x[[i]], ...)
#> 10. │ ├─base::solve(xm[[.x]], ym[[.x]])
#> 11. │ └─base::solve.default(xm[[.x]], ym[[.x]])
#> 12. └─base::.handleSimpleError(...)
#> 13. └─purrr (local) h(simpleError(msg, call))
#> 14. └─cli::cli_abort(...)
#> 15. └─rlang::abort(...)
mtcars %>%
mutate(lag_mpg = lag(mpg)) %>%
filter(!is.na(lag_mpg)) %>%
arrange(mpg) %>%
group_by(am) %>%
dplyr::reframe(
reg = list(lm(mpg ~ ., data = dplyr::pick(dplyr::everything()))),
) %>%
.$reg %>%
map(coefficients) %>%
bind_rows()
#> # A tibble: 2 × 11
#> `(Intercept)` cyl disp hp drat wt qsec vs gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 7.83 -0.533 -0.0209 0.0644 0.509 2.15 -0.932 0.739 8.98 -4.93
#> 2 -178. -4.36 0.328 -0.226 -5.72 -18.9 11.2 -0.715 15.0 7.49
#> # ℹ 1 more variable: lag_mpg <dbl>
Created on 2025-01-05 with reprex v2.0.2
Metadata
Metadata
Assignees
Labels
No labels