Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ VignetteBuilder:
Config/Needs/dev: devtools, lifecycle, readr, glue, RcppTOML, smvr
Config/Needs/lint: fs, lintr
Config/Needs/website: etiennebacher/altdoc, future.apply
Config/polars/lib-version: 1.7.1-rc.1
Config/polars/lib-version: 1.9.0-rc.1
Config/testthat/edition: 3
Config/testthat/parallel: true
Config/testthat/start-first: lazyframe-frame, *-s3-base, polars_options,
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

## polars (development version)

### New features

- `as_polars_series()` on R factors now has an option `factor_as_enum` to convert
to a Polars Enum type. This can be set globally (for instance to convert all
factors in an R `data.frame` to enums with `as_polars_df()`) with
`options(polars.factor_as_enum = TRUE)` (#1723).

- Converting a Polars enum to an R factor now preserves all levels, even if they
don't appear in the data (#1723).

## polars 1.8.0

This is an update that corresponds to Python Polars 1.37.1.
Expand Down
15 changes: 13 additions & 2 deletions R/as_polars_series.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,22 @@ as_polars_series.raw <- function(x, name = NULL, ...) {
wrap()
}

#' @param factor_as_enum `r lifecycle::badge("experimental")` if `FALSE`
#' (default), R factors are converted to Polars [Categorical][pl__Categorical].
#' If `TRUE`, they are converted to Polars [Enum][pl__Enum].
#' @rdname as_polars_series
#' @export
as_polars_series.factor <- function(x, name = NULL, ...) {
as_polars_series.factor <- function(x, name = NULL, ..., factor_as_enum = FALSE) {
if (missing(factor_as_enum)) {
opt <- getOption("polars.factor_as_enum")
if (!is.null(opt)) {
factor_as_enum <- opt
}
}
opt <- getOption("polars.factor_as_enum")

PlRSeries$new_str(name %||% "", as.character(x))$cast(
pl$Categorical()$`_dt`,
if (isTRUE(factor_as_enum)) pl$Enum(levels(x))$`_dt` else pl$Categorical()$`_dt`,
strict = TRUE
) |>
wrap()
Expand Down
7 changes: 6 additions & 1 deletion R/polars_options.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#' `compat_level` argument. See the documentation of those functions for details.
#' * for all `to_r_vector.*` options, see arguments of [to_r_vector()][series__to_r_vector].
#' * `df_knitr_print` (TODO: possible values??)
#' * `factor_as_enum`: if `FALSE` (default), R factors are converted to Polars'
#' [Categorical][pl__Categorical]. If `TRUE`, they are converted to Polars'
#' [Enum][pl__Enum].
#'
#' @return
#' `polars_options()` returns a named list where the names are option names and
Expand Down Expand Up @@ -47,7 +50,8 @@ polars_options <- function() {
to_r_vector.decimal = getOption("polars.to_r_vector.decimal", "double"),
to_r_vector.as_clock_class = getOption("polars.to_r_vector.as_clock_class", FALSE),
to_r_vector.ambiguous = getOption("polars.to_r_vector.ambiguous", "raise"),
to_r_vector.non_existent = getOption("polars.to_r_vector.non_existent", "raise")
to_r_vector.non_existent = getOption("polars.to_r_vector.non_existent", "raise"),
factor_as_enum = getOption("polars.factor_as_enum", FALSE)
)

arg_match_compat_level(out[["compat_level"]], arg_nm = "compat_level")
Expand All @@ -74,6 +78,7 @@ polars_options <- function() {
c("raise", "null"),
arg_nm = "to_r_vector.non_existent"
)
check_bool(out[["factor_as_enum"]], arg = "factor_as_enum")
structure(out, class = "polars_options_list")
}

Expand Down
2 changes: 2 additions & 0 deletions R/series-to_r_vector.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' - Decimal: [double].
#' - String: [character].
#' - Categorical: [factor].
#' - Enum: [factor], all categories (levels) are preserved, even if unobserved in
#' the data.
#' - Date: [Date] or [data.table::IDate][data.table::IDateTime],
#' depending on the `date` argument.
#' - Time: [hms::hms] or [data.table::ITime][data.table::IDateTime],
Expand Down
6 changes: 5 additions & 1 deletion man/as_polars_series.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/polars_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/series__to_r_vector.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "r-polars"
version = "1.7.1-rc.1"
version = "1.9.0-rc.1"
edition = "2024"
rust-version = "1.89.0"
publish = false
Expand Down
32 changes: 31 additions & 1 deletion src/rust/src/series/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl PlRSeries {
series.cast(&DataType::Float64).unwrap().f64().unwrap(),
))),
DataType::Float64 => Ok(<Sexp>::from(Wrap(series.f64().unwrap()))),
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
DataType::Categorical(_, _) => {
let r_func: FunctionSexp =
<Sexp>::from(savvy::eval_parse_text("as.factor")?).try_into()?;
let chr_vec =
Expand All @@ -159,6 +159,36 @@ impl PlRSeries {
let _ = args.add("x", chr_vec);
Ok(r_func.call(args)?.into())
}
DataType::Enum(_, mapping) => {
// Build factor manually to preserve Enum's level order
// Convert to physical representation and cast to u32
let phys = series.to_physical_repr();
let phys_u32 = phys.cast(&DataType::UInt32).map_err(RPolarsErr::from)?;
let phys_ca = phys_u32.u32().map_err(RPolarsErr::from)?;
let len = phys_ca.len();

// Get physical codes (0-indexed) and convert to R's 1-indexed
let mut codes = OwnedIntegerSexp::new(len)?;
for (i, opt_v) in phys_ca.into_iter().enumerate() {
match opt_v {
Some(v) => codes.set_elt(i, (v + 1) as i32)?,
None => codes.set_na(i)?,
}
}

// Get categories from Enum mapping and set as levels
let categories = unsafe {
StringChunked::from_chunks(
PlSmallStr::from_static("category"),
vec![mapping.to_arrow(true)],
)
};
let levels: Sexp = Wrap(&categories).into();
codes.set_attrib("levels", levels)?;
codes.set_class(["factor"])?;

Ok(codes.into())
}
DataType::List(inner) => unsafe {
let len = series.len();
let mut list = OwnedListSexp::new(len, false)?;
Expand Down
24 changes: 24 additions & 0 deletions tests/testthat/_snaps/as_polars_series.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,30 @@
null
]

# use option to convert factor to enum

Code
as_polars_series(factor(1:2))
Output
shape: (2,)
Series: '' [enum]
[
"1"
"2"
]

---

Code
as_polars_series(factor(1:2), factor_as_enum = FALSE)
Output
shape: (2,)
Series: '' [cat]
[
"1"
"2"
]

# as_polars_series.default throws an error

Code
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/_snaps/polars_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@
Error in `polars_options()`:
! `to_r_vector.non_existent` must be one of "raise" or "null", not "foo".

# options are validated by polars_options() polars.factor_as_enum

Code
print(polars_options())
Condition
Error in `polars_options()`:
! `factor_as_enum` must be `TRUE` or `FALSE`, not the string "foo".

# options for to_r_vector() works: polars.to_r_vector.uint8 = integer

Code
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-as_polars_series.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ patrick::with_parameters_test_that(
}
)

test_that("use option to convert factor to enum", {
withr::with_options(
list(polars.factor_as_enum = TRUE),
{
expect_snapshot(as_polars_series(factor(1:2)))
expect_snapshot(as_polars_series(factor(1:2), factor_as_enum = FALSE))
}
)
})

test_that("as_polars_series.default throws an error", {
x <- 1
class(x) <- "foo"
Expand Down
33 changes: 33 additions & 0 deletions tests/testthat/test-dataframe-s3-base.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,36 @@ patrick::with_parameters_test_that(
expect_snapshot(dat[[value]], error = TRUE)
}
)

test_that("as.data.frame() and as.list() keep all levels of Enums", {
lev <- c("b", "a", "c")
dat <- pl$DataFrame(
x = factor(c("a", "b"), levels = lev)
)$cast(x = pl$Enum(lev))

expect_equal(
dat |>
as.data.frame() |>
getElement("x") |>
levels(),
lev
)

# Same with as.list()
expect_equal(
dat |>
as.list(as_series = FALSE) |>
getElement("x") |>
levels(),
lev
)

# Changing the levels works
expect_equal(
dat$cast(x = pl$Enum(c("c", "b", "a"))) |>
as.data.frame() |>
getElement("x") |>
levels(),
c("c", "b", "a")
)
})
1 change: 1 addition & 0 deletions tests/testthat/test-polars_options.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ patrick::with_parameters_test_that(
"polars.to_r_vector.as_clock_class",
"polars.to_r_vector.ambiguous",
"polars.to_r_vector.non_existent",
"polars.factor_as_enum",
),
{
withr::with_options(
Expand Down
9 changes: 0 additions & 9 deletions tools/lib-sums.tsv

This file was deleted.

Loading