Skip to content

Commit 6faea76

Browse files
authored
GH-47257: [R] Fix truncation of time variables to work with numeric subseconds time with hms bindings (#47278)
### Rationale for this change Subsecond times truncated ### What changes are included in this PR? Work with times as milliseconds ### Are these changes tested? Yes ### Are there any user-facing changes? Yes * GitHub Issue: #47257 Authored-by: Nic Crane <[email protected]> Signed-off-by: Nic Crane <[email protected]>
1 parent 7c26ab5 commit 6faea76

File tree

2 files changed

+31
-19
lines changed

2 files changed

+31
-19
lines changed

r/R/dplyr-funcs-datetime.R

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ register_bindings_datetime_rounding <- function() {
831831
register_bindings_hms <- function() {
832832
numeric_to_time32 <- function(x) {
833833
# The only numeric which can be cast to time32 is int32 so double cast to make sure
834-
cast(cast(x, int32()), time32(unit = "s"))
834+
cast(cast(x, int32()), time32(unit = "ms"))
835835
}
836836

837837
datetime_to_time32 <- function(datetime) {
@@ -850,14 +850,14 @@ register_bindings_hms <- function() {
850850
abort("All arguments must be numeric or NA_real_")
851851
}
852852

853-
total_secs <- seconds +
854-
Expression$create("multiply_checked", minutes, 60) +
855-
Expression$create("multiply_checked", hours, 3600) +
856-
Expression$create("multiply_checked", days, 86400)
853+
total_ms <- Expression$create("multiply_checked", seconds, 1000) +
854+
Expression$create("multiply_checked", minutes, 60000) +
855+
Expression$create("multiply_checked", hours, 3600000) +
856+
Expression$create("multiply_checked", days, 86400000)
857857

858-
return(numeric_to_time32(total_secs))
858+
return(numeric_to_time32(total_ms))
859859
},
860-
notes = "subsecond times not supported"
860+
notes = "nanosecond times not supported"
861861
)
862862

863863
register_binding(
@@ -868,20 +868,22 @@ register_bindings_hms <- function() {
868868
}
869869

870870
if (call_binding("is.numeric", x)) {
871-
return(numeric_to_time32(x))
871+
return(numeric_to_time32(Expression$create("multiply_checked", x, 1000)))
872872
}
873873

874874
if (call_binding("is.character", x)) {
875-
dash <- call_binding("gsub", ":", "-", x)
876-
as_date_time_string <- call_binding("str_c", "1970-01-01", dash, sep = "-")
877-
as_date_time <- Expression$create(
875+
# Parse time strings using strptime with a time format
876+
# Since strptime expects full datetime strings, we need to prefix with a dummy date
877+
# Note: Arrow's strptime doesn't support subsecond precision for time parsing
878+
prefixed_string <- call_binding("str_c", "1970-01-01 ", x)
879+
parsed_datetime <- Expression$create(
878880
"strptime",
879-
as_date_time_string,
880-
options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)
881+
prefixed_string,
882+
options = list(format = "%Y-%m-%d %H:%M:%S", unit = 3L)
881883
)
882-
return(datetime_to_time32(as_date_time))
884+
return(datetime_to_time32(parsed_datetime))
883885
}
884886
},
885-
notes = "subsecond times not supported"
887+
notes = "subsecond precision not supported for character input"
886888
)
887889
}

r/tests/testthat/test-dplyr-funcs-datetime.R

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3769,28 +3769,38 @@ test_that("hms::hms", {
37693769
test_that("hms::as_hms", {
37703770
test_df <- tibble(
37713771
hms_string = c("0:7:45", "12:34:56"),
3772+
hms_subsec_string = c("0:7:45.1234", "12:34:56.12345"),
37723773
int = c(30L, 75L),
37733774
integerish_dbl = c(31, 76),
37743775
dbl = c(31.2, 76.4),
3775-
datetime = as.POSIXct(c(1645243500, 1745243500), tz = "UTC", origin = "1970-01-01")
3776+
nanosecs = c(31.21234564, 76.412345),
3777+
datetime = as.POSIXct(c(1645243500.123, 1745243500.123), tz = "UTC", origin = "1970-01-01")
37763778
)
37773779

37783780
compare_dplyr_binding(
37793781
.input %>%
37803782
mutate(
3781-
x2 = hms::as_hms(int),
3782-
x3 = hms::as_hms(integerish_dbl),
3783+
x1 = hms::as_hms(int),
3784+
x2 = hms::as_hms(integerish_dbl),
3785+
x3 = hms::as_hms(dbl),
37833786
x4 = hms::as_hms(datetime)
37843787
) %>%
37853788
collect(),
37863789
test_df
37873790
)
37883791

3792+
# can only do millisecond precision
37893793
expect_error(
3790-
arrow_table(test_df) %>% mutate(y = hms::as_hms(dbl)) %>% collect(),
3794+
arrow_table(test_df) %>% mutate(y = hms::as_hms(nanosecs)) %>% collect(),
37913795
"was truncated converting to int32"
37923796
)
37933797

3798+
# no subsecond precision with character strings
3799+
expect_error(
3800+
arrow_table(test_df) %>% mutate(y = hms::as_hms(hms_subsec_string)) %>% collect(),
3801+
"Failed to parse string"
3802+
)
3803+
37943804
skip_if_not_available("utf8proc")
37953805

37963806
compare_dplyr_binding(

0 commit comments

Comments
 (0)