Document and test rounding that can occur when parsing (#212)

DavisVaughan · web-flow · commit 700ba688513b · 2021-04-08T12:45:35.000-04:00
* Add parsing tests related to parsing into less precise containers

* Document rounding process when parsing
diff --git a/R/date.R b/R/date.R
@@ -908,6 +908,10 @@ date_set_zone.Date <- function(x, zone) {
 #' _`date_parse()` ignores both the `%z` and `%Z` commands,_ as clock treats
 #' Date as a _naive_ type, with a yet-to-be-specified time zone.
 #'
+#' If parsing a string with sub-daily components, such as hours, minutes or
+#' seconds, note that the conversion to Date will round those components to
+#' the nearest day. See the examples for a way to control this.
+#'
 #' @inheritParams zoned-parsing
 #'
 #' @return A Date.
@@ -931,6 +935,29 @@ date_set_zone.Date <- function(x, zone) {
 #' # A neat feature of `date_parse()` is the ability to parse
 #' # the ISO year-week-day format
 #' date_parse("2020-W01-2", format = "%G-W%V-%u")
+#'
+#' # ---------------------------------------------------------------------------
+#' # Rounding of sub-daily components
+#'
+#' # Note that rounding a string with time components will round them to the
+#' # nearest day if you try and parse them
+#' x <- c("2019-01-01 11", "2019-01-01 12")
+#'
+#' # Hour 12 rounds up to the next day
+#' date_parse(x, format = "%Y-%m-%d %H")
+#'
+#' # If you don't like this, one option is to just not parse the time component
+#' date_parse(x, format = "%Y-%m-%d")
+#'
+#' # A more general option is to parse the full string as a naive-time,
+#' # then round manually
+#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H", precision = "hour")
+#' nt
+#'
+#' nt <- time_point_floor(nt, "day")
+#' nt
+#'
+#' as.Date(nt)
 date_parse <- function(x, ..., format = NULL, locale = clock_locale()) {
   x <- naive_time_parse(x, ..., format = format, precision = "day", locale = locale)
   as.Date(x)
diff --git a/R/posixt.R b/R/posixt.R
@@ -1052,6 +1052,10 @@ date_set_zone.POSIXt <- function(x, zone) {
 #' `NA`s, or completely fails to parse, then no time zone will be able to be
 #' determined. In that case, the result will use `"UTC"`.
 #'
+#' If manually parsing sub-second components, be aware that they will be
+#' automatically rounded to the nearest second when converting them to POSIXct.
+#' See the examples for a way to control this.
+#'
 #' @inheritParams zoned-parsing
 #' @inheritParams as-zoned-time-naive-time
 #'
@@ -1105,6 +1109,30 @@ date_set_zone.POSIXt <- function(x, zone) {
 #'   "1970-10-25 01:00:00 EST"
 #' )
 #' date_time_parse_abbrev(abbrev_times, "America/New_York")
+#'
+#' # ---------------------------------------------------------------------------
+#' # Rounding of sub-second components
+#'
+#' # Generally, if you have a string with sub-second components, they will
+#' # be ignored when parsing into a date-time
+#' x <- c("2019-01-01 00:00:01.1", "2019-01-01 00:00:01.7")
+#'
+#' date_time_parse(x, "America/New_York")
+#'
+#' # If you manually try and parse those sub-second components with `%4S` to
+#' # read the 2 seconds, 1 decimal point, and 1 fractional component, the
+#' # fractional component will be rounded to the nearest second
+#' date_time_parse(x, "America/New_York", format = "%Y-%m-%d %H:%M:%4S")
+#'
+#' # If you don't like this, parse the full string as a naive-time,
+#' # then round manually and convert to a POSIXct
+#' nt <- naive_time_parse(x, format = "%Y-%m-%d %H:%M:%S", precision = "millisecond")
+#' nt
+#'
+#' nt <- time_point_floor(nt, "second")
+#' nt
+#'
+#' as.POSIXct(nt, "America/New_York")
 NULL
 
 #' @rdname date-time-parse
diff --git a/man/date-time-parse.Rd b/man/date-time-parse.Rd
diff --git a/man/date_parse.Rd b/man/date_parse.Rd
diff --git a/tests/testthat/test-date.R b/tests/testthat/test-date.R
@@ -255,9 +255,7 @@ test_that("`%z` and `%Z` commands are ignored", {
   )
 })
 
-# TODO: We probably don't want this:
-# https://github.com/HowardHinnant/date/issues/657
-test_that("parsing into a less precise time point rounds rather than floors", {
+test_that("parsing into a date if you requested to parse time components rounds the time (#207)", {
   expect_identical(
     date_parse("2019-12-31 11:59:59", format = "%Y-%m-%d %H:%M:%S"),
     as.Date("2019-12-31")
diff --git a/tests/testthat/test-gregorian-year-month-day.R b/tests/testthat/test-gregorian-year-month-day.R
@@ -312,6 +312,24 @@ test_that("parsing NA returns NA", {
   )
 })
 
+test_that("parsing doesn't round parsed components more precise than the resulting container (#207)", {
+  # With year-month-day, only the year/month/day components are extracted at the end,
+  # the hour component isn't touched
+  expect_identical(
+    year_month_day_parse("2019-12-31 12", format = "%Y-%m-%d %H", precision = "day"),
+    year_month_day(2019, 12, 31)
+  )
+})
+
+test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", {
+  # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up immediately
+  # after parsing the `%S` command, not at the very end
+  expect_identical(
+    year_month_day_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%7S", precision = "millisecond"),
+    year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond")
+  )
+})
+
 # ------------------------------------------------------------------------------
 # calendar_group()
 
diff --git a/tests/testthat/test-naive-time.R b/tests/testthat/test-naive-time.R
@@ -272,6 +272,37 @@ test_that("%Z is completely ignored", {
   )
 })
 
+test_that("parsing rounds parsed components more precise than the resulting container (#207)", {
+  expect_identical(
+    naive_time_parse("2019-12-31 11", format = "%Y-%m-%d %H", precision = "day"),
+    as_naive_time(year_month_day(2019, 12, 31))
+  )
+  expect_identical(
+    naive_time_parse("2019-12-31 12", format = "%Y-%m-%d %H", precision = "day"),
+    as_naive_time(year_month_day(2020, 1, 1))
+  )
+
+  # If you don't try and parse them, it won't round
+  expect_identical(
+    naive_time_parse("2019-12-31 12", format = "%Y-%m-%d", precision = "day"),
+    as_naive_time(year_month_day(2019, 12, 31))
+  )
+})
+
+test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", {
+  # Default N for milliseconds is 6, so `%6S` (2 hour seconds, 1 for decimal, 3 for subseconds)
+  expect_identical(
+    naive_time_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%S", precision = "millisecond"),
+    as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 123, subsecond_precision = "millisecond"))
+  )
+
+  # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up
+  expect_identical(
+    naive_time_parse("2019-01-01 01:01:01.1238", format = "%Y-%m-%d %H:%M:%7S", precision = "millisecond"),
+    as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond"))
+  )
+})
+
 # ------------------------------------------------------------------------------
 # format()
 
diff --git a/tests/testthat/test-zoned-time.R b/tests/testthat/test-zoned-time.R
@@ -230,6 +230,34 @@ test_that("`x` is translated to UTF-8", {
   )
 })
 
+test_that("leftover subseconds result in a parse failure", {
+  x <- "2019-01-01 01:01:01.1238-05:00[America/New_York]"
+
+  # This is fine
+  expect_identical(
+    zoned_time_parse_complete(x, precision = "microsecond"),
+    as_zoned_time(as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 123800, subsecond_precision = "microsecond")), "America/New_York")
+  )
+
+  # This defaults to `%6S`, which parses `01.123` then stops,
+  # leaving a `8` for `%z` to parse, resulting in a failure. Because everything
+  # fails, we get a UTC time zone.
+  expect_identical(
+    expect_warning(zoned_time_parse_complete(x, precision = "millisecond"), class = "clock_warning_parse_failures"),
+    as_zoned_time(naive_seconds(NA) + duration_milliseconds(NA), zone = "UTC")
+  )
+})
+
+test_that("parsing rounds parsed subsecond components more precise than the resulting container (#207)", {
+  x <- "2019-01-01 01:01:01.1238-05:00[America/New_York]"
+
+  # Requesting `%7S` parses the full `01.1238`, and the `1238` portion is rounded up
+  expect_identical(
+    zoned_time_parse_complete(x, precision = "millisecond", format = "%Y-%m-%d %H:%M:%7S%Ez[%Z]"),
+    as_zoned_time(as_naive_time(year_month_day(2019, 1, 1, 1, 1, 1, 124, subsecond_precision = "millisecond")), "America/New_York")
+  )
+})
+
 # ------------------------------------------------------------------------------
 # zoned_time_parse_abbrev()