Rdatatable
diff --git a/‎.dev/CRAN_Release.cmd‎
Lines changed: 16 additions & 13 deletions b/‎.dev/CRAN_Release.cmd‎
Lines changed: 16 additions & 13 deletions
diff --git a/‎.github/workflows/R-CMD-check.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/R-CMD-check.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 0 deletions b/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎GOVERNANCE.md‎
Lines changed: 1 addition & 1 deletion b/‎GOVERNANCE.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 1 deletion b/‎NAMESPACE‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 29 additions & 8 deletions b/‎NEWS.md‎
Lines changed: 29 additions & 8 deletions
diff --git a/‎R/IDateTime.R‎
Lines changed: 10 additions & 3 deletions b/‎R/IDateTime.R‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎R/between.R‎
Lines changed: 2 additions & 2 deletions b/‎R/between.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/data.table.R‎
Lines changed: 9 additions & 5 deletions b/‎R/data.table.R‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎R/setops.R‎
Lines changed: 4 additions & 4 deletions b/‎R/setops.R‎
Lines changed: 4 additions & 4 deletions
@@ -584,26 +584,29 @@ bunzip2 inst/tests/*.Rraw.bz2  # decompress *.Rraw again so as not to commit com
 # Many thanks!
 # Best, Tyson
 # ------------------------------------------------------------
-# DO NOT commit or push to GitHub. Leave 4 files (.dev/CRAN_Release.cmd, DESCRIPTION, NEWS and init.c) edited and not committed. Include these in a single and final bump commit below.
-# DO NOT even use a PR. Because PRs build binaries and we don't want any binary versions of even release numbers available from anywhere other than CRAN.
+
 # Leave milestone open with a 'release checks' issue open. Keep updating status there.
 # ** If on EC2, shutdown instance. Otherwise get charged for potentially many days/weeks idle time with no alerts **
 # If it's evening, SLEEP.
 # It can take a few days for CRAN's checks to run. If any issues arise, backport locally. Resubmit the same even version to CRAN.
 # CRAN's first check is automatic and usually received within an hour. WAIT FOR THAT EMAIL.
 # When CRAN's email contains "Pretest results OK pending a manual inspection" (or similar), or if not and it is known why not and ok, then bump dev.
 
-###### Bump dev for NON-PATCH RELEASE
-# 0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone.
-# 1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd
-# 2. Bump minor version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git.
-# 3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on <today>)" on the released heading.
-# 4. Bump minor version in dllVersion() in init.c
-# 5. Bump 3 minor version numbers in Makefile
-# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.16.99 to 1.16.99 inc below, 1.16.0 to 1.17.0 above, 1.15.0 to 1.16.0 below
-# 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
-# 8. Push to master with this consistent commit message: "1.17.0 on CRAN. Bump to 1.17.99"
-# 9. Take sha from the previous step and run `git tag 1.17.0 96c..sha..d77` then `git push origin 1.16.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
+###### After submission for NON-PATCH RELEASE
+# 0. Start a new branch `cran-x.y.0` with the code as submitted to CRAN
+#    - Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd
+#    - The branch should have one commit with precisely these 4 files being edited
+# 1. Follow up with a commit with this consistent commit message like: "1.17.0 on CRAN. Bump to 1.17.99" to this branch bumping to the next dev version
+#    - Bump minor version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git.
+#    - Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on <today>)" on the released heading.
+#    - Bump minor version in dllVersion() in init.c
+#    - Bump 3 minor version numbers in Makefile
+#    - Search and replace this .dev/CRAN_Release.cmd to update 1.16.99 to 1.16.99 inc below, 1.16.0 to 1.17.0 above, 1.15.0 to 1.16.0 below
+#    - Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
+# 2. Ideally, no PRs are reviewed while a CRAN submission is pending. Any reviews that do happen MUST target this branch, NOT master!
+# 3. Once the submission lands on CRAN, merge this branch WITHOUT SQUASHING!
+# 4. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone.
+# 5. Take SHA from the "...on CRAN. Bump to ..." commit and run `git tag 1.17.0 96c..sha..d77` then `git push origin 1.17.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
 ######
 
 ###### Branching policy for PATCH RELEASE
 
@@ -36,6 +36,7 @@ jobs:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       RSPM: ${{ matrix.config.rspm }}
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      _R_CHECK_RD_CHECKRD_MINLEVEL_: -Inf
 
     steps:
       - uses: actions/checkout@v4
 
@@ -114,6 +114,7 @@ test-lin-rel:
   variables:
     _R_CHECK_FORCE_SUGGESTS_: "TRUE"
     OPENBLAS_MAIN_FREE: "1"
+    _R_CHECK_RD_CHECKRD_MINLEVEL_: "-Inf"
   script:
     - *install-deps
     - echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
 
@@ -108,7 +108,7 @@ Please also make a note in the change log under [`# Governance history`](#govern
 
 # Finances and Funding
 
-data.table is a [NumFOCUS](https://numfocus.org/) project.  Donations to the data.table can be made at [https://numfocus.org/project/data-table]([https://numfocus.org/donate-to-data-table](https://app.hubspot.com/payments/FFWKWTTvKFdzqH?referrer=PAYMENT_LINK))
+data.table is a [NumFOCUS](https://numfocus.org/) project. Donations to data.table can be made at [https://numfocus.org/project/data-table](https://app.hubspot.com/payments/FFWKWTTvKFdzqH?referrer=PAYMENT_LINK).
 
 *NumFOCUS is a 501(c)(3) non-profit charity in the United States; as such, donations to NumFOCUS are tax-deductible as allowed by law. As with any donation, you should consult with your personal tax adviser or the IRS about your particular tax situation.*
 
 
@@ -153,8 +153,9 @@ if (getRversion() >= "3.6.0") {
 
 # IDateTime support:
 export(as.IDate,as.ITime,IDateTime)
-export(second,minute,hour,yday,wday,mday,week,isoweek,month,quarter,year,yearmon,yearqtr)
+export(second,minute,hour,yday,wday,mday,week,isoweek,isoyear,month,quarter,year,yearmon,yearqtr)
 
+if (getRversion() >= "4.3.0") S3method(chooseOpsMethod, IDate)
 S3method("[", ITime)
 S3method("+", IDate)
 S3method("-", IDate)
 
@@ -10,7 +10,16 @@
 
 ### NEW FEATURES
 
-1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
+1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
+
+    ```r
+    DT = data.table(a=c(1L, 2L, 1L), b=c(3L, 1L, 2L))
+    sort_by(DT, ~a + b)
+    #    a b
+    # 1: 1 2
+    # 2: 1 3
+    # 3: 2 1
+    ```
 
 2. `melt()` now supports using `patterns()` with `id.vars`, [#6867](https://github.com/Rdatatable/data.table/issues/6867). Thanks to Toby Dylan Hocking for the suggestion and PR.
 
@@ -56,6 +65,10 @@
 
 13. New `mergelist()` and `setmergelist()` similarly work _a la_ `Reduce()` to recursively merge a `list` of data.tables, [#599](https://github.com/Rdatatable/data.table/issues/599). Different join modes (_left_, _inner_, _full_, _right_, _semi_, _anti_, and _cross_) are supported through the `how` argument; duplicate handling goes through the `mult` argument. `setmergelist()` carefully avoids copies where one is not needed, e.g. in a 1:1 left join. Thanks Patrick Nicholson for the FR (in 2013!), @jangorecki for the PR, and @MichaelChirico for extensive reviews and fine-tuning.
 
+14. `fcoalesce()` and `setcoalesce()` gain `nan` argument to control whether `NaN` values should be treated as missing (`nan=NA`, the default) or non-missing (`nan=NaN`), [#4567](https://github.com/Rdatatable/data.table/issues/4567). This provides full compatibility with `nafill()` behavior. Thanks to @ethanbsmith for the feature request and @Mukulyadav2004 for the implementation.
+
+15. New function `isoyear()` has been implemented as a complement to `isoweek()`, returning the ISO 8601 year corresponding to a given date, [#7154](https://github.com/Rdatatable/data.table/issues/7154). Thanks to @ben-schwen and @MichaelChirico for the suggestion and @venom1204 for the implementation.
+
 ### BUG FIXES
 
 1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.
@@ -74,7 +87,7 @@
 
 8. A data.table with a column of class `vctrs_list_of` (from package {vctrs}) prints as expected, [#5948](https://github.com/Rdatatable/data.table/issues/5948). Before, they could be printed messily, e.g. printing every entry in a nested data.frame. Thanks @jesse-smith for the report, @DavisVaughan and @r2evans for contributing, and @MichaelChirico for the PR.
 
-9.  Fixed incorrect sorting of merges where the first column of a key is a factor with non-`sort()`-ed levels (e.g. `factor(1:2, 2:1)` and it is joined to a character column, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix.
+9. Fixed incorrect sorting of merges where the first column of a key is a factor with non-`sort()`-ed levels (e.g. `factor(1:2, 2:1)` and it is joined to a character column, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report, Benjamin Schwendinger for the fix, and @MichaelChirico for a follow-up fix caught by revdep testing.
 
 10. Spurious warnings from internal code in `cube()`, `rollup()`, and `groupingsets()` are no longer surfaced to the caller, [#6964](https://github.com/Rdatatable/data.table/issues/6964). Thanks @ferenci-tamas for the report and @venom1204 for the fix.
  
@@ -86,6 +99,12 @@
 
 14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
 
+15. `split.data.table()` output list elements retain the S3 class of the generating data.table, e.g. in `l=split(x, ...)` if `x` has class `my_class`, so will `l[[1]]` and so on, [#7105](https://github.com/Rdatatable/data.table/issues/7105). Thanks @m-muecke for the bug report and @MichaelChirico for the fix.
+
+16. `between()` is now more robust with `integer64` arguments. Combining small integer `x` with certain large `integer64` bounds no longer misinterprets the bounds as `double`; if a `double` bound cannot be losslessly converted into `integer64` for comparison with `integer64` `x`, an error is signalled instead of returning a wrong answer with a warning; [#7164](https://github.com/Rdatatable/data.table/issues/7164). Thanks @aitap for the bug report and the fix.
+
+17. `t1 - t2`, where one is an `IDate` and the other is a `Date`, are now consistent with the case where both are `IDate` or both are `Date`, [#4749](https://github.com/Rdatatable/data.table/issues/4749). Thanks @George9000 for the report and @MichaelChirico for the fix.
+
 ### NOTES
 
 1. The following in-progress deprecations have proceeded:
@@ -107,21 +126,23 @@
 
 5. A GitHub Actions workflow is now in place to warn the entire maintainer team, as well as any contributor following the GitHub repository, when the package is at risk of archival on CRAN [#7008](https://github.com/Rdatatable/data.table/issues/7008). Thanks @tdhock for the original report and @Bisaloo and @TysonStanley for the fix.
 
-# data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
+6. Using a double vector in `set()`'s `i=` and/or `j=` no longer throws a warning about preferring integer, [#6594](https://github.com/Rdatatable/data.table/issues/6594). While it may improve efficiency to use integer, there's no guarantee it's an improvement and the difference is likely to be minimal. The coercion will still be reported under `datatable.verbose=TRUE`. For package/production use cases, static analyzers such as `lintr::implicit_integer_linter()` can also report when numeric literals should be rewritten as integer literals.
+
+## data.table [v1.17.8](https://github.com/Rdatatable/data.table/milestone/41) (6 July 2025)
 
 1. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).
 
-# data.table [v1.17.6](https://github.com/Rdatatable/data.table/milestone/40) (15 June 2025)
+## data.table [v1.17.6](https://github.com/Rdatatable/data.table/milestone/40) (15 June 2025)
 
 1. On a heavily loaded machine, a `forder` thread could try to perform a zero-length copy from a null pointer, which was de-facto harmless but is against the C standard and was caught by additional CRAN checks, [#7051](https://github.com/Rdatatable/data.table/issues/7051). Thanks to @helske for the report and @aitap for the PR.
 
-# data.table [v1.17.4](https://github.com/Rdatatable/data.table/milestone/39) (25 May 2025)
+## data.table [v1.17.4](https://github.com/Rdatatable/data.table/milestone/39) (25 May 2025)
 
 1. The C code now avoids passing invalid data pointers from 0-length vectors to `memcpy()`, which previously caused undefined behaviour. Thanks to Prof. Brian D. Ripley for the report and Michael Chirico for the fix, [#6911](https://github.com/Rdatatable/data.table/pull/6911).
 
-# data.table [v1.17.2](https://github.com/Rdatatable/data.table/milestone/38) (7 May 2025)
+## data.table [v1.17.2](https://github.com/Rdatatable/data.table/milestone/38) (7 May 2025)
 
-## BUG FIXES
+### BUG FIXES
 
 1. `fwrite(compress="gzip")` once again produces a gzip header when the column names are missing or disabled, [@6852](https://github.com/Rdatatable/data.table/issues/6852). Thanks @maxscheiber for the report and @aitap for the fix.
 
@@ -137,7 +158,7 @@
 
 7. `as.data.table()` now properly handles keys: specifying keys sets them, omitting keys preserves existing ones, and setting `key=NULL` clears them, [#6859](https://github.com/Rdatatable/data.table/issues/6859). Thanks @brookslogan for the report and @Mukulyadav2004 for the fix.
 
-## NOTES
+### NOTES
 
 1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.
 
 
@@ -91,6 +91,8 @@ round.IDate = function(x, digits=c("weeks", "months", "quarters", "years"), ...)
           years = ISOdate(year(x), 1L, 1L)))
 }
 
+chooseOpsMethod.IDate = function(x, y, mx, my, cl, reverse) inherits(y, "Date")
+
 #Adapted from `+.Date`
 `+.IDate` = function(e1, e2) {
   if (nargs() == 1L)
@@ -115,7 +117,7 @@ round.IDate = function(x, digits=c("weeks", "months", "quarters", "years"), ...)
   if (storage.mode(e1) != "integer")
     internal_error("storage mode of IDate is somehow no longer integer") # nocov
   if (nargs() == 1L)
-    stopf("unary - is not defined for \"IDate\" objects")
+    stopf('unary - is not defined for "IDate" objects')
   if (inherits(e2, "difftime"))
     internal_error("difftime objects may not be subtracted from IDate, but Ops dispatch should have intervened to prevent this") # nocov
 
@@ -127,7 +129,12 @@ round.IDate = function(x, digits=c("weeks", "months", "quarters", "years"), ...)
     # ii) .Date was newly exposed in R some time after 3.4.4
   }
   ans = as.integer(unclass(e1) - unclass(e2))
-  if (!inherits(e2, "Date")) setattr(ans, "class", c("IDate", "Date"))
+  if (inherits(e2, "Date")) {
+    setattr(ans, "class", "difftime")
+    setattr(ans, "units", "days")
+  } else {
+    setattr(ans, "class", c("IDate", "Date"))
+  }
   ans
 }
 
@@ -355,7 +362,7 @@ isoweek = function(x) as.integer(format(as.IDate(x), "%V"))
 #  nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
 #  year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
 #  1L + (nearest_thurs - year_start) %/% 7L
-
+isoyear = function(x) as.integer(format(as.IDate(x), "%G"))
 
 month   = function(x) convertDate(as.IDate(x), "month")
 quarter = function(x) convertDate(as.IDate(x), "quarter")
 
@@ -30,8 +30,8 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE,
   }
   if (is.i64(x)) {
     if (!requireNamespace("bit64", quietly=TRUE)) stopf("trying to use integer64 class when 'bit64' package is not installed") # nocov
-    if (!is.i64(lower) && is.numeric(lower)) lower = bit64::as.integer64(lower)
-    if (!is.i64(upper) && is.numeric(upper)) upper = bit64::as.integer64(upper)
+    if (!is.i64(lower) && (is.integer(lower) || fitsInInt64(lower))) lower = bit64::as.integer64(lower)
+    if (!is.i64(upper) && (is.integer(upper) || fitsInInt64(upper))) upper = bit64::as.integer64(upper)
   }
   is.supported = function(x) is.numeric(x) || is.character(x) || is.px(x)
   if (is.supported(x) && is.supported(lower) && is.supported(upper)) {
 
@@ -2047,8 +2047,9 @@ replace_dot_alias = function(e) {
   if (!.Call(CisOrderedSubset, irows, nrow(x)))
     return(NULL)
 
-  # see #1010. don't set key when i has no key, but irows is ordered and !roll
-  if (roll && length(irows) != 1L)
+  # see #1010. don't set key when i has no key, but irows is ordered and isFALSE(roll)
+  #   NB: roll could still be a string like 'nearest', #7146
+  if (!is.character(roll) && roll && length(irows) != 1L)
     return(NULL)
 
   new_key <- head(x_key, key_length)
@@ -2491,7 +2492,7 @@ Ops.data.table = function(e1, e2 = NULL)
 }
 
 split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TRUE, flatten = TRUE, ..., verbose = getOption("datatable.verbose")) {
-  if (!is.data.table(x)) stopf("x argument must be a data.table")
+  if (!is.data.table(x)) internal_error("x argument to split.data.table must be a data.table") # nocov
   stopifnot(is.logical(drop), is.logical(sorted), is.logical(keep.by),  is.logical(flatten))
   # split data.frame way, using `f` and not `by` argument
   if (!missing(f)) {
@@ -2566,8 +2567,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
   setattr(ll, "names", nm)
   # handle nested split
   if (flatten || length(by) == 1L) {
-    for (x in ll) .Call(C_unlock, x)
-    lapply(ll, setDT)
+    for (xi in ll) .Call(C_unlock, xi)
+    out = lapply(ll, setDT)
+    # TODO(#2000): just let setDT handle this
+    if (!identical(old_class <- class(x), c("data.table", "data.frame"))) for (xi in out) setattr(xi, "class", old_class)
+    out
     # alloc.col could handle DT in list as done in: c9c4ff80bdd4c600b0c4eff23b207d53677176bd
   } else if (length(by) > 1L) {
     lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)
 
@@ -160,8 +160,8 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
       return(sprintf(
         "%s. 'target': %s. 'current': %s.",
         gettext("Datasets have different keys"),
-        if(length(k1)) brackify(k1) else gettextf("has no key"),
-        if(length(k2)) brackify(k2) else gettextf("has no key")
+        if(length(k1)) brackify(k1) else gettext("has no key"),
+        if(length(k2)) brackify(k2) else gettext("has no key")
       ))
     }
     # check index
@@ -171,8 +171,8 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
       return(sprintf(
         "%s. 'target': %s. 'current': %s.",
         gettext("Datasets have different indices"),
-        if(length(i1)) brackify(i1) else gettextf("has no index"),
-        if(length(i2)) brackify(i2) else gettextf("has no index")
+        if(length(i1)) brackify(i1) else gettext("has no index"),
+        if(length(i2)) brackify(i2) else gettext("has no index")
       ))
     }
Original file line number	Diff line number	Diff line change
`@@ -30,8 +30,8 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE,`
`30`	`30`	`}`
`31`	`31`	`if (is.i64(x)) {`
`32`	`32`	`if (!requireNamespace("bit64", quietly=TRUE)) stopf("trying to use integer64 class when 'bit64' package is not installed") # nocov`
`33`		`- if (!is.i64(lower) && is.numeric(lower)) lower = bit64::as.integer64(lower)`
`34`		`- if (!is.i64(upper) && is.numeric(upper)) upper = bit64::as.integer64(upper)`
	`33`	`+ if (!is.i64(lower) && (is.integer(lower) \|\| fitsInInt64(lower))) lower = bit64::as.integer64(lower)`
	`34`	`+ if (!is.i64(upper) && (is.integer(upper) \|\| fitsInInt64(upper))) upper = bit64::as.integer64(upper)`
`35`	`35`	`}`
`36`	`36`	`is.supported = function(x) is.numeric(x) \|\| is.character(x) \|\| is.px(x)`
`37`	`37`	`if (is.supported(x) && is.supported(lower) && is.supported(upper)) {`