Skip to content
Merged
27 changes: 26 additions & 1 deletion .ci/atime/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,31 @@ for (extra.arg in extra.args.6107){
extra.test.list[[sprintf("fread(%s) improved in #6107", extra.arg)]] <- this.test
}

# Test case adapted from https://github.com/Rdatatable/data.table/pull/4386#issue-602528139 which is where the performance was improved.
for(retGrp_chr in c("T","F"))extra.test.list[[sprintf(
"forderv(retGrp=%s) improved in #4386", retGrp_chr
)]] <- list(
setup = quote({
dt <- data.table(group = rep(1:2, l=N))
}),
expr = substitute({
old.opt <- options(datatable.forder.auto.index = TRUE) # required for test, un-documented, comments in forder.c say it is for debugging only.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Jan mentioned that it is only for 'debugging' since it was not exported (coming from his comment here), but we can probably ask him for more information if we want to document it or have more details (otherwise yes, not having this option set to TRUE will not show performance improvement)

data.table:::forderv(dt, "group", retGrp = RETGRP)
options(old.opt) # so the option does not affect other tests.
}, list(RETGRP=eval(str2lang(retGrp_chr)))),
## From ?bench::mark, "Each expression will always run at least twice,
## once to measure the memory allocation and store results
## and one or more times to measure timing."
## So for atime(times=10) that means 11 times total.
## First time for memory allocation measurement,
## (also sets the index of dt in this example),
## then 10 more times for time measurement.
## Timings should be constant if the cached index is used (Fast),
## and (log-)linear if the index is re-computed (Slow).
Slow = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/b0efcf59442a7d086c6df17fa6a45c81b082322e) in the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved.
Fast = "ffe431fbc1fe2d52ed9499f78e7e16eae4d71a93" # Last commit of the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved.
)

# A list of performance tests.
#
# See documentation in https://github.com/Rdatatable/data.table/wiki/Performance-testing for best practices.
Expand Down Expand Up @@ -207,6 +232,6 @@ test.list <- atime::atime_test_list(
expr = data.table:::melt(DT, measure.vars = measure.vars),
Slow = "fd24a3105953f7785ea7414678ed8e04524e6955", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/ed72e398df76a0fcfd134a4ad92356690e4210ea) of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue
Fast = "ed72e398df76a0fcfd134a4ad92356690e4210ea"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue

tests=extra.test.list)
# nolint end: undesirable_operator_linter.
Loading