You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Test case adapted from https://github.com/Rdatatable/data.table/issues/6105#issue-2268691745 which is where the issue was reported.
2
+
# https://github.com/Rdatatable/data.table/pull/6107 fixed performance across 3 ways to specify a column as Date, and we test each individually.
3
+
extra.args.6107<- c(
4
+
"colClasses=list(Date='date')",
5
+
"colClasses='Date'",
6
+
"select=list(Date='date')")
7
+
extra.test.list<-list()
8
+
for (extra.arginextra.args.6107){
9
+
this.test<-atime::atime_test(
10
+
setup= {
11
+
set.seed(1)
12
+
DT= data.table(date=.Date(sample(20000, N, replace=TRUE)))
13
+
tmp_csv= tempfile()
14
+
fwrite(DT, tmp_csv)
15
+
},
16
+
Slow="e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
17
+
Fast="a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
"forderv(retGrp=%s) improved in #4386", retGrp_chr
25
+
)]] <-list(
26
+
setup= quote({
27
+
dt<- data.table(group= rep(1:2, l=N))
28
+
}),
29
+
expr= substitute({
30
+
old.opt<- options(datatable.forder.auto.index=TRUE) # required for test, un-documented, comments in forder.c say it is for debugging only.
31
+
data.table:::forderv(dt, "group", retGrp=RETGRP)
32
+
options(old.opt) # so the option does not affect other tests.
33
+
}, list(RETGRP=eval(str2lang(retGrp_chr)))),
34
+
## From ?bench::mark, "Each expression will always run at least twice,
35
+
## once to measure the memory allocation and store results
36
+
## and one or more times to measure timing."
37
+
## So for atime(times=10) that means 11 times total.
38
+
## First time for memory allocation measurement,
39
+
## (also sets the index of dt in this example),
40
+
## then 10 more times for time measurement.
41
+
## Timings should be constant if the cached index is used (Fast),
42
+
## and (log-)linear if the index is re-computed (Slow).
43
+
Slow="b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/b0efcf59442a7d086c6df17fa6a45c81b082322e) in the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved.
44
+
Fast="ffe431fbc1fe2d52ed9499f78e7e16eae4d71a93"# Last commit of the PR (https://github.com/Rdatatable/data.table/pull/4386/commits) where the performance was improved.
45
+
)
46
+
1
47
# A list of performance tests.
2
48
#
49
+
# See documentation in https://github.com/Rdatatable/data.table/wiki/Performance-testing for best practices.
50
+
#
3
51
# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments:
4
52
# - N: A numeric sequence of data sizes to vary.
5
53
# - setup: An expression evaluated for every data size before measuring time/memory.
@@ -17,6 +65,8 @@
17
65
# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information.
# Common N and pkg.edit.fun are defined here, and inherited in all test cases below which do not re-define them.
69
+
N= as.integer(10^seq(1, 7, by=0.25)),
20
70
# A function to customize R package metadata and source files to facilitate version-specific installation and testing.
21
71
#
22
72
# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
Regression="b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression
88
137
Fixed="9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440)
89
138
90
-
# Test based on: https://github.com/Rdatatable/data.table/issues/5424
91
-
# Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491
Before="be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
106
-
Regression="e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
107
-
Fixed="58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)
153
+
Before="be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR (https://github.com/Rdatatable/data.table/pull/4491/commits) that introduced the issue
154
+
Regression="e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR (https://github.com/Rdatatable/data.table/pull/4491/commits) that introduced the issue
155
+
Fixed="58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR (https://github.com/Rdatatable/data.table/pull/5463/commits) that fixed the regression
Slow="c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801)
122
-
Fast="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
123
-
)
168
+
Slow="c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) in the PR (https://github.com/Rdatatable/data.table/pull/5427/commits) that fixes the issue
169
+
Fast="af48a805e7a5026a0c2d0a7fd9b587fea5cfa3c4"), # Last commit in the PR (https://github.com/Rdatatable/data.table/pull/5427/commits) that fixes the issue
170
+
171
+
# Test case adapted from https://github.com/Rdatatable/data.table/issues/4200#issuecomment-645980224 which is where the issue was reported.
172
+
# Fixed in https://github.com/Rdatatable/data.table/pull/4558
Before="7a9eaf62ede487625200981018d8692be8c6f134", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/515de90a6068911a148e54343a3503043b8bb87c) in the PR (https://github.com/Rdatatable/data.table/pull/4164/commits) that introduced the regression
183
+
Regression="c152ced0e5799acee1589910c69c1a2c6586b95d", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/15f0598b9828d3af2eb8ddc9b38e0356f42afe4f) in the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
184
+
Fixed="f750448a2efcd258b3aba57136ee6a95ce56b302"), # Second commit of the PR (https://github.com/Rdatatable/data.table/pull/4558/commits) that fixes the regression
185
+
186
+
# Issue with sorting again when already sorted, as reported in https://github.com/Rdatatable/data.table/issues/4498
187
+
# Test case adapted from https://github.com/Rdatatable/data.table/pull/4501#issue-625311918 which is the fix PR.
188
+
"DT[,.SD] improved in #4501"=atime::atime_test(
189
+
setup= {
190
+
set.seed(1)
191
+
L= as.data.table(as.character(rnorm(N, 1, 0.5)))
192
+
setkey(L, V1)
193
+
},
194
+
## New DT can safely retain key.
195
+
expr=data.table:::`[.data.table`(L, , .SD),
196
+
Fast="353dc7a6b66563b61e44b2fa0d7b73a0f97ca461", # Close-to-last merge commit in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue
197
+
Slow="3ca83738d70d5597d9e168077f3768e32569c790", # Circa 2024 master parent of close-to-last merge commit (https://github.com/Rdatatable/data.table/commit/353dc7a6b66563b61e44b2fa0d7b73a0f97ca461) in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue
198
+
Slower="cacdc92df71b777369a217b6c902c687cf35a70d"), # Circa 2020 parent of the first commit (https://github.com/Rdatatable/data.table/commit/74636333d7da965a11dad04c322c752a409db098) in the PR (https://github.com/Rdatatable/data.table/pull/4501/commits) that fixes the issue
199
+
200
+
# Test case adapted from https://github.com/Rdatatable/data.table/issues/6286#issue-2412141289 which is where the issue was reported.
201
+
# Fixed in https://github.com/Rdatatable/data.table/pull/6296
202
+
"DT[by,verbose=TRUE] improved in #6296"=atime::atime_test(
Slow="a01f00f7438daf4612280d6886e6929fa8c8f76e", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/fc0c1e76408c34a8482f16f7421d262c7f1bde32) in the PR (https://github.com/Rdatatable/data.table/pull/6296/commits) that fixes the issue
209
+
Fast="f248bbe6d1204dfc8def62328788eaadcc8e17a1"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6296) that fixes the issue
210
+
211
+
# Test case adapted from https://github.com/Rdatatable/data.table/issues/5492#issue-1416598382 which is where the issue was reported,
212
+
# and from https://github.com/Rdatatable/data.table/pull/5493#issue-1416656788 which is the fix PR.
Slow="0895fa247afcf6b38044bd5f56c0d209691ddb31", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/93ce3ce1373bf733ebd2036e2883d2ffe377ab58) in the PR (https://github.com/Rdatatable/data.table/pull/5493/commits) that fixes the issue
220
+
Fast="2d1a0575f87cc50e90f64825c30d7a6cb6b05dd7"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5493) that fixes the issue
221
+
222
+
# Test case created directly using the atime code below (not adapted from any other benchmark), based on the issue/fix PR https://github.com/Rdatatable/data.table/pull/5054#issue-930603663 "melt should be more efficient when there are missing input columns."
Slow="fd24a3105953f7785ea7414678ed8e04524e6955", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/ed72e398df76a0fcfd134a4ad92356690e4210ea) of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue
234
+
Fast="ed72e398df76a0fcfd134a4ad92356690e4210ea"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5054) that fixes the issue
0 commit comments