Skip to content

Commit 53908cb

Browse files
Merge branch 'master' into staticBufferRemoval
2 parents 5b4cfc7 + f032d66 commit 53908cb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+858
-824
lines changed

.ci/atime/tests.R

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ for (extra.arg in extra.args.6107){
1313
tmp_csv = tempfile()
1414
fwrite(DT, tmp_csv)
1515
},
16+
FasterIO = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
1617
Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1718
Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1819
this.test$expr = str2lang(sprintf("data.table::fread(tmp_csv, %s)", extra.arg))
@@ -128,6 +129,18 @@ test.list <- atime::atime_test_list(
128129
paste0('useDynLib(', new.Package_))
129130
},
130131

132+
# Constant overhead improvement https://github.com/Rdatatable/data.table/pull/6925
133+
# Test case adapted from https://github.com/Rdatatable/data.table/pull/7022#discussion_r2107900643
134+
"fread disk overhead improved in #6925" = atime::atime_test(
135+
N = 2^seq(0, 20), # smaller N because we are doing multiple fread calls.
136+
setup = {
137+
fwrite(iris[1], iris.csv <- tempfile())
138+
},
139+
expr = replicate(N, data.table::fread(iris.csv)),
140+
Fast = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
141+
Slow = "e25ea80b793165094cea87d946d2bab5628f70a6" # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/60a01fa65191c44d7997de1843e9a1dfe5be9f72)
142+
),
143+
131144
# Performance regression discussed in https://github.com/Rdatatable/data.table/issues/4311
132145
# Test case adapted from https://github.com/Rdatatable/data.table/pull/4440#issuecomment-632842980 which is the fix PR.
133146
"shallow regression fixed in #4440" = atime::atime_test(
@@ -177,8 +190,9 @@ test.list <- atime::atime_test_list(
177190
# Fixed in https://github.com/Rdatatable/data.table/pull/4558
178191
"DT[by] fixed in #4558" = atime::atime_test(
179192
setup = {
193+
N9 <- as.integer(N * 0.9)
180194
d <- data.table(
181-
id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))),
195+
id = sample(c(seq.int(N9), sample(N9, N-N9, TRUE))),
182196
v1 = sample(5L, N, TRUE),
183197
v2 = sample(5L, N, TRUE)
184198
)
@@ -251,5 +265,15 @@ test.list <- atime::atime_test_list(
251265
Before = "f339aa64c426a9cd7cf2fcb13d91fc4ed353cd31", # Parent of the first commit https://github.com/Rdatatable/data.table/commit/fcc10d73a20837d0f1ad3278ee9168473afa5ff1 in the PR https://github.com/Rdatatable/data.table/pull/6393/commits with major change to fwrite with gzip.
252266
PR = "3630413ae493a5a61b06c50e80d166924d2ef89a"), # Close-to-last merge commit in the PR.
253267

254-
tests=extra.test.list)
268+
# Test case created directly using the atime code below (not adapted from any other benchmark), based on the PR, Removes unnecessary data.table call from as.data.table.array https://github.com/Rdatatable/data.table/pull/7010
269+
"as.data.table.array improved in #7010" = atime::atime_test(
270+
setup = {
271+
dims = c(N, 1, 1)
272+
arr = array(seq_len(prod(dims)), dim=dims)
273+
},
274+
expr = data.table:::as.data.table.array(arr, na.rm=FALSE),
275+
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
276+
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
277+
278+
tests=extra.test.list)
255279
# nolint end: undesirable_operator_linter.

.ci/linters/cocci/malloc_return_value_cast.cocci

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@ expression E;
44
@@
55
- (T)
66
malloc(E)
7+
8+
- (T)
9+
calloc(_, E)
10+
11+
- (T)
12+
realloc(_, E)

.github/CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ See [`?test`](https://rdatatable.gitlab.io/data.table/reference/test.html).
7676
1. **[Squashing Github pull requests into a single commit](http://eli.thegreenplace.net/2014/02/19/squashing-github-pull-requests-into-a-single-commit)**.
7777
1. **[Github help](https://help.github.com/articles/using-pull-requests/)** - you'll need the *fork and pull* model.
7878

79+
#### Performance testing
80+
81+
If your PR may have an effect on time/memory usage, please consider adding a performance test, either in the same PR, or a follow-up PR. Note that first-time contributors _must_ do so in a follow-up PR, since the tests are only run on PRs from branches created directly in the Rdatatable/data.table repo. See the [Performance testing](https://github.com/Rdatatable/data.table/wiki/Performance-testing) wiki page for details.
82+
7983
Minimal first time PR
8084
---------------------
8185

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,5 +101,6 @@ Authors@R: c(
101101
person("Vijay", "Lulla", role="ctb"),
102102
person("Aljaž", "Sluga", role="ctb"),
103103
person("Bill", "Evans", role="ctb"),
104-
person("Reino", "Bruner", role="ctb")
104+
person("Reino", "Bruner", role="ctb"),
105+
person(comment=c(github="@badasahog"), role="ctb")
105106
)

0 commit comments

Comments
 (0)