Skip to content

Commit 3b14dc3

Browse files
Merge branch 'master' into fix_filesize_to_str
2 parents 0ea8f93 + db834d4 commit 3b14dc3

File tree

16 files changed

+348
-301
lines changed

16 files changed

+348
-301
lines changed

.ci/.lintr.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ linters = c(dt_linters, all_linters(
1616
# system_time_linter = undesirable_function_linter(c(
1717
# system.time = "Only run timings in benchmark.Rraw"
1818
# )),
19+
undesirable_function_linter(c(
20+
cat = "Use catf to enable translations",
21+
message = "Use messagef to avoid fragmented translations.",
22+
warning = "Use warningf to avoid fragmented translations.",
23+
stop = "Use stopf to avoid fragmented translations.",
24+
NULL
25+
)),
1926
# undesirable_function_linter(modify_defaults(
2027
# default_undesirable_functions,
2128
# ifelse = "Use fifelse instead.",

.ci/atime/tests.R

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ for (extra.arg in extra.args.6107){
1313
tmp_csv = tempfile()
1414
fwrite(DT, tmp_csv)
1515
},
16+
FasterIO = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
1617
Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1718
Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1819
this.test$expr = str2lang(sprintf("data.table::fread(tmp_csv, %s)", extra.arg))
@@ -128,6 +129,18 @@ test.list <- atime::atime_test_list(
128129
paste0('useDynLib(', new.Package_))
129130
},
130131

132+
# Constant overhead improvement https://github.com/Rdatatable/data.table/pull/6925
133+
# Test case adapted from https://github.com/Rdatatable/data.table/pull/7022#discussion_r2107900643
134+
"fread disk overhead improved in #6925" = atime::atime_test(
135+
N = 2^seq(0, 20), # smaller N because we are doing multiple fread calls.
136+
setup = {
137+
fwrite(iris[1], iris.csv <- tempfile())
138+
},
139+
expr = replicate(N, data.table::fread(iris.csv)),
140+
Fast = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
141+
Slow = "e25ea80b793165094cea87d946d2bab5628f70a6" # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/60a01fa65191c44d7997de1843e9a1dfe5be9f72)
142+
),
143+
131144
# Performance regression discussed in https://github.com/Rdatatable/data.table/issues/4311
132145
# Test case adapted from https://github.com/Rdatatable/data.table/pull/4440#issuecomment-632842980 which is the fix PR.
133146
"shallow regression fixed in #4440" = atime::atime_test(
@@ -177,8 +190,9 @@ test.list <- atime::atime_test_list(
177190
# Fixed in https://github.com/Rdatatable/data.table/pull/4558
178191
"DT[by] fixed in #4558" = atime::atime_test(
179192
setup = {
193+
N9 <- as.integer(N * 0.9)
180194
d <- data.table(
181-
id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))),
195+
id = sample(c(seq.int(N9), sample(N9, N-N9, TRUE))),
182196
v1 = sample(5L, N, TRUE),
183197
v2 = sample(5L, N, TRUE)
184198
)
@@ -251,5 +265,15 @@ test.list <- atime::atime_test_list(
251265
Before = "f339aa64c426a9cd7cf2fcb13d91fc4ed353cd31", # Parent of the first commit https://github.com/Rdatatable/data.table/commit/fcc10d73a20837d0f1ad3278ee9168473afa5ff1 in the PR https://github.com/Rdatatable/data.table/pull/6393/commits with major change to fwrite with gzip.
252266
PR = "3630413ae493a5a61b06c50e80d166924d2ef89a"), # Close-to-last merge commit in the PR.
253267

254-
tests=extra.test.list)
268+
# Test case created directly using the atime code below (not adapted from any other benchmark), based on the PR, Removes unnecessary data.table call from as.data.table.array https://github.com/Rdatatable/data.table/pull/7010
269+
"as.data.table.array improved in #7010" = atime::atime_test(
270+
setup = {
271+
dims = c(N, 1, 1)
272+
arr = array(seq_len(prod(dims)), dim=dims)
273+
},
274+
expr = data.table:::as.data.table.array(arr, na.rm=FALSE),
275+
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
276+
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
277+
278+
tests=extra.test.list)
255279
# nolint end: undesirable_operator_linter.

.dev/CRAN_Release.cmd

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,6 @@ grep "PROTECT_PTR" ./src/*.c
9595
# No use of long long, instead use int64_t. TODO
9696
# grep "long long" ./src/*.c
9797

98-
// No use of llu, lld, zd or zu
99-
grep -nE "(llu|lld|zd|zu)" src/*.[hc]
100-
// Comment moved here from fread.c on 19 Nov 2019
101-
// [Moved from fread.c on 19 Nov 2019] On Windows variables of type `size_t` cannot be printed
102-
// with "%zu" in the `snprintf()` function. For those variables we used to cast them into
103-
// `unsigned long long int` before printing, and defined (llu) to make the cast shorter.
104-
// We're now observing warnings from gcc-8 with -Wformat-extra-args, #4062. So
105-
// now we're more strict and cast to [u]int64_t and use PRIu64/PRId64 from <inttypes.h>
106-
// In many cases the format specifier is passed to our own macro (e.g. DTPRINT) or to Rprintf(),
107-
// error() etc, and even if they don't call sprintf() now, they could in future.
108-
10998
# No tabs in C or R code (sorry, Richard Hendricks)
11099
grep -P "\t" ./R/*.R
111100
grep -P "\t" ./src/*.c

.dev/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# data.table developer
22

3+
Inside this repository we provide some tools to improves development experience. Most notable is the `cc()` helper function that recompiles C sources, reloads R sources, and runs tests.
4+
5+
Typical development workflow will then look like:
6+
7+
0. `git checkout -b [branch]`
8+
1. edit package files
9+
2. run `R`
10+
3. call `cc(TRUE)`
11+
4. (if needed) go to point 1.
12+
13+
Once we (and tests) are satisfied with changes, we then run complete package checks:
14+
15+
0. in shell terminal
16+
1. run `make build`
17+
2. run `make check`
18+
3. (optionally) run on `r-devel`, e.g. `R=~/build/R-devel/bin/R make check`
19+
4. (optionally) run on ancient R, e.g. `R=~/build/R-340/bin/R make check`
20+
5. `git commit -m '[changes description]'`
21+
6. `git push [remote] [branch]`
22+
323
## Setup
424

525
To use the optional helper function `cc()`, one needs to set up the project path and source `.dev/cc.R` to use `cc()` conveniently. This works through creating an additional `.Rprofile` in the `data.table` directory.

.github/CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ See [`?test`](https://rdatatable.gitlab.io/data.table/reference/test.html).
7676
1. **[Squashing Github pull requests into a single commit](http://eli.thegreenplace.net/2014/02/19/squashing-github-pull-requests-into-a-single-commit)**.
7777
1. **[Github help](https://help.github.com/articles/using-pull-requests/)** - you'll need the *fork and pull* model.
7878

79+
#### Performance testing
80+
81+
If your PR may have an effect on time/memory usage, please consider adding a performance test, either in the same PR, or a follow-up PR. Note that first-time contributors _must_ do so in a follow-up PR, since the tests are only run on PRs from branches created directly in the Rdatatable/data.table repo. See the [Performance testing](https://github.com/Rdatatable/data.table/wiki/Performance-testing) wiki page for details.
82+
7983
Minimal first time PR
8084
---------------------
8185

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Authors@R: c(
5656
person("Martin","Morgan", role="ctb"),
5757
person("Michael","Quinn", role="ctb"),
5858
person("@javrucebo","", role="ctb"),
59-
person("@marc-outins","", role="ctb"),
59+
person("Marc","Halperin", role="ctb"),
6060
person("Roy","Storey", role="ctb"),
6161
person("Manish","Saraswat", role="ctb"),
6262
person("Morgan","Jacob", role="ctb"),
@@ -101,5 +101,6 @@ Authors@R: c(
101101
person("Vijay", "Lulla", role="ctb"),
102102
person("Aljaž", "Sluga", role="ctb"),
103103
person("Bill", "Evans", role="ctb"),
104-
person("Reino", "Bruner", role="ctb")
104+
person("Reino", "Bruner", role="ctb"),
105+
person(comment=c(github="@badasahog"), role="ctb")
105106
)

0 commit comments

Comments
 (0)