Skip to content

Commit 13a516c

Browse files
Merge branch 'master' into fix/6898-ignore-tzone
2 parents 3d915e5 + f3f7ce0 commit 13a516c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1042
-1043
lines changed

.ci/.lintr.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ linters = c(dt_linters, all_linters(
1616
# system_time_linter = undesirable_function_linter(c(
1717
# system.time = "Only run timings in benchmark.Rraw"
1818
# )),
19+
undesirable_function_linter(c(
20+
cat = "Use catf to enable translations",
21+
message = "Use messagef to avoid fragmented translations.",
22+
warning = "Use warningf to avoid fragmented translations.",
23+
stop = "Use stopf to avoid fragmented translations.",
24+
NULL
25+
)),
1926
# undesirable_function_linter(modify_defaults(
2027
# default_undesirable_functions,
2128
# ifelse = "Use fifelse instead.",

.ci/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Test jobs:
1212
- `test-lin-rel-cran` - `--as-cran` on Linux, strict test for final status of `R CMD check`.
1313
- `test-lin-dev-gcc-strict-cran` - `--as-cran` on Linux, `r-devel` built with `-enable-strict-barrier --disable-long-double`, test for compilation warnings, test for new NOTEs/WARNINGs from `R CMD check`.
1414
- `test-lin-dev-clang-cran` - same as `gcc-strict` job but R built with `clang` and no `--enable-strict-barrier --disable-long-double` flags.
15-
- `test-lin-ancient-cran` - Stated R dependency version (currently 3.3.0) on Linux.
15+
- `test-lin-ancient-cran` - Stated R dependency version (currently 3.4.0) on Linux.
1616
- `test-lin-dev-san` - `r-devel` on Linux built with `clang -fsanitize=address,undefined` (including LeakSanitizer), test for sanitizer output in tests and examples.
1717
- `test-win-rel` - `r-release` on Windows.
1818
- `test-win-dev` - `r-devel` on Windows.

.ci/atime/tests.R

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ for (extra.arg in extra.args.6107){
1313
tmp_csv = tempfile()
1414
fwrite(DT, tmp_csv)
1515
},
16+
FasterIO = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
1617
Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1718
Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
1819
this.test$expr = str2lang(sprintf("data.table::fread(tmp_csv, %s)", extra.arg))
@@ -128,6 +129,18 @@ test.list <- atime::atime_test_list(
128129
paste0('useDynLib(', new.Package_))
129130
},
130131

132+
# Constant overhead improvement https://github.com/Rdatatable/data.table/pull/6925
133+
# Test case adapted from https://github.com/Rdatatable/data.table/pull/7022#discussion_r2107900643
134+
"fread disk overhead improved in #6925" = atime::atime_test(
135+
N = 2^seq(0, 20), # smaller N because we are doing multiple fread calls.
136+
setup = {
137+
fwrite(iris[1], iris.csv <- tempfile())
138+
},
139+
expr = replicate(N, data.table::fread(iris.csv)),
140+
Fast = "60a01fa65191c44d7997de1843e9a1dfe5be9f72", # First commit of the PR (https://github.com/Rdatatable/data.table/pull/6925/commits) that reduced time usage
141+
Slow = "e25ea80b793165094cea87d946d2bab5628f70a6" # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/60a01fa65191c44d7997de1843e9a1dfe5be9f72)
142+
),
143+
131144
# Performance regression discussed in https://github.com/Rdatatable/data.table/issues/4311
132145
# Test case adapted from https://github.com/Rdatatable/data.table/pull/4440#issuecomment-632842980 which is the fix PR.
133146
"shallow regression fixed in #4440" = atime::atime_test(
@@ -177,8 +190,9 @@ test.list <- atime::atime_test_list(
177190
# Fixed in https://github.com/Rdatatable/data.table/pull/4558
178191
"DT[by] fixed in #4558" = atime::atime_test(
179192
setup = {
193+
N9 <- as.integer(N * 0.9)
180194
d <- data.table(
181-
id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))),
195+
id = sample(c(seq.int(N9), sample(N9, N-N9, TRUE))),
182196
v1 = sample(5L, N, TRUE),
183197
v2 = sample(5L, N, TRUE)
184198
)
@@ -251,5 +265,15 @@ test.list <- atime::atime_test_list(
251265
Before = "f339aa64c426a9cd7cf2fcb13d91fc4ed353cd31", # Parent of the first commit https://github.com/Rdatatable/data.table/commit/fcc10d73a20837d0f1ad3278ee9168473afa5ff1 in the PR https://github.com/Rdatatable/data.table/pull/6393/commits with major change to fwrite with gzip.
252266
PR = "3630413ae493a5a61b06c50e80d166924d2ef89a"), # Close-to-last merge commit in the PR.
253267

254-
tests=extra.test.list)
268+
# Test case created directly using the atime code below (not adapted from any other benchmark), based on the PR, Removes unnecessary data.table call from as.data.table.array https://github.com/Rdatatable/data.table/pull/7010
269+
"as.data.table.array improved in #7010" = atime::atime_test(
270+
setup = {
271+
dims = c(N, 1, 1)
272+
arr = array(seq_len(prod(dims)), dim=dims)
273+
},
274+
expr = data.table:::as.data.table.array(arr, na.rm=FALSE),
275+
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
276+
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
277+
278+
tests=extra.test.list)
255279
# nolint end: undesirable_operator_linter.

.ci/linters/cocci/malloc_return_value_cast.cocci

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@ expression E;
44
@@
55
- (T)
66
malloc(E)
7+
8+
- (T)
9+
calloc(_, E)
10+
11+
- (T)
12+
realloc(_, E)

.dev/CRAN_Release.cmd

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,6 @@ grep "PROTECT_PTR" ./src/*.c
9595
# No use of long long, instead use int64_t. TODO
9696
# grep "long long" ./src/*.c
9797

98-
// No use of llu, lld, zd or zu
99-
grep -nE "(llu|lld|zd|zu)" src/*.[hc]
100-
// Comment moved here from fread.c on 19 Nov 2019
101-
// [Moved from fread.c on 19 Nov 2019] On Windows variables of type `size_t` cannot be printed
102-
// with "%zu" in the `snprintf()` function. For those variables we used to cast them into
103-
// `unsigned long long int` before printing, and defined (llu) to make the cast shorter.
104-
// We're now observing warnings from gcc-8 with -Wformat-extra-args, #4062. So
105-
// now we're more strict and cast to [u]int64_t and use PRIu64/PRId64 from <inttypes.h>
106-
// In many cases the format specifier is passed to our own macro (e.g. DTPRINT) or to Rprintf(),
107-
// error() etc, and even if they don't call sprintf() now, they could in future.
108-
10998
# No tabs in C or R code (sorry, Richard Hendricks)
11099
grep -P "\t" ./R/*.R
111100
grep -P "\t" ./src/*.c
@@ -235,23 +224,23 @@ system.time(test.data.table(script="*.Rraw")) # apx 8h = froll 3h + nafill 1m +
235224

236225

237226
###############################################
238-
# R 3.3.0 (stated dependency)
227+
# R 3.4.0 (stated dependency)
239228
###############################################
240229

241230
### ONE TIME BUILD
242231
sudo apt-get -y build-dep r-base
243232
cd ~/build
244-
wget http://cran.stat.ucla.edu/src/base/R-3/R-3.3.0.tar.gz
245-
tar xvf R-3.3.0.tar.gz
246-
cd R-3.3.0
233+
wget http://cran.stat.ucla.edu/src/base/R-3/R-3.4.0.tar.gz
234+
tar xvf R-3.4.0.tar.gz
235+
cd R-3.4.0
247236
CFLAGS="-fcommon" FFLAGS="-fallow-argument-mismatch" ./configure --without-recommended-packages
248237
make
249-
alias R330=~/build/R-3.3.0/bin/R
238+
alias R340=~/build/R-3.4.0/bin/R
250239
### END ONE TIME BUILD
251240

252241
cd ~/GitHub/data.table
253-
R330 CMD INSTALL ./data.table_1.16.99.tar.gz
254-
R330
242+
R340 CMD INSTALL ./data.table_1.16.99.tar.gz
243+
R340
255244
require(data.table)
256245
test.data.table(script="*.Rraw")
257246

.dev/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# data.table developer
22

3+
Inside this repository we provide some tools to improves development experience. Most notable is the `cc()` helper function that recompiles C sources, reloads R sources, and runs tests.
4+
5+
Typical development workflow will then look like:
6+
7+
0. `git checkout -b [branch]`
8+
1. edit package files
9+
2. run `R`
10+
3. call `cc(TRUE)`
11+
4. (if needed) go to point 1.
12+
13+
Once we (and tests) are satisfied with changes, we then run complete package checks:
14+
15+
0. in shell terminal
16+
1. run `make build`
17+
2. run `make check`
18+
3. (optionally) run on `r-devel`, e.g. `R=~/build/R-devel/bin/R make check`
19+
4. (optionally) run on ancient R, e.g. `R=~/build/R-340/bin/R make check`
20+
5. `git commit -m '[changes description]'`
21+
6. `git push [remote] [branch]`
22+
323
## Setup
424

525
To use the optional helper function `cc()`, one needs to set up the project path and source `.dev/cc.R` to use `cc()` conveniently. This works through creating an additional `.Rprofile` in the `data.table` directory.

.devcontainer/r-ancient-gcc/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM registry.gitlab.com/jangorecki/dockerfiles/r-3.3.0
1+
FROM registry.gitlab.com/jangorecki/dockerfiles/r-3.4.0
22

33
RUN apt-get -qq update \
44
&& apt-get install -y --no-install-recommends git

.github/CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ See [`?test`](https://rdatatable.gitlab.io/data.table/reference/test.html).
7676
1. **[Squashing Github pull requests into a single commit](http://eli.thegreenplace.net/2014/02/19/squashing-github-pull-requests-into-a-single-commit)**.
7777
1. **[Github help](https://help.github.com/articles/using-pull-requests/)** - you'll need the *fork and pull* model.
7878

79+
#### Performance testing
80+
81+
If your PR may have an effect on time/memory usage, please consider adding a performance test, either in the same PR, or a follow-up PR. Note that first-time contributors _must_ do so in a follow-up PR, since the tests are only run on PRs from branches created directly in the Rdatatable/data.table repo. See the [Performance testing](https://github.com/Rdatatable/data.table/wiki/Performance-testing) wiki page for details.
82+
7983
Minimal first time PR
8084
---------------------
8185

.github/workflows/R-CMD-check-occasional.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
fail-fast: false
1616
matrix:
1717
os: [macOS-latest, windows-latest, ubuntu-latest]
18-
r: ['devel', 'release', '3.3', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3']
18+
r: ['devel', 'release', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3']
1919
locale: ['en_US.utf8', 'zh_CN.utf8', 'lv_LV.utf8'] # Chinese for translations, Latvian for collate order (#3502)
2020
exclude:
2121
# only run non-English locale CI on Ubuntu
@@ -28,8 +28,6 @@ jobs:
2828
- os: windows-latest
2929
locale: 'lv_LV.utf8'
3030
# macOS/arm64 only available for R>=4.1.0
31-
- os: macOS-latest
32-
r: '3.3'
3331
- os: macOS-latest
3432
r: '3.4'
3533
- os: macOS-latest

.gitlab-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,13 @@ test-lin-dev-clang-cran:
203203
# stated dependency on R
204204
test-lin-ancient-cran:
205205
<<: *test-lin
206-
image: registry.gitlab.com/rdatatable/dockerfiles/r-3.3.0
206+
image: registry.gitlab.com/rdatatable/dockerfiles/r-3.4.0
207207
variables:
208208
_R_CHECK_FORCE_SUGGESTS_: "FALSE" # can be removed if all dependencies are available (knitr, xts, etc.)
209209
script:
210210
- *install-deps
211211
# knitr requires evaluate, which requires R 3.6.0.
212-
# Restore checking vignettes if upgrading our R dependency means knitr can be installed.
212+
# Restore checking vignettes if upgrading our R dependency means knitr can be installed, or when we switch to litedown.
213213
- R CMD check --no-manual --no-build-vignettes --ignore-vignettes $(ls -1t data.table_*.tar.gz | head -n 1)
214214

215215
# run the main checks with Address(+Leak),UBSanitizer enabled

0 commit comments

Comments
 (0)