Skip to content

Commit 4ffc262

Browse files
2nd pass
1 parent a01ac2f commit 4ffc262

File tree

9 files changed

+17
-17
lines changed

9 files changed

+17
-17
lines changed

.dev/CRAN_Release.cmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,9 +562,9 @@ ls -1 *.tar.gz | grep -E 'Chicago|dada2|flowWorkspace|LymphoSeq' | TZ='UTC' para
562562
# 3) dllVersion() at the end of init.c
563563
# DO NOT push to GitHub's master branch. Prevents even a slim possibility of user getting premature version.
564564
# Even release numbers must have been obtained from CRAN and only CRAN. There were too many support problems in the past before this procedure was brought in.
565-
du -k inst/tests # 1.5MB before
565+
du -k inst/tests # 1.5MiB before
566566
bzip2 inst/tests/*.Rraw # compress *.Rraw just for release to CRAN; do not commit compressed *.Rraw to git
567-
du -k inst/tests # 0.75MB after
567+
du -k inst/tests # 0.75MiB after
568568
R CMD build .
569569
export GITHUB_PAT="f1c.. github personal access token ..7ad"
570570
Rdevel -q -e "packageVersion('xml2')" # ensure installed

.dev/revdep.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ options(repos = BiocManager::repositories())
6767

6868
options(warn=1) # warning at the time so we can more easily see what's going on package by package when we scroll through output
6969
cat("options()$timeout==", options()$timeout," set by R_DEFAULT_INTERNET_TIMEOUT in .dev/.bash_aliases revdepsh\n",sep="")
70-
# R's default is 60. Before Dec 2020, we used 300 but that wasn't enough to download Bioc package BSgenome.Hsapiens.UCSC.hg19 (677GB) which is
70+
# R's default is 60. Before Dec 2020, we used 300 but that wasn't enough to download Bioc package BSgenome.Hsapiens.UCSC.hg19 (677GiB) which is
7171
# suggested by CRAN package CNVScope which imports data.table. From Dec 2020 we use 3600.
7272

7373
if (is.null(utils::old.packages(.libPaths()[2]))) {

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ test-lin-rel-cran:
151151
_R_CHECK_CRAN_INCOMING_: "TRUE" ## stricter --as-cran checks should run in dev pipelines continuously (not sure what they are though)
152152
_R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" ## Other than no URL checking (takes many minutes) or 'Days since last update 0' NOTEs needed, #3284
153153
_R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## bytes
154-
_R_CHECK_PKG_SIZES_THRESHOLD_: "10" ## MB 'checking installed package size' NOTE increased due to po
154+
_R_CHECK_PKG_SIZES_THRESHOLD_: "10" ## MiB 'checking installed package size' NOTE increased due to po
155155
script:
156156
- *install-deps
157157
- echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Title: Extension of `data.frame`
44
Depends: R (>= 3.4.0)
55
Imports: methods
66
Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown
7-
Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.
7+
Description: Fast aggregation of large data (e.g. 100GiB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.
88
License: MPL-2.0 | file LICENSE
99
URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table
1010
BugReports: https://github.com/Rdatatable/data.table/issues

R/data.table.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2237,7 +2237,7 @@ tail.data.table = function(x, n=6L, ...) {
22372237

22382238
"[<-.data.table" = function(x, i, j, value) {
22392239
# [<- is provided for consistency, but := is preferred as it allows by group and by reference to subsets of columns
2240-
# with no copy of the (very large, say 10GB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change.
2240+
# with no copy of the (very large, say 10GiB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change.
22412241
if (!cedta()) {
22422242
x = if (nargs()<4L) `[<-.data.frame`(x, i, value=value)
22432243
else `[<-.data.frame`(x, i, j, value)

R/duplicated.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon
4949

5050
# Test for #2013 unique() memory efficiency improvement in v1.10.5
5151
# set.seed(1)
52-
# Create unique 7.6GB DT on 16GB laptop
52+
# Create unique 7.6GiB DT on 16GiB laptop
5353
# DT = data.table(
5454
# A = sample(1e8, 2e8, TRUE),
5555
# B = sample(1e8, 2e8, TRUE),

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pay for developer time, professional services, travel, workshops, and a variety
4545
* fast and friendly delimited **file reader**: **[`?fread`](https://rdatatable.gitlab.io/data.table/reference/fread.html)**, see also [convenience features for _small_ data](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread)
4646
* fast and feature rich delimited **file writer**: **[`?fwrite`](https://rdatatable.gitlab.io/data.table/reference/fwrite.html)**
4747
* low-level **parallelism**: many common operations are internally parallelized to use multiple CPU threads
48-
* fast and scalable aggregations; e.g. 100GB in RAM (see [benchmarks](https://duckdblabs.github.io/db-benchmark/) on up to **two billion rows**)
48+
* fast and scalable aggregations; e.g. 100GiB in RAM (see [benchmarks](https://duckdblabs.github.io/db-benchmark/) on up to **two billion rows**)
4949
* fast and feature rich joins: **ordered joins** (e.g. rolling forwards, backwards, nearest and limited staleness), **[overlapping range joins](https://github.com/Rdatatable/data.table/wiki/talks/EARL2014_OverlapRangeJoin_Arun.pdf)** (similar to `IRanges::findOverlaps`), **[non-equi joins](https://github.com/Rdatatable/data.table/wiki/talks/ArunSrinivasanUseR2016.pdf)** (i.e. joins using operators `>, >=, <, <=`), **aggregate on join** (`by=.EACHI`), **update on join**
5050
* fast add/update/delete columns **by reference** by group using no copies at all
5151
* fast and feature rich **reshaping** data: **[`?dcast`](https://rdatatable.gitlab.io/data.table/reference/dcast.data.table.html)** (_pivot/wider/spread_) and **[`?melt`](https://rdatatable.gitlab.io/data.table/reference/melt.data.table.html)** (_unpivot/longer/gather_)

inst/tests/benchmark.Rraw

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ test(476, nrow(as.matrix(ans)), 2L*N)
2424

2525
# Test that as.list.data.table no longer copies via unclass, so speeding up sapply(DT,class) and lapply(.SD,...) etc, #2000
2626
N = 1e6
27-
DT = data.table(a=1:N,b=1:N,c=1:N,d=1:N) # 15MB in dev testing, but test with N=1e7
27+
DT = data.table(a=1:N,b=1:N,c=1:N,d=1:N) # 15MiB in dev testing, but test with N=1e7
2828
test(603, system.time(sapply(DT,class))["user.self"] < 0.1)
2929

3030

@@ -96,7 +96,7 @@ local({
9696

9797
# fwrite showProgress test 1735. Turned off as too long/big for CRAN.
9898
if (FALSE) {
99-
N = 6e8 # apx 6GB
99+
N = 6e8 # apx 6GiB
100100
DT = data.table(C1=sample(100000,N,replace=TRUE), C2=sample(paste0(LETTERS,LETTERS,LETTERS), N, replace=TRUE))
101101
gc()
102102
d = "/dev/shm/"
@@ -232,15 +232,15 @@ DT = data.table(A=rep(1:2,c(100000,1)), B=runif(100001))
232232
before = gc()["Vcells",2]
233233
for (i in 1:50) DT[, sum(B), by=A]
234234
after = gc()["Vcells",2]
235-
test(1157, after < before+3) # +3 = 3MB
236-
# Before the patch, Vcells grew dramatically from 6MB to 60MB. Now stable at 6MB. Increase 50 to 1000 and it grew to over 1GB for this case.
235+
test(1157, after < before+3) # +3 = 3MiB
236+
# Before the patch, Vcells grew dramatically from 6MiB to 60MiB. Now stable at 6MiB. Increase 50 to 1000 and it grew to over 1GiB for this case.
237237

238238
# Similar for when dogroups writes less rows than allocated, #2648.
239239
DT = data.table(k = 1:50, g = 1:20, val = rnorm(1e4))
240240
before = gc()["Vcells",2]
241241
for (i in 1:50) DT[ , unlist(.SD), by = 'k']
242242
after = gc()["Vcells",2]
243-
test(1158, after < before+3) # 177.6MB => 179.2MB. Needs to be +3 now from v1.9.8 with alloccol up from 100 to 1024
243+
test(1158, after < before+3) # 177.6MiB => 179.2MiB. Needs to be +3 now from v1.9.8 with alloccol up from 100 to 1024
244244

245245
# fix DT[TRUE, :=] using too much working memory for i, #1249
246246
if (!inherits(try(Rprofmem(NULL), silent=TRUE), "try-error")) { # in case R not compiled with memory profiling enabled
@@ -311,7 +311,7 @@ unlink(f)
311311
# test no memory leak, #2191 and #2284
312312
# These take a few seconds each, and it's important to run these on CRAN to check no leak
313313
gc(); before = gc()["Vcells","(Mb)"]
314-
for (i in 1:2000) { DT = data.table(1:3); rm(DT) } # in 1.8.2 would leak 3MB
314+
for (i in 1:2000) { DT = data.table(1:3); rm(DT) } # in 1.8.2 would leak 3MiB
315315
gc(); after = gc()["Vcells","(Mb)"]
316316
test(861, after < before+0.5) # close to 0.0 difference, but 0.5 for safe margin
317317
gc(); before = gc()["Vcells","(Mb)"]
@@ -327,7 +327,7 @@ test(863, after < before+0.5)
327327

328328
# fread should use multiple threads on single column input.
329329
# tests 2 threads; the very reasonable limit on CRAN
330-
# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MB currently)
330+
# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MiB currently)
331331
if (getDTthreads() == 1L) {
332332
cat("Test 1760 not run because this session either has no OpenMP or has been limited to one thread (e.g. under UBSAN and ASAN)\n")
333333
} else {
@@ -369,7 +369,7 @@ for(i in 1:100) {
369369
gc() # extra gc() (i.e. two including the one on next line) seems to reduce `after`
370370
# from 29.7 to 27.2 (exactly `before`). Keeping the extra gc() as no harm.
371371
after = sum(gc()[, 2])
372-
test(1912.1, after < before + 10) # 10MB very wide margin. With the gc race, heap usage grew much more which is all we're testing here (no blow up).
372+
test(1912.1, after < before + 10) # 10MiB very wide margin. With the gc race, heap usage grew much more which is all we're testing here (no blow up).
373373
#
374374
before = sum(gc()[, 2])
375375
fff = function(aref) {

inst/tests/tests.Rraw

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5468,7 +5468,7 @@ test(1333.2, fread('A,B\nfoo,1\n"Analyst\\" ,2\nbar,3', strip.white=FALSE), data
54685468
test(1334, fread('A,B\nfoo,1\n"Analyst\\" ,",2\nbar,3'), data.table(A=c('foo', 'Analyst\\" ,', 'bar'), B=1:3))
54695469
test(1335, fread('A,B\nfoo,1\n"Analyst\\\\",2\nbar,3'), data.table(A=c('foo','Analyst\\\\','bar'), B=1:3))
54705470

5471-
# data from 12GB file in comments on http://stackoverflow.com/a/23858323/403310 ...
5471+
# data from 12GiB file in comments on http://stackoverflow.com/a/23858323/403310 ...
54725472
# note that read.csv gets this wrong and puts jacoleman high school into the previous field, then fills the rest of the line silently.
54735473
cat('A,B,C,D,E,F
54745474
"12",0,"teacher private nfp\\\\\\\\"",""jacoleman high school","",""

0 commit comments

Comments
 (0)