Skip to content

Commit 1e2e3ba

Browse files
Use IEC memory unit abbreviations (#7126)
* first pass * 2nd pass * finish * j/k, now finished * Corrected Bioc package size * fix test broken on head * revert GiB->GB in DESCRIPTION * might as well keep README in sync too
1 parent 4773ef9 commit 1e2e3ba

20 files changed

+61
-59
lines changed

.dev/CRAN_Release.cmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,9 +562,9 @@ ls -1 *.tar.gz | grep -E 'Chicago|dada2|flowWorkspace|LymphoSeq' | TZ='UTC' para
562562
# 3) dllVersion() at the end of init.c
563563
# DO NOT push to GitHub's master branch. Prevents even a slim possibility of user getting premature version.
564564
# Even release numbers must have been obtained from CRAN and only CRAN. There were too many support problems in the past before this procedure was brought in.
565-
du -k inst/tests # 1.5MB before
565+
du -k inst/tests # 1.5MiB before
566566
bzip2 inst/tests/*.Rraw # compress *.Rraw just for release to CRAN; do not commit compressed *.Rraw to git
567-
du -k inst/tests # 0.75MB after
567+
du -k inst/tests # 0.75MiB after
568568
R CMD build .
569569
export GITHUB_PAT="f1c.. github personal access token ..7ad"
570570
Rdevel -q -e "packageVersion('xml2')" # ensure installed

.dev/revdep.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ options(repos = BiocManager::repositories())
6767

6868
options(warn=1) # warning at the time so we can more easily see what's going on package by package when we scroll through output
6969
cat("options()$timeout==", options()$timeout," set by R_DEFAULT_INTERNET_TIMEOUT in .dev/.bash_aliases revdepsh\n",sep="")
70-
# R's default is 60. Before Dec 2020, we used 300 but that wasn't enough to download Bioc package BSgenome.Hsapiens.UCSC.hg19 (677GB) which is
70+
# R's default is 60. Before Dec 2020, we used 300 but that wasn't enough to download Bioc package BSgenome.Hsapiens.UCSC.hg19 (677MiB) which is
7171
# suggested by CRAN package CNVScope which imports data.table. From Dec 2020 we use 3600.
7272

7373
if (is.null(utils::old.packages(.libPaths()[2]))) {

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ test-lin-rel-cran:
151151
_R_CHECK_CRAN_INCOMING_: "TRUE" ## stricter --as-cran checks should run in dev pipelines continuously (not sure what they are though)
152152
_R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" ## Other than no URL checking (takes many minutes) or 'Days since last update 0' NOTEs needed, #3284
153153
_R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## bytes
154-
_R_CHECK_PKG_SIZES_THRESHOLD_: "10" ## MB 'checking installed package size' NOTE increased due to po
154+
_R_CHECK_PKG_SIZES_THRESHOLD_: "10" ## MiB 'checking installed package size' NOTE increased due to po
155155
script:
156156
- *install-deps
157157
- echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars

R/data.table.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2237,7 +2237,7 @@ tail.data.table = function(x, n=6L, ...) {
22372237

22382238
"[<-.data.table" = function(x, i, j, value) {
22392239
# [<- is provided for consistency, but := is preferred as it allows by group and by reference to subsets of columns
2240-
# with no copy of the (very large, say 10GB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change.
2240+
# with no copy of the (very large, say 10GiB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change.
22412241
if (!cedta()) {
22422242
x = if (nargs()<4L) `[<-.data.frame`(x, i, value=value)
22432243
else `[<-.data.frame`(x, i, j, value)

R/duplicated.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon
4949

5050
# Test for #2013 unique() memory efficiency improvement in v1.10.5
5151
# set.seed(1)
52-
# Create unique 7.6GB DT on 16GB laptop
52+
# Create unique 7.6GiB DT on 16GiB laptop
5353
# DT = data.table(
5454
# A = sample(1e8, 2e8, TRUE),
5555
# B = sample(1e8, 2e8, TRUE),

R/test.data.table.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,13 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
277277
y = head(order(-diff(timings$RSS)), 10L)
278278
ans = timings[, diff := c(NA_real_, round(diff(RSS), 1L))][y + 1L]
279279
ans[, time:=NULL] # time is distracting and influenced by gc() calls; just focus on RAM usage here
280-
catf("10 largest RAM increases (MB); see plot for cumulative effect (if any)\n")
280+
catf("10 largest RAM increases (MiB); see plot for cumulative effect (if any)\n")
281281
print(ans, class=FALSE)
282282
get("dev.new")(width=14.0, height=7.0)
283283
get("par")(mfrow=1:2)
284-
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim[0]=0 for context"), ylab="RSS (MB)", ylim=c(0.0, max(timings$RSS)))
284+
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim[0]=0 for context"), ylab="RSS (MiB)", ylim=c(0.0, max(timings$RSS)))
285285
get("mtext")(lastRSS<-as.integer(ceiling(last(timings$RSS))), side=4L, at=lastRSS, las=1L, font=2L)
286-
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim=range for inspection"), ylab="RSS (MB)")
286+
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim=range for inspection"), ylab="RSS (MiB)")
287287
get("mtext")(lastRSS, side=4L, at=lastRSS, las=1L, font=2L)
288288
}
289289

@@ -316,7 +316,7 @@ INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw
316316

317317
gc_mem = function() {
318318
# nocov start
319-
# gc reports memory in MB
319+
# gc reports memory in MiB
320320
m = colSums(gc()[, c(2L, 4L, 6L)])
321321
names(m) = c("GC_used", "GC_gc_trigger", "GC_max_used")
322322
m

R/utils.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,10 @@ edit.data.table = function(name, ...) {
212212

213213
rss = function() { #5515 #5517
214214
# nocov start
215-
cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB
215+
cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KiB
216216
ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_)
217217
if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case
218-
round(ans / 1024.0, 1L) # return MB
218+
round(ans / 1024.0, 1L) # return MiB
219219
# nocov end
220220
}
221221

inst/tests/benchmark.Rraw

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ test(476, nrow(as.matrix(ans)), 2L*N)
2424

2525
# Test that as.list.data.table no longer copies via unclass, so speeding up sapply(DT,class) and lapply(.SD,...) etc, #2000
2626
N = 1e6
27-
DT = data.table(a=1:N,b=1:N,c=1:N,d=1:N) # 15MB in dev testing, but test with N=1e7
27+
DT = data.table(a=1:N,b=1:N,c=1:N,d=1:N) # 15MiB in dev testing, but test with N=1e7
2828
test(603, system.time(sapply(DT,class))["user.self"] < 0.1)
2929

3030

@@ -96,7 +96,7 @@ local({
9696

9797
# fwrite showProgress test 1735. Turned off as too long/big for CRAN.
9898
if (FALSE) {
99-
N = 6e8 # apx 6GB
99+
N = 6e8 # apx 6GiB
100100
DT = data.table(C1=sample(100000,N,replace=TRUE), C2=sample(paste0(LETTERS,LETTERS,LETTERS), N, replace=TRUE))
101101
gc()
102102
d = "/dev/shm/"
@@ -232,15 +232,15 @@ DT = data.table(A=rep(1:2,c(100000,1)), B=runif(100001))
232232
before = gc()["Vcells",2]
233233
for (i in 1:50) DT[, sum(B), by=A]
234234
after = gc()["Vcells",2]
235-
test(1157, after < before+3) # +3 = 3MB
236-
# Before the patch, Vcells grew dramatically from 6MB to 60MB. Now stable at 6MB. Increase 50 to 1000 and it grew to over 1GB for this case.
235+
test(1157, after < before+3) # +3 = 3MiB
236+
# Before the patch, Vcells grew dramatically from 6MiB to 60MiB. Now stable at 6MiB. Increase 50 to 1000 and it grew to over 1GiB for this case.
237237

238238
# Similar for when dogroups writes less rows than allocated, #2648.
239239
DT = data.table(k = 1:50, g = 1:20, val = rnorm(1e4))
240240
before = gc()["Vcells",2]
241241
for (i in 1:50) DT[ , unlist(.SD), by = 'k']
242242
after = gc()["Vcells",2]
243-
test(1158, after < before+3) # 177.6MB => 179.2MB. Needs to be +3 now from v1.9.8 with alloccol up from 100 to 1024
243+
test(1158, after < before+3) # 177.6MiB => 179.2MiB. Needs to be +3 now from v1.9.8 with alloccol up from 100 to 1024
244244

245245
# fix DT[TRUE, :=] using too much working memory for i, #1249
246246
if (!inherits(try(Rprofmem(NULL), silent=TRUE), "try-error")) { # in case R not compiled with memory profiling enabled
@@ -311,7 +311,7 @@ unlink(f)
311311
# test no memory leak, #2191 and #2284
312312
# These take a few seconds each, and it's important to run these on CRAN to check no leak
313313
gc(); before = gc()["Vcells","(Mb)"]
314-
for (i in 1:2000) { DT = data.table(1:3); rm(DT) } # in 1.8.2 would leak 3MB
314+
for (i in 1:2000) { DT = data.table(1:3); rm(DT) } # in 1.8.2 would leak 3MiB
315315
gc(); after = gc()["Vcells","(Mb)"]
316316
test(861, after < before+0.5) # close to 0.0 difference, but 0.5 for safe margin
317317
gc(); before = gc()["Vcells","(Mb)"]
@@ -327,7 +327,7 @@ test(863, after < before+0.5)
327327

328328
# fread should use multiple threads on single column input.
329329
# tests 2 threads; the very reasonable limit on CRAN
330-
# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MB currently)
330+
# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MiB currently)
331331
if (getDTthreads() == 1L) {
332332
cat("Test 1760 not run because this session either has no OpenMP or has been limited to one thread (e.g. under UBSAN and ASAN)\n")
333333
} else {
@@ -369,7 +369,7 @@ for(i in 1:100) {
369369
gc() # extra gc() (i.e. two including the one on next line) seems to reduce `after`
370370
# from 29.7 to 27.2 (exactly `before`). Keeping the extra gc() as no harm.
371371
after = sum(gc()[, 2])
372-
test(1912.1, after < before + 10) # 10MB very wide margin. With the gc race, heap usage grew much more which is all we're testing here (no blow up).
372+
test(1912.1, after < before + 10) # 10MiB very wide margin. With the gc race, heap usage grew much more which is all we're testing here (no blow up).
373373
#
374374
before = sum(gc()[, 2])
375375
fff = function(aref) {

inst/tests/tests.Rraw

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,7 +2290,7 @@ test(754.04, DT[, b := a][3, b := 6L], data.table(a=INT(4,2,3),b=INT(4,2,6)))
22902290
test(754.05, DT[, a := as.numeric(a), verbose=TRUE], output="Direct plonk.*no copy")
22912291
RHS = as.integer(DT$a)
22922292
test(754.06, DT[, a:= RHS, verbose=TRUE], output="RHS for item 1 has been duplicated")
2293-
if (getRversion() >= "3.5.0") { # TODO(R>=3.5.0): test unconditionally
2293+
if (base::getRversion() >= "3.5.0") { # TODO(R>=3.5.0): test unconditionally
22942294
# Expand ALTREPS in assign.c, #5400
22952295
# String conversion gets deferred
22962296
## first, a regression test of R itself -- we want to make sure our own test continues to be useful & testing its intended purpose
@@ -5468,7 +5468,7 @@ test(1333.2, fread('A,B\nfoo,1\n"Analyst\\" ,2\nbar,3', strip.white=FALSE), data
54685468
test(1334, fread('A,B\nfoo,1\n"Analyst\\" ,",2\nbar,3'), data.table(A=c('foo', 'Analyst\\" ,', 'bar'), B=1:3))
54695469
test(1335, fread('A,B\nfoo,1\n"Analyst\\\\",2\nbar,3'), data.table(A=c('foo','Analyst\\\\','bar'), B=1:3))
54705470

5471-
# data from 12GB file in comments on http://stackoverflow.com/a/23858323/403310 ...
5471+
# data from 12GiB file in comments on http://stackoverflow.com/a/23858323/403310 ...
54725472
# note that read.csv gets this wrong and puts jacoleman high school into the previous field, then fills the rest of the line silently.
54735473
cat('A,B,C,D,E,F
54745474
"12",0,"teacher private nfp\\\\\\\\"",""jacoleman high school","",""
@@ -9770,7 +9770,7 @@ test(1640.2, x[y, c(.SD, .(x.aa=x.aa)), on=c(aa="bb")], data.table(aa=3:5, cc=c(
97709770
nq_fun = function(n=100L) {
97719771
i1 = sample(sample.int(n, 10L), n, TRUE)
97729772
i2 = sample.int(n, n, TRUE) - as.integer(n/2) # this used to be type numeric before #5517 which didn't seem intentional
9773-
i3 = sample.int(2e6, n, TRUE) - as.integer(1e6) # used to sample from -1e6:1e6 which if allocated would be 8MB, #5517
9773+
i3 = sample.int(2e6, n, TRUE) - as.integer(1e6) # used to sample from -1e6:1e6 which if allocated would be 8MiB, #5517
97749774
i4 = sample(c(NA_integer_, sample.int(n*2L, 10L, FALSE)-n), n, TRUE)
97759775

97769776
d1 = sample(rnorm(10L), n, TRUE)
@@ -9861,7 +9861,7 @@ y = na.omit(dt2)
98619861

98629862
if (.Machine$sizeof.pointer>4) {
98639863

9864-
# temporarily off due to hitting 2GB limit on 32bit, #2767
9864+
# temporarily off due to hitting 2GiB limit on 32bit, #2767
98659865
# turn off temporarily using FALSE when using valgrind, too, as very slow
98669866

98679867
set.seed(1509611616L)
@@ -11964,7 +11964,7 @@ test(1800.2, fread("A\n1e55555555\n-1e+234056\n2e-59745"), data.table(A=c("1e555
1196411964
#
1196511965
# Tests thanks to Pasha copied verbatim from his PR#2200
1196611966
#
11967-
# Test files with "round" sizes (different multiples of 2, from 512B to 64KB)
11967+
# Test files with "round" sizes (different multiples of 2, from 512B to 64KiB)
1196811968
for (mul in c(16, 128, 512, 1024, 2048)) {
1196911969
ff = file(f<-tempfile(), open="wb")
1197011970
cat(strrep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), file=ff)
@@ -12943,7 +12943,7 @@ test(1903.2, fread(",A,B\n1,0,1\n2,0,1\n3,1,1\n", logical01=TRUE), data.table(V1
1294312943
txt = 'A, B, C\n17, 34, 2.3\n3., NA, 1\nNA , 2, NA \n0,0.1,0'
1294412944
test(1904.1, fread(txt, na.strings="NA", verbose=TRUE),
1294512945
ans <- data.table(A=c(17,3,NA,0), B=c(34,NA,2,0.1), C=c(2.3,1.0,NA,0.0)),
12946-
output = c("Number of sampling jump points = 1 because.*Reading 1 chunks \\(0 swept\\) of 1.000MB \\(each chunk 4 rows\\) using 1 thread.*Rereading 0 columns"))
12946+
output = c("Number of sampling jump points = 1 because.*Reading 1 chunks \\(0 swept\\) of 1.000MiB \\(each chunk 4 rows\\) using 1 thread.*Rereading 0 columns"))
1294712947
test(1904.2, fread(txt, na.strings=c("NA", " ")), ans, warning='na.strings\\[2\\]==" " consists only of whitespace, ignoring. Since strip.white=TRUE.*use.*"".*<NA>')
1294812948
test(1904.3, fread(txt, na.strings=c("NA", "")), ans)
1294912949
test(1904.4, fread(txt, na.strings=c("NA", "", " ")), ans, warning='na.strings\\[3\\]==" ".*only.*whitespace.*will already be read as <NA>')
@@ -17973,7 +17973,7 @@ DT = data.table(x = sample(letters[1:5], 20, TRUE),
1797317973
c = sample(c(0+3i,1,-1-1i,NA), 20, TRUE),
1797417974
l = sample(c(TRUE, FALSE, NA), 20, TRUE),
1797517975
r = as.raw(sample(1:5, 20, TRUE)))
17976-
load(testDir("test2224.Rdata")) # 47KB array 24x8 where each cell contains a length-20 result
17976+
load(testDir("test2224.Rdata")) # 47KiB array 24x8 where each cell contains a length-20 result
1797717977
if (test_bit64) {
1797817978
DT[, i64:=as.integer64(sample(c(-2L,0L,2L,NA), 20, TRUE))]
1797917979
} else {
@@ -17984,7 +17984,7 @@ for (col in names(DT)[-1]) {
1798417984
for (n in list(1, 5, -1, -5, c(1,2), c(-1,1))) {
1798517985
for (type in c('lag','lead','shift','cyclic')) {
1798617986
# fill is tested by group in tests 2218.*; see comments in #5205
17987-
# sapply(sapply()) changed to for(for(for())) to save 29MB, #5517
17987+
# sapply(sapply()) changed to for(for(for())) to save 29MiB, #5517
1798817988
test(2224.1+i/10000, # 192 tests here when test_bit64=TRUE; 168 when FALSE
1798917989
EVAL(sprintf("DT[, shift(%s, %d, type='%s'), by=x]$V1", col, n, type)),
1799017990
ans[[i]])

man/datatable-optimize.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ old = options(datatable.optimize = Inf)
110110
set.seed(1L)
111111
DT = lapply(1:20, function(x) sample(c(-100:100), 5e6L, TRUE))
112112
setDT(DT)[, id := sample(1e5, 5e6, TRUE)]
113-
print(object.size(DT), units="Mb") # 400MB, not huge, but will do
113+
print(object.size(DT), units="MiB") # 400MiB, not huge, but will do
114114
115115
# 'order' optimisation
116116
options(datatable.optimize = 1L) # optimisation 'on'

0 commit comments

Comments
 (0)