Skip to content

Commit a5fae71

Browse files
finish
1 parent 4ffc262 commit a5fae71

File tree

10 files changed

+24
-24
lines changed

10 files changed

+24
-24
lines changed

R/test.data.table.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,13 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
277277
y = head(order(-diff(timings$RSS)), 10L)
278278
ans = timings[, diff := c(NA_real_, round(diff(RSS), 1L))][y + 1L]
279279
ans[, time:=NULL] # time is distracting and influenced by gc() calls; just focus on RAM usage here
280-
catf("10 largest RAM increases (MB); see plot for cumulative effect (if any)\n")
280+
catf("10 largest RAM increases (MiB); see plot for cumulative effect (if any)\n")
281281
print(ans, class=FALSE)
282282
get("dev.new")(width=14.0, height=7.0)
283283
get("par")(mfrow=1:2)
284-
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim[0]=0 for context"), ylab="RSS (MB)", ylim=c(0.0, max(timings$RSS)))
284+
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim[0]=0 for context"), ylab="RSS (MiB)", ylim=c(0.0, max(timings$RSS)))
285285
get("mtext")(lastRSS<-as.integer(ceiling(last(timings$RSS))), side=4L, at=lastRSS, las=1L, font=2L)
286-
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim=range for inspection"), ylab="RSS (MB)")
286+
get("plot")(timings$RSS, main=paste(basename(fn),"\nylim=range for inspection"), ylab="RSS (MiB)")
287287
get("mtext")(lastRSS, side=4L, at=lastRSS, las=1L, font=2L)
288288
}
289289

@@ -316,7 +316,7 @@ INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw
316316

317317
gc_mem = function() {
318318
# nocov start
319-
# gc reports memory in MB
319+
# gc reports memory in MiB
320320
m = colSums(gc()[, c(2L, 4L, 6L)])
321321
names(m) = c("GC_used", "GC_gc_trigger", "GC_max_used")
322322
m

R/utils.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,10 @@ edit.data.table = function(name, ...) {
212212

213213
rss = function() { #5515 #5517
214214
# nocov start
215-
cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB
215+
cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KiB
216216
ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_)
217217
if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case
218-
round(ans / 1024.0, 1L) # return MB
218+
round(ans / 1024.0, 1L) # return MiB
219219
# nocov end
220220
}
221221

inst/tests/tests.Rraw

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9770,7 +9770,7 @@ test(1640.2, x[y, c(.SD, .(x.aa=x.aa)), on=c(aa="bb")], data.table(aa=3:5, cc=c(
97709770
nq_fun = function(n=100L) {
97719771
i1 = sample(sample.int(n, 10L), n, TRUE)
97729772
i2 = sample.int(n, n, TRUE) - as.integer(n/2) # this used to be type numeric before #5517 which didn't seem intentional
9773-
i3 = sample.int(2e6, n, TRUE) - as.integer(1e6) # used to sample from -1e6:1e6 which if allocated would be 8MB, #5517
9773+
i3 = sample.int(2e6, n, TRUE) - as.integer(1e6) # used to sample from -1e6:1e6 which if allocated would be 8MiB, #5517
97749774
i4 = sample(c(NA_integer_, sample.int(n*2L, 10L, FALSE)-n), n, TRUE)
97759775

97769776
d1 = sample(rnorm(10L), n, TRUE)
@@ -9861,7 +9861,7 @@ y = na.omit(dt2)
98619861

98629862
if (.Machine$sizeof.pointer>4) {
98639863

9864-
# temporarily off due to hitting 2GB limit on 32bit, #2767
9864+
# temporarily off due to hitting 2GiB limit on 32bit, #2767
98659865
# turn off temporarily using FALSE when using valgrind, too, as very slow
98669866

98679867
set.seed(1509611616L)
@@ -11964,7 +11964,7 @@ test(1800.2, fread("A\n1e55555555\n-1e+234056\n2e-59745"), data.table(A=c("1e555
1196411964
#
1196511965
# Tests thanks to Pasha copied verbatim from his PR#2200
1196611966
#
11967-
# Test files with "round" sizes (different multiples of 2, from 512B to 64KB)
11967+
# Test files with "round" sizes (different multiples of 2, from 512B to 64KiB)
1196811968
for (mul in c(16, 128, 512, 1024, 2048)) {
1196911969
ff = file(f<-tempfile(), open="wb")
1197011970
cat(strrep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), file=ff)
@@ -12943,7 +12943,7 @@ test(1903.2, fread(",A,B\n1,0,1\n2,0,1\n3,1,1\n", logical01=TRUE), data.table(V1
1294312943
txt = 'A, B, C\n17, 34, 2.3\n3., NA, 1\nNA , 2, NA \n0,0.1,0'
1294412944
test(1904.1, fread(txt, na.strings="NA", verbose=TRUE),
1294512945
ans <- data.table(A=c(17,3,NA,0), B=c(34,NA,2,0.1), C=c(2.3,1.0,NA,0.0)),
12946-
output = c("Number of sampling jump points = 1 because.*Reading 1 chunks \\(0 swept\\) of 1.000MB \\(each chunk 4 rows\\) using 1 thread.*Rereading 0 columns"))
12946+
output = c("Number of sampling jump points = 1 because.*Reading 1 chunks \\(0 swept\\) of 1.000MiB \\(each chunk 4 rows\\) using 1 thread.*Rereading 0 columns"))
1294712947
test(1904.2, fread(txt, na.strings=c("NA", " ")), ans, warning='na.strings\\[2\\]==" " consists only of whitespace, ignoring. Since strip.white=TRUE.*use.*"".*<NA>')
1294812948
test(1904.3, fread(txt, na.strings=c("NA", "")), ans)
1294912949
test(1904.4, fread(txt, na.strings=c("NA", "", " ")), ans, warning='na.strings\\[3\\]==" ".*only.*whitespace.*will already be read as <NA>')
@@ -17973,7 +17973,7 @@ DT = data.table(x = sample(letters[1:5], 20, TRUE),
1797317973
c = sample(c(0+3i,1,-1-1i,NA), 20, TRUE),
1797417974
l = sample(c(TRUE, FALSE, NA), 20, TRUE),
1797517975
r = as.raw(sample(1:5, 20, TRUE)))
17976-
load(testDir("test2224.Rdata")) # 47KB array 24x8 where each cell contains a length-20 result
17976+
load(testDir("test2224.Rdata")) # 47KiB array 24x8 where each cell contains a length-20 result
1797717977
if (test_bit64) {
1797817978
DT[, i64:=as.integer64(sample(c(-2L,0L,2L,NA), 20, TRUE))]
1797917979
} else {
@@ -17984,7 +17984,7 @@ for (col in names(DT)[-1]) {
1798417984
for (n in list(1, 5, -1, -5, c(1,2), c(-1,1))) {
1798517985
for (type in c('lag','lead','shift','cyclic')) {
1798617986
# fill is tested by group in tests 2218.*; see comments in #5205
17987-
# sapply(sapply()) changed to for(for(for())) to save 29MB, #5517
17987+
# sapply(sapply()) changed to for(for(for())) to save 29MiB, #5517
1798817988
test(2224.1+i/10000, # 192 tests here when test_bit64=TRUE; 168 when FALSE
1798917989
EVAL(sprintf("DT[, shift(%s, %d, type='%s'), by=x]$V1", col, n, type)),
1799017990
ans[[i]])

src/forder.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max
318318
savetl(s); // afterwards. From R 2.14.0, tl is initialized to 0, prior to that it was random so this step saved too much.
319319
// now save unique SEXP in ustr so i) we can loop through them afterwards and reset TRUELENGTH to 0 and ii) sort uniques when sorting too
320320
if (ustr_alloc<=ustr_n) {
321-
ustr_alloc = (ustr_alloc==0) ? 16384 : ustr_alloc*2; // small initial guess, negligible time to alloc 128KB (32 pages)
321+
ustr_alloc = (ustr_alloc==0) ? 16384 : ustr_alloc*2; // small initial guess, negligible time to alloc 128KiB (32 pages)
322322
if (ustr_alloc>n) ustr_alloc = n; // clamp at n. Reaches n when fully unique (no dups)
323323
ustr = realloc(ustr, sizeof(SEXP) * ustr_alloc);
324324
if (ustr==NULL) STOP(_("Unable to realloc %d * %d bytes in range_str"), ustr_alloc, (int)sizeof(SEXP)); // # nocov

src/fread.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,7 +1420,7 @@ int freadMain(freadMainArgs _args) {
14201420

14211421
// No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage
14221422
// Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried).
1423-
// TO DO?: MAP_HUGETLB for Linux but seems to need admin to setup first. My Hugepagesize is 2MB (>>2KB, so promising)
1423+
// TO DO?: MAP_HUGETLB for Linux but seems to need admin to setup first. My Hugepagesize is 2MiB (>>2KiB, so promising)
14241424
// https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
14251425
mmp = mmap(NULL, fileSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); // COW for last page lastEOLreplaced
14261426
#ifdef __EMSCRIPTEN__
@@ -1901,7 +1901,7 @@ int freadMain(freadMainArgs _args) {
19011901

19021902
const ptrdiff_t jump0size = firstJumpEnd - pos; // the size in bytes of the first 100 lines from the start (jump point 0)
19031903
// how many places in the file to jump to and test types there (the very end is added as 11th or 101th)
1904-
// not too many though so as not to slow down wide files; e.g. 10,000 columns. But for such large files (50GB) it is
1904+
// not too many though so as not to slow down wide files; e.g. 10,000 columns. But for such large files (50GiB) it is
19051905
// worth spending a few extra seconds sampling 10,000 rows to decrease a chance of costly reread even further.
19061906
nJumps = 1;
19071907
const ptrdiff_t sz = eof - pos;
@@ -2254,10 +2254,10 @@ int freadMain(freadMainArgs _args) {
22542254
int buffGrown = 0;
22552255
// chunkBytes is the distance between each jump point; it decides the number of jumps
22562256
// We may want each chunk to write to its own page of the final column, hence 1000*maxLen
2257-
// For the 44GB file with 12875 columns, the max line len is 108,497. We may want each chunk to write to its
2257+
// For the 44GiB file with 12875 columns, the max line len is 108,497. We may want each chunk to write to its
22582258
// own page (4k) of the final column, hence 1000 rows of the smallest type (4 byte int) is just
22592259
// under 4096 to leave space for R's header + malloc's header.
2260-
size_t chunkBytes = umax((uint64_t)(1000 * meanLineLen), 1ULL * 1024 * 1024/*MB*/);
2260+
size_t chunkBytes = umax((uint64_t)(1000 * meanLineLen), 1ULL * 1024 * 1024/*MiB*/);
22612261
// Index of the first jump to read. May be modified if we ever need to restart
22622262
// reading from the middle of the file.
22632263
int jump0 = 0;

src/fsort.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ int qsort_cmp(const void *a, const void *b) {
9393
uint64_t x = qsort_data[*(int *)a];
9494
uint64_t y = qsort_data[*(int *)b];
9595
// return x-y; would like this, but this is long and the cast to int return may not preserve sign
96-
// We have long vectors in mind (1e10(74GB), 1e11(740GB)) where extreme skew may feasibly mean the largest count
96+
// We have long vectors in mind (1e10(74GiB), 1e11(740GiB)) where extreme skew may feasibly mean the largest count
9797
// is greater than 2^32. The first split is (currently) 16 bits so should be very rare but to be safe keep 64bit counts.
9898
return (x<y)-(x>y); // largest first in a safe branchless way casting long to int
9999
}
@@ -233,7 +233,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
233233
// This assignment to ans is not random access as it may seem, but cache efficient by
234234
// design since target pages are written to contiguously. MSBsize * 4k < cache.
235235
// TODO: therefore 16 bit MSB seems too big for this step. Time this step and reduce 16 a lot.
236-
// 20MB cache / nth / 4k => MSBsize=160
236+
// 20MiB cache / nth / 4k => MSBsize=160
237237
source++;
238238
}
239239
}

src/fwrite.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ void fwriteMain(fwriteMainArgs args)
791791
}
792792
char *buffPool = malloc(alloc_size);
793793
if (!buffPool) {
794-
STOP(_("Unable to allocate %zu MB * %d thread buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), // # nocov
794+
STOP(_("Unable to allocate %zu MiB * %d thread buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), // # nocov
795795
buffSize / MEGA, nth, errno, strerror(errno)); // # nocov
796796
}
797797

src/fwrite.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ typedef struct fwriteMainArgs
109109
// iff scipen >= 3=8-5
110110
bool squashDateTime;
111111
bool append;
112-
int buffMB; // [1-1024] default 8MB
112+
int buffMB; // [1-1024] default 8MiB
113113
int nth;
114114
bool showProgress;
115115
bool is_gzip;

src/fwriteR.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ SEXP fwriteR(
163163
SEXP logical01_Arg, // TRUE|FALSE
164164
SEXP scipen_Arg,
165165
SEXP dateTimeAs_Arg, // 0=ISO(yyyy-mm-dd),1=squash(yyyymmdd),2=epoch,3=write.csv
166-
SEXP buffMB_Arg, // [1-1024] default 8MB
166+
SEXP buffMB_Arg, // [1-1024] default 8MiB
167167
SEXP nThread_Arg,
168168
SEXP showProgress_Arg,
169169
SEXP is_gzip_Arg,

vignettes/datatable-keys-fast-subset.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,10 +416,10 @@ N = 2e7L
416416
DT = data.table(x = sample(letters, N, TRUE),
417417
y = sample(1000L, N, TRUE),
418418
val = runif(N))
419-
print(object.size(DT), units = "Mb")
419+
print(object.size(DT), units = "MiB")
420420
```
421421

422-
`DT` is ~380MB. It is not really huge, but this will do to illustrate the point.
422+
`DT` is ~380MiB. It is not really huge, but this will do to illustrate the point.
423423

424424
From what we have seen in the Introduction to data.table section, we can subset those rows where columns `x = "g"` and `y = 877` as follows:
425425

0 commit comments

Comments
 (0)