Skip to content

Commit 4421958

Browse files
committed
initial internal framework for benchmarking
1 parent 1c7e6c4 commit 4421958

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

R/benchmark.data.table.R

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
benchmark.data.table = function(script="benchmarks.Rraw", pct=100, th=2, rbin="Rscript", desc=character()) {
2+
stopifnot(length(script)==1L)
3+
# make revision && make build && make install && R -q -e 'data.table:::benchmark.data.table()'
4+
fn = setNames(file.path("inst","benchmarks", script), script) ## this path only for development, finally use system.file(package="data.table", "benchmarks", script)
5+
desc = if (length(desc)) paste0(" ", desc) else ""
6+
cat("benchmark.data.table() running: ", names(fn), "\n", sep="")
7+
init = proc.time()[[3L]]
8+
for (p in pct) {
9+
cmd = sprintf("R_DATATABLE_NUM_PROCS_PERCENT=%s R_DATATABLE_NUM_THREADS=%s %s %s%s", p, th, rbin, fn, desc)
10+
system(cmd)
11+
}
12+
t = proc.time()[[3L]]
13+
cat("Benchmarks in ", names(fn), " completed in ", trunc(t-init), "s\n", sep="")
14+
invisible(TRUE)
15+
}
16+
17+
omp = function() {
18+
omp = capture.output(th<-getDTthreads(verbose=TRUE))
19+
val = function(x) tail(strsplit(x, " ", fixed=TRUE)[[1L]], 1L)
20+
l = list(
21+
procs = val(grep("omp_get_num_procs", omp, value=TRUE)),
22+
threads = val(grep("omp_get_max_threads", omp, value=TRUE)),
23+
dt_procs_pct = val(grep("R_DATATABLE_NUM_PROCS_PERCENT", omp, value=TRUE)),
24+
dt_threads = val(grep("R_DATATABLE_NUM_THREADS", omp, value=TRUE)),
25+
th = th
26+
)
27+
suppressWarnings(lapply(l, as.integer))
28+
}
29+
benchmarkEnv = function(args) {
30+
rbin = args[1L]
31+
args = args[-1L]
32+
file.i = which(substr(args, 1L, 6L)=="--file")
33+
rargs = paste(args[seq_len(file.i-1L)], collapse=",")
34+
script = substr(args[file.i], 8L, nchar(args[file.i]))
35+
args.i = which(substr(args, 1L, 6L)=="--args")
36+
if (length(args.i) && length(args)>args.i) {
37+
script_desc = paste(args[(args.i+1L):length(args)], collapse=",")
38+
} else script_desc = NA_character_
39+
cc = readLines(system.file(package="data.table", "cc", mustWork=TRUE))
40+
l = list(
41+
batch = as.integer(Sys.time()),
42+
nodename = Sys.info()[["nodename"]],
43+
rbin = rbin,
44+
rargs = rargs,
45+
script = script,
46+
rver = base::getRversion(),
47+
package = "data.table",
48+
ver = packageVersion("data.table"),
49+
git = unname(read.dcf(system.file("DESCRIPTION", package="data.table", mustWork=TRUE), fields="Revision")[, "Revision"]),
50+
cc = sub("CC=", "", cc[1L], fixed=TRUE),
51+
cflags = sub("CFLAGS=", "", cc[2L], fixed=TRUE),
52+
script_desc = script_desc
53+
)
54+
c(l, omp())
55+
}
56+
funArgs = function(x) {
57+
stopifnot(is.list(x))
58+
nam = names(x)
59+
val = vapply(x, deparse, width.cutoff=500L, "")
60+
if (is.null(nam)) return(paste(val, collapse=","))
61+
nam[nzchar(nam)] = paste0(nam[nzchar(nam)],"=")
62+
paste(paste0(nam, val), collapse=",")
63+
}
64+
benchmark = function(num, expr, desc=NA_character_) {
65+
ts = as.numeric(Sys.time())
66+
sub.expr = substitute(expr)
67+
stopifnot(is.call(sub.expr))
68+
t = system.time(expr)
69+
t = `names<-`(as.list(t), gsub(".", "_", names(t), fixed=TRUE))
70+
l = list(num=num, timestamp=ts, fun=as.character(sub.expr[[1L]]), args=funArgs(as.list(sub.expr)[-1L]), desc=desc)
71+
l = lapply(c(l, getOption("datatable.env"), t), format, scientific=FALSE)
72+
setDF(l)
73+
setcolorder(l, c(
74+
"nodename","batch","timestamp",
75+
"rbin","rargs","rver",
76+
"script","script_desc",
77+
"package","ver","git","cc","cflags",
78+
"procs","threads","dt_procs_pct","dt_threads","th",
79+
"fun","args","desc",
80+
"user_self","sys_self","elapsed","user_child","sys_child"
81+
))
82+
if (num>0) fwrite(l, "benchmarks.csv", append=TRUE, row.names=FALSE)
83+
invisible(TRUE)
84+
}

inst/benchmarks/benchmarks.Rraw

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
library(data.table)
2+
benchmark = data.table:::benchmark
3+
options(datatable.env = data.table:::benchmarkEnv(commandArgs()))
4+
5+
# add timing test for many .SD cols #3797
6+
mt = rep(rownames(mtcars)[1:25],20)
7+
st = rep(state.name,10)
8+
DT = data.table(mt=mt, st=st, matrix(sample(1:(30000L*500),30000*500,replace=TRUE), nrow=500,ncol=30000), key='mt')
9+
options(datatable.optimize=0L)
10+
benchmark(1.01, DT[,.SD,by=st], desc="optimize=0L")
11+
options(datatable.optimize=Inf)
12+
benchmark(1.02, DT[,.SD,by=st], desc="optimize=Inf")

0 commit comments

Comments
 (0)