Skip to content

Commit 8998942

Browse files
author
dj44vuri
committed
manual test for different variants of simulate choices
1 parent eb1e946 commit 8998942

File tree

1 file changed

+84
-41
lines changed

1 file changed

+84
-41
lines changed
Lines changed: 84 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,135 @@
11
# ─────────────────────────────────────────────────────────────────────────────
2-
# BENCHMARK: add NEW3 using dt[, .. := .., by=group]
2+
# BENCHMARK in pure data.table, no back-and-forth to data.frame
33
# ─────────────────────────────────────────────────────────────────────────────
44

5-
library(formula.tools); library(compiler)
6-
library(dplyr); library(purrr)
7-
library(data.table); library(tictoc)
5+
library(formula.tools) # lhs() / rhs()
6+
library(compiler) # cmpfun()
7+
library(purrr) # map(), map2()
8+
library(data.table) # as.data.table(), .GRP, rbindlist(), setcolorder()
9+
library(tictoc) # tic()/toc()
810

911
# 1) Prepare data ------------------------------------------------------------
1012
set.seed(2025)
11-
n <- 1e7
13+
n <- 1e8
1214
ngrp <- 10
1315
df <- data.frame(
14-
price = runif(n, 5, 100),
15-
quality = runif(n, 1, 5),
16+
price = runif(n, 5, 100),
17+
quality = runif(n, 1, 5),
1618
group = rep(1:ngrp, length.out = n)
1719
)
18-
bprice <- -0.2; bquality <- 0.8
20+
bprice <- -0.2
21+
bquality <- 0.8
22+
1923
single_group <- list(
2024
V1 = V1 ~ bprice * price + bquality * quality,
2125
V2 = V2 ~ 0
2226
)
2327
utility_list <- rep(list(single_group), ngrp)
2428

2529
# 2) Helpers ---------------------------------------------------------------
26-
by_formula <- function(eq) pick(everything()) %>% transmute(!!lhs(eq) := !!rhs(eq))
27-
compile_one <- function(fm){
28-
nm <- as.character(lhs(fm)); rhs <- rhs(fm)
29-
fn <- eval(bquote(function(df) with(df, .(rhs))))
30+
31+
compile_one <- function(fm) {
32+
nm <- as.character(lhs(fm))
33+
rhs <- rhs(fm)
34+
fn <- eval(bquote(function(d) with(d, .(rhs))))
3035
list(name = nm, fun = cmpfun(fn))
3136
}
32-
compile_utility_list <- function(u) lapply(u, function(fl){
33-
tmp <- lapply(fl, compile_one)
34-
setNames(lapply(tmp, `[[`, "fun"),
35-
vapply(tmp, `[[`, "", "name"))
36-
})
3737

38-
# 3) Methods --------------------------------------------------------------
38+
compile_utility_list <- function(ul) {
39+
lapply(ul, function(fl) {
40+
tmp <- lapply(fl, compile_one)
41+
setNames(lapply(tmp, `[[`, "fun"),
42+
vapply(tmp, `[[`, "", "name"))
43+
})
44+
}
45+
46+
# 3) Four methods, all returning data.tables -------------------------------
3947

4048
old_group <- function(data, utility) {
4149
tic("OLD-GROUP")
42-
subs <- split(data, data$group)
43-
subs2 <- map2(utility, subs, ~ mutate(.y, map_dfc(.x, by_formula)))
44-
out <- bind_rows(subs2)
50+
subs <- split(data, data$group)
51+
out <- map2(utility, subs, ~ {
52+
dt <- as.data.table(.y)
53+
for (fm in .x) {
54+
dt[, as.character(lhs(fm)) := eval(rhs(fm), dt)]
55+
}
56+
dt
57+
})
58+
res <- rbindlist(out)
4559
toc(log = FALSE)
46-
out
60+
res
4761
}
4862

4963
new1_group <- function(data, utility) {
5064
ufuns <- compile_utility_list(utility)
5165
tic("NEW1-GROUP")
5266
subs <- split(data, data$group)
53-
subs2 <- map2(ufuns, subs, ~ bind_cols(.y, lapply(.x, function(f) f(.y))))
54-
out <- bind_rows(subs2)
67+
out <- map2(ufuns, subs, ~ {
68+
dt <- as.data.table(.y)
69+
for (nm in names(.x)) dt[, (nm) := .x[[nm]](dt)]
70+
dt
71+
})
72+
res <- rbindlist(out)
5573
toc(log = FALSE)
56-
out
74+
res
5775
}
5876

5977
new2_group <- function(data, utility) {
60-
ufuns <- compile_utility_list(utility)
61-
dt <- setDT(data)
78+
ufuns <- compile_utility_list(utility)
79+
dt <- as.data.table(data)
80+
groups <- sort(unique(dt$group))
81+
gm <- setNames(seq_along(groups), groups)
82+
6283
tic("NEW2-GROUP (loop)")
63-
for (g in seq_along(ufuns)) {
64-
fns <- ufuns[[g]]
84+
for (g in groups) {
85+
idx <- gm[as.character(g)]
86+
fns <- ufuns[[idx]]
6587
dt[group == g, (names(fns)) := lapply(fns, function(f) f(.SD))]
6688
}
6789
toc(log = FALSE)
68-
as.data.frame(dt)
90+
dt
6991
}
7092

7193
new3_group <- function(data, utility) {
72-
ufuns <- compile_utility_list(utility)
73-
dt <- setDT(data)
74-
varnames <- names(ufuns[[1]])
75-
tic("NEW3-GROUP (by=group)")
76-
dt[, (varnames) := lapply(ufuns[[.BY$group]], function(f) f(.SD)), by = group]
94+
ufuns <- compile_utility_list(utility)
95+
dt <- as.data.table(data)
96+
97+
# figure which columns your utilities will actually use
98+
sdcols <- intersect(
99+
names(dt),
100+
unique(unlist(lapply(utility, function(fl) {
101+
unlist(lapply(fl, function(fm) all.vars(rhs(fm))))
102+
})))
103+
)
104+
varn <- names(ufuns[[1]])
105+
106+
tic("NEW3-GROUP (single dt[ , by=group ])")
107+
out <- dt[
108+
,
109+
c(.SD,
110+
setNames(lapply(ufuns[[.GRP]], function(f) f(.SD)), varn)
111+
),
112+
by = group,
113+
.SDcols = sdcols
114+
]
77115
toc(log = FALSE)
78-
as.data.frame(dt)
116+
117+
# reorder columns to exactly: (sdcols, group, new vars)
118+
setcolorder(out, c(sdcols, "group", varn))
119+
out
79120
}
80121

81-
# 4) Run & validate ---------------------------------------------------------
122+
# 4) Run & compare ---------------------------------------------------------
123+
82124
res_old <- old_group(df, utility_list)
83125
res_new1 <- new1_group(df, utility_list)
84126
res_new2 <- new2_group(df, utility_list)
85127
res_new3 <- new3_group(df, utility_list)
86128

129+
# numeric‐accuracy check (ignores type/class)
87130
stopifnot(
88-
identical(res_old, res_new1),
89-
identical(res_new1, res_new2),
90-
identical(res_new2, res_new3)
131+
isTRUE(all.equal(res_old, res_new1)),
132+
isTRUE(all.equal(res_new1, res_new2)),
133+
isTRUE(all.equal(res_new2, res_new3))
91134
)
92-
message("✅ All four methods agree on results.")
135+
message("✅ All four methods agree (within numeric tolerance).")

0 commit comments

Comments
 (0)