Skip to content

Commit 4f81368

Browse files
committed
started adding shapley sensi
1 parent 831cd69 commit 4f81368

File tree

4 files changed

+267
-70
lines changed

4 files changed

+267
-70
lines changed
2.06 KB
Binary file not shown.

Tests/testingStuff/shapley.R

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
library(ggplot2)
2+
path <- paste0(system.file("examples", package = "tsf"), "/IDABatch.csv")
3+
lowerBounds <- c(
4+
kG = 1000,
5+
I0 = 0,
6+
IHD = 0,
7+
ID = 0
8+
)
9+
upperBounds <- c(
10+
kG = 10^8,
11+
I0 = 100,
12+
IHD = 10^7,
13+
ID = 10^7
14+
)
15+
additionalParameters <- c(
16+
host = 1e-6,
17+
dye = 1e-6,
18+
kHD = 3e6
19+
)
20+
res <- tsf::batch(
21+
"ida",
22+
lowerBounds, upperBounds,
23+
path, additionalParameters,
24+
ngen = 100,
25+
num_cores = 6,
26+
num_rep = 30
27+
)
28+
params <- Reduce(rbind, res[[1]][[2]])
29+
params
30+
save(params, file = "./Tests/testingStuff/parameters.RData")
31+
32+
load("./Tests/testingStuff/parameters.RData")
33+
datasets <- tsf:::importDataBatch(path)
34+
env <- new.env()
35+
env$h0 <- 1e-6
36+
env$d0 <- 1e-6
37+
env$ga <- datasets[[1]][, 1]
38+
env$signal <- datasets[[1]][, 2]
39+
env$kd <- 3e6
40+
env$error_calc_fct <- tsf:::rel_err
41+
42+
make_joint_sampler_kde <- function(df, lb, ub) {
43+
eps <- 1e-6
44+
p <- ncol(df)
45+
to_unit <- function(X) {
46+
U <- sweep(X, 2, lb, "-")
47+
U <- sweep(U, 2, (ub - lb), "/")
48+
pmin(pmax(U, eps), 1 - eps)
49+
}
50+
to_param <- function(U) {
51+
U <- sweep(U, 2, (ub - lb), "*")
52+
sweep(U, 2, lb, "+")
53+
}
54+
U <- to_unit(as.matrix(df))
55+
Z <- qlogis(U)
56+
kde_obj <- ks::kde(Z)
57+
function(n) {
58+
Znew <- ks::rkde(n = n, fhat = kde_obj)
59+
Unew <- plogis(Znew)
60+
Xnew <- to_param(Unew)
61+
Xdf <- as.data.frame(Xnew)
62+
colnames(Xdf) <- colnames(df)
63+
Xdf
64+
}
65+
}
66+
sobolVariance_dep <- function(parameter_df, lossFct, env, lb, ub, parameterNames, runAsShiny) {
67+
n <- 10000 # For large values package RANN is required
68+
nboot <- 100
69+
70+
joint_sampler <- make_joint_sampler_kde(parameter_df, lb, ub)
71+
X <- joint_sampler(n)
72+
names(X) <- parameterNames
73+
74+
sobolFun <- function(X) {
75+
p <- NULL
76+
if (is.data.frame(X) || is.matrix(X)) {
77+
return(sapply(1:nrow(X), function(x) {
78+
lossFct(as.numeric(X[x, ]), env, FALSE)
79+
}))
80+
} else {
81+
p <- as.numeric(X)
82+
}
83+
lossFct(p, env, FALSE)
84+
}
85+
sh <- sensitivity::shapleysobol_knn(model = sobolFun, X = X, nboot = nboot)
86+
df <- data.frame(sh$conf_int); df$x <- row.names(df)
87+
p <- ggplot(data = df, aes(x = x, y = original)) +
88+
geom_point() +
89+
geom_errorbar(aes(ymin = min..c.i., ymax = max..c.i.)) +
90+
labs(
91+
x = NULL,
92+
y = "Explained fraction of variance (Shapley effects)"
93+
)
94+
list(sh, p)
95+
}
96+
97+
set.seed(1234)
98+
res_sens1 <- sobolVariance_dep(
99+
params[, 1:4], tsf:::lossFctIDA, env,
100+
lowerBounds, upperBounds,
101+
names(lowerBounds), FALSE
102+
)
103+
res_sens1[[1]]
104+
res_sens1[[2]]
105+
106+
set.seed(1235)
107+
res_sens2 <- sobolVariance_dep(
108+
params[, 1:4], tsf:::lossFctIDA, env,
109+
lowerBounds, upperBounds,
110+
names(lowerBounds), FALSE
111+
)
112+
res_sens2[[1]]
113+
res_sens2[[2]]

tsf/R/Utils_App.R

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -286,74 +286,6 @@ is_integer <- function(x) {
286286
return(is.numeric(x) && identical(round(x), x))
287287
}
288288

289-
# NOTE: Using Contour Levels for Significant Model Regions
290-
# Significant Model Regions (SMR)
291-
kde4d_intern <- function(df) {
292-
mins <- apply(df, 2, min)
293-
maxs <- apply(df, 2, max)
294-
res <- ks::kde(df, xmin = mins, xmax = maxs)
295-
grid_points <- expand.grid(res$eval.points)
296-
joint_densities <- as.vector(res$estimate)
297-
density_data <- cbind(grid_points, joint_density = joint_densities)
298-
return(density_data)
299-
}
300-
301-
kde4d_with_smr <- function(df, prob = 0.95) {
302-
df <- df[, 1:4]
303-
res <- ks::kde(df)
304-
level <- paste0(prob * 100, "%")
305-
density_threshold <- res$cont[level]
306-
grid_points <- expand.grid(res$eval.points)
307-
densities <- as.vector(res$estimate)
308-
significant_points <- grid_points[densities >= density_threshold, ]
309-
mode_index <- which.max(densities)
310-
mode <- grid_points[mode_index, ]
311-
mode <- ifelse(mode < 0, 0, mode) |> as.numeric()
312-
CIs <- apply(significant_points, 2, range)
313-
lc <- CIs[1, ]
314-
lc <- ifelse(lc < 0, 0, lc)
315-
uc <- CIs[2, ]
316-
uc <- ifelse(uc < 0, 0, uc)
317-
res <- kde4d_intern(df)
318-
df <- lapply(1:4, function(x) {
319-
i <- parent.frame()$i[]
320-
data.frame(x = res[, i], y = res[, 5])
321-
})
322-
return(list(
323-
mode = mode,
324-
lower_ci = lc,
325-
upper_ci = uc,
326-
df = df
327-
))
328-
}
329-
330-
jkd <- function(df) {
331-
res <- kde4d_with_smr(df)
332-
res$mode
333-
res$lower_ci
334-
res$upper_ci
335-
res <- lapply(1:4, function(idx) {
336-
mode <- res$mode[idx]
337-
l <- res$lower_ci[idx]
338-
u <- res$upper_ci[idx]
339-
df_temp <- data.frame(
340-
values = c(mode, l, u),
341-
type = c("mode", "lower", "upper")
342-
)
343-
names(df_temp)[1] <- names(df)[idx]
344-
return(df_temp)
345-
})
346-
res <- lapply(res, function(x) {
347-
x[, 1]
348-
})
349-
res <- Reduce(rbind, res) |> as.data.frame()
350-
res <- cbind(names(df)[1:4], res)
351-
names(res) <- c("Parameter", "mode", "lower", "upper")
352-
row.names(res) <- NULL
353-
return(res)
354-
}
355-
356-
357289
# download file
358290
# ========================================================================================
359291
download_file <- function(model, file, result_val) {
Lines changed: 154 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,157 @@
1+
# TODO: update the plot and the text
2+
3+
`%||%` <- function(a, b) if (!is.null(a)) a else b
4+
5+
kde4d_intern <- function(df, n_samp = 20000) {
6+
stopifnot(ncol(df) == 4)
7+
colnames(df) <- colnames(df) %||% paste0("V", seq_len(ncol(df)))
8+
H <- ks::Hpi(as.matrix(df))
9+
fit <- ks::kde(as.matrix(df), H = H)
10+
draws <- as.data.frame(ks::rkde(n = n_samp, fhat = fit))
11+
colnames(draws) <- colnames(df)
12+
marginals <- lapply(seq_len(ncol(draws)), function(j) {
13+
d <- density(draws[[j]], n = 512)
14+
data.frame(x = d$x, y = d$y)
15+
})
16+
names(marginals) <- colnames(draws)
17+
list(fit = fit, draws = draws, marginals = marginals)
18+
}
19+
kde4d_with_smr <- function(df, prob = 0.95, n_samp = 20000) {
20+
stopifnot(ncol(df) == 4)
21+
ki <- kde4d_intern(df, n_samp = n_samp)
22+
fit <- ki$fit
23+
Xs <- ki$draws
24+
grid <- expand.grid(fit$eval.points)
25+
dens <- as.vector(fit$estimate)
26+
mode <- as.numeric(grid[which.max(dens), ])
27+
names(mode) <- colnames(df)
28+
alpha <- (1 - prob) / 2
29+
qs <- t(apply(Xs, 2, quantile, probs = c(alpha, 1 - alpha), na.rm = TRUE))
30+
colnames(qs) <- c("lower", "upper")
31+
rownames(qs) <- colnames(df)
32+
list(
33+
mode = mode,
34+
lower_ci = qs[, "lower"],
35+
upper_ci = qs[, "upper"],
36+
df = ki$marginals
37+
)
38+
}
39+
40+
# # NOTE: Using Contour Levels for Significant Model Regions
41+
# # Significant Model Regions (SMR)
42+
# kde4d_intern <- function(df) {
43+
# mins <- apply(df, 2, min)
44+
# maxs <- apply(df, 2, max)
45+
# res <- ks::kde(df, xmin = mins, xmax = maxs)
46+
# grid_points <- expand.grid(res$eval.points)
47+
# joint_densities <- as.vector(res$estimate)
48+
# density_data <- cbind(grid_points, joint_density = joint_densities)
49+
# return(density_data)
50+
# }
51+
52+
# kde4d_with_smr <- function(df, prob = 0.95) {
53+
# df <- df[, 1:4]
54+
# res <- ks::kde(df)
55+
# level <- paste0(prob * 100, "%")
56+
# density_threshold <- res$cont[level]
57+
# grid_points <- expand.grid(res$eval.points)
58+
# densities <- as.vector(res$estimate)
59+
# significant_points <- grid_points[densities >= density_threshold, ]
60+
# mode_index <- which.max(densities)
61+
# mode <- grid_points[mode_index, ]
62+
# mode <- ifelse(mode < 0, 0, mode) |> as.numeric()
63+
# CIs <- apply(significant_points, 2, range)
64+
# lc <- CIs[1, ]
65+
# lc <- ifelse(lc < 0, 0, lc)
66+
# uc <- CIs[2, ]
67+
# uc <- ifelse(uc < 0, 0, uc)
68+
# res <- kde4d_intern(df)
69+
# df <- lapply(1:4, function(x) {
70+
# i <- parent.frame()$i[]
71+
# data.frame(x = res[, i], y = res[, 5])
72+
# })
73+
# return(list(
74+
# mode = mode,
75+
# lower_ci = lc,
76+
# upper_ci = uc,
77+
# df = df
78+
# ))
79+
# }
80+
81+
jkd <- function(df) {
82+
res <- kde4d_with_smr(df)
83+
res$mode
84+
res$lower_ci
85+
res$upper_ci
86+
res <- lapply(1:4, function(idx) {
87+
mode <- res$mode[idx]
88+
l <- res$lower_ci[idx]
89+
u <- res$upper_ci[idx]
90+
df_temp <- data.frame(
91+
values = c(mode, l, u),
92+
type = c("mode", "lower", "upper")
93+
)
94+
names(df_temp)[1] <- names(df)[idx]
95+
return(df_temp)
96+
})
97+
res <- lapply(res, function(x) {
98+
x[, 1]
99+
})
100+
res <- Reduce(rbind, res) |> as.data.frame()
101+
res <- cbind(names(df)[1:4], res)
102+
names(res) <- c("Parameter", "mode", "lower", "upper")
103+
row.names(res) <- NULL
104+
return(res)
105+
}
106+
107+
make_joint_sampler_kde <- function(df, lb, ub) {
108+
eps <- 1e-6
109+
p <- ncol(df)
110+
to_unit <- function(X) {
111+
U <- sweep(X, 2, lb, "-")
112+
U <- sweep(U, 2, (ub - lb), "/")
113+
pmin(pmax(U, eps), 1 - eps)
114+
}
115+
to_param <- function(U) {
116+
U <- sweep(U, 2, (ub - lb), "*")
117+
sweep(U, 2, lb, "+")
118+
}
119+
U <- to_unit(as.matrix(df))
120+
Z <- qlogis(U)
121+
kde_obj <- ks::kde(Z)
122+
function(n) {
123+
Znew <- ks::rkde(n = n, fhat = kde_obj)
124+
Unew <- plogis(Znew)
125+
Xnew <- to_param(Unew)
126+
Xdf <- as.data.frame(Xnew)
127+
colnames(Xdf) <- colnames(df)
128+
Xdf
129+
}
130+
}
131+
sobolVariance_dep <- function(parameter_df, lossFct, env, lb, ub, parameterNames, runAsShiny) {
132+
n <- 1000
133+
nboot <- 100
134+
joint_sampler <- make_joint_sampler_kde(parameter_df, lb, ub)
135+
X <- joint_sampler(n)
136+
names(X) <- parameterNames
137+
sobolFun <- function(X) {
138+
p <- NULL
139+
if (is.data.frame(X) || is.matrix(X)) {
140+
return(sapply(1:nrow(X), function(x) {
141+
lossFct(as.numeric(X[x, ]), env, FALSE)
142+
}))
143+
} else {
144+
p <- as.numeric(X)
145+
}
146+
lossFct(p, env, FALSE)
147+
}
148+
sh <- shapleysobol_knn(model = sobolFun, X = X, nboot = nboot)
149+
ggplot(sh) +
150+
theme(axis.text.x = element_text(size = 8, angle = 90, hjust = 1),
151+
axis.text.y = element_text(size = 8)) +
152+
ylab("Explained fraction of variance (Shapley effects)")
153+
}
154+
1155
# Monte Carlo Estimation of Sobol’ Indices
2156
sobolVariance <- function(lossFct, env, lb, ub, parameterNames, runAsShiny) {
3157
n <- 1000
@@ -38,8 +192,6 @@ sobolVariance <- function(lossFct, env, lb, ub, parameterNames, runAsShiny) {
38192
ylab("Explained fraction of variance")
39193
}
40194

41-
42-
43195
#' Optimize algebraic systems which describe thermodynamic binding systems
44196
#'
45197
#' @export

0 commit comments

Comments
 (0)