-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbgrm_logit.R
More file actions
118 lines (108 loc) · 3.22 KB
/
bgrm_logit.R
File metadata and controls
118 lines (108 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
options(show.error.locations = TRUE)
if (length(args)==0) {
SEED = 1
C = 5
n = 1000
m = 50
TYPE = "GP"
}
if (length(args)==5){
SEED = as.integer(args[1])
C = as.integer(args[2])
n = as.integer(args[3])
m = as.integer(args[4])
TYPE = args[5]
}
# install gpirt package
R_path="~/R/x86_64-redhat-linux-gnu-library/4.0"
.libPaths(R_path)
HYP = paste(TYPE, "_C_", C, '_n_', n, '_m_', m, '_SEED_', SEED, sep="")
load(file=paste("./data/", HYP, ".RData" , sep=""))
set.seed(SEED)
library(dplyr)
source("getprob_gpirt.R")
library(rstan)
rstan_options(auto_write = FALSE)
options(mc.cores = parallel::detectCores())
xs = seq(-3,3,0.01)
data_train[is.na(data_train)] = 0
stan_data <- list(N=n,
M=m,
C=C,
Nstar=length(xs),
xstar=xs,
y=data_train)
# train stan model
fit <- stan(file = "bgrm_logit.stan",
data = stan_data,
warmup = 500,
iter = 1000,
chains = 1,
cores = 1,
thin = 4,
control=list(adapt_delta=.98, max_treedepth = 15),
seed = SEED,
refresh= 1
)
# saveRDS(fit, paste("./results/bgrm_", HYP, ".rds" , sep=""))
samples <- as.data.frame(fit)
SAMPLE_ITERS = length(samples[["theta[1]"]])
pred_theta = rep(0, n)
pred_lls = c()
pred_acc = c()
train_lls = c()
train_acc = c()
for (i in 1:nrow(data)) {
pred_theta[i] = mean(samples[[paste("theta[",as.character(i), "]", sep="")]])
i_star = as.integer(pred_theta[i]*100+300)
for (j in 1:ncol(data)) {
if(!is.na(data[[i,j]])){
lls = matrix(0,nrow=SAMPLE_ITERS, ncol = C)
ps = matrix(0, nrow=C, ncol=SAMPLE_ITERS)
for(c in 1:C){
ps[c,] = samples[[paste("p[",as.character(i_star), ",", as.character(j), ",", as.character(c) ,"]", sep="")]]
}
ll = log(rowMeans(ps))
y_pred = which.max(ll)
if(train_idx[i,j]==0){
pred_acc = c(pred_acc, y_pred==(data[[i,j]]))
pred_lls = c(pred_lls, ll[data[[i,j]]])
}else{
train_acc = c(train_acc, y_pred==(data[[i,j]]))
train_lls = c(train_lls, ll[data[[i,j]]])
}
}
}
}
print(cor(theta,pred_theta))
print(mean(train_lls))
print(mean(train_acc))
print(mean(pred_lls))
print(mean(pred_acc))
# TODO: cor of icc
idx = (as.integer(min(theta)*100+300)):(as.integer(max(theta)*100+300))
bgrm_iccs = matrix(0, nrow=length(idx), ncol=m)
true_iccs = matrix(0, nrow=nrow(bgrm_iccs), ncol=m)
cor_icc = rep(0, m)
rmse_icc = rep(0, m)
for (j in 1:m) {
source("true_irf.R")
probs = getprobs_gpirt(sign(cor(theta,pred_theta))*xs[idx], irfs, matrix(thresholds,nrow=1))
tmp = probs %>%
group_by(xs) %>%
summarize(icc=sum(order*p))
true_iccs[,j] = tmp$icc
tmp = c()
for (i in 1:length(xs[idx])) {
tmp = c(tmp, mean(samples[[paste("icc[", as.integer(xs[idx][i]*100+300),",",j,"]", sep="")]]))
}
bgrm_iccs[,j] = tmp
cor_icc[j] = cor(bgrm_iccs[,j], true_iccs[,j])
rmse_icc[j] = sqrt(mean((bgrm_iccs[,j]-true_iccs[,j])^2))
}
print(mean(cor_icc))
print(mean(rmse_icc))
save(bgrm_iccs,pred_theta,train_lls, train_acc, pred_lls, pred_acc,cor_icc, rmse_icc,
file=paste("./results/bgrm_", HYP, ".RData" , sep=""))