|
| 1 | +library(bgms) |
| 2 | + |
| 3 | +pseudolikelihood_numerator <- function(thresholds, interactions, suffstats, seen, threshold_counts_without_0, P) { |
| 4 | + result <- 0.0 |
| 5 | + |
| 6 | + for (i in seq_len(P)) { |
| 7 | + for (u in seq_len(seen[i] - 1)) { |
| 8 | + result <- result + threshold_counts_without_0[i, u] * thresholds[i, u] |
| 9 | + } |
| 10 | + } |
| 11 | + |
| 12 | + result <- result + sum(interactions * suffstats) |
| 13 | + |
| 14 | + return(result) |
| 15 | +} |
| 16 | + |
| 17 | + |
| 18 | +pseudo_logposterior_full_aij2 <- function(a, i, j, thresholds, interactions, suffstats, seen, threshold_counts_without_0, X, P, N, |
| 19 | + prior_cauchy_scale = 2.5) { |
| 20 | + |
| 21 | + interactions[i, j] <- interactions[j, i] <- a |
| 22 | + |
| 23 | + pseudolikelihood_numerator( |
| 24 | + thresholds = thresholds, |
| 25 | + interactions = interactions, |
| 26 | + suffstats = suffstats, |
| 27 | + seen = seen, |
| 28 | + threshold_counts_without_0 = threshold_counts_without_0, |
| 29 | + P = P |
| 30 | + ) + |
| 31 | + pseudolikelihood_denominator2( |
| 32 | + thresholds = thresholds, |
| 33 | + interactions = interactions, |
| 34 | + suffstats = suffstats, |
| 35 | + seen = seen, |
| 36 | + X = X, |
| 37 | + P = P, |
| 38 | + N = N, |
| 39 | + i0 = i, |
| 40 | + j0 = j |
| 41 | + ) + |
| 42 | + sum(dcauchy(interactions[lower.tri(interactions)], 0, prior_cauchy_scale, log = TRUE)) |
| 43 | +} |
| 44 | + |
| 45 | +pseudolikelihood_denominator2 <- function(thresholds, interactions, suffstats, seen, X, P, N, i0, j0) { |
| 46 | + result <- 0.0 |
| 47 | + |
| 48 | + for (v in seq_len(N)) { |
| 49 | + for (i in c(i0, j0)) { |
| 50 | + temp1 <- c(crossprod(interactions[i, ], X[v, ])) |
| 51 | + |
| 52 | + temp2 <- 1.0 |
| 53 | + for (u in seq_len(seen[i] - 1)) { |
| 54 | + temp2 <- temp2 + exp(thresholds[i, u] + u * temp1) |
| 55 | + } |
| 56 | + |
| 57 | + result <- result - log(temp2) |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + return(result) |
| 62 | +} |
| 63 | + |
| 64 | +log_pseudolikelihood_full2 <- function(a, i, j, Mu, Sigma, iter, x, suffstats, seen, threshold_counts_without_0) { |
| 65 | + |
| 66 | + |
| 67 | + n <- nrow(x) # Number of observations |
| 68 | + p <- ncol(x) |
| 69 | + |
| 70 | + MuIter <- Mu[iter, ] |
| 71 | + MuMat <- matrix(0, p, ncol(threshold_counts_without_0)) # Initialize matrix for thresholds |
| 72 | + idx <- 1 |
| 73 | + for (ii in 1:p) { |
| 74 | + for (jj in 1:ncol(MuMat)) { |
| 75 | + MuMat[ii, jj] <- MuIter[idx] |
| 76 | + idx <- idx + 1# Fill matrix with threshold values |
| 77 | + } |
| 78 | + } |
| 79 | + SigmaIter <- Sigma[iter, ] |
| 80 | + SigmaMat = matrix(0, p, p) # Initialize matrix for interactions |
| 81 | + SigmaMat[lower.tri(SigmaMat)] = SigmaIter # Fill lower triangle with Sigma values |
| 82 | + SigmaMat = SigmaMat + t(SigmaMat) # Make symmetric |
| 83 | + |
| 84 | + D = length(a) # Number of elements in a |
| 85 | + log_pl = numeric(length = D) # Initialize log pseudolikelihood vector |
| 86 | + |
| 87 | + # colMax <- unname(matrixStats::colMaxs(x)) |
| 88 | + # log_p <- numeric(length = max(colMax) + 1) # Initialize log probability vector |
| 89 | + |
| 90 | + for (d in 1:D) { |
| 91 | + log_pl[d] = pseudo_logposterior_full_aij2(a[d], i, j, thresholds = MuMat, interactions = SigmaMat, X = x, N = n, P = p, |
| 92 | + seen = seen, suffstats = suffstats, threshold_counts_without_0 = threshold_counts_without_0) |
| 93 | + } |
| 94 | + |
| 95 | + return(log_pl) # Return log pseudolikelihood |
| 96 | +} |
| 97 | + |
| 98 | + |
| 99 | +x0 = Wenchuan[1:50, 1:5] # Select the first 5 columns of Wenchuan dataset |
| 100 | +p = ncol(x0) # Get the number of variables (columns) |
| 101 | + |
| 102 | +samples = bgm(x0, save = TRUE) # Run the bgm function and save samples |
| 103 | +Mu = samples$main_effect_samples # Extract threshold estimates |
| 104 | +Sigma = samples$pairwise_effect_samples # Extract interaction estimates |
| 105 | + |
| 106 | +data = bgms:::reformat_data(x = x0, |
| 107 | + na_action = "listwise", |
| 108 | + variable_bool = rep(TRUE, p), |
| 109 | + reference_category = rep(1, p)) |
| 110 | + |
| 111 | +x = data$x # Extract reformatted data |
| 112 | +no_categories = data$no_categories # Get number of categories per variable |
| 113 | +no_categories = cumsum(no_categories) # Cumulative sum for indexing |
| 114 | +start = 1 + c(0, no_categories[-length(no_categories)]) # Start indices |
| 115 | +stop = no_categories # Stop indices |
| 116 | + |
| 117 | +K <- max(x) |
| 118 | +threshold_counts_wench <- apply(x, 2, \(y) c(table(c(y, 0:K)) - 1)) |
| 119 | +threshold_counts_without_0_wench <- apply(threshold_counts_wench, 2L, \(y) { |
| 120 | + c(y[y > 0], rep(0, sum(y == 0)))[-1L] |
| 121 | +}) |
| 122 | +threshold_counts_without_0_wench <- t(matrix(threshold_counts_without_0_wench, K, p)) |
| 123 | + |
| 124 | +seen_wench <- unname(apply(x, 2, \(y) length(unique(y)))) |
| 125 | +suffstats_wench <- unname(crossprod(x)) |
| 126 | + |
| 127 | + |
| 128 | +i <- 2; j <- 1 |
| 129 | +log_pseudolikelihood_full2(c(.2, .5), i, j, Mu, Sigma, iter = 10000, x = x, |
| 130 | + seen = seen_wench, suffstats = suffstats_wench, |
| 131 | + threshold_counts_without_0 = threshold_counts_without_0_wench) |
| 132 | + |
| 133 | +optim_res <- optim(Sigma[i, j], function(a) { |
| 134 | + returnVal <- log_pseudolikelihood_full2(a, i, j, Mu, Sigma, iter = 10000, x = x, |
| 135 | + seen = seen_wench, suffstats = suffstats_wench, |
| 136 | + threshold_counts_without_0 = threshold_counts_without_0_wench) |
| 137 | + if (any(!is.finite(returnVal))) { |
| 138 | + for (i in seq_along(returnVal)) { |
| 139 | + # if (!is.finite(returnVal[i])) { |
| 140 | + # print(sprintf("a: %f, returnVal: %f", a[i], returnVal[i])) |
| 141 | + # } |
| 142 | + if (!is.finite(returnVal[i]) && returnVal[i] < 0) { |
| 143 | + returnVal[i] <- -.Machine$double.xmax |
| 144 | + } |
| 145 | + } |
| 146 | + } |
| 147 | + return(returnVal) |
| 148 | +}, method = "Brent", lower = -100, upper = 100, control = list(fnscale = -1, trace = 5)) |
| 149 | + |
| 150 | + |
| 151 | +# setup arguments for C++ |
| 152 | +iter <- 10000 |
| 153 | +MuMat <- matrix(0, p, ncol(threshold_counts_without_0_wench)) # Initialize matrix for thresholds |
| 154 | +idx <- 1 |
| 155 | +for (ii in 1:p) { |
| 156 | + for (jj in 1:ncol(MuMat)) { |
| 157 | + MuMat[ii, jj] <- Mu[iter, idx] |
| 158 | + idx <- idx + 1# Fill matrix with threshold values |
| 159 | + } |
| 160 | +} |
| 161 | +SigmaMat = matrix(0, p, p) # Initialize matrix for interactions |
| 162 | +SigmaMat[lower.tri(SigmaMat)] = Sigma[iter, ] # Fill lower triangle with Sigma values |
| 163 | +SigmaMat = SigmaMat + t(SigmaMat) # Make symmetric |
| 164 | + |
| 165 | + |
| 166 | +pairwise_effects <- SigmaMat |
| 167 | +main_effects <- MuMat |
| 168 | + |
| 169 | +# const double |
| 170 | +initial_value <- Sigma[i, j] |
| 171 | +# const arma::mat& |
| 172 | +pairwise_effects <- SigmaMat |
| 173 | +# const arma::mat& |
| 174 | +main_effects <- MuMat |
| 175 | +# const arma::imat& |
| 176 | +observations <- x |
| 177 | +# const arma::ivec& |
| 178 | +num_categories <- seen_wench |
| 179 | +# const int |
| 180 | +num_persons <- nrow(x) |
| 181 | +# const int |
| 182 | +variable1 <- i |
| 183 | +# const int |
| 184 | +variable2 <- j |
| 185 | +# TODO: these two are unused? |
| 186 | +# const double |
| 187 | +proposed_state <- 0.0 |
| 188 | +# const double |
| 189 | +current_state <- 0.0 |
| 190 | +# const arma::mat& |
| 191 | +residual_matrix <- matrix(0, nrow(x), p) |
| 192 | +# const arma::uvec& |
| 193 | +is_ordinal_variable <- rep(1, p) |
| 194 | +# const arma::ivec& |
| 195 | +reference_category <- data$reference_category |
| 196 | +# const double |
| 197 | +interaction_scale <- 2.5 |
| 198 | + |
| 199 | +newton_raphson_x <- bgms:::optimize_log_pseudoposterior_interaction( |
| 200 | + initial_value = c(initial_value), |
| 201 | + pairwise_effects = pairwise_effects, |
| 202 | + main_effects = main_effects, |
| 203 | + observations = observations, |
| 204 | + num_categories = num_categories - 1, |
| 205 | + num_persons = num_persons, |
| 206 | + variable1 = variable1 - 1, |
| 207 | + variable2 = variable2 - 1, |
| 208 | + proposed_state = proposed_state, |
| 209 | + current_state = current_state, |
| 210 | + residual_matrix = residual_matrix, |
| 211 | + is_ordinal_variable = is_ordinal_variable, |
| 212 | + reference_category = reference_category, |
| 213 | + interaction_scale = interaction_scale |
| 214 | +) |
| 215 | +newton_raphson_fx <- log_pseudolikelihood_full2(newton_raphson, i, j, Mu, Sigma, iter = 10000, x = x, |
| 216 | + seen = seen_wench, suffstats = suffstats_wench, |
| 217 | + threshold_counts_without_0 = threshold_counts_without_0_wench) |
| 218 | +matrix(c(newton_raphson_x, newton_raphson_fx, optim_res$par, optim_res$value), |
| 219 | + nrow = 2, dimnames = list(c("x", "f(x)"), c("Newton-Raphson", "Optim"))) |
0 commit comments