Init bc model update.

MaartenMarsman · MaartenMarsman · commit 6845f2425158 · 2025-10-19T12:02:40.000+02:00
* Does not use ExpBeGone trick.
* Metropolis works but NUTS not yet.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -25,8 +25,8 @@ sample_omrf_gibbs <- function(no_states, no_variables, no_categories, interactio
     .Call(`_bgms_sample_omrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, iter)
 }
 
-sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) {
-    .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)
+sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter) {
+    .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter)
 }
 
 compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) {
diff --git a/R/bgm.R b/R/bgm.R
@@ -524,8 +524,9 @@ bgm = function(
     # Ordinal (variable_bool == TRUE) or Blume-Capel (variable_bool == FALSE)
     bc_vars = which(!variable_bool)
     for(i in bc_vars) {
-      blume_capel_stats[1, i] = sum(x[, i])
+      blume_capel_stats[1, i] = sum(x[, i] - baseline_category[i])
       blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2)
+      x[, i] = x[, i] - baseline_category[i]
     }
   }
   pairwise_stats = t(x) %*% x
@@ -588,7 +589,6 @@ bgm = function(
     nThreads = cores, seed = seed, progress_type = progress_type
   )
 
-
   userInterrupt = any(vapply(out, FUN = `[[`, FUN.VALUE = logical(1L), "userInterrupt"))
   if (userInterrupt) {
     warning("Stopped sampling after user interrupt, results are likely uninterpretable.")
diff --git a/R/bgmCompare.R b/R/bgmCompare.R
@@ -402,6 +402,9 @@ bgmCompare = function(
   blume_capel_stats = compute_blume_capel_stats(
     x, baseline_category, ordinal_variable, group
   )
+  for (i in which(!ordinal_variable)) {
+    x[, i] = sum(x[, i] - baseline_category[i])
+  }
 
   # Compute sufficient statistics for pairwise interactions
   pairwise_stats = compute_pairwise_stats(
diff --git a/R/data_utils.R b/R/data_utils.R
@@ -210,7 +210,7 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro
     sufficient_stats = matrix(0, nrow = 2, ncol = ncol(x))
     bc_vars = which(!ordinal_variable)
     for (i in bc_vars) {
-      sufficient_stats[1, i] = sum(x[, i])
+      sufficient_stats[1, i] = sum(x[, i] - baseline_category[i])
       sufficient_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2)
     }
     return(sufficient_stats)
@@ -220,7 +220,7 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro
       sufficient_stats_gr = matrix(0, nrow = 2, ncol = ncol(x))
       bc_vars = which(!ordinal_variable)
       for (i in bc_vars) {
-        sufficient_stats_gr[1, i] = sum(x[group == g, i])
+        sufficient_stats_gr[1, i] = sum(x[group == g, i] - baseline_category[i])
         sufficient_stats_gr[2, i] = sum((x[group == g, i] - baseline_category[i]) ^ 2)
       }
       sufficient_stats[[g]] = sufficient_stats_gr
diff --git a/R/sampleMRF.R b/R/sampleMRF.R
@@ -13,7 +13,7 @@
 #' in specifying their model.
 #'
 #' The Blume-Capel option is specifically designed for ordinal variables that
-#' have a special type of reference_category category, such as the neutral
+#' have a special type of baseline_category category, such as the neutral
 #' category in a Likert scale. The Blume-Capel model specifies the following
 #' quadratic model for the threshold parameters:
 #' \deqn{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}{{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}}
@@ -23,8 +23,8 @@
 #' \eqn{\alpha > 0}{\alpha > 0} and decreasing threshold values if
 #' \eqn{\alpha <0}{\alpha <0}), if \eqn{\beta < 0}{\beta < 0}, it offers an
 #' increasing penalty for responding in a category further away from the
-#' reference_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a
-#' preference for responding in the reference_category category.
+#' baseline_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a
+#' preference for responding in the baseline_category category.
 #'
 #' @param no_states The number of states of the ordinal MRF to be generated.
 #'
@@ -53,8 +53,8 @@
 #' ``blume-capel''. Binary variables are automatically treated as ``ordinal’’.
 #' Defaults to \code{variable_type = "ordinal"}.
 #'
-#' @param reference_category An integer vector of length \code{no_variables} specifying the
-#' reference_category category that is used for the Blume-Capel model (details below).
+#' @param baseline_category An integer vector of length \code{no_variables} specifying the
+#' baseline_category category that is used for the Blume-Capel model (details below).
 #' Can be any integer value between \code{0} and \code{no_categories} (or
 #' \code{no_categories[i]}).
 #'
@@ -103,7 +103,7 @@
 #'                interactions = Interactions,
 #'                thresholds = Thresholds,
 #'                variable_type = c("b","b","o","b","o"),
-#'                reference_category = 2)
+#'                baseline_category = 2)
 #'
 #' @export
 mrfSampler = function(no_states,
@@ -112,7 +112,7 @@ mrfSampler = function(no_states,
                       interactions,
                       thresholds,
                       variable_type = "ordinal",
-                      reference_category,
+                      baseline_category,
                       iter = 1e3) {
   # Check no_states, no_variables, iter --------------------------------------------
   if(no_states <= 0 ||
@@ -168,20 +168,20 @@ mrfSampler = function(no_states,
     }
   }
 
-  # Check the reference_category for Blume-Capel variables ---------------------
+  # Check the baseline_category for Blume-Capel variables ---------------------
   if(any(variable_type == "blume-capel")) {
-    if(length(reference_category) == 1) {
-      reference_category = rep(reference_category, no_variables)
+    if(length(baseline_category) == 1) {
+      baseline_category = rep(baseline_category, no_variables)
     }
-    if(any(reference_category < 0) || any(abs(reference_category - round(reference_category)) > .Machine$double.eps)) {
+    if(any(baseline_category < 0) || any(abs(baseline_category - round(baseline_category)) > .Machine$double.eps)) {
       stop(paste0("For variables ",
-                  which(reference_category < 0),
-                  " ``reference_category'' was either negative or not integer."))
+                  which(baseline_category < 0),
+                  " ``baseline_category'' was either negative or not integer."))
     }
-    if(any(reference_category - no_categories > 0)) {
+    if(any(baseline_category - no_categories > 0)) {
       stop(paste0("For variables ",
-                  which(reference_category - no_categories > 0),
-                  " the ``reference_category'' category was larger than the maximum category value."))
+                  which(baseline_category - no_categories > 0),
+                  " the ``baseline_category'' category was larger than the maximum category value."))
     }
   }
 
@@ -300,7 +300,7 @@ mrfSampler = function(no_states,
                              interactions = interactions,
                              thresholds = thresholds,
                              variable_type = variable_type,
-                             reference_category = reference_category,
+                             baseline_category = baseline_category,
                              iter = iter)
   }
 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -148,8 +148,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // sample_bcomrf_gibbs
-IntegerMatrix sample_bcomrf_gibbs(int no_states, int no_variables, IntegerVector no_categories, NumericMatrix interactions, NumericMatrix thresholds, StringVector variable_type, IntegerVector reference_category, int iter);
-RcppExport SEXP _bgms_sample_bcomrf_gibbs(SEXP no_statesSEXP, SEXP no_variablesSEXP, SEXP no_categoriesSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP variable_typeSEXP, SEXP reference_categorySEXP, SEXP iterSEXP) {
+IntegerMatrix sample_bcomrf_gibbs(int no_states, int no_variables, IntegerVector no_categories, NumericMatrix interactions, NumericMatrix thresholds, StringVector variable_type, IntegerVector baseline_category, int iter);
+RcppExport SEXP _bgms_sample_bcomrf_gibbs(SEXP no_statesSEXP, SEXP no_variablesSEXP, SEXP no_categoriesSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP variable_typeSEXP, SEXP baseline_categorySEXP, SEXP iterSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -159,9 +159,9 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< NumericMatrix >::type interactions(interactionsSEXP);
     Rcpp::traits::input_parameter< NumericMatrix >::type thresholds(thresholdsSEXP);
     Rcpp::traits::input_parameter< StringVector >::type variable_type(variable_typeSEXP);
-    Rcpp::traits::input_parameter< IntegerVector >::type reference_category(reference_categorySEXP);
+    Rcpp::traits::input_parameter< IntegerVector >::type baseline_category(baseline_categorySEXP);
     Rcpp::traits::input_parameter< int >::type iter(iterSEXP);
-    rcpp_result_gen = Rcpp::wrap(sample_bcomrf_gibbs(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter));
+    rcpp_result_gen = Rcpp::wrap(sample_bcomrf_gibbs(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter));
     return rcpp_result_gen;
 END_RCPP
 }
diff --git a/src/bgmCompare_logp_and_grad.cpp b/src/bgmCompare_logp_and_grad.cpp
@@ -160,10 +160,10 @@ double log_pseudoposterior(
         const double quad_effect = main_group(v, 1);
         const int ref = baseline_category(v);
         for (int c = 0; c <= num_cats; ++c) {
-          const int centered = c - ref;
-          const double quad = quad_effect * centered * centered;
-          const double lin  = lin_effect * c;
-          const arma::vec exponent = lin + quad + c * rest_score - bound;
+          const int score = c - ref;
+          const double lin  = lin_effect * score;
+          const double quad = quad_effect * score * score;
+          const arma::vec exponent = lin + quad + score * rest_score - bound;
           denom += ARMA_MY_EXP(exponent);
         }
       }
@@ -566,10 +566,10 @@ arma::vec gradient(
         const double lin_effect  = main_group(v, 0);
         const double quad_effect = main_group(v, 1);
         for (int s = 0; s <= K; ++s) {
-          const int centered = s - ref;
-          const double lin  = lin_effect * s;
-          const double quad = quad_effect * centered * centered;
-          exponents.col(s) = lin + quad + s * rest_score - bound;
+          const int score = s - ref;
+          const double lin  = lin_effect * score;
+          const double quad = quad_effect * score * score;
+          exponents.col(s) = lin + quad + score * rest_score - bound;
         }
       }
 
@@ -594,7 +594,7 @@ arma::vec gradient(
           }
         }
       } else {
-        arma::vec lin_score  = arma::regspace<arma::vec>(0, K);          // length K+1
+        arma::vec lin_score  = arma::regspace<arma::vec>(0 - ref, K - ref);          // length K+1
         arma::vec quad_score = arma::square(lin_score - ref);
 
         double sum_lin  = arma::accu(probs * lin_score);
@@ -619,8 +619,15 @@ arma::vec gradient(
         if (v == v2) continue;
 
         arma::vec expected_value(num_group_obs, arma::fill::zeros);
-        for (int s = 1; s <= K; ++s) {
-          expected_value += s * probs.col(s) % obs.col(v2);
+        if (is_ordinal_variable(v)) {
+          for (int s = 1; s <= K; ++s) {
+            expected_value += s * probs.col(s) % obs.col(v2);
+          }
+        } else {
+          for (int s = 0; s <= K; ++s) {
+            int score = s - ref;
+            expected_value += score * probs.col(s) % obs.col(v2);
+          }
         }
         double sum_expectation = arma::accu(expected_value);
 
@@ -860,10 +867,10 @@ double log_pseudoposterior_main_component(
       const double quad_effect = main_group(variable, 1);
       const int ref = baseline_category(variable);
       for (int cat = 0; cat <= num_cats; cat++) {
-        const int centered = cat - ref;
-        const double quad = quad_effect * centered * centered;
-        const double lin  = lin_effect * cat;
-        const arma::vec exponent = lin + quad + cat * rest_score - bound;
+        const int score = cat - ref;
+        const double quad = quad_effect * score * score;
+        const double lin  = lin_effect * score;
+        const arma::vec exponent = lin + quad + score * rest_score - bound;
         denom += ARMA_MY_EXP(exponent);
       }
     }
@@ -1044,10 +1051,10 @@ double log_pseudoposterior_pair_component(
         const double quad_effect = main_group(v, 1);
         const int ref = baseline_category(v);
         for (int c = 0; c <= num_cats; ++c) {
-          const int centered = c - ref;
-          const double quad = quad_effect * centered * centered;
-          const double lin  = lin_effect * c;
-          const arma::vec exponent = lin + quad + c * rest_score - bound;
+          const int score = c - ref;
+          const double lin  = lin_effect * score;
+          const double quad = quad_effect * score * score;
+          const arma::vec exponent = lin + quad + score * rest_score - bound;
           denom += ARMA_MY_EXP(exponent);
         }
       }
@@ -1192,9 +1199,9 @@ double log_ratio_pseudolikelihood_constant_variable(
       arma::vec const_current(num_cats + 1, arma::fill::zeros);
       arma::vec const_proposed(num_cats + 1, arma::fill::zeros);
       for (int s = 0; s <= num_cats; ++s) {
-        const int centered = s - ref;
-        const_current(s) = main_current(0) * s + main_current(1) * centered * centered;
-        const_proposed(s) = main_proposed(0) * s + main_proposed(1) * centered * centered;
+        const int score = s - ref;
+        const_current(s) = main_current(0) * score + main_current(1) * score* score;
+        const_proposed(s) = main_proposed(0) * score + main_proposed(1) * score * score;
       }
 
       double lbound = std::max(const_current.max(), const_proposed.max());
@@ -1204,8 +1211,9 @@ double log_ratio_pseudolikelihood_constant_variable(
       bound_proposed = lbound + num_cats * arma::clamp(rest_proposed, 0.0, arma::datum::inf);
 
       for (int s = 0; s <= num_cats; ++s) {
-        denom_current += ARMA_MY_EXP(const_current(s) + s * rest_current - bound_current);
-        denom_proposed += ARMA_MY_EXP(const_proposed(s) + s * rest_proposed - bound_proposed);
+        const int score = s - ref;
+        denom_current += ARMA_MY_EXP(const_current(s) + score * rest_current - bound_current);
+        denom_proposed += ARMA_MY_EXP(const_proposed(s) + score * rest_proposed - bound_proposed);
       }
     }
 
diff --git a/src/bgmCompare_sampler.cpp b/src/bgmCompare_sampler.cpp
@@ -89,7 +89,7 @@ void impute_missing_bgmcompare(
 
   arma::vec category_response_probabilities(max_num_categories + 1);
   double exponent, cumsum, u;
-  int score, person, variable, new_observation, old_observation, group;
+  int score, person, variable, new_value, old_value, group;
 
   //Impute missing data
   for(int missing = 0; missing < num_missings; missing++) {
@@ -132,12 +132,12 @@ void impute_missing_bgmcompare(
     } else {
       // For Blume-Capel variables
       cumsum = 0.0;
+      const int ref = baseline_category[variable];
       for(int category = 0; category <= num_categories(variable); category++) {
-        exponent = group_main_effects[0] * category;
-        exponent += group_main_effects[1] *
-          (category - baseline_category[variable]) *
-          (category - baseline_category[variable]);
-        exponent += category * rest_score;
+        score = category - ref;
+        exponent = group_main_effects[0] * score;
+        exponent += group_main_effects[1] * score * score;
+        exponent += rest_score * score;
         cumsum += MY_EXP(exponent);
         category_response_probabilities[category] = cumsum;
       }
@@ -149,31 +149,30 @@ void impute_missing_bgmcompare(
     while (u > category_response_probabilities[score]) {
       score++;
     }
-    new_observation = score;
-    old_observation = observations(person, variable);
 
-    if(old_observation != new_observation) {
+    new_value = score;
+    if(!is_ordinal_variable[variable])
+      new_value -= baseline_category[variable];
+    old_value = observations(person, variable);
+
+    if(old_value != new_value) {
       // Update raw observations
-      observations(person, variable) = new_observation;
+      observations(person, variable) = new_value;
 
       // Update sufficient statistics for main effects
       if(is_ordinal_variable[variable] == true) {
         arma::imat counts_per_category_group = counts_per_category[group];
-        if(old_observation > 0)
-          counts_per_category_group(old_observation-1, variable)--;
-        if(new_observation > 0)
-          counts_per_category_group(new_observation-1, variable)++;
+        if(old_value > 0)
+          counts_per_category_group(old_value-1, variable)--;
+        if(new_value > 0)
+          counts_per_category_group(new_value-1, variable)++;
         counts_per_category[group] = counts_per_category_group;
       } else {
         arma::imat blume_capel_stats_group = blume_capel_stats[group];
-        blume_capel_stats_group(0, variable) -= old_observation;
-        blume_capel_stats_group(0, variable) += new_observation;
-        blume_capel_stats_group(1, variable) -=
-          (old_observation - baseline_category[variable]) *
-          (old_observation - baseline_category[variable]);
-        blume_capel_stats_group(1, variable) +=
-          (new_observation - baseline_category[variable]) *
-          (new_observation - baseline_category[variable]);
+        blume_capel_stats_group(0, variable) -= old_value;
+        blume_capel_stats_group(0, variable) += new_value;
+        blume_capel_stats_group(1, variable) -= old_value * old_value;
+        blume_capel_stats_group(1, variable) += new_value * new_value;
         blume_capel_stats[group] = blume_capel_stats_group;
       }
 
diff --git a/src/bgm_logp_and_grad.cpp b/src/bgm_logp_and_grad.cpp
diff --git a/src/bgm_sampler.cpp b/src/bgm_sampler.cpp
diff --git a/src/data_simulation.cpp b/src/data_simulation.cpp

Original file line number	Diff line number	Diff line change
`@@ -25,8 +25,8 @@ sample_omrf_gibbs <- function(no_states, no_variables, no_categories, interactio`
`25`	`25`	.Call(`_bgms_sample_omrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, iter)
`26`	`26`	`}`
`27`	`27`
`28`		`-sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) {`
`29`		- .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)
	`28`	`+sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter) {`
	`29`	+ .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter)
`30`	`30`	`}`
`31`	`31`
`32`	`32`	`compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) {`
Original file line number	Diff line number	Diff line change
`@@ -524,8 +524,9 @@ bgm = function(`
`524`	`524`	`# Ordinal (variable_bool == TRUE) or Blume-Capel (variable_bool == FALSE)`
`525`	`525`	`bc_vars = which(!variable_bool)`
`526`	`526`	`for(i in bc_vars) {`
`527`		`- blume_capel_stats[1, i] = sum(x[, i])`
	`527`	`+ blume_capel_stats[1, i] = sum(x[, i] - baseline_category[i])`
`528`	`528`	`blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2)`
	`529`	`+ x[, i] = x[, i] - baseline_category[i]`
`529`	`530`	`}`
`530`	`531`	`}`
`531`	`532`	`pairwise_stats = t(x) %*% x`
`@@ -588,7 +589,6 @@ bgm = function(`
`588`	`589`	`nThreads = cores, seed = seed, progress_type = progress_type`
`589`	`590`	`)`
`590`	`591`
`591`		`-`
`592`	`592`	userInterrupt = any(vapply(out, FUN = `[[`, FUN.VALUE = logical(1L), "userInterrupt"))
`593`	`593`	`if (userInterrupt) {`
`594`	`594`	`warning("Stopped sampling after user interrupt, results are likely uninterpretable.")`
Original file line number	Diff line number	Diff line change
`@@ -402,6 +402,9 @@ bgmCompare = function(`
`402`	`402`	`blume_capel_stats = compute_blume_capel_stats(`
`403`	`403`	`x, baseline_category, ordinal_variable, group`
`404`	`404`	`)`
	`405`	`+ for (i in which(!ordinal_variable)) {`
	`406`	`+ x[, i] = sum(x[, i] - baseline_category[i])`
	`407`	`+ }`
`405`	`408`
`406`	`409`	`# Compute sufficient statistics for pairwise interactions`
`407`	`410`	`pairwise_stats = compute_pairwise_stats(`