Bayesian-Graphical-Modelling-Lab · vandenman · Dec 2, 2025 · Dec 2, 2025 · Dec 18, 2025 · Dec 28, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,14 +1,35 @@
-^renv$
-^renv\.lock$
+# RStudio / IDE
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^\.vscode$
 
-^Readme.Rmd$
-^\.github$
-^_pkgdown\.yml$
+# renv
+^renv$
+^renv\.lock$
+
+# pkgdown / docs
 ^docs$
 ^pkgdown$
-^vignettes/introduction_cache
+^_pkgdown\.yml$
+^Readme\.Rmd$
+^vignettes/introduction_cache$
+
+# GitHub / CI
+^\.github$
+
+# R CMD build artifacts
 ^doc$
 ^Meta$
-^\.vscode$
+
+# Development helpers
+^dev$
+
+# ---- C/C++ build artifacts (REQUIRED) ----
+^src/.*\.o$
+^src/.*\.so$
+^src/.*\.dll$
+
+# ---- Generated build files ----
+^src/Makevars$
+^src/Makevars\.win$
+^src/sources\.mk$
diff --git a/.gitignore b/.gitignore
@@ -5,9 +5,13 @@
 src/*.o
 src/*.so
 src/*.dll
+src/**/*.o
+src/**/*.so
+src/**/*.dll
 .DS_Store
 /doc/
 /Meta/
 src/Makevars
 src/Makevars.win
+src/sources.mk
 docs/*
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: bgms
 Type: Package
 Title: Bayesian Analysis of Networks of Binary and/or Ordinal Variables
 Version: 0.1.6.2
-Date: 2025-12-02
+Date: 2025-12-29
 Authors@R: c(
     person("Maarten", "Marsman", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-5309-7502")),

diff --git a/NEWS.md b/NEWS.md
@@ -6,11 +6,14 @@
 
 ## Other changes
 
-* reparameterized the Blume-capel model to use (score-baseline) instead of score for mrfSampler() and bgm().
+* reparameterized the Blume-capel model to use (score-baseline) instead of score.
+* implemented a new way to compute the denominators and probabilities. This made their computation both faster and more stable.
+* refactored c++ code for better maintainability.
+* removed the prepared_data field from bgm objects.
 
 ## Bug fixes
 
-* Fixed numerical problems with Blume-Capel variables using HMC and NUTS for bgm().
+* fixed numerical problems with Blume-Capel variables using HMC and NUTS.
 
 # bgms 0.1.6.1
 
@@ -22,9 +25,9 @@
 
 ## Bug fixes
 
-* Fixed a problem with warmup scheduling for adaptive-metropolis in bgmCompare()
-* Fixed stability problems with parallel sampling for bgm()
-* Fixed spurious output errors printing to console after user interrupt. 
+* fixed a problem with warmup scheduling for adaptive-metropolis in bgmCompare()
+* fixed stability problems with parallel sampling for bgm()
+* fixed spurious output errors printing to console after user interrupt. 
 
 # bgms 0.1.6.0
 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -9,24 +9,16 @@ run_bgm_parallel <- function(observations, num_categories, pairwise_scale, edge_
     .Call(`_bgms_run_bgm_parallel`, observations, num_categories, pairwise_scale, edge_prior, inclusion_probability, beta_bernoulli_alpha, beta_bernoulli_beta, beta_bernoulli_alpha_between, beta_bernoulli_beta_between, dirichlet_alpha, lambda, interaction_index_matrix, iter, warmup, counts_per_category, blume_capel_stats, main_alpha, main_beta, na_impute, missing_index, is_ordinal_variable, baseline_category, edge_selection, update_method, pairwise_effect_indices, target_accept, pairwise_stats, hmc_num_leapfrogs, nuts_max_depth, learn_mass_matrix, num_chains, nThreads, seed, progress_type)
 }
 
-get_explog_switch <- function() {
-    .Call(`_bgms_get_explog_switch`)
-}
-
-rcpp_ieee754_exp <- function(x) {
-    .Call(`_bgms_rcpp_ieee754_exp`, x)
-}
-
-rcpp_ieee754_log <- function(x) {
-    .Call(`_bgms_rcpp_ieee754_log`, x)
-}
-
 sample_omrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, iter) {
     .Call(`_bgms_sample_omrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, iter)
 }
 
-sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) {
-    .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)
+sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter) {
+    .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter)
+}
+
+sample_ggm <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, seed, no_threads, progress_type) {
+    .Call(`_bgms_sample_ggm`, inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, seed, no_threads, progress_type)
 }
 
 compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) {

diff --git a/R/bgm.R b/R/bgm.R
@@ -33,7 +33,7 @@
 #' Assume a baseline category (e.g., a “neutral” response) and score responses
 #' by distance from this baseline. Category thresholds are modeled as:
 #'
-#' \deqn{\mu_{c} = \alpha \cdot c + \beta \cdot (c - b)^2}
+#' \deqn{\mu_{c} = \alpha \cdot (c-b) + \beta \cdot (c - b)^2}
 #'
 #' where:
 #' \itemize{
@@ -48,7 +48,8 @@
 #'    }
 #'   \item \eqn{b}: baseline category
 #' }
-#'
+#' Accordingly, pairwise interactions between Blume-Capel variables are modeled
+#' in terms of \eqn{c-b} scores.
 #'
 #' @section Edge Selection:
 #' When \code{edge_selection = TRUE}, the function performs Bayesian variable
@@ -559,8 +560,9 @@ bgm = function(
     # Ordinal (variable_bool == TRUE) or Blume-Capel (variable_bool == FALSE)
     bc_vars = which(!variable_bool)
     for(i in bc_vars) {
-      blume_capel_stats[1, i] = sum(x[, i])
-      blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i])^2)
+      blume_capel_stats[1, i] = sum(x[, i] - baseline_category[i])
+      blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2)
+      x[, i] = x[, i] - baseline_category[i]
     }
   }
   pairwise_stats = t(x) %*% x
@@ -626,7 +628,6 @@ bgm = function(
     nThreads = cores, seed = seed, progress_type = progress_type
   )
 
-
   userInterrupt = any(vapply(out, FUN = `[[`, FUN.VALUE = logical(1L), "userInterrupt"))
   if(userInterrupt) {
     warning("Stopped sampling after user interrupt, results are likely uninterpretable.")

diff --git a/R/bgmCompare.R b/R/bgmCompare.R
@@ -321,7 +321,7 @@ bgmCompare = function(
     } else if(update_method == "hamiltonian-mc") {
       target_accept = 0.65
     } else if(update_method == "nuts") {
-      target_accept = 0.80
+      target_accept = 0.65
     }
   }
 
@@ -414,13 +414,15 @@ bgmCompare = function(
   blume_capel_stats = compute_blume_capel_stats(
     x, baseline_category, ordinal_variable, group
   )
+  for (i in which(!ordinal_variable)) {
+    x[, i] = x[, i] - baseline_category[i]
+  }
 
   # Compute sufficient statistics for pairwise interactions
   pairwise_stats = compute_pairwise_stats(
     x, group
   )
 
-
   # Index vector used to sample interactions in a random order -----------------
   Index = matrix(0, nrow = num_interactions, ncol = 3)
   counter = 0
@@ -490,7 +492,6 @@ bgmCompare = function(
 
   seed <- as.integer(seed)
 
-
   # Call the Rcpp function
   out = run_bgmCompare_parallel(
     observations = observations,

diff --git a/R/data_utils.R b/R/data_utils.R
@@ -243,7 +243,7 @@ compute_counts_per_category = function(x, num_categories, group = NULL) {
         counts_per_category_gr[category, variable] = sum(x[group == g, variable] == category)
       }
     }
-    counts_per_category[[g]] = counts_per_category_gr
+    counts_per_category[[length(counts_per_category) + 1]] = counts_per_category_gr
   }
   return(counts_per_category)
 }
@@ -253,34 +253,34 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro
   if(is.null(group)) { # One-group design
     sufficient_stats = matrix(0, nrow = 2, ncol = ncol(x))
     bc_vars = which(!ordinal_variable)
-    for(i in bc_vars) {
-      sufficient_stats[1, i] = sum(x[, i])
-      sufficient_stats[2, i] = sum((x[, i] - baseline_category[i])^2)
+    for (i in bc_vars) {
+      sufficient_stats[1, i] = sum(x[, i] - baseline_category[i])
+      sufficient_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2)
     }
     return(sufficient_stats)
   } else { # Multi-group design
     sufficient_stats = list()
     for(g in unique(group)) {
       sufficient_stats_gr = matrix(0, nrow = 2, ncol = ncol(x))
       bc_vars = which(!ordinal_variable)
-      for(i in bc_vars) {
-        sufficient_stats_gr[1, i] = sum(x[group == g, i])
-        sufficient_stats_gr[2, i] = sum((x[group == g, i] - baseline_category[i])^2)
+      for (i in bc_vars) {
+        sufficient_stats_gr[1, i] = sum(x[group == g, i] - baseline_category[i])
+        sufficient_stats_gr[2, i] = sum((x[group == g, i] - baseline_category[i]) ^ 2)
       }
-      sufficient_stats[[g]] = sufficient_stats_gr
+      sufficient_stats[[length(sufficient_stats) + 1]] = sufficient_stats_gr
     }
     return(sufficient_stats)
   }
 }
 
 # Helper function for computing sufficient statistics for pairwise interactions
 compute_pairwise_stats <- function(x, group) {
-  result <- vector("list", length(unique(group)))
+  result <- list()
 
   for(g in unique(group)) {
     obs <- x[group == g, , drop = FALSE]
     # cross-product: gives number of co-occurrences of categories
-    result[[g]] <- t(obs) %*% obs
+    result[[length(result) + 1]] <- t(obs) %*% obs
   }
 
   result

diff --git a/R/nuts_diagnostics.R b/R/nuts_diagnostics.R
@@ -42,18 +42,16 @@ summarize_nuts_diagnostics <- function(out, nuts_max_depth = 10, verbose = TRUE)
         100 * divergence_rate,
         total_divergences,
         nrow(divergent_mat) * ncol(divergent_mat)
-      ), "Consider increasing the target acceptance rate.")
-    } else if(divergence_rate > 0) {
-      message(
-        sprintf(
-          "Note: %.3f%% of transitions ended with a divergence (%d of %d).\n",
-          100 * divergence_rate,
-          total_divergences,
-          nrow(divergent_mat) * ncol(divergent_mat)
-        ),
-        "Check R-hat and effective sample size (ESS) to ensure the chains are\n",
-        "mixing well."
-      )
+      ), "Consider increasing the target acceptance rate or change to update_method = ``adaptive-metropolis''.")
+    } else if (divergence_rate > 0) {
+      message(sprintf(
+        "Note: %.3f%% of transitions ended with a divergence (%d of %d).\n",
+        100 * divergence_rate,
+        total_divergences,
+        nrow(divergent_mat) * ncol(divergent_mat)
+      ),
+      "Check R-hat and effective sample size (ESS) to ensure the chains are\n",
+      "mixing well.")
     }
 
     depth_hit_rate <- max_tree_depth_hits / (nrow(treedepth_mat) * ncol(treedepth_mat))
@@ -84,16 +82,14 @@ summarize_nuts_diagnostics <- function(out, nuts_max_depth = 10, verbose = TRUE)
     low_ebfmi_chains <- which(ebfmi_per_chain < 0.3)
     min_ebfmi <- min(ebfmi_per_chain)
 
-    if(length(low_ebfmi_chains) > 0) {
-      warning(
-        sprintf(
-          "E-BFMI below 0.3 detected in %d chain(s): %s.\n",
-          length(low_ebfmi_chains),
-          paste(low_ebfmi_chains, collapse = ", ")
-        ),
-        "This suggests inefficient momentum resampling in those chains.\n",
-        "Sampling efficiency may be reduced. Consider longer chains or checking convergence diagnostics."
-      )
+    if (length(low_ebfmi_chains) > 0) {
+      warning(sprintf(
+        "E-BFMI below 0.3 detected in %d chain(s): %s.\n",
+        length(low_ebfmi_chains),
+        paste(low_ebfmi_chains, collapse = ", ")
+      ),
+      "This suggests inefficient momentum resampling in those chains.\n",
+      "Sampling efficiency may be reduced. Consider longer chains and check convergence diagnostics.")
     }
   }
 

diff --git a/R/output_utils.R b/R/output_utils.R
@@ -8,7 +8,6 @@ prepare_output_bgm = function(
   nuts_max_depth, learn_mass_matrix, num_chains
 ) {
   arguments = list(
-    prepared_data = x,
     num_variables = ncol(x),
     num_cases = nrow(x),
     na_impute = na_impute,
@@ -291,7 +290,6 @@ prepare_output_bgmCompare = function(
   num_variables = ncol(observations)
 
   arguments = list(
-    prepared_data = observations,
     num_variables = num_variables,
     num_cases = nrow(observations),
     iter = iter,