Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions R/bgm.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,14 @@
#' model \code{edge_prior = "Stochastic-Block"} assumes that nodes can be
#' organized into blocks or clusters. In principle, the assignment of nodes to
#' such clusters is unknown, and the model as implemented here considers all
#' possible options \insertCite{@i.e., specifies a Dirichlet process on the node to block
#' allocation as described by @GengEtAl_2019}{bgms}. This model is advantageous
#' possible options \insertCite{@i.e., specifies a Dirichlet prior on the probability
#' of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous
#' when nodes are expected to fall into distinct clusters. The inclusion
#' probabilities for the edges are defined at the level of the clusters, with a
#' beta prior for the unknown inclusion probability with shape parameters
#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}. The default is
#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
#' prior on the cluster assignment probabilities with a common concentration
#' parameter \code{dirichlet_alpha}. The default is
#' \code{edge_prior = "Bernoulli"}.
#' @param inclusion_probability The prior edge inclusion probability for the
#' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows
Expand All @@ -127,7 +129,7 @@
#' positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and
#' \code{beta_bernoulli_beta = 1}.
#' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block
#' allocation parameters for the Stochastic Block model.
#' allocation probabilities for the Stochastic Block model.
#' @param na.action How do you want the function to handle missing data? If
#' \code{na.action = "listwise"}, listwise deletion is used. If
#' \code{na.action = "impute"}, missing data are imputed iteratively during the
Expand Down Expand Up @@ -156,8 +158,14 @@
#' ``blume-capel'' variables, the first entry is the parameter for the linear
#' effect and the second entry is the parameter for the quadratic effect, which
#' models the offset to the reference category.
#' \item In the case of
#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned:
#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an
#' table \code{clusters} with the estimated posterior probability of the number
#' of clusters in the network. The vector of node allocations is calculated using
#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by
#' \insertCite{GengEtAl_2019}{bgms}.
#' }
#'
#' If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
#' \itemize{
#' \item \code{indicator}: A matrix with \code{iter} rows and
Expand All @@ -169,8 +177,15 @@
#' \item \code{thresholds}: A matrix with \code{iter} rows and
#' \code{sum(m)} columns, containing parameter states from every iteration of
#' the Gibbs sampler for the category thresholds.
#' \item In the case of
#' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
#' cluster assignments of the nodes from each iteration is returned. This matrix
#' can be used to calculate the posterior probability of the number of clusters
#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
#' }
#' Column averages of these matrices provide the model-averaged posterior means.
#' Except for the \code{allocations} matrix, for which the \code{summary_SBM}
#' needs to be utilized.
#'
#' In addition to the analysis results, the bgm output lists some of the
#' arguments of its call. This is useful for post-processing the results.
Expand Down Expand Up @@ -504,10 +519,24 @@ bgm = function(x,
arguments$data_columnnames = data_columnnames

if(edge_selection == TRUE) {
output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
arguments = arguments)
if(edge_prior == "Stochastic-Block"){

summarySbm <- summary_SBM(cluster_allocations = out$allocations)

output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
allocations = summarySbm$allocations,
clusters = summarySbm$no_clusters,
arguments = arguments)
} else {

output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
arguments = arguments)
}

} else {
output = list(interactions = interactions,
thresholds = thresholds,
Expand Down Expand Up @@ -557,10 +586,19 @@ bgm = function(x,
arguments$data_columnnames = data_columnnames

if(edge_selection == TRUE) {
output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
arguments = arguments)
if(edge_prior == "Stochastic-Block"){
output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
allocations = out$allocations,
arguments = arguments)
} else {

output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
arguments = arguments)
}
} else {
output = list(interactions = interactions,
thresholds = thresholds,
Expand Down
56 changes: 56 additions & 0 deletions R/utility_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1007,3 +1007,59 @@ compare_reformat_data = function(x,
missing_index_gr2 = missing_index_gr2,
na_impute = na_impute))
}

# Dahl's method to summarize the samples of the cluster_allocations
# This function was adapted from the R code accompanying the paper:
# Geng, J., Bhattacharya, A., & Pati, D. (2019). Probabilistic Community
# Detection With Unknown Number of Communities, Journal of the American
# Statistical Association, 114:526, 893-905, DOI:10.1080/01621459.2018.1458618

getDahl <- function(cluster_allocations) {
# Dimensions of the input matrix
niters <- nrow(cluster_allocations) # Number of iterations
n <- ncol(cluster_allocations) # Number of nodes

# Compute membership matrices for each iteration
membershipMatrices <- apply(cluster_allocations, 1, function(clusterAssign) {
outer(clusterAssign, clusterAssign, FUN = "==")
})

# Reshape membershipMatrices into a list of matrices
membershipMatrices <- lapply(seq_len(niters), function(i) {
matrix(membershipMatrices[, i], n, n)
})

# Compute the average membership matrix
membershipAverage <- Reduce("+", membershipMatrices) / niters

# Compute squared error for each iteration
SqError <- sapply(membershipMatrices, function(x, av) sum((x - av)^2),
av = membershipAverage)

# Find the iteration with the minimum squared error
DahlIndex <- which.min(SqError)

# Extract the cluster assignment corresponding to the best iteration
DahlAns <- cluster_allocations[DahlIndex, , drop = TRUE]

return(DahlAns)
}


# A function that computes the cluster posterior probabilities and allocations

summary_SBM <- function(cluster_allocations) {
# Compute the number of unique clusters for each iteration
clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))

# Compute the posterior probabilities of the actual unique clusters
no_clusters <- table(clusters) / length(clusters)

# Compute the allocations of the nodes based on Dahl's method
allocations <- getDahl(cluster_allocations)

# Return the results
return(list(no_clusters = no_clusters,
allocations = allocations))
}

27 changes: 21 additions & 6 deletions inst/REFERENCES.bib
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
@article{Dahl2009,
author = {David B. Dahl},
title = {Modal clustering in a class of product partition models},
journal = {Bayesian Analysis},
volume = {4},
number = {2},
pages = {243--264},
year = {2009},
month = {June},
doi = {10.1214/09-BA409},
url = {https://doi.org/10.1214/09-BA409}
}

@article{GengEtAl_2019,
title={Probabilistic community detection with unknown number of communities},
author={Geng, J. and Bhattacharya, A. and Pati, D.},
Expand All @@ -8,12 +21,14 @@ @article{GengEtAl_2019
year={2019}}

@article{SekulovskiEtAl_2023,
title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods},
author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.},
journal={Multivariate Behavioral Research},
doi = {10.1080/00273171.2024.2345915},
year={in press}
}
author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.},
doi = {10.1080/00273171.2024.2345915},
journal = {Multivariate Behavioral Research},
pages = {913--933},
title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods},
volume = {59},
year = {2024},
bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}}

@article{Dienes_2014,
author = {Dienes, Z.},
Expand Down
25 changes: 20 additions & 5 deletions man/bgm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 29 additions & 6 deletions src/gibbs_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,8 @@ List gibbs_sampler(IntegerMatrix observations,
int no_interactions = Index.nrow();
int no_thresholds = sum(no_categories);
int max_no_categories = max(no_categories);
IntegerVector K_values; // To store sampled K values


IntegerVector v = seq(0, no_interactions - 1);
IntegerVector order(no_interactions);
Expand Down Expand Up @@ -864,7 +866,10 @@ List gibbs_sampler(IntegerMatrix observations,
NumericMatrix cluster_prob(1, 1);
NumericVector log_Vn(1);

if(edge_prior == "Stochastic-Block") {
// store the allocation indices for each iteration
NumericMatrix out_allocations(iter, no_variables);

if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations
cluster_allocations[0] = 0;
cluster_allocations[1] = 1;
for(int i = 2; i < no_variables; i++) {
Expand Down Expand Up @@ -1038,11 +1043,17 @@ List gibbs_sampler(IntegerMatrix observations,
//The post burn-in iterations ------------------------------------------------
for(int iteration = 0; iteration < iter; iteration++) {
if (Progress::check_abort()) {
if(edge_selection == true) {
if(edge_selection == true && edge_prior == "Stochastic-Block") {
return List::create(Named("indicator") = out_indicator,
Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds,
Named("allocations") = out_allocations);
} if(edge_selection == true && edge_prior != "Stochastic-Block") {
return List::create(Named("indicator") = out_indicator,
Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
} else {
}
else {
return List::create(Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
}
Expand Down Expand Up @@ -1150,6 +1161,7 @@ List gibbs_sampler(IntegerMatrix observations,
beta_bernoulli_alpha,
beta_bernoulli_beta);


for(int i = 0; i < no_variables - 1; i++) {
for(int j = i + 1; j < no_variables; j++) {
theta(i, j) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
Expand Down Expand Up @@ -1234,15 +1246,26 @@ List gibbs_sampler(IntegerMatrix observations,
out_thresholds(no_variables - 1, 1) /= iteration + 1;
}
}

for(int i = 0; i < no_variables; i++) {
out_allocations(iteration, i) = cluster_allocations[i] + 1;
}
}


if(edge_selection == true) {
if(edge_prior == "Stochastic-Block"){
return List::create(Named("indicator") = out_indicator,
Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
Named("thresholds") = out_thresholds,
Named("allocations") = out_allocations);
} else {
return List::create(Named("indicator") = out_indicator,
Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
}
} else {
return List::create(Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
return List::create(Named("interactions") = out_interactions,
Named("thresholds") = out_thresholds);
}
}
3 changes: 2 additions & 1 deletion src/gibbs_functions_edge_prior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
}
}
return cluster_assign;

}

// ----------------------------------------------------------------------------|
Expand Down Expand Up @@ -376,4 +377,4 @@ NumericMatrix block_probs_mfm_sbm(IntegerVector cluster_assign,
}

return block_probs;
}
}
15 changes: 9 additions & 6 deletions vignettes/refs.bib
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@

@article{SekulovskiEtAl_2023,
title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods},
author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.},
journal={Multivariate Behavioral Research},
doi = {10.1080/00273171.2024.2345915},
year={in press}
}
author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.},
doi = {10.1080/00273171.2024.2345915},
journal = {Multivariate Behavioral Research},
pages = {913--933},
title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods},
volume = {59},
year = {2024},
bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}}

@article{GengEtAl_2019,
title={Probabilistic community detection with unknown number of communities},
Expand Down
Loading