Skip to content

Commit e940427

Browse files
authored
Merge pull request #70 from FMKerckhof/master
hi @FMKerckhof and thanks for the pull request. You are right we forgot to add the additional logical parameter `full_covariance_matrices` of the `GMM` function to the `Optimal_Clusters_GMM` function. Moreover, thanks for the correction of the calculation of the number of free parameters depending on covariance type in the 'GMM_arma_AIC_BIC' function Let me proceed and merge because the pull request errors are related to the github action .yml files
2 parents 7fad21b + 9261a50 commit e940427

File tree

11 files changed

+122
-24
lines changed

11 files changed

+122
-24
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ClusterR
22
Type: Package
33
Title: Gaussian Mixture Models, K-Means, Mini-Batch-Kmeans, K-Medoids and Affinity Propagation Clustering
4-
Version: 1.3.4
4+
Version: 1.3.5
55
Date: 2025-09-14
66
Authors@R: c( person(given = "Lampros", family = "Mouselimis", email = "mouselimislampros@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8024-1546")), person(given = "Conrad", family = "Sanderson", role = "cph", comment = "Author of the C++ Armadillo library"), person(given = "Ryan", family = "Curtin", role = "cph", comment = "Author of the C++ Armadillo library"), person(given = "Siddharth", family = "Agrawal", role = "cph", comment = "Author of the C code of the Mini-Batch-Kmeans algorithm (https://github.com/siddharth-agrawal/Mini-Batch-K-Means)"), person(given = "Brendan", family = "Frey", email = "frey@psi.toronto.edu", role = "cph", comment = "Author of the matlab code of the Affinity propagation algorithm (for commercial use please contact the author of the matlab code)"), person(given = "Delbert", family = "Dueck", role = "cph", comment = "Author of the matlab code of the Affinity propagation algorithm"), person(given = "Vitalie", family = "Spinu", email = "spinuvit@gmail.com", role = "ctb", comment = "Github Contributor"),person(given = "Frederiek - Maarten", family = "Kerckhof", email = "fm@kytos.be", role = "ctb", comment = "Github Contributor") )
77
BugReports: https://github.com/mlampros/ClusterR/issues

R/RcppExports.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ predict_MGausDPDF_full <- function(data, CENTROIDS, COVARIANCE, WEIGHTS, eps = 1
5353
.Call(`_ClusterR_predict_MGausDPDF_full`, data, CENTROIDS, COVARIANCE, WEIGHTS, eps)
5454
}
5555

56-
GMM_arma_AIC_BIC <- function(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor = 1e-10, criterion = "AIC", seed = 1L) {
57-
.Call(`_ClusterR_GMM_arma_AIC_BIC`, data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed)
56+
GMM_arma_AIC_BIC <- function(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor = 1e-10, criterion = "AIC", seed = 1L, full_covariance_matrices = FALSE) {
57+
.Call(`_ClusterR_GMM_arma_AIC_BIC`, data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices)
5858
}
5959

6060
dissim_mat <- function(data, method, minkowski_p = 1.0, upper = TRUE, diagonal = TRUE, threads = 1L, eps = 1.0e-6) {

R/clustering_functions.R

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,9 @@ print.GMMCluster <- function(x, ...) {
211211
#' tryCatch function to prevent armadillo errors in GMM_arma_AIC_BIC
212212
#'
213213
#' @keywords internal
214-
tryCatch_optimal_clust_GMM <- function(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed) {
214+
tryCatch_optimal_clust_GMM <- function(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices) {
215215

216-
Error = tryCatch(GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed),
216+
Error = tryCatch(GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices),
217217

218218
error = function(e) e)
219219

@@ -242,6 +242,7 @@ tryCatch_optimal_clust_GMM <- function(data, max_clusters, dist_mode, seed_mode,
242242
#' @param var_floor the variance floor (smallest allowed value) for the diagonal covariances
243243
#' @param plot_data either TRUE or FALSE indicating whether the results of the function should be plotted
244244
#' @param seed integer value for random number generator (RNG)
245+
#' @param full_covariance_matrices a boolean. If FALSE "diagonal" covariance matrices (i.e. in each covariance matrix, all entries outside the main diagonal are assumed to be zero) otherwise "full" covariance matrices will be used. Note: when using full covariance matrices, the AIC/BIC calculation accounts for the increased number of parameters.
245246
#' @return a vector with either the AIC or BIC for each iteration. In case of Error it returns the error message and the possible causes.
246247
#' @author Lampros Mouselimis
247248
#' @details
@@ -251,6 +252,8 @@ tryCatch_optimal_clust_GMM <- function(data, max_clusters, dist_mode, seed_mode,
251252
#'
252253
#' In case that the \emph{max_clusters} parameter is a contiguous or non-contiguous vector then plotting is disabled. Therefore, plotting is enabled only if the \emph{max_clusters} parameter is of length 1.
253254
#'
255+
#' When \emph{full_covariance_matrices} is TRUE, the AIC/BIC values will be different from when it is FALSE because full covariance matrices have more free parameters (k*(d + d*(d+1)/2)) compared to diagonal covariance matrices (k*2*d), where k is the number of clusters and d is the number of dimensions.
256+
#'
254257
#' @importFrom grDevices dev.cur
255258
#' @importFrom grDevices dev.off
256259
#' @importFrom graphics plot
@@ -289,7 +292,8 @@ Optimal_Clusters_GMM = function(data,
289292
verbose = FALSE,
290293
var_floor = 1e-10,
291294
plot_data = TRUE,
292-
seed = 1) {
295+
seed = 1,
296+
full_covariance_matrices = FALSE) {
293297

294298
if ('data.frame' %in% class(data)) data = as.matrix(data)
295299
if (!inherits(data, 'matrix')) stop('data should be either a matrix or a data frame')
@@ -305,6 +309,7 @@ Optimal_Clusters_GMM = function(data,
305309
if (em_iter < 0 ) stop('the em_iter parameter can not be negative')
306310
if (!is.logical(verbose)) stop('the verbose parameter should be either TRUE or FALSE')
307311
if (var_floor < 0 ) stop('the var_floor parameter can not be negative')
312+
if (!is.logical(full_covariance_matrices)) stop('The full_covariance_matrices parameter must be a boolean!')
308313

309314
if (length(max_clusters) != 1) {
310315
plot_data = FALSE # set "plot_data" to FALSE if the "max_clusters" parameter is not of length 1
@@ -332,7 +337,7 @@ Optimal_Clusters_GMM = function(data,
332337
stop("The 'max_clusters' vector can not include a 0 value !", call. = F)
333338
}
334339

335-
gmm = tryCatch_optimal_clust_GMM(data, pass_vector, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed)
340+
gmm = tryCatch_optimal_clust_GMM(data, pass_vector, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices)
336341

337342
if ('Error' %in% names(gmm)) {
338343
return(gmm)

inst/include/ClusterRHeader.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,7 +1828,7 @@ namespace clustR {
18281828

18291829
arma::rowvec GMM_arma_AIC_BIC(arma::mat& data, arma::rowvec max_clusters, std::string dist_mode, std::string seed_mode,
18301830

1831-
int km_iter, int em_iter, bool verbose, double var_floor = 1e-10, std::string criterion = "AIC", int seed = 1) {
1831+
int km_iter, int em_iter, bool verbose, double var_floor = 1e-10, std::string criterion = "AIC", int seed = 1, bool full_covariance_matrices = false) {
18321832

18331833
int LEN_max_clust = max_clusters.n_elem;
18341834

@@ -1840,7 +1840,7 @@ namespace clustR {
18401840

18411841
if (verbose) { Rcpp::Rcout << "iteration: " << i + 1 << " num-clusters: " << max_clusters(i) << std::endl; }
18421842

1843-
Rcpp::List gmm = GMM_arma(data, max_clusters(i), dist_mode, seed_mode, km_iter, em_iter, false, var_floor = 1e-10);
1843+
Rcpp::List gmm = GMM_arma(data, max_clusters(i), dist_mode, seed_mode, km_iter, em_iter, false, var_floor = 1e-10, seed, full_covariance_matrices);
18441844

18451845
arma::mat loglik = Rcpp::as<arma::mat> (gmm[3]);
18461846

@@ -1865,14 +1865,27 @@ namespace clustR {
18651865

18661866
arma::mat centers = Rcpp::as<arma::mat> (gmm[0]);
18671867

1868+
// Calculate number of free parameters depending on covariance type
1869+
int num_free_params;
1870+
int k = centers.n_rows; // number of clusters
1871+
int d = centers.n_cols; // number of dimensions
1872+
1873+
if (full_covariance_matrices) {
1874+
// For full covariance: k * d (means) + k * d*(d+1)/2 (full covariances) + (k-1) (mixture weights)
1875+
num_free_params = k * d + k * (d * (d + 1)) / 2 + (k - 1);
1876+
} else {
1877+
// For diagonal covariance: k * d (means) + k * d (diagonal covariances) + (k-1) (mixture weights)
1878+
num_free_params = k * d + k * d + (k - 1);
1879+
}
1880+
18681881
if (criterion == "AIC") {
18691882

1870-
evaluate_comps(i) = -2.0 * arma::accu(log_sum_exp) + 2.0 * centers.n_rows * centers.n_cols;
1883+
evaluate_comps(i) = -2.0 * arma::accu(log_sum_exp) + 2.0 * num_free_params;
18711884
}
18721885

18731886
if (criterion == "BIC") {
18741887

1875-
evaluate_comps(i) = -2.0 * arma::accu(log_sum_exp) + std::log(data.n_rows) * centers.n_rows * centers.n_cols;
1888+
evaluate_comps(i) = -2.0 * arma::accu(log_sum_exp) + std::log(data.n_rows) * num_free_params;
18761889
}
18771890
}
18781891

man/Optimal_Clusters_GMM.Rd

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tryCatch_optimal_clust_GMM.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.o
2+
*.so
3+
*.dll

src/RcppExports.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ BEGIN_RCPP
215215
END_RCPP
216216
}
217217
// GMM_arma_AIC_BIC
218-
arma::rowvec GMM_arma_AIC_BIC(arma::mat& data, arma::rowvec max_clusters, std::string dist_mode, std::string seed_mode, int km_iter, int em_iter, bool verbose, double var_floor, std::string criterion, int seed);
219-
RcppExport SEXP _ClusterR_GMM_arma_AIC_BIC(SEXP dataSEXP, SEXP max_clustersSEXP, SEXP dist_modeSEXP, SEXP seed_modeSEXP, SEXP km_iterSEXP, SEXP em_iterSEXP, SEXP verboseSEXP, SEXP var_floorSEXP, SEXP criterionSEXP, SEXP seedSEXP) {
218+
arma::rowvec GMM_arma_AIC_BIC(arma::mat& data, arma::rowvec max_clusters, std::string dist_mode, std::string seed_mode, int km_iter, int em_iter, bool verbose, double var_floor, std::string criterion, int seed, bool full_covariance_matrices);
219+
RcppExport SEXP _ClusterR_GMM_arma_AIC_BIC(SEXP dataSEXP, SEXP max_clustersSEXP, SEXP dist_modeSEXP, SEXP seed_modeSEXP, SEXP km_iterSEXP, SEXP em_iterSEXP, SEXP verboseSEXP, SEXP var_floorSEXP, SEXP criterionSEXP, SEXP seedSEXP, SEXP full_covariance_matricesSEXP) {
220220
BEGIN_RCPP
221221
Rcpp::RObject rcpp_result_gen;
222222
Rcpp::RNGScope rcpp_rngScope_gen;
@@ -230,7 +230,8 @@ BEGIN_RCPP
230230
Rcpp::traits::input_parameter< double >::type var_floor(var_floorSEXP);
231231
Rcpp::traits::input_parameter< std::string >::type criterion(criterionSEXP);
232232
Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
233-
rcpp_result_gen = Rcpp::wrap(GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed));
233+
Rcpp::traits::input_parameter< bool >::type full_covariance_matrices(full_covariance_matricesSEXP);
234+
rcpp_result_gen = Rcpp::wrap(GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices));
234235
return rcpp_result_gen;
235236
END_RCPP
236237
}

src/export_inst_folder_headers.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,11 @@ Rcpp::List predict_MGausDPDF_full(arma::mat data, arma::mat CENTROIDS, arma::cub
315315
// [[Rcpp::export]]
316316
arma::rowvec GMM_arma_AIC_BIC(arma::mat& data, arma::rowvec max_clusters, std::string dist_mode, std::string seed_mode,
317317

318-
int km_iter, int em_iter, bool verbose, double var_floor = 1e-10, std::string criterion = "AIC", int seed = 1) {
318+
int km_iter, int em_iter, bool verbose, double var_floor = 1e-10, std::string criterion = "AIC", int seed = 1, bool full_covariance_matrices = false) {
319319

320320
ClustHeader CRH;
321321

322-
return CRH.GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed);
322+
return CRH.GMM_arma_AIC_BIC(data, max_clusters, dist_mode, seed_mode, km_iter, em_iter, verbose, var_floor, criterion, seed, full_covariance_matrices);
323323
}
324324

325325

src/init.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ extern SEXP _ClusterR_dissim_mat(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
1717
extern SEXP _ClusterR_dissim_MEDOIDS(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
1818
extern SEXP _ClusterR_evaluation_rcpp(SEXP, SEXP, SEXP);
1919
extern SEXP _ClusterR_GMM_arma(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
20-
extern SEXP _ClusterR_GMM_arma_AIC_BIC(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
20+
extern SEXP _ClusterR_GMM_arma_AIC_BIC(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
2121
extern SEXP _ClusterR_KMEANS_arma(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
2222
extern SEXP _ClusterR_KMEANS_rcpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
2323
extern SEXP _ClusterR_mini_batch_kmeans(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
@@ -33,8 +33,10 @@ extern SEXP _ClusterR_silhouette_clusters(SEXP, SEXP);
3333
extern SEXP _ClusterR_split_rcpp_lst(SEXP);
3434
extern SEXP _ClusterR_validate_centroids(SEXP, SEXP, SEXP, SEXP, SEXP);
3535

36+
37+
3638
static const R_CallMethodDef CallEntries[] = {
37-
{"_ClusterR_affinity_propagation", (DL_FUNC) &_ClusterR_affinity_propagation, 9},
39+
{"_ClusterR_affinity_propagation", (DL_FUNC) &_ClusterR_affinity_propagation, 9},
3840
{"_ClusterR_check_NaN_Inf", (DL_FUNC) &_ClusterR_check_NaN_Inf, 1},
3941
{"_ClusterR_ClaraMedoids", (DL_FUNC) &_ClusterR_ClaraMedoids, 11},
4042
{"_ClusterR_ClusterMedoids", (DL_FUNC) &_ClusterR_ClusterMedoids, 9},
@@ -43,7 +45,7 @@ static const R_CallMethodDef CallEntries[] = {
4345
{"_ClusterR_dissim_MEDOIDS", (DL_FUNC) &_ClusterR_dissim_MEDOIDS, 6},
4446
{"_ClusterR_evaluation_rcpp", (DL_FUNC) &_ClusterR_evaluation_rcpp, 3},
4547
{"_ClusterR_GMM_arma", (DL_FUNC) &_ClusterR_GMM_arma, 10},
46-
{"_ClusterR_GMM_arma_AIC_BIC", (DL_FUNC) &_ClusterR_GMM_arma_AIC_BIC, 10},
48+
{"_ClusterR_GMM_arma_AIC_BIC", (DL_FUNC) &_ClusterR_GMM_arma_AIC_BIC, 11},
4749
{"_ClusterR_KMEANS_arma", (DL_FUNC) &_ClusterR_KMEANS_arma, 7},
4850
{"_ClusterR_KMEANS_rcpp", (DL_FUNC) &_ClusterR_KMEANS_rcpp, 12},
4951
{"_ClusterR_mini_batch_kmeans", (DL_FUNC) &_ClusterR_mini_batch_kmeans, 13},
@@ -61,8 +63,7 @@ static const R_CallMethodDef CallEntries[] = {
6163
{NULL, NULL, 0}
6264
};
6365

64-
void R_init_ClusterR(DllInfo *dll)
65-
{
66+
void R_init_ClusterR(DllInfo *dll) {
6667
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
6768
R_useDynamicSymbols(dll, FALSE);
68-
}
69+
}

0 commit comments

Comments
 (0)