|
| 1 | +## ----include = FALSE---------------------------------------------------------- |
| 2 | +knitr::opts_chunk$set( |
| 3 | + collapse = TRUE, |
| 4 | + comment = "#>" |
| 5 | +) |
| 6 | + |
| 7 | +## ----check-packages, echo=FALSE, message=FALSE, warning=FALSE----------------- |
| 8 | +required_packages <- c("proxy", "ppclust", "blockcluster", "cluster", "factoextra", "ggplot2") |
| 9 | + |
| 10 | +missing_packages <- required_packages[!vapply(required_packages, requireNamespace, logical(1), quietly = TRUE)] |
| 11 | + |
| 12 | +if (length(missing_packages) > 0) { |
| 13 | + message("❌ The following required packages are not installed:\n") |
| 14 | + message(paste0("- ", missing_packages, collapse = "\n"), "\n") |
| 15 | + message("📦 To install them, run the following in R:\n") |
| 16 | + message("```r") |
| 17 | + message(sprintf("install.packages(c(%s))", paste(sprintf('\"%s\"', missing_packages), collapse = ", "))) |
| 18 | + message("```") |
| 19 | + |
| 20 | + knitr::knit_exit() |
| 21 | +} |
| 22 | + |
| 23 | +## ----setup,echo=FALSE, include=FALSE------------------------------------------ |
| 24 | +library(Silhouette) |
| 25 | +library(proxy) |
| 26 | +library(ppclust) |
| 27 | +library(cluster) |
| 28 | +library(factoextra) |
| 29 | +library(blockcluster) |
| 30 | +library(ggplot2) |
| 31 | +library(drclust) |
| 32 | +set.seed(123) |
| 33 | + |
| 34 | +## ----kmeans------------------------------------------------------------------- |
| 35 | +data(iris) |
| 36 | +km <- kmeans(iris[, -5], centers = 3) |
| 37 | + |
| 38 | +## ----crisp-silhouette1, fig.width=7, fig.height=4, fig.alt = "fig1.1"--------- |
| 39 | +library(proxy) |
| 40 | +dist_matrix <- proxy::dist(iris[, -5], km$centers) |
| 41 | +sil <- Silhouette(dist_matrix) |
| 42 | +head(sil) |
| 43 | +summary(sil) |
| 44 | +plot(sil) |
| 45 | + |
| 46 | +## ----crisp-silhouette2, fig.width=7, fig.height=4, fig.alt = "fig1.2"--------- |
| 47 | +sil_pac <- Silhouette(dist_matrix, method = "pac", sort = TRUE) |
| 48 | +head(sil_pac) |
| 49 | +summary(sil_pac) |
| 50 | +plot(sil_pac) |
| 51 | + |
| 52 | +## ----crisp-silhouette3-------------------------------------------------------- |
| 53 | +s <- summary(sil_pac,print.summary = TRUE) |
| 54 | +# summary table |
| 55 | +s$sil.sum |
| 56 | +# cluster wise silhouette widths |
| 57 | +s$clus.avg.widths |
| 58 | +# Overall average silhouette width |
| 59 | +s$avg.width |
| 60 | + |
| 61 | +## ----fm----------------------------------------------------------------------- |
| 62 | +library(ppclust) |
| 63 | +data(iris) |
| 64 | +fm <- ppclust::fcm(x = iris[, -5], centers = 3) |
| 65 | + |
| 66 | +## ----crisp-silhouette4, fig.width=7, fig.height=4, fig.alt = "fig1.2"--------- |
| 67 | +sil_fm <- Silhouette(fm$d) |
| 68 | +plot(sil_fm) |
| 69 | + |
| 70 | +## ----crisp-silhouette5, fig.width=7, fig.height=4, fig.alt = "fig1.3"--------- |
| 71 | +sil_fcm <- Silhouette(prox_matrix = "d", clust_fun = fcm, x = iris[, -5], centers = 3) |
| 72 | +plot(sil_fcm) |
| 73 | + |
| 74 | +## ----fuzzy-silhouette4.1, fig.width=7, fig.height=4--------------------------- |
| 75 | +data(iris) |
| 76 | +fm1 <- ppclust::fcm(x = iris[, -5], centers = 3) |
| 77 | + |
| 78 | +## ----fuzzy-silhouette4, fig.width=7, fig.height=4, fig.alt = "fig1.6"--------- |
| 79 | +sil_fm1 <- Silhouette(prox_matrix = fm1$d, prob_matrix = fm1$u, average = "fuzzy") |
| 80 | +plot(sil_fm1) |
| 81 | + |
| 82 | +## ----fuzzy-silhouette5, fig.width=7, fig.height=4, fig.alt = "fig1.3"--------- |
| 83 | +library(ppclust) |
| 84 | +sil_fcm1 <- Silhouette(prox_matrix = "d", prob_matrix = "u", average = "fuzzy", clust_fun = fcm, x = iris[, -5], centers = 3) |
| 85 | +plot(sil_fcm1) |
| 86 | + |
| 87 | +## ----fcm---------------------------------------------------------------------- |
| 88 | +data(iris) |
| 89 | + |
| 90 | +# FCM clustering |
| 91 | +fcm_result <- ppclust::fcm(iris[, 1:4], 3) |
| 92 | + |
| 93 | +# FCM2 clustering |
| 94 | +fcm2_result <- ppclust::fcm2(iris[, 1:4], 3) |
| 95 | + |
| 96 | +## ----softSilhouette, fig.width=7, fig.height=4, fig.alt = "fig2.1"------------ |
| 97 | +# Soft silhouette for FCM |
| 98 | +sil_fcm <- softSilhouette(prob_matrix = fcm_result$u) |
| 99 | +plot(sil_fcm) |
| 100 | + |
| 101 | +# Soft silhouette for FCM2 |
| 102 | +sil_fcm2 <- softSilhouette(prob_matrix = fcm2_result$u) |
| 103 | +plot(sil_fcm2) |
| 104 | + |
| 105 | +## ----softSilhouette1, fig.width=7, fig.height=4, fig.alt = "fig2.2"----------- |
| 106 | +sfcm <- summary(sil_fcm, print.summary = FALSE) |
| 107 | +sfcm2 <- summary(sil_fcm2, print.summary = FALSE) |
| 108 | + |
| 109 | +cat("FCM average silhouette width:", sfcm$avg.width, "\n", |
| 110 | + "FCM2 average silhouette width:", sfcm2$avg.width) |
| 111 | + |
| 112 | + |
| 113 | +## ----cer-db-silhouette, fig.width=7, fig.height=4, fig.alt = "fig2.3"--------- |
| 114 | +# Certainty-based silhouette for FCM and FCM2 |
| 115 | + cer_fcm <- cerSilhouette(prob_matrix = fcm_result$u, print.summary = TRUE) |
| 116 | + plot(cer_fcm) |
| 117 | + |
| 118 | + cer_fcm2 <- cerSilhouette(prob_matrix = fcm2_result$u, print.summary = TRUE) |
| 119 | + plot(cer_fcm2) |
| 120 | + |
| 121 | +# Density-based silhouette for FCM and FCM2 |
| 122 | + db_fcm <- dbSilhouette(prob_matrix = fcm_result$u, print.summary = TRUE) |
| 123 | + plot(db_fcm) |
| 124 | + |
| 125 | + db_fcm2 <- dbSilhouette(prob_matrix = fcm2_result$u, print.summary = TRUE) |
| 126 | + plot(db_fcm2) |
| 127 | + |
| 128 | +## ----cer-db-silhouette-summary------------------------------------------------ |
| 129 | +# Compare average silhouette widths across all methods |
| 130 | + # Summary for FCM |
| 131 | + cer_sfcm <- summary(cer_fcm, print.summary = FALSE) |
| 132 | + db_sfcm <- summary(db_fcm, print.summary = FALSE) |
| 133 | + |
| 134 | + # Summary for FCM2 |
| 135 | + cer_sfcm2 <- summary(cer_fcm2, print.summary = FALSE) |
| 136 | + db_sfcm2 <- summary(db_fcm2, print.summary = FALSE) |
| 137 | + |
| 138 | + # Print comparison |
| 139 | + cat("FCM - Soft silhouette:", sfcm$avg.width, "\n", |
| 140 | + "FCM - Certainty silhouette:", cer_sfcm$avg.width, "\n", |
| 141 | + "FCM - Density-based silhouette:", db_sfcm$avg.width, |
| 142 | + "\n\n","FCM2 - Soft silhouette:", sfcm2$avg.width, |
| 143 | + "\n","FCM2 - Certainty silhouette:", cer_sfcm2$avg.width, |
| 144 | + "\n","FCM2 - Density-based silhouette:", db_sfcm2$avg.width, "\n") |
| 145 | + |
| 146 | +## ----screeplot1--------------------------------------------------------------- |
| 147 | +data(iris) |
| 148 | +avg_sil_width <- rep(NA,7) |
| 149 | +for (k in 2:7) { |
| 150 | + sil_out <- Silhouette( |
| 151 | + prox_matrix = "d", |
| 152 | + method = "pac", |
| 153 | + clust_fun = ppclust::fcm, |
| 154 | + x = iris[, 1:4], |
| 155 | + centers = k) |
| 156 | + avg_sil_width[k] <- summary(sil_out, print.summary = FALSE)$avg.width |
| 157 | +} |
| 158 | + |
| 159 | +## ----screeplot2, fig.width=7, fig.height=4, fig.alt = "fig3.1"---------------- |
| 160 | +plot(avg_sil_width, |
| 161 | + type = "o", |
| 162 | + ylab = "Overall Silhouette Width", |
| 163 | + xlab = "Number of Clusters", |
| 164 | + main = "Silhouette Scree Plot" |
| 165 | +) |
| 166 | + |
| 167 | +## ----plot0, fig.width=6, fig.height=4, fig.alt = "fig4.0"--------------------- |
| 168 | +data(iris) |
| 169 | + km_out <- kmeans(iris[, -5], 3) |
| 170 | + dist_mat <- proxy::dist(iris[, -5], km_out$centers) |
| 171 | + sil_obj <- Silhouette(dist_mat) |
| 172 | + plot(sil_obj) # S3 method auto-dispatch |
| 173 | + plotSilhouette(sil_obj) # explicit call (identical output) |
| 174 | + |
| 175 | +## ----plot1, fig.width=6, fig.height=4, fig.alt = "fig4.1"--------------------- |
| 176 | +library(cluster) |
| 177 | +pam_result <- pam(iris[, 1:4], k = 3) |
| 178 | +plotSilhouette(pam_result) # for cluster::pam object |
| 179 | + |
| 180 | +clara_result <- clara(iris[, 1:4], k = 3) |
| 181 | +plotSilhouette(clara_result) |
| 182 | + |
| 183 | +fanny_result <- fanny(iris[, 1:4], k = 3) |
| 184 | +plotSilhouette(fanny_result) |
| 185 | + |
| 186 | +## ----plot2, fig.width=6, fig.height=4, fig.alt = "fig4.2"--------------------- |
| 187 | +sil_base <- cluster::silhouette(pam_result) |
| 188 | +plotSilhouette(sil_base) |
| 189 | + |
| 190 | +## ----plot3, fig.width=6, fig.height=4, fig.alt = "fig4.3"--------------------- |
| 191 | +library(factoextra) |
| 192 | +eclust_result <- eclust(iris[, 1:4], "kmeans", k = 3, graph = FALSE) |
| 193 | +plotSilhouette(eclust_result) |
| 194 | + |
| 195 | +hcut_result <- hcut(iris[, 1:4], k = 3) |
| 196 | +plotSilhouette(hcut_result) |
| 197 | + |
| 198 | +## ----plot3.1, fig.width=7, fig.height=6, fig.alt = "fig4.3.1"----------------- |
| 199 | +library(drclust) |
| 200 | +# Loading the numeric in matrix |
| 201 | +iris_mat <- as.matrix(iris[,-5]) |
| 202 | +#applying a clustering algorithm |
| 203 | +drclust_out <- dpcakm(iris_mat, 20, 3) |
| 204 | +#silhouette based on the data and the output of the clustering algorithm |
| 205 | +d <- silhouette(iris_mat, drclust_out) |
| 206 | +plotSilhouette(d$cl.silhouette) |
| 207 | + |
| 208 | +## ----plot4, fig.width=6, fig.height=4, fig.alt = "fig4.4"--------------------- |
| 209 | +data(iris) |
| 210 | +fcm_out <- ppclust::fcm(iris[, 1:4], 3) |
| 211 | +sil_fuzzy <- Silhouette( |
| 212 | + prox_matrix = "d", prob_matrix = "u", clust_fun = fcm, |
| 213 | + x = iris[, 1:4], centers = 3, sort = TRUE |
| 214 | +) |
| 215 | +plot(sil_fuzzy, summary.legend = FALSE, grayscale = TRUE) |
| 216 | + |
| 217 | +## ----plot5, fig.width=6, fig.height=4, fig.alt = "fig4.5"--------------------- |
| 218 | +plotSilhouette(sil_fuzzy, grayscale = TRUE) # Use grayscale palette |
| 219 | +plotSilhouette(sil_fuzzy, summary.legend = TRUE) # Include size + avg silhouette in legend |
| 220 | +plotSilhouette(sil_fuzzy, label = TRUE) # Label bars with row index |
| 221 | + |
| 222 | +## ----custom1, fig.width=7, fig.height=4, fig.alt = "fig5.1"------------------- |
| 223 | +# Create a custom Silhouette object |
| 224 | +cluster_assignments <- c(1, 1, 2, 2, 3, 3) |
| 225 | +neighbor_clusters <- c(2, 2, 1, 1, 1, 1) |
| 226 | +silhouette_widths <- c(0.8, 0.7, 0.6, 0.9, 0.5, 0.4) |
| 227 | +weights <- c(0.9, 0.8, 0.7, 0.95, 0.6, 0.5) |
| 228 | + |
| 229 | +sil_custom <- getSilhouette( |
| 230 | + cluster = cluster_assignments, |
| 231 | + neighbor = neighbor_clusters, |
| 232 | + sil_width = silhouette_widths, |
| 233 | + weight = weights, |
| 234 | + proximity_type = "similarity", |
| 235 | + method = "pac", |
| 236 | + average = "fuzzy" |
| 237 | +) |
| 238 | +# Validate the object |
| 239 | +is.Silhouette(sil_custom) # Basic class check: TRUE |
| 240 | +is.Silhouette(sil_custom, strict = TRUE) # Strict structural validation: TRUE |
| 241 | +is.Silhouette(data.frame(a = 1:6)) # Non-Silhouette object: FALSE |
| 242 | +# Visualize the custom Silhouette object |
| 243 | +plotSilhouette(sil_custom, summary.legend = TRUE) |
| 244 | + |
| 245 | +## ----ext1--------------------------------------------------------------------- |
| 246 | +library(blockcluster) |
| 247 | +data(iris) |
| 248 | +result <- coclusterContinuous(as.matrix(iris[, -5]), nbcocluster = c(3, 2)) |
| 249 | + |
| 250 | +## ----ext2--------------------------------------------------------------------- |
| 251 | +sil_mode1 <- softSilhouette( |
| 252 | + prob_matrix = result@rowposteriorprob, |
| 253 | + method = "pac", |
| 254 | + print.summary = FALSE |
| 255 | +) |
| 256 | +sil_mode2 <- softSilhouette( |
| 257 | + prob_matrix = result@colposteriorprob, |
| 258 | + method = "pac", |
| 259 | + print.summary = FALSE |
| 260 | +) |
| 261 | + |
| 262 | +## ----ext3--------------------------------------------------------------------- |
| 263 | +ext_sil <- extSilhouette( |
| 264 | + sil_list = list(sil_mode1, sil_mode2), |
| 265 | + dim_names = c("Rows", "Columns"), |
| 266 | + print.summary = TRUE |
| 267 | +) |
| 268 | + |
0 commit comments