Skip to content

Commit d8f0671

Browse files
authored
Merge pull request #16 from snoweye/master
v0.2-0
2 parents e929298 + abc81d4 commit d8f0671

15 files changed

+336
-39
lines changed

ChangeLog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
2018-02-01: Ver. 0.2-0
2+
* Add ‘R_registerRoutines’, ‘R_useDynamicSymbols’.
3+
14
2016-12-17: Ver. 0.1-8
25
* Change web address.
36

DESCRIPTION

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: pmclust
2-
Version: 0.1-9
3-
Date: 2016-12-17
2+
Version: 0.2-0
3+
Date: 2018-02-01
44
Title: Parallel Model-Based Clustering using
55
Expectation-Gathering-Maximization Algorithm for Finite Mixture
66
Gaussian Model
@@ -21,9 +21,9 @@ Description: Aims to utilize model-based clustering (unsupervised)
2121
Gaussian models. The implementation is default in the single program
2222
multiple data programming model. The code can be executed
2323
through pbdMPI and independent to most MPI applications.
24-
See the High Performance
25-
Statistical Computing website for more information, documents
26-
and examples.
24+
See the High Performance Statistical Computing website
25+
<https://snoweye.github.io/hpsc/>
26+
for more information, documents and examples.
2727
License: GPL (>= 2)
2828
URL: http://r-pbd.org/
2929
BugReports: http://group.r-pbd.org/

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ importFrom(pbdMPI,get.jid)
2626
export(
2727
### General functions.
2828
"pmclust",
29+
"pmclust.reduceK",
2930
"pkmeans",
3031
"as.dmat",
3132
"as.spmd",

R/00_pmclust_reduceK.r

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
### For automatically reducing K methods.
2+
3+
### X should be in spmd, gbd, or dmat and set at .pmclustEnv or so, as used
4+
### in pmclust().
5+
pmclust.reduceK <- function(K = 2, algorithm = .PMC.CT$algorithm){
6+
if(any(algorithm[1] %in% c("kmeans", "kmeans.dmat"))){
7+
stop("kmeans/pkmeans is not supported in reduceK.")
8+
}
9+
10+
if(algorithm[1] %in% .PMC.CT$algorithm.gbd){
11+
ret <- pmclust.reduceK.spmd(K = K, algorithm = algorithm)
12+
} else if(algorithm[1] %in% .PMC.CT$algorithm.dmat){
13+
ret <- pmclust.reduceK.dmat(K = K, algorithm = algorithm)
14+
} else{
15+
comm.stop("The algorithm is not found.")
16+
}
17+
18+
ret
19+
} # End of pmclust.reduceK().
20+
21+
22+
pmclust.reduceK.spmd <- function(K = 2, algorithm = .PMC.CT$algorithm){
23+
# Get an initial start.
24+
PARAM.org <- set.global(K = K)
25+
PARAM.org <- try(initial.em(PARAM.org), silent = TRUE)
26+
27+
# Ensure the initial is good. Warning: This may take forever to run!
28+
repeat{
29+
if(class(PARAM.org) == "try-error"){
30+
PARAM.org <- set.global(K = K)
31+
PARAM.org <- try(initial.em(PARAM.org), silent = TRUE)
32+
} else{
33+
break
34+
}
35+
}
36+
37+
# Update steps.
38+
method.step <- switch(algorithm[1],
39+
"em" = em.step,
40+
"aecm" = aecm.step,
41+
"apecm" = apecm.step,
42+
"apecma" = apecma.step,
43+
NULL)
44+
if(comm.all(is.null(method.step))){
45+
comm.stop("Algorithm is not found.")
46+
}
47+
PARAM.new <- try(method.step(PARAM.org), silent = TRUE)
48+
em.update.class()
49+
N.CLASS <- get.N.CLASS(K)
50+
51+
52+
# Reduce K if error occurs.
53+
repeat{
54+
if((class(PARAM.new) == "try-error" ||
55+
.pmclustEnv$CHECK$convergence == 99) &&
56+
K > 1){
57+
# Drop specific i.k if available or
58+
# drop the smallest class or
59+
# drop the class with the smallest eta among all small classes or
60+
# drop all classes with 0 elements.
61+
if(.pmclustEnv$CONTROL$stop.at.fail && .pmclustEnv$FAIL.i.k > 0){
62+
i.k <- .pmclustEnv$FAIL.i.k
63+
} else{
64+
i.k <- which(N.CLASS == min(N.CLASS))
65+
}
66+
if(i.k > 1 && min(N.CLASS) > 0){
67+
i.k <- i.k[which.min(PARAM.new$ETA[i.k])]
68+
}
69+
K <- K - length(i.k)
70+
comm.cat("- Reduce: ", K, "\n")
71+
72+
# Initial global storage.
73+
PARAM.org <- set.global(K = K)
74+
75+
# Replacing PARAM.org by previous PARAM.new.
76+
PARAM.org$ETA <- PARAM.new$ETA[-i.k] / sum(PARAM.new$ETA[-i.k])
77+
PARAM.org$log.ETA <- log(PARAM.org$ETA)
78+
PARAM.org$MU <- matrix(PARAM.new$MU[, -i.k], ncol = K)
79+
PARAM.org$SIGMA <- PARAM.new$SIGMA[-i.k]
80+
81+
# Update steps.
82+
e.step.spmd(PARAM.org)
83+
PARAM.new <- try(method.step(PARAM.org), silent = TRUE)
84+
em.update.class()
85+
N.CLASS <- get.N.CLASS(K)
86+
} else{
87+
break
88+
}
89+
}
90+
91+
# For return.
92+
ret <- list(algorithm = algorithm[1],
93+
param = PARAM.new,
94+
class = .pmclustEnv$CLASS.spmd,
95+
n.class = N.CLASS,
96+
check = .pmclustEnv$CHECK)
97+
98+
ret
99+
} # End of pmclust.reduceK.spmd().
100+

R/00_pmclust_reduceK_dmat.r

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
### A dmat version for automatically reducing K methods.
2+
3+
pmclust.reduceK.dmat <- function(K = 2, algorithm = .PMC.CT$algorithm){
4+
# Get an initial start.
5+
PARAM.org <- set.global.dmat(K = K)
6+
PARAM.org <- try(initial.em.dmat(PARAM.org), silent = TRUE)
7+
8+
# Ensure the initial is good. Warning: This may take forever to run!
9+
repeat{
10+
if(class(PARAM.org) == "try-error"){
11+
PARAM.org <- set.global.dmat(K = K)
12+
PARAM.org <- try(initial.em.dmat(PARAM.org), silent = TRUE)
13+
} else{
14+
break
15+
}
16+
}
17+
18+
# Update steps.
19+
method.step <- switch(algorithm[1],
20+
"em.dmat" = em.step.dmat,
21+
# "aecm.dmat" = aecm.step.dmat,
22+
# "apecm.dmat" = apecm.step.dmat,
23+
# "apecma.dmat" = apecma.step.dmat,
24+
NULL)
25+
if(comm.all(is.null(method.step))){
26+
comm.stop("Algorithm is not found.")
27+
}
28+
PARAM.new <- try(method.step(PARAM.org), silent = TRUE)
29+
em.update.class.dmat()
30+
N.CLASS <- get.N.CLASS.dmat(K)
31+
32+
33+
# Reduce K if error occurs.
34+
repeat{
35+
if((class(PARAM.new) == "try-error" ||
36+
.pmclustEnv$CHECK$convergence == 99) &&
37+
K > 1){
38+
# Drop specific i.k if available or
39+
# drop the smallest class or
40+
# drop the class with the smallest eta among all small classes or
41+
# drop all classes with 0 elements.
42+
if(.pmclustEnv$CONTROL$stop.at.fail && .pmclustEnv$FAIL.i.k > 0){
43+
i.k <- .pmclustEnv$FAIL.i.k
44+
} else{
45+
i.k <- which(N.CLASS == min(N.CLASS))
46+
}
47+
if(i.k > 1 && min(N.CLASS) > 0){
48+
i.k <- i.k[which.min(PARAM.new$ETA[i.k])]
49+
}
50+
K <- K - length(i.k)
51+
comm.cat("- Reduce: ", K, "\n")
52+
53+
# Initial global storage.
54+
PARAM.org <- set.global.dmat(K = K)
55+
56+
# Replacing PARAM.org by previous PARAM.new.
57+
PARAM.org$ETA <- PARAM.new$ETA[-i.k] / sum(PARAM.new$ETA[-i.k])
58+
PARAM.org$log.ETA <- log(PARAM.org$ETA)
59+
PARAM.org$MU <- matrix(PARAM.new$MU[, -i.k], ncol = K)
60+
PARAM.org$SIGMA <- PARAM.new$SIGMA[-i.k]
61+
62+
# Update steps.
63+
e.step.dmat(PARAM.org)
64+
PARAM.new <- try(method.step(PARAM.org), silent = TRUE)
65+
em.update.class.dmat()
66+
N.CLASS <- get.N.CLASS.dmat(K)
67+
} else{
68+
break
69+
}
70+
}
71+
72+
# For return.
73+
ret <- list(algorithm = algorithm[1],
74+
param = PARAM.new,
75+
class = .pmclustEnv$CLASS.spmd,
76+
n.class = N.CLASS,
77+
check = .pmclustEnv$CHECK)
78+
79+
ret
80+
} # End of pmclust.reduceK.dmat().
81+

R/dmat_em_base.r

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -212,17 +212,30 @@ m.step.dmat <- function(PARAM){
212212
tmp.1 <- crossprod(B)
213213
tmp.2 <- as.matrix(tmp.1)
214214
tmp.SIGMA <- tmp.2
215-
dim(tmp.SIGMA) <- c(p, p)
216215

217-
tmp.U <- decompsigma(tmp.SIGMA)
218-
PARAM$U.check[[i.k]] <- tmp.U$check
219-
if(tmp.U$check){
220-
PARAM$U[[i.k]] <- tmp.U$value
221-
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
216+
if(!any(is.nan(tmp.SIGMA))){
217+
dim(tmp.SIGMA) <- c(p, p)
218+
219+
tmp.U <- decompsigma(tmp.SIGMA)
220+
PARAM$U.check[[i.k]] <- tmp.U$check
221+
if(tmp.U$check){
222+
PARAM$U[[i.k]] <- tmp.U$value
223+
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
224+
}
225+
} else{
226+
PARAM$U.check[[i.k]] <- FALSE
227+
if(.pmclustEnv$CONTROL$debug > 2){
228+
comm.cat(" SIGMA[[", i.k, "]] has NaN. Updating is skipped.\n", sep = "", quiet = TRUE)
229+
}
230+
231+
.pmclustEnv$FAIL.i.k <- i.k # i.k is failed to update.
232+
if(.pmclustEnv$CONTROL$stop.at.fail){
233+
stop(paste("NaN occurs at i.k=", i.k, sep = ""))
234+
}
222235
}
223236
} else{
224237
if(.pmclustEnv$CONTROL$debug > 2){
225-
comm.cat(" SIGMA[[", i.k, "]] is fixed.\n", sep = "", quiet = TRUE)
238+
comm.cat(" SIGMA[[", i.k, "]] is fixed. Updating is skipped.\n", sep = "", quiet = TRUE)
226239
}
227240
}
228241
}
@@ -262,11 +275,16 @@ em.step.dmat <- function(PARAM.org){
262275
time.start <- proc.time()
263276
}
264277

278+
### This is used to record which i.k may be failed to update.
279+
.pmclustEnv$FAIL.i.k <- 0
280+
281+
### Start EM next in DMAT format.
282+
265283
### WCC: original
266284
PARAM.new <- try(em.onestep.dmat(PARAM.org))
267285
### WCC: temp
268286
# PARAM.new <- em.onestep.dmat(PARAM.org)
269-
if(comm.any(class(PARAM.new) == "try-error")){
287+
if(class(PARAM.new) == "try-error" || is.nan(PARAM.new$logL)){
270288
comm.cat("Results of previous iterations are returned.\n", quiet = TRUE)
271289
.pmclustEnv$CHECK$convergence <- 99
272290
PARAM.new <- PARAM.org

R/pm_aecm_base.r

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,30 @@ cm.step.spmd.SIGMA <- function(PARAM){
4040
sqrt(.pmclustEnv$Z.spmd[, i.k] / .pmclustEnv$Z.colSums[i.k])
4141
tmp.SIGMA <- crossprod(B)
4242
tmp.SIGMA <- spmd.allreduce.double(tmp.SIGMA, double(p.2), op = "sum")
43-
dim(tmp.SIGMA) <- c(p, p)
4443

45-
tmp.U <- decompsigma(tmp.SIGMA)
46-
PARAM$U.check[[i.k]] <- tmp.U$check
47-
if(tmp.U$check){
48-
PARAM$U[[i.k]] <- tmp.U$value
49-
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
44+
if(!any(is.nan(tmp.SIGMA))){
45+
dim(tmp.SIGMA) <- c(p, p)
46+
47+
tmp.U <- decompsigma(tmp.SIGMA)
48+
PARAM$U.check[[i.k]] <- tmp.U$check
49+
if(tmp.U$check){
50+
PARAM$U[[i.k]] <- tmp.U$value
51+
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
52+
}
53+
} else{
54+
PARAM$U.check[[i.k]] <- FALSE
55+
if(.pmclustEnv$CONTROL$debug > 2){
56+
comm.cat(" SIGMA[[", i.k, "]] has NaN. Updating is skipped\n", sep = "", quiet = TRUE)
57+
}
58+
59+
.pmclustEnv$FAIL.i.k <- i.k # i.k is failed to update.
60+
if(.pmclustEnv$CONTROL$stop.at.fail){
61+
stop(paste("NaN occurs at i.k=", i.k, sep = ""))
62+
}
5063
}
5164
} else{
5265
if(.pmclustEnv$CONTROL$debug > 2){
53-
comm.cat(" SIGMA[[", i.k, "]] is fixed.\n", sep = "")
66+
comm.cat(" SIGMA[[", i.k, "]] is fixed. Updating is skipped\n", sep = "")
5467
}
5568
}
5669
}
@@ -84,8 +97,12 @@ aecm.step.spmd <- function(PARAM.org){
8497
time.start <- proc.time()
8598
}
8699

100+
### This is used to record which i.k may be failed to update.
101+
.pmclustEnv$FAIL.i.k <- 0
102+
103+
### Start AECM here.
87104
PARAM.new <- try(aecm.onestep.spmd(PARAM.org))
88-
if(comm.any(class(PARAM.new) == "try-error")){
105+
if(class(PARAM.new) == "try-error" || is.nan(PARAM.new$logL)){
89106
comm.cat("Results of previous iterations are returned.\n", quiet = TRUE)
90107
.pmclustEnv$CHECK$convergence <- 99
91108
PARAM.new <- PARAM.org

R/pm_apecm_base.r

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,12 @@ apecm.step.spmd <- function(PARAM.org){
133133
time.start <- proc.time()
134134
}
135135

136+
### This is used to record which i.k may be failed to update.
137+
.pmclustEnv$FAIL.i.k <- 0
138+
139+
### Start APECM here.
136140
PARAM.new <- try(apecm.onestep.spmd(PARAM.org))
137-
if(comm.any(class(PARAM.new) == "try-error")){
141+
if(class(PARAM.new) == "try-error" || is.nan(PARAM.new$logL)){
138142
comm.cat("Results of previous iterations are returned.\n", quiet = TRUE)
139143
.pmclustEnv$CHECK$convergence <- 99
140144
PARAM.new <- PARAM.org

R/pm_apecma_base.r

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,30 @@ cm.step.spmd.MU.SIGMA.k <- function(PARAM, i.k){
3737
sqrt(.pmclustEnv$Z.spmd[, i.k] / .pmclustEnv$Z.colSums[i.k])
3838
tmp.SIGMA <- crossprod(B)
3939
tmp.SIGMA <- spmd.allreduce.double(tmp.SIGMA, double(p.2), op = "sum")
40-
dim(tmp.SIGMA) <- c(p, p)
4140

42-
tmp.U <- decompsigma(tmp.SIGMA)
43-
PARAM$U.check[[i.k]] <- tmp.U$check
44-
if(tmp.U$check){
45-
PARAM$U[[i.k]] <- tmp.U$value
46-
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
41+
if(!any(is.nan(tmp.SIGMA))){
42+
dim(tmp.SIGMA) <- c(p, p)
43+
44+
tmp.U <- decompsigma(tmp.SIGMA)
45+
PARAM$U.check[[i.k]] <- tmp.U$check
46+
if(tmp.U$check){
47+
PARAM$U[[i.k]] <- tmp.U$value
48+
PARAM$SIGMA[[i.k]] <- tmp.SIGMA
49+
}
50+
} else{
51+
PARAM$U.check[[i.k]] <- FALSE
52+
if(.pmclustEnv$CONTROL$debug > 2){
53+
comm.cat(" SIGMA[[", i.k, "]] has NaN. Updating is skipped.\n", sep = "", quiet = TRUE)
54+
}
55+
56+
.pmclustEnv$FAIL.i.k <- i.k # i.k is failed to update.
57+
if(.pmclustEnv$CONTROL$stop.at.fail){
58+
stop(paste("NaN occurs at i.k=", i.k, sep = ""))
59+
}
4760
}
4861
} else{
4962
if(.pmclustEnv$CONTROL$debug > 2){
50-
comm.cat(" SIGMA[[", i.k, "]] is fixed.\n", sep = "", quiet = TRUE)
63+
comm.cat(" SIGMA[[", i.k, "]] is fixed. Updating is skipped.\n", sep = "", quiet = TRUE)
5164
}
5265
}
5366

@@ -79,8 +92,12 @@ apecma.step.spmd <- function(PARAM.org){
7992
time.start <- proc.time()
8093
}
8194

95+
### This is used to record which i.k may be failed to update.
96+
.pmclustEnv$FAIL.i.k <- 0
97+
98+
### Start APECMA here.
8299
PARAM.new <- try(apecma.onestep.spmd(PARAM.org))
83-
if(comm.any(class(PARAM.new) == "try-error")){
100+
if(class(PARAM.new) == "try-error" || is.nan(PARAM.new$logL)){
84101
comm.cat("Results of previous iterations are returned.\n", quiet =TRUE)
85102
.pmclustEnv$CHECK$convergence <- 99
86103
PARAM.new <- PARAM.org

0 commit comments

Comments
 (0)