Skip to content

Commit 0913ddb

Browse files
committed
scalafmt
1 parent 4afde36 commit 0913ddb

File tree

85 files changed

+3093
-2790
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+3093
-2790
lines changed

src/main/scala/com/massivedatascience/clusterer/AnnealedKMeans.scala

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ import org.apache.spark.rdd.RDD
2222

2323
/** Configuration for annealed (deterministic annealing) k-means clustering.
2424
*
25-
* Annealed k-means gradually transitions from soft to hard clustering by increasing the inverse temperature parameter
26-
* (beta) according to an annealing schedule.
25+
* Annealed k-means gradually transitions from soft to hard clustering by increasing the inverse
26+
* temperature parameter (beta) according to an annealing schedule.
2727
*
2828
* @param initialBeta
2929
* Starting inverse temperature (low = soft, high = hard)
3030
* @param finalBeta
3131
* Ending inverse temperature
3232
* @param annealingSchedule
33-
* Strategy for increasing beta: "exponential" - β_new = β_old * annealingRate "linear" - β_new = β_old +
34-
* annealingRate
33+
* Strategy for increasing beta: "exponential" - β_new = β_old * annealingRate "linear" - β_new =
34+
* β_old + annealingRate
3535
* @param annealingRate
3636
* Rate at which beta increases
3737
* @param stepsPerTemperature
@@ -44,14 +44,14 @@ import org.apache.spark.rdd.RDD
4444
* Minimum membership probability (from BregmanSoftKMeansConfig)
4545
*/
4646
case class AnnealedKMeansConfig(
47-
initialBeta: Double = 0.1,
48-
finalBeta: Double = 100.0,
49-
annealingSchedule: String = "exponential",
50-
annealingRate: Double = 1.5,
51-
stepsPerTemperature: Int = 5,
52-
maxTemperatures: Int = 20,
53-
convergenceThreshold: Double = 1e-4,
54-
minMembership: Double = 1e-10
47+
initialBeta: Double = 0.1,
48+
finalBeta: Double = 100.0,
49+
annealingSchedule: String = "exponential",
50+
annealingRate: Double = 1.5,
51+
stepsPerTemperature: Int = 5,
52+
maxTemperatures: Int = 20,
53+
convergenceThreshold: Double = 1e-4,
54+
minMembership: Double = 1e-10
5555
) extends ConfigValidator {
5656

5757
requirePositive(initialBeta, "Initial beta")
@@ -65,8 +65,8 @@ case class AnnealedKMeansConfig(
6565

6666
/** Annealed (deterministic annealing) k-means clustering implementation.
6767
*
68-
* This algorithm gradually transitions from soft to hard clustering using a temperature parameter, providing several
69-
* benefits over standard k-means:
68+
* This algorithm gradually transitions from soft to hard clustering using a temperature parameter,
69+
* providing several benefits over standard k-means:
7070
*
7171
* Benefits:
7272
* - Better escape from local minima (starts globally, refines locally)
@@ -75,9 +75,10 @@ case class AnnealedKMeansConfig(
7575
* - Works with any Bregman divergence
7676
*
7777
* Algorithm:
78-
* 1. Start with low beta (high temperature) = very soft clustering 2. Run soft k-means (BregmanSoftKMeans) for a few
79-
* iterations 3. Increase beta (decrease temperature) = make clustering sharper 4. Repeat until beta is high (low
80-
* temperature) = hard clustering 5. Final result approaches standard k-means
78+
* 1. Start with low beta (high temperature) = very soft clustering 2. Run soft k-means
79+
* (BregmanSoftKMeans) for a few iterations 3. Increase beta (decrease temperature) = make
80+
* clustering sharper 4. Repeat until beta is high (low temperature) = hard clustering 5.
81+
* Final result approaches standard k-means
8182
*
8283
* The annealing schedule controls how quickly we transition from soft to hard:
8384
* - Exponential: β_t+1 = rate * β_t (faster)
@@ -91,13 +92,15 @@ case class AnnealedKMeansConfig(
9192
* @param config
9293
* Configuration parameters
9394
*/
94-
class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig()) extends MultiKMeansClusterer with Logging {
95+
class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig())
96+
extends MultiKMeansClusterer
97+
with Logging {
9598

9699
def cluster(
97-
maxIterations: Int,
98-
pointOps: BregmanPointOps,
99-
data: RDD[BregmanPoint],
100-
centers: Seq[IndexedSeq[BregmanCenter]]
100+
maxIterations: Int,
101+
pointOps: BregmanPointOps,
102+
data: RDD[BregmanPoint],
103+
centers: Seq[IndexedSeq[BregmanCenter]]
101104
): Seq[ClusteringWithDistortion] = {
102105

103106
logger.info(s"Starting annealed k-means with ${centers.size} initial center sets")
@@ -113,9 +116,9 @@ class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig()) exte
113116
/** Train annealed k-means on a single initial center set.
114117
*/
115118
private def trainAnnealed(
116-
pointOps: BregmanPointOps,
117-
data: RDD[BregmanPoint],
118-
initialCenters: IndexedSeq[BregmanCenter]
119+
pointOps: BregmanPointOps,
120+
data: RDD[BregmanPoint],
121+
initialCenters: IndexedSeq[BregmanCenter]
119122
): ClusteringWithDistortion = {
120123

121124
val k = initialCenters.length
@@ -171,7 +174,7 @@ class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig()) exte
171174
)
172175

173176
val finalSoftKMeans = new BregmanSoftKMeans(finalConfig)
174-
val finalResult =
177+
val finalResult =
175178
finalSoftKMeans.clusterSoft(config.stepsPerTemperature, pointOps, data, currentCenters)
176179

177180
totalIterations += finalResult.iterations

0 commit comments

Comments
 (0)