@@ -22,16 +22,16 @@ import org.apache.spark.rdd.RDD
2222
2323/** Configuration for annealed (deterministic annealing) k-means clustering.
2424 *
25- * Annealed k-means gradually transitions from soft to hard clustering by increasing the inverse temperature parameter
26- * (beta) according to an annealing schedule.
25+ * Annealed k-means gradually transitions from soft to hard clustering by increasing the inverse
26+ * temperature parameter (beta) according to an annealing schedule.
2727 *
2828 * @param initialBeta
2929 * Starting inverse temperature (low = soft, high = hard)
3030 * @param finalBeta
3131 * Ending inverse temperature
3232 * @param annealingSchedule
33- * Strategy for increasing beta: "exponential" - β_new = β_old * annealingRate "linear" - β_new = β_old +
34- * annealingRate
33+ * Strategy for increasing beta: "exponential" - β_new = β_old * annealingRate "linear" - β_new =
34+ * β_old + annealingRate
3535 * @param annealingRate
3636 * Rate at which beta increases
3737 * @param stepsPerTemperature
@@ -44,14 +44,14 @@ import org.apache.spark.rdd.RDD
4444 * Minimum membership probability (from BregmanSoftKMeansConfig)
4545 */
4646case class AnnealedKMeansConfig (
47- initialBeta : Double = 0.1 ,
48- finalBeta : Double = 100.0 ,
49- annealingSchedule : String = " exponential" ,
50- annealingRate : Double = 1.5 ,
51- stepsPerTemperature : Int = 5 ,
52- maxTemperatures : Int = 20 ,
53- convergenceThreshold : Double = 1e-4 ,
54- minMembership : Double = 1e-10
47+ initialBeta : Double = 0.1 ,
48+ finalBeta : Double = 100.0 ,
49+ annealingSchedule : String = " exponential" ,
50+ annealingRate : Double = 1.5 ,
51+ stepsPerTemperature : Int = 5 ,
52+ maxTemperatures : Int = 20 ,
53+ convergenceThreshold : Double = 1e-4 ,
54+ minMembership : Double = 1e-10
5555) extends ConfigValidator {
5656
5757 requirePositive(initialBeta, " Initial beta" )
@@ -65,8 +65,8 @@ case class AnnealedKMeansConfig(
6565
6666/** Annealed (deterministic annealing) k-means clustering implementation.
6767 *
68- * This algorithm gradually transitions from soft to hard clustering using a temperature parameter, providing several
69- * benefits over standard k-means:
68+ * This algorithm gradually transitions from soft to hard clustering using a temperature parameter,
69+ * providing several benefits over standard k-means:
7070 *
7171 * Benefits:
7272 * - Better escape from local minima (starts globally, refines locally)
@@ -75,9 +75,10 @@ case class AnnealedKMeansConfig(
7575 * - Works with any Bregman divergence
7676 *
7777 * Algorithm:
78- * 1. Start with low beta (high temperature) = very soft clustering 2. Run soft k-means (BregmanSoftKMeans) for a few
79- * iterations 3. Increase beta (decrease temperature) = make clustering sharper 4. Repeat until beta is high (low
80- * temperature) = hard clustering 5. Final result approaches standard k-means
78+ * 1. Start with low beta (high temperature) = very soft clustering 2. Run soft k-means
79+ * (BregmanSoftKMeans) for a few iterations 3. Increase beta (decrease temperature) = make
80+ * clustering sharper 4. Repeat until beta is high (low temperature) = hard clustering 5.
81+ * Final result approaches standard k-means
8182 *
8283 * The annealing schedule controls how quickly we transition from soft to hard:
8384 * - Exponential: β_t+1 = rate * β_t (faster)
@@ -91,13 +92,15 @@ case class AnnealedKMeansConfig(
9192 * @param config
9293 * Configuration parameters
9394 */
94- class AnnealedKMeans (config : AnnealedKMeansConfig = AnnealedKMeansConfig ()) extends MultiKMeansClusterer with Logging {
95+ class AnnealedKMeans (config : AnnealedKMeansConfig = AnnealedKMeansConfig ())
96+ extends MultiKMeansClusterer
97+ with Logging {
9598
9699 def cluster (
97- maxIterations : Int ,
98- pointOps : BregmanPointOps ,
99- data : RDD [BregmanPoint ],
100- centers : Seq [IndexedSeq [BregmanCenter ]]
100+ maxIterations : Int ,
101+ pointOps : BregmanPointOps ,
102+ data : RDD [BregmanPoint ],
103+ centers : Seq [IndexedSeq [BregmanCenter ]]
101104 ): Seq [ClusteringWithDistortion ] = {
102105
103106 logger.info(s " Starting annealed k-means with ${centers.size} initial center sets " )
@@ -113,9 +116,9 @@ class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig()) exte
113116 /** Train annealed k-means on a single initial center set.
114117 */
115118 private def trainAnnealed (
116- pointOps : BregmanPointOps ,
117- data : RDD [BregmanPoint ],
118- initialCenters : IndexedSeq [BregmanCenter ]
119+ pointOps : BregmanPointOps ,
120+ data : RDD [BregmanPoint ],
121+ initialCenters : IndexedSeq [BregmanCenter ]
119122 ): ClusteringWithDistortion = {
120123
121124 val k = initialCenters.length
@@ -171,7 +174,7 @@ class AnnealedKMeans(config: AnnealedKMeansConfig = AnnealedKMeansConfig()) exte
171174 )
172175
173176 val finalSoftKMeans = new BregmanSoftKMeans (finalConfig)
174- val finalResult =
177+ val finalResult =
175178 finalSoftKMeans.clusterSoft(config.stepsPerTemperature, pointOps, data, currentCenters)
176179
177180 totalIterations += finalResult.iterations
0 commit comments