@@ -176,8 +176,11 @@ class KMeansppCenterChooser : public CenterChooser<Distance>
176
176
assert (index >=0 && index < n);
177
177
centers[0 ] = indices[index];
178
178
179
+ // Computing distance^2 will have the advantage of even higher probability further to pick new centers
180
+ // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
179
181
for (int i = 0 ; i < n; i++) {
180
182
closestDistSq[i] = distance_ (points_[indices[i]], points_[indices[index]], cols_);
183
+ closestDistSq[i] *= closestDistSq[i];
181
184
currentPot += closestDistSq[i];
182
185
}
183
186
@@ -203,7 +206,10 @@ class KMeansppCenterChooser : public CenterChooser<Distance>
203
206
204
207
// Compute the new potential
205
208
double newPot = 0 ;
206
- for (int i = 0 ; i < n; i++) newPot += std::min ( distance_ (points_[indices[i]], points_[indices[index]], cols_), closestDistSq[i] );
209
+ for (int i = 0 ; i < n; i++) {
210
+ DistanceType dist = distance_ (points_[indices[i]], points_[indices[index]], cols_);
211
+ newPot += std::min ( dist*dist, closestDistSq[i] );
212
+ }
207
213
208
214
// Store the best result
209
215
if ((bestNewPot < 0 )||(newPot < bestNewPot)) {
@@ -215,7 +221,10 @@ class KMeansppCenterChooser : public CenterChooser<Distance>
215
221
// Add the appropriate center
216
222
centers[centerCount] = indices[bestNewIndex];
217
223
currentPot = bestNewPot;
218
- for (int i = 0 ; i < n; i++) closestDistSq[i] = std::min ( distance_ (points_[indices[i]], points_[indices[bestNewIndex]], cols_), closestDistSq[i] );
224
+ for (int i = 0 ; i < n; i++) {
225
+ DistanceType dist = distance_ (points_[indices[i]], points_[indices[bestNewIndex]], cols_);
226
+ closestDistSq[i] = std::min ( dist*dist, closestDistSq[i] );
227
+ }
219
228
}
220
229
221
230
centers_length = centerCount;
0 commit comments