Skip to content

Commit 0e80afe

Browse files
committed
kmedoids(): fix duplicate medoids handling
If medoids have duplicate coordinates, make sure the medoid is assigned to itself, otherwise its cluster might be empty. If the distance to itself is further than to another medoid, it is an issue with the distance matrix, and ArgumentError is raised. fixes #231
1 parent a063ec6 commit 0e80afe

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

src/kmedoids.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ function _kmed_update_assignments!(dist::AbstractMatrix{<:Real}, # in: (n, n)
206206
m = medoids[i]
207207
v = dist[m, j]
208208
# assign if current medoid is closer or if it is j itself
209-
if v < mv
209+
if (v < mv) || (m == j)
210+
(v <= mv) || throw(ArgumentError("sample #$j reassigned from medoid[$p]=#$(medoids[p]) (distance=$mv) to medoid[$i]=#$m (distance=$v); check the distance matrix correctness"))
210211
p = i
211212
mv = v
212213
end

test/kmedoids.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ include("test_helpers.jl")
99
Random.seed!(34568)
1010
@test_throws ArgumentError kmedoids(randn(2, 3), 1)
1111
@test_throws ArgumentError kmedoids(randn(2, 3), 4)
12-
dist = inv.(max.(pairwise(Euclidean(), randn(2, 3), dims=2), 0.1))
12+
dist = max.(pairwise(Euclidean(), randn(2, 3), dims=2), 0.1)
1313
@test @inferred(kmedoids(dist, 2)) isa KmedoidsResult
14+
# incorrect distance matrix
15+
invdist = inv.(max.(pairwise(Euclidean(), randn(2, 3), dims=2), 0.1))
16+
@test_throws ArgumentError kmedoids(invdist, 2)
17+
1418
@test_throws ArgumentError kmedoids(dist, 2, display=:mylog)
1519
for disp in keys(Clustering.DisplayLevels)
1620
@test @inferred(kmedoids(dist, 2, display=disp)) isa KmedoidsResult
@@ -48,6 +52,15 @@ R = @inferred(kmedoids(dist, k))
4852
end
4953
end
5054

55+
@testset "Duplicated points (#231)" begin
56+
pts = [0.0 0.0]
57+
dists = pairwise(SqEuclidean(), pts, dims=2)
58+
dupmed = kmedoids(dists, 2)
59+
@test nclusters(dupmed) == 2
60+
@test sort(dupmed.medoids) == [1, 2]
61+
@test sort(dupmed.assignments) == [1, 2]
62+
end
63+
5164
@testset "Toy example #1" begin
5265
pts = [1 2 3; .1 .2 .3; 4 5.6 7]
5366
# k=1 and k=n cases

0 commit comments

Comments
 (0)