Skip to content

Commit fce05b8

Browse files
authored
Merge pull request #3833 from PrimozGodec/fix-dbscan
[FIX] DBSCAN: Fix predicted labels
2 parents 3da6479 + 40396e3 commit fce05b8

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

Orange/clustering/dbscan.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import sklearn.cluster as skl_cluster
2-
from numpy import ndarray
2+
from numpy import ndarray, unique
33

44
from Orange.data import Table, DiscreteVariable, Domain, Instance
55
from Orange.projection import SklProjector, Projection
@@ -38,11 +38,11 @@ def __call__(self, data):
3838
if data.domain is not self.pre_domain:
3939
data = data.transform(self.pre_domain)
4040
y = self.proj.fit_predict(data.X)
41-
vals = [-1] + list(self.proj.core_sample_indices_)
41+
vals, indices = unique(y, return_inverse=True)
4242
c = DiscreteVariable(name='Core sample index',
4343
values=[str(v) for v in vals])
4444
domain = Domain([c])
45-
return Table(domain, y.reshape(len(y), 1))
45+
return Table(domain, indices.reshape(len(y), 1))
4646

4747
elif isinstance(data, Instance):
4848
if data.domain is not self.pre_domain:

Orange/tests/test_clustering_dbscan.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,19 @@ def test_predict_numpy(self):
2929
c = dbscan(self.iris)
3030
X = self.iris.X[::20]
3131
p = c(X)
32+
33+
def test_values(self):
34+
dbscan = DBSCAN(eps=1) # it clusters data in two classes
35+
c = dbscan(self.iris)
36+
table = self.iris
37+
p = c(table)
38+
39+
self.assertEqual(2, len(p.domain[0].values))
40+
self.assertSetEqual({"0", "1"}, set(p.domain[0].values))
41+
42+
table.X[0] = [100, 100, 100, 100] # we add a big outlier
43+
44+
p = c(table)
45+
46+
self.assertEqual(3, len(p.domain[0].values))
47+
self.assertSetEqual({"-1", "0", "1"}, set(p.domain[0].values))

0 commit comments

Comments
 (0)