diff --git a/Orange/clustering/dbscan.py b/Orange/clustering/dbscan.py index d69bb019d19..7481f1c34ec 100644 --- a/Orange/clustering/dbscan.py +++ b/Orange/clustering/dbscan.py @@ -1,5 +1,5 @@ import sklearn.cluster as skl_cluster -from numpy import ndarray +from numpy import ndarray, unique from Orange.data import Table, DiscreteVariable, Domain, Instance from Orange.projection import SklProjector, Projection @@ -38,11 +38,11 @@ def __call__(self, data): if data.domain is not self.pre_domain: data = data.transform(self.pre_domain) y = self.proj.fit_predict(data.X) - vals = [-1] + list(self.proj.core_sample_indices_) + vals, indices = unique(y, return_inverse=True) c = DiscreteVariable(name='Core sample index', values=[str(v) for v in vals]) domain = Domain([c]) - return Table(domain, y.reshape(len(y), 1)) + return Table(domain, indices.reshape(len(y), 1)) elif isinstance(data, Instance): if data.domain is not self.pre_domain: diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py index bf42775a7b6..5065fe2482c 100644 --- a/Orange/tests/test_clustering_dbscan.py +++ b/Orange/tests/test_clustering_dbscan.py @@ -29,3 +29,19 @@ def test_predict_numpy(self): c = dbscan(self.iris) X = self.iris.X[::20] p = c(X) + + def test_values(self): + dbscan = DBSCAN(eps=1) # it clusters data in two classes + c = dbscan(self.iris) + table = self.iris + p = c(table) + + self.assertEqual(2, len(p.domain[0].values)) + self.assertSetEqual({"0", "1"}, set(p.domain[0].values)) + + table.X[0] = [100, 100, 100, 100] # we add a big outlier + + p = c(table) + + self.assertEqual(3, len(p.domain[0].values)) + self.assertSetEqual({"-1", "0", "1"}, set(p.domain[0].values))