From 40396e392f184aa9e70348039f43cfb852d701f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Thu, 30 May 2019 13:10:25 +0200
Subject: [PATCH] DBSCAN: fix indices and tests
---
Orange/clustering/dbscan.py | 6 +++---
Orange/tests/test_clustering_dbscan.py | 16 ++++++++++++++++
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/Orange/clustering/dbscan.py b/Orange/clustering/dbscan.py
index d69bb019d19..7481f1c34ec 100644
--- a/Orange/clustering/dbscan.py
+++ b/Orange/clustering/dbscan.py
@@ -1,5 +1,5 @@
import sklearn.cluster as skl_cluster
-from numpy import ndarray
+from numpy import ndarray, unique
from Orange.data import Table, DiscreteVariable, Domain, Instance
from Orange.projection import SklProjector, Projection
@@ -38,11 +38,11 @@ def __call__(self, data):
if data.domain is not self.pre_domain:
data = data.transform(self.pre_domain)
y = self.proj.fit_predict(data.X)
- vals = [-1] + list(self.proj.core_sample_indices_)
+ vals, indices = unique(y, return_inverse=True)
c = DiscreteVariable(name='Core sample index',
values=[str(v) for v in vals])
domain = Domain([c])
- return Table(domain, y.reshape(len(y), 1))
+ return Table(domain, indices.reshape(len(y), 1))
elif isinstance(data, Instance):
if data.domain is not self.pre_domain:
diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py
index bf42775a7b6..5065fe2482c 100644
--- a/Orange/tests/test_clustering_dbscan.py
+++ b/Orange/tests/test_clustering_dbscan.py
@@ -29,3 +29,19 @@ def test_predict_numpy(self):
c = dbscan(self.iris)
X = self.iris.X[::20]
p = c(X)
+
+ def test_values(self):
+ dbscan = DBSCAN(eps=1) # it clusters data in two classes
+ c = dbscan(self.iris)
+ table = self.iris
+ p = c(table)
+
+ self.assertEqual(2, len(p.domain[0].values))
+ self.assertSetEqual({"0", "1"}, set(p.domain[0].values))
+
+ table.X[0] = [100, 100, 100, 100] # we add a big outlier
+
+ p = c(table)
+
+ self.assertEqual(3, len(p.domain[0].values))
+ self.assertSetEqual({"-1", "0", "1"}, set(p.domain[0].values))