Skip to content

Commit 6ad4ad4

Browse files
authored
Merge pull request #2915 from pavlin-policar/kmeans-fixes
[FIX] KMeans clear results on k change, do not recluster
2 parents 68346c2 + 9020874 commit 6ad4ad4

File tree

2 files changed

+58
-16
lines changed

2 files changed

+58
-16
lines changed

Orange/widgets/unsupervised/owkmeans.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ def migrate_settings(cls, settings, version):
136136
# type: (Dict, int) -> None
137137
if version < 2:
138138
if 'auto_apply' in settings:
139-
settings['auto_commit'] = settings['auto_apply']
140-
del settings['auto_apply']
139+
settings['auto_commit'] = settings.get('auto_apply', True)
140+
settings.pop('auto_apply', None)
141141

142142
def __init__(self):
143143
super().__init__()
@@ -151,11 +151,7 @@ def __init__(self):
151151
layout = QGridLayout()
152152
bg = gui.radioButtonsInBox(
153153
self.controlArea, self, "optimize_k", orientation=layout,
154-
box="Number of Clusters",
155-
# Because commit is only wrapped when creating the auto_commit
156-
# buttons, we can't pass it as the callback here, so we can add
157-
# this hacky lambda to call the wrapped commit when necessary
158-
callback=lambda: self.commit(),
154+
box="Number of Clusters", callback=self.update_method,
159155
)
160156

161157
layout.addWidget(
@@ -229,18 +225,25 @@ def adjustSize(self):
229225
s = self.sizeHint()
230226
self.resize(s)
231227

228+
def update_method(self):
229+
self.table_model.clear_scores()
230+
self.commit()
231+
232232
def update_k(self):
233233
self.optimize_k = False
234+
self.table_model.clear_scores()
234235
self.commit()
235236

236237
def update_from(self):
237238
self.k_to = max(self.k_from + 1, self.k_to)
238239
self.optimize_k = True
240+
self.table_model.clear_scores()
239241
self.commit()
240242

241243
def update_to(self):
242244
self.k_from = min(self.k_from, self.k_to - 1)
243245
self.optimize_k = True
246+
self.table_model.clear_scores()
244247
self.commit()
245248

246249
def enough_data_instances(self, k):
@@ -395,17 +398,14 @@ def commit(self):
395398

396399
QTimer.singleShot(100, self.adjustSize)
397400

398-
def invalidate(self, force=False):
401+
def invalidate(self):
399402
self.cancel()
400403
self.Error.clear()
401404
self.Warning.clear()
402405
self.clusterings = {}
403406
self.table_model.clear_scores()
404407

405-
if force:
406-
self.unconditional_commit()
407-
else:
408-
self.commit()
408+
self.commit()
409409

410410
def update_results(self):
411411
scores = [
@@ -437,7 +437,7 @@ def send_data(self):
437437
k = self.k
438438

439439
km = self.clusterings.get(k)
440-
if not self.data or km is None or isinstance(km, str):
440+
if self.data is None or km is None or isinstance(km, str):
441441
self.Outputs.annotated_data.send(None)
442442
self.Outputs.centroids.send(None)
443443
return
@@ -469,8 +469,14 @@ def send_data(self):
469469
@Inputs.data
470470
@check_sql_input
471471
def set_data(self, data):
472-
self.data = data
473-
self.invalidate()
472+
self.data, old_data = data, self.data
473+
474+
# Do not needlessly recluster the data if X hasn't changed
475+
if old_data and self.data and np.array_equal(self.data.X, old_data.X):
476+
if self.auto_commit:
477+
self.send_data()
478+
else:
479+
self.invalidate()
474480

475481
def send_report(self):
476482
# False positives (Setting is not recognized as int)

Orange/widgets/unsupervised/tests/test_owkmeans.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from AnyQt.QtWidgets import QRadioButton
99

1010
import Orange.clustering
11-
from Orange.data import Table
11+
from Orange.data import Table, Domain
1212
from Orange.widgets import gui
1313
from Orange.widgets.tests.base import WidgetTest
1414
from Orange.widgets.unsupervised.owkmeans import OWKMeans, ClusterTableModel
@@ -377,6 +377,42 @@ def test_invalidate_clusterings_cancels_jobs(self):
377377

378378
self.assertEqual(widget.clusterings, {})
379379

380+
def test_do_not_recluster_on_same_data(self):
381+
"""Do not recluster data points when targets or metas change."""
382+
383+
# Prepare some dummy data
384+
x = np.eye(5)
385+
y1, y2 = np.ones((5, 1)), np.ones((5, 2))
386+
meta1, meta2 = np.ones((5, 1)), np.ones((5, 2))
387+
388+
table1 = Table.from_numpy(
389+
domain=Domain.from_numpy(X=x, Y=y1, metas=meta1),
390+
X=x, Y=y1, metas=meta1,
391+
)
392+
# X is same, should not cause update
393+
table2 = Table.from_numpy(
394+
domain=Domain.from_numpy(X=x, Y=y2, metas=meta2),
395+
X=x, Y=y2, metas=meta2,
396+
)
397+
# X is different, should cause update
398+
table3 = table1.copy()
399+
table3.X[:, 0] = 1
400+
401+
with patch.object(self.widget, 'commit') as commit:
402+
self.send_signal(self.widget.Inputs.data, table1)
403+
self.commit_and_wait()
404+
call_count = commit.call_count
405+
406+
# Sending data with same X should not recompute the clustering
407+
self.send_signal(self.widget.Inputs.data, table2)
408+
self.commit_and_wait()
409+
self.assertEqual(call_count, commit.call_count)
410+
411+
# Sending data with different X should recompute the clustering
412+
self.send_signal(self.widget.Inputs.data, table3)
413+
self.commit_and_wait()
414+
self.assertEqual(call_count + 1, commit.call_count)
415+
380416

381417
if __name__ == "__main__":
382418
unittest.main()

0 commit comments

Comments
 (0)