Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions Orange/widgets/unsupervised/owdistances.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import bottleneck as bn
import numpy
from AnyQt.QtCore import Qt
from scipy.sparse import issparse

Expand Down Expand Up @@ -48,6 +47,8 @@ class Error(OWWidget.Error):
dense_metric_sparse_data = Msg("Selected metric does not support sparse data")
empty_data = Msg("Empty data set")
mahalanobis_error = Msg("{}")
distances_memory_error = Msg("Not enough memory.")
distances_value_error = Msg("Error occurred while calculating distances\n{}")

class Warning(OWWidget.Warning):
ignoring_discrete = Msg("Ignoring discrete features")
Expand All @@ -60,12 +61,12 @@ def __init__(self):

gui.radioButtons(self.controlArea, self, "axis", ["Rows", "Columns"],
box="Distances between", callback=self._invalidate
)
)
self.metrics_combo = gui.comboBox(self.controlArea, self, "metric_idx",
box="Distance Metric",
items=[m.name for m in METRICS],
callback=self._invalidate
)
)
box = gui.auto_commit(self.buttonsArea, self, "autocommit", "Apply",
box=False, checkbox_label="Apply automatically")
box.layout().insertWidget(0, self.report_button)
Expand Down Expand Up @@ -105,34 +106,41 @@ def commit(self):
self.send("Distances", self.compute_distances(metric, self.data))

def compute_distances(self, metric, data):
self.clear_messages()
def checks(metric, data):
if data is None:
return

if data is None:
return
if issparse(data.X) and not metric.supports_sparse:
self.Error.dense_metric_sparse_data()
return

if issparse(data.X) and not metric.supports_sparse:
self.Error.dense_metric_sparse_data()
return
if not any(a.is_continuous for a in data.domain.attributes):
self.Error.no_continuous_features()
return

if not any(a.is_continuous for a in data.domain.attributes):
self.Error.no_continuous_features()
return
needs_preprocessing = False
if any(a.is_discrete for a in self.data.domain.attributes):
self.Warning.ignoring_discrete()
needs_preprocessing = True

needs_preprocessing = False
if any(a.is_discrete for a in self.data.domain.attributes):
self.Warning.ignoring_discrete()
needs_preprocessing = True
if not issparse(data.X) and bn.anynan(data.X):
self.Warning.imputing_data()
needs_preprocessing = True

if not issparse(data.X) and bn.anynan(data.X):
self.Warning.imputing_data()
needs_preprocessing = True
if needs_preprocessing:
# removes discrete features and imputes data
data = distance._preprocess(data)

if needs_preprocessing:
# removes discrete features and imputes data
data = distance._preprocess(data)
if not data.X.size:
self.Error.empty_data()
return

return data

self.clear_messages()

if not data.X.size:
self.Error.empty_data()
data = checks(metric, data)
if data is None:
return

if isinstance(metric, distance.MahalanobisDistance):
Expand All @@ -144,7 +152,15 @@ def compute_distances(self, metric, data):
self.Error.mahalanobis_error(e)
return

return metric(data, data, 1 - self.axis, impute=True)
try:
met = metric(data, data, 1 - self.axis, impute=True)
except ValueError as e:
self.Error.distances_value_error(e)
return
except MemoryError:
self.Error.distances_memory_error()
return
return met

def _invalidate(self):
self._checksparse()
Expand Down
20 changes: 20 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owdistances.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
from unittest.mock import Mock

import numpy as np

from Orange.data import Table
Expand Down Expand Up @@ -62,3 +64,21 @@ def test_mahalanobis_error(self):
self.send_signal("Data", data2)
self.assertEqual(self.widget.Error.mahalanobis_error.is_shown(), bad2)
self.assertEqual(self.get_output("Distances") is not None, out2)

def test_too_big_array(self):
"""
Users sees an error message when calculating too large arrays and Orange
does not crash.
GH-2315
"""
self.assertEqual(len(self.widget.Error.active), 0)
self.send_signal("Data", self.iris)

mock = Mock(side_effect=ValueError)
self.widget.compute_distances(mock, self.iris)
self.assertTrue(self.widget.Error.distances_value_error.is_shown())

mock = Mock(side_effect=MemoryError)
self.widget.compute_distances(mock, self.iris)
self.assertEqual(len(self.widget.Error.active), 1)
self.assertTrue(self.widget.Error.distances_memory_error.is_shown())