diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index 81999739f15..9890cfcdd4c 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -5,10 +5,12 @@ It also patches bottleneck to contain these functions. """ from warnings import warn +from distutils.version import StrictVersion -import bottleneck as bn import numpy as np +import bottleneck as bn from scipy import sparse as sp +import scipy.stats.stats def _count_nans_per_row_sparse(X, weights, dtype=None): @@ -462,6 +464,16 @@ def nanmedian_sparse(x): return _apply_func(x, np.nanmedian, nanmedian_sparse, axis=axis) +def nanmode(x, axis=0): + """ A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0""" + if StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"): + warn("Use scipy.stats.mode in scipy >= 1.2.0", DeprecationWarning) + nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis] + res = scipy.stats.stats.mode(x, axis) + return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode), + np.where(nans, np.nan, res.count)) + + def unique(x, return_counts=False): """ Equivalent of np.unique that supports sparse or dense matrices. """ if not sp.issparse(x): diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index 99570f0a19e..d5b813622b4 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -7,7 +7,7 @@ from Orange.statistics.util import bincount, countnans, contingency, digitize, \ mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \ - unique, var, nanstd, nanvar + unique, var, nanstd, nanvar, nanmode def dense_sparse(test_case): @@ -164,6 +164,16 @@ def test_nanmean(self): nanmean(X_sparse), np.nanmean(X)) + def test_nanmode(self): + X = np.array([[np.nan, np.nan, 1, 1], + [2, np.nan, 1, 1]]) + mode, count = nanmode(X, 0) + np.testing.assert_array_equal(mode, [[2, np.nan, 1, 1]]) + np.testing.assert_array_equal(count, [[1, np.nan, 2, 2]]) + mode, count = nanmode(X, 1) + np.testing.assert_array_equal(mode, [[1], [1]]) + np.testing.assert_array_equal(count, [[2], [2]]) + @dense_sparse def test_nanmedian(self, array): for X in self.data: diff --git a/Orange/widgets/data/owfeaturestatistics.py b/Orange/widgets/data/owfeaturestatistics.py index f9a60ea1e6b..46c965d08ae 100644 --- a/Orange/widgets/data/owfeaturestatistics.py +++ b/Orange/widgets/data/owfeaturestatistics.py @@ -245,7 +245,8 @@ def __compute_statistics(self): def __mode(x, *args, **kwargs): if sp.issparse(x): x = x.todense(order="C") - return ss.mode(x, *args, **kwargs)[0] + # return ss.mode(x, *args, **kwargs)[0] + return ut.nanmode(x, *args, **kwargs)[0] # Temporary replacement for scipy < 1.2.0 self._center = self.__compute_stat( matrices, @@ -787,6 +788,7 @@ def set_data(self, data): if data is not None: self.color_var_model.set_domain(data.domain) + self.color_var = None if self.data.domain.class_vars: self.color_var = self.data.domain.class_vars[0] else: diff --git a/Orange/widgets/data/tests/test_owfeaturestatistics.py b/Orange/widgets/data/tests/test_owfeaturestatistics.py index 6166d211b69..a8296d74e4d 100644 --- a/Orange/widgets/data/tests/test_owfeaturestatistics.py +++ b/Orange/widgets/data/tests/test_owfeaturestatistics.py @@ -10,7 +10,7 @@ from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \ DiscreteVariable, TimeVariable -from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.tests.base import WidgetTest, datasets from Orange.widgets.tests.utils import simulate from Orange.widgets.data.owfeaturestatistics import \ OWFeatureStatistics @@ -187,7 +187,7 @@ def _wrapper(self): return _wrapper -class TestVariableTypes(WidgetTest): +class TestVariousDataSets(WidgetTest): def setUp(self): self.widget = self.create_widget( OWFeatureStatistics, stored_settings={'auto_commit': False} @@ -207,50 +207,82 @@ def run_through_variables(self): @table_dense_sparse def test_runs_on_iris(self, prepare_table): - self.send_signal('Data', prepare_table(Table('iris'))) + self.send_signal(self.widget.Inputs.data, prepare_table(Table('iris'))) def test_does_not_crash_on_data_removal(self): - self.send_signal('Data', make_table(discrete)) - self.send_signal('Data', None) + self.send_signal(self.widget.Inputs.data, make_table(discrete)) + self.send_signal(self.widget.Inputs.data, None) # No missing values @table_dense_sparse def test_on_data_with_no_missing_values(self, prepare_table): data = make_table([continuous_full, rgb_full, ints_full, time_full]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() @table_dense_sparse def test_on_data_with_no_missing_values_full_domain(self, prepare_table): data = make_table([continuous_full, time_full], [ints_full], [rgb_full]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() # With missing values @table_dense_sparse def test_on_data_with_missing_continuous_values(self, prepare_table): data = make_table([continuous_full, continuous_missing, rgb_full, ints_full, time_full]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() @table_dense_sparse def test_on_data_with_missing_discrete_values(self, prepare_table): data = make_table([continuous_full, rgb_full, rgb_missing, ints_full, time_full]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() @table_dense_sparse def test_on_data_with_discrete_values_all_the_same(self, prepare_table): data = make_table([continuous_full], [ints_same, rgb_same]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() @table_dense_sparse def test_on_data_with_continuous_values_all_the_same(self, prepare_table): data = make_table([ints_full, ints_same], [continuous_same, continuous_full]) - self.send_signal('Data', prepare_table(data)) + self.send_signal(self.widget.Inputs.data, prepare_table(data)) self.run_through_variables() + def test_switching_to_dataset_with_no_target_var(self): + """Switching from data set with target variable to a data set with + no target variable should not result in crash.""" + data1 = make_table([continuous_full, ints_full], [ints_same, rgb_same]) + data2 = make_table([rgb_full, ints_full]) + + self.send_signal(self.widget.Inputs.data, data1) + self.force_render_table() + + self.send_signal(self.widget.Inputs.data, data2) + self.force_render_table() + + def test_switching_to_dataset_with_target_var(self): + """Switching from data set with no target variable to a data set with + a target variable should not result in crash.""" + data1 = make_table([rgb_full, ints_full]) + data2 = make_table([continuous_full, ints_full], [ints_same, rgb_same]) + + self.send_signal(self.widget.Inputs.data, data1) + self.force_render_table() + + self.send_signal(self.widget.Inputs.data, data2) + self.force_render_table() + + def test_on_edge_case_datasets(self): + for data in datasets.datasets(): + try: + self.send_signal(self.widget.Inputs.data, data) + self.force_render_table() + except Exception as e: + raise AssertionError(f"Failed on `{data.name}`") from e + def select_rows(rows: List[int], widget: OWFeatureStatistics): """Since the widget sorts the rows, selecting rows isn't trivial."""