Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
It also patches bottleneck to contain these functions.
"""
from warnings import warn
from distutils.version import StrictVersion

import bottleneck as bn
import numpy as np
import bottleneck as bn
from scipy import sparse as sp
import scipy.stats.stats


def _count_nans_per_row_sparse(X, weights, dtype=None):
Expand Down Expand Up @@ -462,6 +464,16 @@ def nanmedian_sparse(x):
return _apply_func(x, np.nanmedian, nanmedian_sparse, axis=axis)


def nanmode(x, axis=0):
""" A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0"""
if StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"):
warn("Use scipy.stats.mode in scipy >= 1.2.0", DeprecationWarning)
nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis]
res = scipy.stats.stats.mode(x, axis)
return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode),
np.where(nans, np.nan, res.count))


def unique(x, return_counts=False):
""" Equivalent of np.unique that supports sparse or dense matrices. """
if not sp.issparse(x):
Expand Down
12 changes: 11 additions & 1 deletion Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from Orange.statistics.util import bincount, countnans, contingency, digitize, \
mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \
unique, var, nanstd, nanvar
unique, var, nanstd, nanvar, nanmode


def dense_sparse(test_case):
Expand Down Expand Up @@ -164,6 +164,16 @@ def test_nanmean(self):
nanmean(X_sparse),
np.nanmean(X))

def test_nanmode(self):
X = np.array([[np.nan, np.nan, 1, 1],
[2, np.nan, 1, 1]])
mode, count = nanmode(X, 0)
np.testing.assert_array_equal(mode, [[2, np.nan, 1, 1]])
np.testing.assert_array_equal(count, [[1, np.nan, 2, 2]])
mode, count = nanmode(X, 1)
np.testing.assert_array_equal(mode, [[1], [1]])
np.testing.assert_array_equal(count, [[2], [2]])

@dense_sparse
def test_nanmedian(self, array):
for X in self.data:
Expand Down
4 changes: 3 additions & 1 deletion Orange/widgets/data/owfeaturestatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ def __compute_statistics(self):
def __mode(x, *args, **kwargs):
if sp.issparse(x):
x = x.todense(order="C")
return ss.mode(x, *args, **kwargs)[0]
# return ss.mode(x, *args, **kwargs)[0]
return ut.nanmode(x, *args, **kwargs)[0] # Temporary replacement for scipy < 1.2.0

self._center = self.__compute_stat(
matrices,
Expand Down Expand Up @@ -787,6 +788,7 @@ def set_data(self, data):

if data is not None:
self.color_var_model.set_domain(data.domain)
self.color_var = None
if self.data.domain.class_vars:
self.color_var = self.data.domain.class_vars[0]
else:
Expand Down
54 changes: 43 additions & 11 deletions Orange/widgets/data/tests/test_owfeaturestatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \
DiscreteVariable, TimeVariable
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.base import WidgetTest, datasets
from Orange.widgets.tests.utils import simulate
from Orange.widgets.data.owfeaturestatistics import \
OWFeatureStatistics
Expand Down Expand Up @@ -187,7 +187,7 @@ def _wrapper(self):
return _wrapper


class TestVariableTypes(WidgetTest):
class TestVariousDataSets(WidgetTest):
def setUp(self):
self.widget = self.create_widget(
OWFeatureStatistics, stored_settings={'auto_commit': False}
Expand All @@ -207,50 +207,82 @@ def run_through_variables(self):

@table_dense_sparse
def test_runs_on_iris(self, prepare_table):
self.send_signal('Data', prepare_table(Table('iris')))
self.send_signal(self.widget.Inputs.data, prepare_table(Table('iris')))

def test_does_not_crash_on_data_removal(self):
self.send_signal('Data', make_table(discrete))
self.send_signal('Data', None)
self.send_signal(self.widget.Inputs.data, make_table(discrete))
self.send_signal(self.widget.Inputs.data, None)

# No missing values
@table_dense_sparse
def test_on_data_with_no_missing_values(self, prepare_table):
data = make_table([continuous_full, rgb_full, ints_full, time_full])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

@table_dense_sparse
def test_on_data_with_no_missing_values_full_domain(self, prepare_table):
data = make_table([continuous_full, time_full], [ints_full], [rgb_full])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

# With missing values
@table_dense_sparse
def test_on_data_with_missing_continuous_values(self, prepare_table):
data = make_table([continuous_full, continuous_missing, rgb_full, ints_full, time_full])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

@table_dense_sparse
def test_on_data_with_missing_discrete_values(self, prepare_table):
data = make_table([continuous_full, rgb_full, rgb_missing, ints_full, time_full])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

@table_dense_sparse
def test_on_data_with_discrete_values_all_the_same(self, prepare_table):
data = make_table([continuous_full], [ints_same, rgb_same])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

@table_dense_sparse
def test_on_data_with_continuous_values_all_the_same(self, prepare_table):
data = make_table([ints_full, ints_same], [continuous_same, continuous_full])
self.send_signal('Data', prepare_table(data))
self.send_signal(self.widget.Inputs.data, prepare_table(data))
self.run_through_variables()

def test_switching_to_dataset_with_no_target_var(self):
"""Switching from data set with target variable to a data set with
no target variable should not result in crash."""
data1 = make_table([continuous_full, ints_full], [ints_same, rgb_same])
data2 = make_table([rgb_full, ints_full])

self.send_signal(self.widget.Inputs.data, data1)
self.force_render_table()

self.send_signal(self.widget.Inputs.data, data2)
self.force_render_table()

def test_switching_to_dataset_with_target_var(self):
"""Switching from data set with no target variable to a data set with
a target variable should not result in crash."""
data1 = make_table([rgb_full, ints_full])
data2 = make_table([continuous_full, ints_full], [ints_same, rgb_same])

self.send_signal(self.widget.Inputs.data, data1)
self.force_render_table()

self.send_signal(self.widget.Inputs.data, data2)
self.force_render_table()

def test_on_edge_case_datasets(self):
for data in datasets.datasets():
try:
self.send_signal(self.widget.Inputs.data, data)
self.force_render_table()
except Exception as e:
raise AssertionError(f"Failed on `{data.name}`") from e


def select_rows(rows: List[int], widget: OWFeatureStatistics):
"""Since the widget sorts the rows, selecting rows isn't trivial."""
Expand Down