Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,22 +437,31 @@ def unique(x, return_counts=False):
r = np.unique(x.data, return_counts=return_counts)
if not implicit_zeros:
return r

if return_counts:
zero_index = np.searchsorted(r[0], 0)
if explicit_zeros:
r[1][r[0] == 0.] += implicit_zeros
return r
return np.insert(r[0], 0, 0), np.insert(r[1], 0, implicit_zeros)
return np.insert(r[0], zero_index, 0), np.insert(r[1], zero_index, implicit_zeros)
else:
if explicit_zeros:
return r
return np.insert(r, 0, 0)
zero_index = np.searchsorted(r, 0)
return np.insert(r, zero_index, 0)


def nanunique(x):
def nanunique(*args, **kwargs):
""" Return unique values while disregarding missing (np.nan) values.
Supports sparse or dense matrices. """
r = unique(x)
return r[~np.isnan(r)]
result = unique(*args, **kwargs)

if isinstance(result, tuple):
result, counts = result
non_nan_mask = ~np.isnan(result)
return result[non_nan_mask], counts[non_nan_mask]

return result[~np.isnan(result)]


def digitize(x, bins, right=False):
Expand Down
100 changes: 58 additions & 42 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import unittest
import warnings
from functools import wraps, partial
from itertools import chain
from functools import partial, wraps

import numpy as np
import scipy as sp
from scipy.sparse import csr_matrix, issparse, csc_matrix
from scipy.sparse import csr_matrix, issparse, lil_matrix, csc_matrix

from Orange.statistics.util import bincount, countnans, contingency, stats, \
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var
Expand Down Expand Up @@ -128,44 +126,6 @@ def test_nanmin_nanmax(self):
nanmax(X_sparse, axis=axis),
np.nanmax(X, axis=axis))

def test_unique(self):
for X in self.data:
X_sparse = csr_matrix(X)
np.testing.assert_array_equal(
unique(X_sparse, return_counts=False),
np.unique(X, return_counts=False))

for a1, a2 in zip(unique(X_sparse, return_counts=True),
np.unique(X, return_counts=True)):
np.testing.assert_array_equal(a1, a2)

def test_unique_explicit_zeros(self):
x1 = csr_matrix(np.eye(3))
x2 = csr_matrix(np.eye(3))

# set some of-diagonal to explicit zeros
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
category=sp.sparse.SparseEfficiencyWarning)
x2[0, 1] = 0
x2[1, 0] = 0

np.testing.assert_array_equal(
unique(x1, return_counts=False),
unique(x2, return_counts=False),
)
np.testing.assert_array_equal(
unique(x1, return_counts=True),
unique(x2, return_counts=True),
)

def test_nanunique(self):
x = csr_matrix(np.array([0, 1, 1, np.nan]))
np.testing.assert_array_equal(
nanunique(x),
np.array([0, 1])
)

def test_mean(self):
for X in self.data:
X_sparse = csr_matrix(X)
Expand Down Expand Up @@ -420,3 +380,59 @@ def test_weights_with_transposed_x(self, array):

expected = [3, 0, 2, 1]
np.testing.assert_equal(bincount(x, w)[0], expected)


class TestUnique(unittest.TestCase):
@dense_sparse
def test_returns_unique_values(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [-1, 0, 1, 2, 3, 5, 6, 7, np.nan, np.nan]

np.testing.assert_equal(unique(x, return_counts=False), expected)

@dense_sparse
def test_returns_counts(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [2, 6, 2, 1, 2, 1, 1, 1, 1, 1]

np.testing.assert_equal(unique(x, return_counts=True)[1], expected)

def test_sparse_explicit_zeros(self):
# Use `lil_matrix` to fix sparse warning for matrix construction
x = lil_matrix(np.eye(3))
x[0, 1] = 0
x[1, 0] = 0
x = x.tocsr()
# Test against identity matrix
y = csr_matrix(np.eye(3))

np.testing.assert_array_equal(
unique(y, return_counts=True),
unique(x, return_counts=True),
)

@dense_sparse
def test_nanunique_ignores_nans_in_values(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [-1, 0, 1, 2, 3, 5, 6, 7]

np.testing.assert_equal(nanunique(x, return_counts=False), expected)

@dense_sparse
def test_nanunique_ignores_nans_in_counts(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [2, 6, 2, 1, 2, 1, 1, 1]

np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected)