Skip to content

Commit 00b5143

Browse files
committed
Orange.statistics.util: Add nanunique method
1 parent 95fe731 commit 00b5143

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

Orange/statistics/util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def mean(x):
280280
n_values = np.prod(x.shape)
281281
return np.sum(x.data) / n_values
282282

283+
283284
def nanmean(x, axis=None):
284285
""" Equivalent of np.nanmean that supports sparse or dense matrices. """
285286
def nanmean_sparse(x):
@@ -297,6 +298,7 @@ def nanmean_sparse(x):
297298
else:
298299
raise NotImplementedError
299300

301+
300302
def unique(x, return_counts=False):
301303
""" Equivalent of np.unique that supports sparse or dense matrices. """
302304
if not sp.issparse(x):
@@ -316,3 +318,10 @@ def unique(x, return_counts=False):
316318
if explicit_zeros:
317319
return r
318320
return np.insert(r, 0, 0)
321+
322+
323+
def nanunique(x):
324+
""" Return unique values while disregarding missing (np.nan) values.
325+
Supports sparse or dense matrices. """
326+
r = unique(x)
327+
return r[~np.isnan(r)]

Orange/tests/test_statistics.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from scipy.sparse import csr_matrix
66

77
from Orange.statistics.util import bincount, countnans, contingency, stats, \
8-
nanmin, nanmax, unique, mean, nanmean
8+
nanmin, nanmax, unique, nanunique, mean, nanmean
99

1010

1111
class TestUtil(unittest.TestCase):
@@ -145,6 +145,13 @@ def test_unique_explicit_zeros(self):
145145
unique(x2, return_counts=True),
146146
)
147147

148+
def test_nanunique(self):
149+
x = csr_matrix(np.array([0, 1, 1, np.nan]))
150+
np.testing.assert_array_equal(
151+
nanunique(x),
152+
np.array([0, 1])
153+
)
154+
148155
def test_mean(self):
149156
for X in self.data:
150157
X_sparse = csr_matrix(X)

0 commit comments

Comments
 (0)