Skip to content

Commit 0359a98

Browse files
Statistics.countnans: Fix sparse implementation and add axis support
1 parent 1f97b66 commit 0359a98

File tree

2 files changed

+56
-13
lines changed

2 files changed

+56
-13
lines changed

Orange/statistics/util.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212

1313
def _count_nans_per_row_sparse(X, weights):
1414
""" Count the number of nans (undefined) values per row. """
15-
items_per_row = 1 if X.ndim == 1 else X.shape[1]
16-
counts = np.ones(X.shape[0]) * items_per_row
17-
nnz_per_row = np.bincount(X.indices, minlength=len(counts))
18-
counts -= nnz_per_row
15+
counts = np.fromiter((np.isnan(row.data).sum() for row in X), dtype=np.float)
16+
1917
if weights is not None:
2018
counts *= weights
21-
return np.sum(counts)
19+
20+
return counts
2221

2322

2423
def bincount(X, max_val=None, weights=None, minlength=None):
@@ -54,7 +53,7 @@ def bincount(X, max_val=None, weights=None, minlength=None):
5453
return bc, nans
5554

5655

57-
def countnans(X, weights=None, axis=None, dtype=None, keepdims=False):
56+
def countnans(X, weights=None, axis=None):
5857
"""
5958
Count the undefined elements in arr along given axis.
6059
@@ -64,10 +63,12 @@ def countnans(X, weights=None, axis=None, dtype=None, keepdims=False):
6463
weights : array_like
6564
Weights to weight the nans with, before or after counting (depending
6665
on the weights shape).
66+
axis : Optional[int]
6767
6868
Returns
6969
-------
7070
counts
71+
7172
"""
7273
if not sp.issparse(X):
7374
X = np.asanyarray(X)
@@ -78,12 +79,18 @@ def countnans(X, weights=None, axis=None, dtype=None, keepdims=False):
7879
if weights is not None and weights.shape != X.shape:
7980
counts = counts * weights
8081
else:
81-
if any(attr is not None for attr in [axis, dtype]) or \
82-
keepdims is not False:
83-
raise ValueError('Arguments axis, dtype and keepdims'
84-
'are not yet supported on sparse data!')
82+
assert axis in [None, 0, 1], 'Only axis 0 and 1 are currently supported'
83+
arr = X if axis == 1 else X.T
84+
85+
if weights is not None:
86+
weights = weights if axis == 1 else weights.T
87+
88+
arr = arr.tocsr()
89+
counts = _count_nans_per_row_sparse(arr, weights)
90+
91+
if axis is None:
92+
counts = counts.sum()
8593

86-
counts = _count_nans_per_row_sparse(X, weights)
8794
return counts
8895

8996

Orange/tests/test_statistics.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,44 @@ def test_bincount(self):
3232
np.testing.assert_equal(hist, [1, 1, 0, 1])
3333

3434
def test_countnans(self):
35-
np.testing.assert_equal(countnans([[1, np.nan],
36-
[2, np.nan]], axis=0), [0, 2])
35+
x = [[1, np.nan],
36+
[2, np.nan]]
37+
np.testing.assert_equal(
38+
countnans(x), 2, 'Countnans fails on dense data')
39+
np.testing.assert_equal(
40+
countnans(csr_matrix(x)), 2, 'Countnans fails on sparse data.')
41+
42+
def test_countnans_columns(self):
43+
x = [[1, np.nan],
44+
[2, np.nan]]
45+
np.testing.assert_equal(
46+
countnans(x, axis=0), [0, 2],
47+
'Countnans fails on dense data with `axis=0`')
48+
np.testing.assert_equal(
49+
countnans(csr_matrix(x), axis=0), [0, 2],
50+
'Countnans fails on sparse data with `axis=0`')
51+
52+
def test_countnans_rows(self):
53+
x = [[1, np.nan],
54+
[2, np.nan]]
55+
np.testing.assert_equal(
56+
countnans(x, axis=1), [1, 1],
57+
'Countnans fails on dense data with `axis=1`')
58+
np.testing.assert_equal(
59+
countnans(csr_matrix(x), axis=1), [1, 1],
60+
'Countnans fails on sparse data with `axis=1`')
61+
62+
def test_countnans_weights(self):
63+
x = [[1, np.nan],
64+
[2, np.nan]]
65+
w = np.array([[1, 1],
66+
[2, 2]])
67+
np.testing.assert_equal(countnans(x, weights=w, axis=0), [0, 3])
68+
np.testing.assert_equal(countnans(x, weights=w, axis=1), [1, 2])
69+
70+
w = np.array([1, 2])
71+
np.testing.assert_equal(countnans(x, weights=w, axis=0), [0, 4])
72+
np.testing.assert_equal(countnans(x, weights=w, axis=1), [1, 2])
3773

3874
def test_contingency(self):
3975
x = np.array([0, 1, 0, 2, np.nan])

0 commit comments

Comments
 (0)