Skip to content

Commit 39cdce6

Browse files
committed
stats.utils: Don't count zeros as nans
1 parent a25a9d2 commit 39cdce6

File tree

2 files changed

+58
-31
lines changed

2 files changed

+58
-31
lines changed

Orange/statistics/util.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -344,31 +344,22 @@ def stats(X, weights=None, compute_variance=False):
344344

345345
if X.size and is_numeric:
346346
if is_sparse:
347+
nans = countnans(X, axis=0)
347348
X = X.tocsc()
349+
else:
350+
nans = np.isnan(X).sum(axis=0)
348351
if compute_variance:
349352
means, vars = nan_mean_var(X, axis=0, weights=weights)
350353
else:
351354
means = nanmean(X, axis=0, weights=weights)
352355
vars = np.zeros(X.shape[1] if X.ndim == 2 else 1)
353-
354-
if X.size and is_numeric and not is_sparse:
355-
nans = np.isnan(X).sum(axis=0)
356-
return np.column_stack((
357-
np.nanmin(X, axis=0),
358-
np.nanmax(X, axis=0),
359-
means,
360-
vars,
361-
nans,
362-
X.shape[0] - nans))
363-
elif is_sparse and X.size:
364-
non_zero = np.bincount(X.nonzero()[1], minlength=X.shape[1])
365356
return np.column_stack((
366357
nanmin(X, axis=0),
367358
nanmax(X, axis=0),
368359
means,
369360
vars,
370-
X.shape[0] - non_zero,
371-
non_zero))
361+
nans,
362+
X.shape[0] - nans))
372363
else:
373364
if X.ndim == 1:
374365
X = X[:, None]

Orange/tests/test_statistics.py

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,26 +107,26 @@ def test_stats(self):
107107

108108
def test_stats_sparse(self):
109109
X = csr_matrix(np.identity(5))
110-
np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 4, 1],
111-
[0, 1, .2, 0, 4, 1],
112-
[0, 1, .2, 0, 4, 1],
113-
[0, 1, .2, 0, 4, 1],
114-
[0, 1, .2, 0, 4, 1]])
110+
np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 0, 5],
111+
[0, 1, .2, 0, 0, 5],
112+
[0, 1, .2, 0, 0, 5],
113+
[0, 1, .2, 0, 0, 5],
114+
[0, 1, .2, 0, 0, 5]])
115115

116116
# assure last two columns have just zero elements
117117
X = X[:3]
118-
np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 2, 1],
119-
[0, 1, 1/3, 0, 2, 1],
120-
[0, 1, 1/3, 0, 2, 1],
121-
[0, 0, 0, 0, 3, 0],
122-
[0, 0, 0, 0, 3, 0]])
118+
np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 0, 3],
119+
[0, 1, 1/3, 0, 0, 3],
120+
[0, 1, 1/3, 0, 0, 3],
121+
[0, 0, 0, 0, 0, 3],
122+
[0, 0, 0, 0, 0, 3]])
123123

124124
r = stats(X, compute_variance=True)
125-
np.testing.assert_almost_equal(r, [[0, 1, 1/3, 2/9, 2, 1],
126-
[0, 1, 1/3, 2/9, 2, 1],
127-
[0, 1, 1/3, 2/9, 2, 1],
128-
[0, 0, 0, 0, 3, 0],
129-
[0, 0, 0, 0, 3, 0]])
125+
np.testing.assert_almost_equal(r, [[0, 1, 1/3, 2/9, 0, 3],
126+
[0, 1, 1/3, 2/9, 0, 3],
127+
[0, 1, 1/3, 2/9, 0, 3],
128+
[0, 0, 0, 0, 0, 3],
129+
[0, 0, 0, 0, 0, 3]])
130130

131131
def test_stats_weights(self):
132132
X = np.arange(4).reshape(2, 2).astype(float)
@@ -152,11 +152,11 @@ def test_stats_weights_sparse(self):
152152
X = np.arange(4).reshape(2, 2).astype(float)
153153
X = csr_matrix(X)
154154
weights = np.array([1, 3])
155-
np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 1, 1],
155+
np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 0, 2],
156156
[1, 3, 2.5, 0, 0, 2]])
157157

158158
np.testing.assert_equal(stats(X, weights, compute_variance=True),
159-
[[0, 2, 1.5, 0.75, 1, 1],
159+
[[0, 2, 1.5, 0.75, 0, 2],
160160
[1, 3, 2.5, 0.75, 0, 2]])
161161

162162
def test_stats_non_numeric(self):
@@ -170,6 +170,42 @@ def test_stats_non_numeric(self):
170170
[np.inf, -np.inf, 0, 0, 2, 1],
171171
[np.inf, -np.inf, 0, 0, 0, 3]])
172172

173+
def test_stats_nancounts(self):
174+
arr = np.array([[1, 4, 9],
175+
[-2, 10, 0],
176+
[0, np.nan, np.nan],
177+
[0, np.nan, 0]])
178+
179+
expected = [[-2, 1, -0.25, (1.25 ** 2 + 1.75 ** 2 + .25 ** 2 + .25 ** 2) / 4, 0, 4],
180+
[4, 10, 7, 3 ** 2, 2, 2],
181+
[0, 9, 3, (6 ** 2 + 3 ** 2 + 3 ** 2) / 3, 1, 3]]
182+
np.testing.assert_almost_equal(stats(arr, compute_variance=True), expected)
183+
184+
sparr = csc_matrix(arr)
185+
np.testing.assert_almost_equal(stats(sparr, compute_variance=True), expected)
186+
187+
sparr = sparr.tocsr()
188+
np.testing.assert_almost_equal(stats(sparr, compute_variance=True), expected)
189+
190+
weights = np.array([1, 2, 0, 3])
191+
e0 = (1 * 1 - 2 * 2 + 0 * 0 + 3 * 0) / (1 + 2 + 0 + 3)
192+
e1 = (1 * 4 + 2 * 10) / 3
193+
e2 = (1 * 9 + 2 * 0 + 3 * 0) / 6
194+
expected = [[-2, 1, e0, ((e0 - 1) ** 2 + 2 * (e0 + 2) ** 2 + 3 * e0 ** 2) / 6, 0, 4],
195+
[4, 10, e1, ((e1 - 4) ** 2 + 2 * (e1 - 10) ** 2) / 3, 2, 2],
196+
[0, 9, e2, ((e2 - 9) ** 2 + 2 * e2 ** 2 + 3 * e2 ** 2) / 6, 1, 3]]
197+
198+
np.testing.assert_almost_equal(
199+
stats(arr, weights=weights, compute_variance=True), expected)
200+
201+
sparr = csc_matrix(arr)
202+
np.testing.assert_almost_equal(
203+
stats(sparr, weights=weights, compute_variance=True), expected)
204+
205+
sparr = sparr.tocsr()
206+
np.testing.assert_almost_equal(
207+
stats(sparr, weights=weights, compute_variance=True), expected)
208+
173209
def test_stats_empty(self):
174210
X = np.array([])
175211
np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 0, 0]])

0 commit comments

Comments
 (0)