Skip to content

Commit d36c6aa

Browse files
authored
Merge pull request #4081 from PrimozGodec/fix-predictor
[FIX] Model's data_to_model_domain supports sparse matrix
2 parents 9ed951b + 7d57895 commit d36c6aa

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

Orange/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import itertools
33
from collections import Iterable
44
import re
5-
import threading
65

76
import numpy as np
87
import scipy
@@ -13,6 +12,7 @@
1312
from Orange.data.util import one_hot
1413
from Orange.misc.wrapper_meta import WrapperMeta
1514
from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize
15+
from Orange.statistics.util import all_nan
1616
from Orange.util import Reprable
1717

1818
__all__ = ["Learner", "Model", "SklLearner", "SklModel",
@@ -320,9 +320,9 @@ def data_to_model_domain():
320320

321321
if self.original_domain.attributes != data.domain.attributes \
322322
and data.X.size \
323-
and not np.isnan(data.X).all():
323+
and not all_nan(data.X):
324324
new_data = data.transform(self.original_domain)
325-
if np.isnan(new_data.X).all():
325+
if all_nan(new_data.X):
326326
raise DomainTransformationError(
327327
"domain transformation produced no defined values")
328328
return new_data.transform(self.domain)

Orange/statistics/util.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,32 @@ def any_nan(x, axis=None):
659659
return result
660660

661661

662+
def all_nan(x, axis=None):
663+
"""
664+
Check if all of the values in a matrix is nan. Works for sparse matrix too.
665+
"""
666+
if not sp.issparse(x):
667+
return np.isnan(x).all(axis=axis)
668+
669+
if axis is None:
670+
# when x.nnz < actual shape there are zero values which are not nan
671+
return np.prod(x.shape) == x.nnz and np.isnan(x.data).all()
672+
673+
if axis == 0:
674+
x = x.tocsc()
675+
elif axis == 1:
676+
x = x.tocsr()
677+
678+
ax = x.ndim - axis - 1
679+
axis_len = x.shape[axis]
680+
result = np.zeros(x.shape[ax], dtype=bool)
681+
for i in range(x.shape[ax]):
682+
vals = x.data[x.indptr[i]:x.indptr[i + 1]]
683+
result[i] = axis_len == len(vals) and np.isnan(vals).all()
684+
685+
return result
686+
687+
662688
def FDR(p_values: Iterable, dependent=False, m=None, ordered=False) -> Iterable:
663689
""" `False Discovery Rate <http://en.wikipedia.org/wiki/False_discovery_rate>`_
664690
correction on a list of p-values.

Orange/tests/test_statistics.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
from Orange.data.util import assure_array_dense
1212
from Orange.statistics.util import bincount, countnans, contingency, digitize, \
1313
mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \
14-
unique, var, nanstd, nanvar, nanmode, nan_to_num, FDR, isnan, any_nan
14+
unique, var, nanstd, nanvar, nanmode, nan_to_num, FDR, isnan, any_nan, \
15+
all_nan
1516
from sklearn.utils import check_random_state
1617

1718

@@ -730,6 +731,51 @@ def test_axis_1_with_nans(self, array):
730731
np.testing.assert_equal(result, expected)
731732

732733

734+
class TestAllNans(unittest.TestCase):
735+
def setUp(self) -> None:
736+
# pylint: disable=bad-whitespace
737+
self.x_with_nans = np.array([
738+
[0., 1., 0., np.nan, 3., 5.],
739+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
740+
[0., 0., 0., np.nan, 7., 6.],
741+
[0., 0., 0., np.nan, 7., np.nan],
742+
])
743+
self.x_no_nans = np.arange(12).reshape((3, 4))
744+
self.x_only_nans = (np.ones(12) * np.nan).reshape((3, 4))
745+
746+
@dense_sparse
747+
def test_axis_none_without_nans(self, array):
748+
self.assertFalse(all_nan(array(self.x_no_nans)))
749+
750+
@dense_sparse
751+
def test_axis_none_with_nans(self, array):
752+
self.assertTrue(all_nan(array(self.x_only_nans)))
753+
754+
@dense_sparse
755+
def test_axis_0_without_nans(self, array):
756+
expected = np.array([0, 0, 0, 0], dtype=bool)
757+
result = all_nan(array(self.x_no_nans), axis=0)
758+
np.testing.assert_equal(result, expected)
759+
760+
@dense_sparse
761+
def test_axis_0_with_nans(self, array):
762+
expected = np.array([0, 0, 0, 1, 0, 0], dtype=bool)
763+
result = all_nan(array(self.x_with_nans), axis=0)
764+
np.testing.assert_equal(result, expected)
765+
766+
@dense_sparse
767+
def test_axis_1_without_nans(self, array):
768+
expected = np.array([0, 0, 0], dtype=bool)
769+
result = all_nan(array(self.x_no_nans), axis=1)
770+
np.testing.assert_equal(result, expected)
771+
772+
@dense_sparse
773+
def test_axis_1_with_nans(self, array):
774+
expected = np.array([0, 1, 0, 0], dtype=bool)
775+
result = all_nan(array(self.x_with_nans), axis=1)
776+
np.testing.assert_equal(result, expected)
777+
778+
733779
class TestNanModeFixedInScipy(unittest.TestCase):
734780

735781
@unittest.expectedFailure

0 commit comments

Comments
 (0)