From e5c1dc34afe3bf597828f4865f9e8fc4a1c71dcd Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Fri, 27 Jan 2017 15:28:46 +0100 Subject: [PATCH] table: Ensure correct dtype in `_compute_distributions` Fix an 'ValueError: cannot convert float NaN to integer' in bincount when the column data comes from a object array and contains NaN values. --- Orange/data/table.py | 2 ++ Orange/tests/test_distribution.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Orange/data/table.py b/Orange/data/table.py index 0afb2a29b51..250f01ccde2 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -1309,6 +1309,8 @@ def _get_matrix(M, cachedM, col): m, W, Xcsc = _get_matrix(self.X, Xcsc, col) elif col < 0: m, W, Xcsc = _get_matrix(self.metas, Xcsc, col * (-1) - 1) + if np.issubdtype(m.dtype, np.dtype(object)): + m = m.astype(float) else: m, W, Ycsc = _get_matrix(self._Y, Ycsc, col - self.X.shape[1]) if var.is_discrete: diff --git a/Orange/tests/test_distribution.py b/Orange/tests/test_distribution.py index aba727f16c5..ff2735ffca0 100644 --- a/Orange/tests/test_distribution.py +++ b/Orange/tests/test_distribution.py @@ -1,5 +1,6 @@ # Test methods with long descriptive names can omit docstrings -# pylint: disable=missing-docstring +# Test internal methods +# pylint: disable=missing-docstring, protected-access import unittest from unittest.mock import Mock @@ -430,7 +431,13 @@ def test_compute_distributions_metas(self): variable = d.domain[-2] dist, _ = d._compute_distributions([variable])[0] np.testing.assert_almost_equal(dist, [3, 3, 2]) - + # repeat with nan values + assert d.metas.dtype.kind == "O" + assert d.metas[0, 1] == 0 + d.metas[0, 1] = np.nan + dist, nanc = d._compute_distributions([variable])[0] + np.testing.assert_almost_equal(dist, [2, 3, 2]) + self.assertEqual(nanc, 1) if __name__ == "__main__": unittest.main()