Merge pull request #3503 from janezd/fix-test-warning-scripting

lanzagar · web-flow · commit 2124dba55fbb · 2019-01-29T14:54:14.000+01:00
[MNT] Fix test warnings in scripting part
diff --git a/Orange/__init__.py b/Orange/__init__.py
@@ -1,5 +1,5 @@
 # This module is a mixture of imports and code, so we allow import anywhere
-# pylint: disable=wrong-import-position
+# pylint: disable=wrong-import-position,wrong-import-order
 
 from Orange import data
 
@@ -46,3 +46,10 @@
     pass
 finally:
     del ctypes
+
+
+# scipy.sparse uses matrix
+# we can't do anything about it, so we silence it until scipy is fixed
+import warnings
+warnings.filterwarnings(
+    "ignore", ".*the matrix subclass.*", PendingDeprecationWarning)
diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py
@@ -1,3 +1,6 @@
+import warnings
+from unittest.mock import patch
+
 import numpy as np
 from scipy import stats
 import sklearn.metrics as skl_metrics
@@ -56,7 +59,8 @@ def compute_distances(self, x1, x2=None):
             distances *= -2
             distances += xx
             distances += yy
-            np.maximum(distances, 0, out=distances)
+            with np.errstate(invalid="ignore"):  # Nans are fixed below
+                np.maximum(distances, 0, out=distances)
             if x2 is None:
                 distances.flat[::distances.shape[0] + 1] = 0.0
             fixer = _distance.fix_euclidean_rows_normalized if self.normalize \
@@ -111,7 +115,8 @@ def compute_distances(self, x1, x2=None):
         distances *= -2
         distances += xx
         distances += xx.T
-        np.maximum(distances, 0, out=distances)
+        with np.errstate(invalid="ignore"):  # Nans are fixed below
+            np.maximum(distances, 0, out=distances)
         distances.flat[::distances.shape[0] + 1] = 0.0
 
         fixer = _distance.fix_euclidean_cols_normalized if self.normalize \
@@ -153,11 +158,24 @@ def fit_cols(self, attributes, x, n_vals):
         Return `EuclideanColumnsModel` with stored means and variances
         for normalization and imputation.
         """
+        def nowarn(msg, cat, *args, **kwargs):
+            if cat is RuntimeWarning and (
+                    msg == "Mean of empty slice"
+                    or msg == "Degrees of freedom <= 0 for slice"):
+                if self.normalize:
+                    raise ValueError("some columns have no defined values")
+            else:
+                orig_warn(msg, cat, *args, **kwargs)
+
         self.check_no_discrete(n_vals)
-        means = np.nanmean(x, axis=0)
-        vars = np.nanvar(x, axis=0)
-        if self.normalize and (np.isnan(vars).any() or not vars.all()):
-            raise ValueError("some columns are constant or have no values")
+        # catch_warnings resets the registry for "once", while avoiding this
+        # warning would be annoying and slow, hence patching
+        orig_warn = warnings.warn
+        with patch("warnings.warn", new=nowarn):
+            means = np.nanmean(x, axis=0)
+            vars = np.nanvar(x, axis=0)
+        if self.normalize and not vars.all():
+            raise ValueError("some columns are constant")
         return EuclideanColumnsModel(
             attributes, self.impute, self.normalize, means, vars)
 
@@ -277,8 +295,12 @@ def fit_cols(self, attributes, x, n_vals):
         for normalization and imputation.
         """
         self.check_no_discrete(n_vals)
-        medians = np.nanmedian(x, axis=0)
-        mads = np.nanmedian(np.abs(x - medians), axis=0)
+        if x.size == 0:
+            medians = np.zeros(len(x))
+            mads = np.zeros(len(x))
+        else:
+            medians = np.nanmedian(x, axis=0)
+            mads = np.nanmedian(np.abs(x - medians), axis=0)
         if self.normalize and (np.isnan(mads).any() or not mads.all()):
             raise ValueError(
                 "some columns have zero absolute distance from median, "
diff --git a/Orange/distance/tests/test_distance.py b/Orange/distance/tests/test_distance.py
@@ -1,4 +1,5 @@
 import unittest
+from unittest.mock import patch
 from math import sqrt
 
 import numpy as np
@@ -672,6 +673,12 @@ def test_manhattan_mixed_cols(self):
 class CosineDistanceTest(FittedDistanceTest, CommonFittedTests):
     Distance = distance.Cosine
 
+    def test_no_data(self):
+        with patch("warnings.warn") as warn:
+            super().test_no_data()
+            self.assertEqual(warn.call_args[0],
+                             ("Mean of empty slice", RuntimeWarning))
+
     def test_cosine_disc(self):
         assert_almost_equal = np.testing.assert_almost_equal
         data = self.disc_data
diff --git a/Orange/evaluation/clustering.py b/Orange/evaluation/clustering.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 from sklearn.metrics import silhouette_score, adjusted_mutual_info_score, silhouette_samples
 
@@ -35,17 +37,22 @@ class ClusteringScore(Score):
 
     def from_predicted(self, results, score_function):
         # Clustering scores from labels
-        if self.considers_actual:
-            return np.fromiter(
-                (score_function(results.actual.flatten(), predicted.flatten())
-                 for predicted in results.predicted),
-                dtype=np.float64, count=len(results.predicted))
-        # Clustering scores from data only
-        else:
-            return np.fromiter(
-                (score_function(results.data.X, predicted.flatten())
-                 for predicted in results.predicted),
-                dtype=np.float64, count=len(results.predicted))
+        # This warning filter can be removed in scikit 0.22
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", "The behavior of AMI will change in version 0\.22.*")
+            if self.considers_actual:
+                return np.fromiter(
+                    (score_function(results.actual.flatten(),
+                                    predicted.flatten())
+                     for predicted in results.predicted),
+                    dtype=np.float64, count=len(results.predicted))
+            # Clustering scores from data only
+            else:
+                return np.fromiter(
+                    (score_function(results.data.X, predicted.flatten())
+                     for predicted in results.predicted),
+                    dtype=np.float64, count=len(results.predicted))
 
 
 class Silhouette(ClusteringScore):
diff --git a/Orange/projection/freeviz.py b/Orange/projection/freeviz.py
@@ -344,7 +344,11 @@ def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1,
                                      is_class_discrete=is_class_discrete)
 
             # Scale the changes (the largest anchor move is alpha * radius)
-            step = np.min(np.linalg.norm(A, axis=1) / np.linalg.norm(G, axis=1))
+            with np.errstate(divide="ignore"):  # inf's will be ignored by min
+                step = np.min(np.linalg.norm(A, axis=1)
+                              / np.linalg.norm(G, axis=1))
+                if not np.isfinite(step):
+                    break
             step = alpha * step
             Anew = A - step * G
 
diff --git a/Orange/regression/linear.py b/Orange/regression/linear.py
@@ -73,7 +73,7 @@ class ElasticNetCVLearner(LinearRegressionLearner):
 
     def __init__(self, l1_ratio=0.5, eps=0.001, n_alphas=100, alphas=None,
                  fit_intercept=True, normalize=False, precompute='auto',
-                 max_iter=1000, tol=0.0001, cv=None, copy_X=True,
+                 max_iter=1000, tol=0.0001, cv=5, copy_X=True,
                  verbose=0, n_jobs=1, positive=False, preprocessors=None):
         super().__init__(preprocessors=preprocessors)
         self.params = vars()
diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py
@@ -5,8 +5,6 @@
 It also patches bottleneck to contain these functions.
 """
 import warnings
-from warnings import warn
-from distutils.version import StrictVersion
 
 import numpy as np
 import bottleneck as bn
@@ -394,8 +392,8 @@ def mean(x):
          if sp.issparse(x) else
          np.mean(x))
     if np.isnan(m):
-        warn('mean() resulted in nan. If input can contain nan values, perhaps '
-             'you meant nanmean?', stacklevel=2)
+        warnings.warn('mean() resulted in nan. If input can contain nan values,'
+                      ' perhaps you meant nanmean?', stacklevel=2)
     return m
 
 
@@ -470,8 +468,6 @@ def nanmedian_sparse(x):
 
 def nanmode(x, axis=0):
     """ A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0"""
-    if StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"):
-        warn("Use scipy.stats.mode in scipy >= 1.2.0", DeprecationWarning)
     nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis]
     res = scipy.stats.stats.mode(x, axis)
     return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode),
diff --git a/Orange/tests/test_distances.py b/Orange/tests/test_distances.py
@@ -9,6 +9,7 @@
 import scipy.spatial
 import scipy.stats
 from scipy.sparse import csr_matrix
+from sklearn.exceptions import DataConversionWarning
 
 from Orange.data import (Table, Domain, ContinuousVariable,
                          DiscreteVariable, StringVariable, Instance)
@@ -58,11 +59,11 @@ def test_deprecated(self):
 
     def test_from_file(self):
         with named_file(
-            """3 axis=0 asymmetric col_labels row_labels
-                ann	bert	chad
-                danny	0.12	3.45	6.78
-                eve	9.01	2.34	5.67
-                frank	8.90	1.23	4.56""") as name:
+                """3 axis=0 asymmetric col_labels row_labels
+                    ann	bert	chad
+                    danny	0.12	3.45	6.78
+                    eve	9.01	2.34	5.67
+                    frank	8.90	1.23	4.56""") as name:
             m = DistMatrix.from_file(name)
             np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
                                                         [9.01, 2.34, 5.67],
@@ -76,10 +77,10 @@ def test_from_file(self):
             self.assertEqual(m.axis, 0)
 
         with named_file(
-            """3 axis=1 row_labels
-                danny	0.12	3.45	6.78
-                eve 	9.01	2.34	5.67
-                frank	8.90""") as name:
+                """3 axis=1 row_labels
+                    danny	0.12	3.45	6.78
+                    eve 	9.01	2.34	5.67
+                    frank	8.90""") as name:
             m = DistMatrix.from_file(name)
             np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                         [9.01, 2.34, 0],
@@ -499,26 +500,27 @@ def test_jaccard_distance_many_examples(self):
                       [0., 0., 0.5]]))
 
     def test_jaccard_distance_numpy(self):
-        np.testing.assert_almost_equal(
-            self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
-            np.array([[0.5]]))
-        np.testing.assert_almost_equal(
-            self.dist(self.titanic.X),
-            np.array([[0., 0., 0.5, 0.5],
-                      [0., 0., 0.5, 0.5],
-                      [0.5, 0.5, 0., 0.],
-                      [0.5, 0.5, 0., 0.]]))
-        np.testing.assert_almost_equal(
-            self.dist(self.titanic[2].x, self.titanic[:3].X),
-            np.array([[0.5, 0.5, 0.]]))
-        np.testing.assert_almost_equal(
-            self.dist(self.titanic[:2].X, self.titanic[3].x),
-            np.array([[0.5],
-                      [0.5]]))
-        np.testing.assert_almost_equal(
-            self.dist(self.titanic[:2].X, self.titanic[:3].X),
-            np.array([[0., 0., 0.5],
-                      [0., 0., 0.5]]))
+        with self.assertWarns(DataConversionWarning):
+            np.testing.assert_almost_equal(
+                self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
+                np.array([[0.5]]))
+            np.testing.assert_almost_equal(
+                self.dist(self.titanic.X),
+                np.array([[0., 0., 0.5, 0.5],
+                          [0., 0., 0.5, 0.5],
+                          [0.5, 0.5, 0., 0.],
+                          [0.5, 0.5, 0., 0.]]))
+            np.testing.assert_almost_equal(
+                self.dist(self.titanic[2].x, self.titanic[:3].X),
+                np.array([[0.5, 0.5, 0.]]))
+            np.testing.assert_almost_equal(
+                self.dist(self.titanic[:2].X, self.titanic[3].x),
+                np.array([[0.5],
+                          [0.5]]))
+            np.testing.assert_almost_equal(
+                self.dist(self.titanic[:2].X, self.titanic[:3].X),
+                np.array([[0., 0., 0.5],
+                          [0., 0., 0.5]]))
 
 
 # noinspection PyTypeChecker
diff --git a/Orange/tests/test_distribution.py b/Orange/tests/test_distribution.py
@@ -4,6 +4,7 @@
 
 import unittest
 from unittest.mock import Mock
+import warnings
 
 import numpy as np
 import scipy.sparse as sp
@@ -430,6 +431,7 @@ def assert_dist_and_unknowns(computed, goal_dist):
              [0, 0, 0, 0,      0,      0,      0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0,      0, 0, 0],
              [0, 0, 2, 0,      0,      0,      1, 0, 0, 0, 0, 0, 0, 1.1, 0, 0, 0,      0, 0, 0]]
         )
+        warnings.filterwarnings("ignore", ".*", sp.SparseEfficiencyWarning)
         X[0, 0] = 0
 
         d = data.Table.from_numpy(domain, X)
diff --git a/Orange/tests/test_filter.py b/Orange/tests/test_filter.py
@@ -457,6 +457,5 @@ def test_reprs(self):
 
         for f in filters:
             repr_str = repr(f)
-            print(repr_str)
             new_f = eval(repr_str)
             self.assertEqual(repr(new_f), repr_str)
diff --git a/Orange/tests/test_logistic_regression.py b/Orange/tests/test_logistic_regression.py
@@ -2,6 +2,7 @@
 # pylint: disable=missing-docstring
 
 import unittest
+
 import numpy as np
 import sklearn
 
@@ -120,7 +121,8 @@ def test_single_class(self):
         t = self.iris[60:90]
         self.assertEqual(len(np.unique(t.Y)), 1)
         learn = LogisticRegressionLearner()
-        model = learn(t)
+        with self.assertWarns(UserWarning):
+            model = learn(t)
         self.assertEqual(model(t[0]), 1)
         self.assertTrue(np.all(model(t[0], ret=Model.Probs) == [0, 1, 0]))
         self.assertTrue(np.all(model(t) == 1))
diff --git a/Orange/tests/test_orange.py b/Orange/tests/test_orange.py
@@ -1,4 +1,8 @@
 import unittest
+import warnings
+import os
+
+import scipy.sparse as sp
 
 
 class TestOrange(unittest.TestCase):
@@ -10,3 +14,16 @@ def test_orange_has_modules(self):
         for _, name, __ in pkgutil.iter_modules(Orange.__path__):
             if name not in unimported:
                 self.assertIn(name, Orange.__dict__)
+
+    @unittest.skipUnless(
+        os.environ.get("TRAVIS"),
+        "Travis has latest versions; Appveyor doesn't, and users don't care")
+    def test_remove_matrix_deprecation_filter(self):
+        # Override filter in Orange.__init__
+        warnings.filterwarnings(
+            "once", ".*the matrix subclass.*", PendingDeprecationWarning)
+        with self.assertWarns(
+                PendingDeprecationWarning,
+                msg="Remove filter for PendingDeprecationWarning of np.matrix "
+                    "from Orange.__init__"):
+            sp.lil_matrix([1, 2, 3])
diff --git a/Orange/tests/test_score_feature.py b/Orange/tests/test_score_feature.py
@@ -2,6 +2,7 @@
 # pylint: disable=missing-docstring
 
 import unittest
+import warnings
 
 import numpy as np
 
@@ -10,7 +11,7 @@
 from Orange.modelling import RandomForestLearner
 from Orange.preprocess.score import InfoGain, GainRatio, Gini, Chi2, ANOVA,\
     UnivariateLinearRegression, ReliefF, FCBF, RReliefF
-
+from Orange.projection import PCA
 
 
 class FeatureScoringTest(unittest.TestCase):
@@ -150,12 +151,16 @@ def test_fcbf(self):
                             DiscreteVariable('target')),
                      np.full((2, 2), np.nan),
                      np.r_[0., 1])
-        weights = scorer(data, None)
-        np.testing.assert_equal(weights, np.nan)
+        with warnings.catch_warnings():
+            # these warnings are expected
+            warnings.filterwarnings("ignore", "invalid value.*double_scalars")
+            warnings.filterwarnings("ignore", "invalid value.*true_divide")
+
+            weights = scorer(data, None)
+            np.testing.assert_equal(weights, np.nan)
 
     def test_learner_with_transformation(self):
         learner = RandomForestLearner(random_state=0)
-        from Orange.projection import PCA
         iris = Table("iris")
         data = PCA(n_components=2)(iris)(iris)
         scores = learner.score_data(data)
diff --git a/Orange/tests/test_sparse_table.py b/Orange/tests/test_sparse_table.py
diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py
diff --git a/Orange/tree.py b/Orange/tree.py