Skip to content

Commit 2124dba

Browse files
authored
Merge pull request #3503 from janezd/fix-test-warning-scripting
[MNT] Fix test warnings in scripting part
2 parents b944c1c + bc99d0c commit 2124dba

File tree

16 files changed

+166
-67
lines changed

16 files changed

+166
-67
lines changed

Orange/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This module is a mixture of imports and code, so we allow import anywhere
2-
# pylint: disable=wrong-import-position
2+
# pylint: disable=wrong-import-position,wrong-import-order
33

44
from Orange import data
55

@@ -46,3 +46,10 @@
4646
pass
4747
finally:
4848
del ctypes
49+
50+
51+
# scipy.sparse uses matrix
52+
# we can't do anything about it, so we silence it until scipy is fixed
53+
import warnings
54+
warnings.filterwarnings(
55+
"ignore", ".*the matrix subclass.*", PendingDeprecationWarning)

Orange/distance/distance.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import warnings
2+
from unittest.mock import patch
3+
14
import numpy as np
25
from scipy import stats
36
import sklearn.metrics as skl_metrics
@@ -56,7 +59,8 @@ def compute_distances(self, x1, x2=None):
5659
distances *= -2
5760
distances += xx
5861
distances += yy
59-
np.maximum(distances, 0, out=distances)
62+
with np.errstate(invalid="ignore"): # Nans are fixed below
63+
np.maximum(distances, 0, out=distances)
6064
if x2 is None:
6165
distances.flat[::distances.shape[0] + 1] = 0.0
6266
fixer = _distance.fix_euclidean_rows_normalized if self.normalize \
@@ -111,7 +115,8 @@ def compute_distances(self, x1, x2=None):
111115
distances *= -2
112116
distances += xx
113117
distances += xx.T
114-
np.maximum(distances, 0, out=distances)
118+
with np.errstate(invalid="ignore"): # Nans are fixed below
119+
np.maximum(distances, 0, out=distances)
115120
distances.flat[::distances.shape[0] + 1] = 0.0
116121

117122
fixer = _distance.fix_euclidean_cols_normalized if self.normalize \
@@ -153,11 +158,24 @@ def fit_cols(self, attributes, x, n_vals):
153158
Return `EuclideanColumnsModel` with stored means and variances
154159
for normalization and imputation.
155160
"""
161+
def nowarn(msg, cat, *args, **kwargs):
162+
if cat is RuntimeWarning and (
163+
msg == "Mean of empty slice"
164+
or msg == "Degrees of freedom <= 0 for slice"):
165+
if self.normalize:
166+
raise ValueError("some columns have no defined values")
167+
else:
168+
orig_warn(msg, cat, *args, **kwargs)
169+
156170
self.check_no_discrete(n_vals)
157-
means = np.nanmean(x, axis=0)
158-
vars = np.nanvar(x, axis=0)
159-
if self.normalize and (np.isnan(vars).any() or not vars.all()):
160-
raise ValueError("some columns are constant or have no values")
171+
# catch_warnings resets the registry for "once", while avoiding this
172+
# warning would be annoying and slow, hence patching
173+
orig_warn = warnings.warn
174+
with patch("warnings.warn", new=nowarn):
175+
means = np.nanmean(x, axis=0)
176+
vars = np.nanvar(x, axis=0)
177+
if self.normalize and not vars.all():
178+
raise ValueError("some columns are constant")
161179
return EuclideanColumnsModel(
162180
attributes, self.impute, self.normalize, means, vars)
163181

@@ -277,8 +295,12 @@ def fit_cols(self, attributes, x, n_vals):
277295
for normalization and imputation.
278296
"""
279297
self.check_no_discrete(n_vals)
280-
medians = np.nanmedian(x, axis=0)
281-
mads = np.nanmedian(np.abs(x - medians), axis=0)
298+
if x.size == 0:
299+
medians = np.zeros(len(x))
300+
mads = np.zeros(len(x))
301+
else:
302+
medians = np.nanmedian(x, axis=0)
303+
mads = np.nanmedian(np.abs(x - medians), axis=0)
282304
if self.normalize and (np.isnan(mads).any() or not mads.all()):
283305
raise ValueError(
284306
"some columns have zero absolute distance from median, "

Orange/distance/tests/test_distance.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import unittest
2+
from unittest.mock import patch
23
from math import sqrt
34

45
import numpy as np
@@ -672,6 +673,12 @@ def test_manhattan_mixed_cols(self):
672673
class CosineDistanceTest(FittedDistanceTest, CommonFittedTests):
673674
Distance = distance.Cosine
674675

676+
def test_no_data(self):
677+
with patch("warnings.warn") as warn:
678+
super().test_no_data()
679+
self.assertEqual(warn.call_args[0],
680+
("Mean of empty slice", RuntimeWarning))
681+
675682
def test_cosine_disc(self):
676683
assert_almost_equal = np.testing.assert_almost_equal
677684
data = self.disc_data

Orange/evaluation/clustering.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import warnings
2+
13
import numpy as np
24
from sklearn.metrics import silhouette_score, adjusted_mutual_info_score, silhouette_samples
35

@@ -35,17 +37,22 @@ class ClusteringScore(Score):
3537

3638
def from_predicted(self, results, score_function):
3739
# Clustering scores from labels
38-
if self.considers_actual:
39-
return np.fromiter(
40-
(score_function(results.actual.flatten(), predicted.flatten())
41-
for predicted in results.predicted),
42-
dtype=np.float64, count=len(results.predicted))
43-
# Clustering scores from data only
44-
else:
45-
return np.fromiter(
46-
(score_function(results.data.X, predicted.flatten())
47-
for predicted in results.predicted),
48-
dtype=np.float64, count=len(results.predicted))
40+
# This warning filter can be removed in scikit 0.22
41+
with warnings.catch_warnings():
42+
warnings.filterwarnings(
43+
"ignore", "The behavior of AMI will change in version 0\.22.*")
44+
if self.considers_actual:
45+
return np.fromiter(
46+
(score_function(results.actual.flatten(),
47+
predicted.flatten())
48+
for predicted in results.predicted),
49+
dtype=np.float64, count=len(results.predicted))
50+
# Clustering scores from data only
51+
else:
52+
return np.fromiter(
53+
(score_function(results.data.X, predicted.flatten())
54+
for predicted in results.predicted),
55+
dtype=np.float64, count=len(results.predicted))
4956

5057

5158
class Silhouette(ClusteringScore):

Orange/projection/freeviz.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,11 @@ def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1,
344344
is_class_discrete=is_class_discrete)
345345

346346
# Scale the changes (the largest anchor move is alpha * radius)
347-
step = np.min(np.linalg.norm(A, axis=1) / np.linalg.norm(G, axis=1))
347+
with np.errstate(divide="ignore"): # inf's will be ignored by min
348+
step = np.min(np.linalg.norm(A, axis=1)
349+
/ np.linalg.norm(G, axis=1))
350+
if not np.isfinite(step):
351+
break
348352
step = alpha * step
349353
Anew = A - step * G
350354

Orange/regression/linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class ElasticNetCVLearner(LinearRegressionLearner):
7373

7474
def __init__(self, l1_ratio=0.5, eps=0.001, n_alphas=100, alphas=None,
7575
fit_intercept=True, normalize=False, precompute='auto',
76-
max_iter=1000, tol=0.0001, cv=None, copy_X=True,
76+
max_iter=1000, tol=0.0001, cv=5, copy_X=True,
7777
verbose=0, n_jobs=1, positive=False, preprocessors=None):
7878
super().__init__(preprocessors=preprocessors)
7979
self.params = vars()

Orange/statistics/util.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
It also patches bottleneck to contain these functions.
66
"""
77
import warnings
8-
from warnings import warn
9-
from distutils.version import StrictVersion
108

119
import numpy as np
1210
import bottleneck as bn
@@ -394,8 +392,8 @@ def mean(x):
394392
if sp.issparse(x) else
395393
np.mean(x))
396394
if np.isnan(m):
397-
warn('mean() resulted in nan. If input can contain nan values, perhaps '
398-
'you meant nanmean?', stacklevel=2)
395+
warnings.warn('mean() resulted in nan. If input can contain nan values,'
396+
' perhaps you meant nanmean?', stacklevel=2)
399397
return m
400398

401399

@@ -470,8 +468,6 @@ def nanmedian_sparse(x):
470468

471469
def nanmode(x, axis=0):
472470
""" A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0"""
473-
if StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"):
474-
warn("Use scipy.stats.mode in scipy >= 1.2.0", DeprecationWarning)
475471
nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis]
476472
res = scipy.stats.stats.mode(x, axis)
477473
return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode),

Orange/tests/test_distances.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import scipy.spatial
1010
import scipy.stats
1111
from scipy.sparse import csr_matrix
12+
from sklearn.exceptions import DataConversionWarning
1213

1314
from Orange.data import (Table, Domain, ContinuousVariable,
1415
DiscreteVariable, StringVariable, Instance)
@@ -58,11 +59,11 @@ def test_deprecated(self):
5859

5960
def test_from_file(self):
6061
with named_file(
61-
"""3 axis=0 asymmetric col_labels row_labels
62-
ann bert chad
63-
danny 0.12 3.45 6.78
64-
eve 9.01 2.34 5.67
65-
frank 8.90 1.23 4.56""") as name:
62+
"""3 axis=0 asymmetric col_labels row_labels
63+
ann bert chad
64+
danny 0.12 3.45 6.78
65+
eve 9.01 2.34 5.67
66+
frank 8.90 1.23 4.56""") as name:
6667
m = DistMatrix.from_file(name)
6768
np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
6869
[9.01, 2.34, 5.67],
@@ -76,10 +77,10 @@ def test_from_file(self):
7677
self.assertEqual(m.axis, 0)
7778

7879
with named_file(
79-
"""3 axis=1 row_labels
80-
danny 0.12 3.45 6.78
81-
eve 9.01 2.34 5.67
82-
frank 8.90""") as name:
80+
"""3 axis=1 row_labels
81+
danny 0.12 3.45 6.78
82+
eve 9.01 2.34 5.67
83+
frank 8.90""") as name:
8384
m = DistMatrix.from_file(name)
8485
np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
8586
[9.01, 2.34, 0],
@@ -499,26 +500,27 @@ def test_jaccard_distance_many_examples(self):
499500
[0., 0., 0.5]]))
500501

501502
def test_jaccard_distance_numpy(self):
502-
np.testing.assert_almost_equal(
503-
self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
504-
np.array([[0.5]]))
505-
np.testing.assert_almost_equal(
506-
self.dist(self.titanic.X),
507-
np.array([[0., 0., 0.5, 0.5],
508-
[0., 0., 0.5, 0.5],
509-
[0.5, 0.5, 0., 0.],
510-
[0.5, 0.5, 0., 0.]]))
511-
np.testing.assert_almost_equal(
512-
self.dist(self.titanic[2].x, self.titanic[:3].X),
513-
np.array([[0.5, 0.5, 0.]]))
514-
np.testing.assert_almost_equal(
515-
self.dist(self.titanic[:2].X, self.titanic[3].x),
516-
np.array([[0.5],
517-
[0.5]]))
518-
np.testing.assert_almost_equal(
519-
self.dist(self.titanic[:2].X, self.titanic[:3].X),
520-
np.array([[0., 0., 0.5],
521-
[0., 0., 0.5]]))
503+
with self.assertWarns(DataConversionWarning):
504+
np.testing.assert_almost_equal(
505+
self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
506+
np.array([[0.5]]))
507+
np.testing.assert_almost_equal(
508+
self.dist(self.titanic.X),
509+
np.array([[0., 0., 0.5, 0.5],
510+
[0., 0., 0.5, 0.5],
511+
[0.5, 0.5, 0., 0.],
512+
[0.5, 0.5, 0., 0.]]))
513+
np.testing.assert_almost_equal(
514+
self.dist(self.titanic[2].x, self.titanic[:3].X),
515+
np.array([[0.5, 0.5, 0.]]))
516+
np.testing.assert_almost_equal(
517+
self.dist(self.titanic[:2].X, self.titanic[3].x),
518+
np.array([[0.5],
519+
[0.5]]))
520+
np.testing.assert_almost_equal(
521+
self.dist(self.titanic[:2].X, self.titanic[:3].X),
522+
np.array([[0., 0., 0.5],
523+
[0., 0., 0.5]]))
522524

523525

524526
# noinspection PyTypeChecker

Orange/tests/test_distribution.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import unittest
66
from unittest.mock import Mock
7+
import warnings
78

89
import numpy as np
910
import scipy.sparse as sp
@@ -430,6 +431,7 @@ def assert_dist_and_unknowns(computed, goal_dist):
430431
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
431432
[0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1.1, 0, 0, 0, 0, 0, 0]]
432433
)
434+
warnings.filterwarnings("ignore", ".*", sp.SparseEfficiencyWarning)
433435
X[0, 0] = 0
434436

435437
d = data.Table.from_numpy(domain, X)

Orange/tests/test_filter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,5 @@ def test_reprs(self):
457457

458458
for f in filters:
459459
repr_str = repr(f)
460-
print(repr_str)
461460
new_f = eval(repr_str)
462461
self.assertEqual(repr(new_f), repr_str)

0 commit comments

Comments
 (0)