Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Orange/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This module is a mixture of imports and code, so we allow import anywhere
# pylint: disable=wrong-import-position
# pylint: disable=wrong-import-position,wrong-import-order

from Orange import data

Expand Down Expand Up @@ -46,3 +46,10 @@
pass
finally:
del ctypes


# scipy.sparse uses matrix
# we can't do anything about it, so we silence it until scipy is fixed
import warnings
warnings.filterwarnings(
"ignore", ".*the matrix subclass.*", PendingDeprecationWarning)
38 changes: 30 additions & 8 deletions Orange/distance/distance.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import warnings
from unittest.mock import patch

import numpy as np
from scipy import stats
import sklearn.metrics as skl_metrics
Expand Down Expand Up @@ -56,7 +59,8 @@ def compute_distances(self, x1, x2=None):
distances *= -2
distances += xx
distances += yy
np.maximum(distances, 0, out=distances)
with np.errstate(invalid="ignore"): # Nans are fixed below
np.maximum(distances, 0, out=distances)
if x2 is None:
distances.flat[::distances.shape[0] + 1] = 0.0
fixer = _distance.fix_euclidean_rows_normalized if self.normalize \
Expand Down Expand Up @@ -111,7 +115,8 @@ def compute_distances(self, x1, x2=None):
distances *= -2
distances += xx
distances += xx.T
np.maximum(distances, 0, out=distances)
with np.errstate(invalid="ignore"): # Nans are fixed below
np.maximum(distances, 0, out=distances)
distances.flat[::distances.shape[0] + 1] = 0.0

fixer = _distance.fix_euclidean_cols_normalized if self.normalize \
Expand Down Expand Up @@ -153,11 +158,24 @@ def fit_cols(self, attributes, x, n_vals):
Return `EuclideanColumnsModel` with stored means and variances
for normalization and imputation.
"""
def nowarn(msg, cat, *args, **kwargs):
if cat is RuntimeWarning and (
msg == "Mean of empty slice"
or msg == "Degrees of freedom <= 0 for slice"):
if self.normalize:
raise ValueError("some columns have no defined values")
else:
orig_warn(msg, cat, *args, **kwargs)

self.check_no_discrete(n_vals)
means = np.nanmean(x, axis=0)
vars = np.nanvar(x, axis=0)
if self.normalize and (np.isnan(vars).any() or not vars.all()):
raise ValueError("some columns are constant or have no values")
# catch_warnings resets the registry for "once", while avoiding this
# warning would be annoying and slow, hence patching
orig_warn = warnings.warn
with patch("warnings.warn", new=nowarn):
means = np.nanmean(x, axis=0)
vars = np.nanvar(x, axis=0)
if self.normalize and not vars.all():
raise ValueError("some columns are constant")
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

x[~mask] unnecessarily copies the array, and I wouldn't like to do it column by column. This patching is a non-idiomatic substitute for catch_warnings.

return EuclideanColumnsModel(
attributes, self.impute, self.normalize, means, vars)

Expand Down Expand Up @@ -277,8 +295,12 @@ def fit_cols(self, attributes, x, n_vals):
for normalization and imputation.
"""
self.check_no_discrete(n_vals)
medians = np.nanmedian(x, axis=0)
mads = np.nanmedian(np.abs(x - medians), axis=0)
if x.size == 0:
medians = np.zeros(len(x))
mads = np.zeros(len(x))
else:
medians = np.nanmedian(x, axis=0)
mads = np.nanmedian(np.abs(x - medians), axis=0)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning here was "mean of empty slice", which appeared because median fell back to mean if size equalled 0 (achieving what?!)

if self.normalize and (np.isnan(mads).any() or not mads.all()):
raise ValueError(
"some columns have zero absolute distance from median, "
Expand Down
7 changes: 7 additions & 0 deletions Orange/distance/tests/test_distance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from unittest.mock import patch
from math import sqrt

import numpy as np
Expand Down Expand Up @@ -672,6 +673,12 @@ def test_manhattan_mixed_cols(self):
class CosineDistanceTest(FittedDistanceTest, CommonFittedTests):
Distance = distance.Cosine

def test_no_data(self):
with patch("warnings.warn") as warn:
super().test_no_data()
self.assertEqual(warn.call_args[0],
("Mean of empty slice", RuntimeWarning))

def test_cosine_disc(self):
assert_almost_equal = np.testing.assert_almost_equal
data = self.disc_data
Expand Down
29 changes: 18 additions & 11 deletions Orange/evaluation/clustering.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from sklearn.metrics import silhouette_score, adjusted_mutual_info_score, silhouette_samples

Expand Down Expand Up @@ -35,17 +37,22 @@ class ClusteringScore(Score):

def from_predicted(self, results, score_function):
# Clustering scores from labels
if self.considers_actual:
return np.fromiter(
(score_function(results.actual.flatten(), predicted.flatten())
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))
# Clustering scores from data only
else:
return np.fromiter(
(score_function(results.data.X, predicted.flatten())
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))
# This warning filter can be removed in scikit 0.22
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "The behavior of AMI will change in version 0\.22.*")
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kept this catch_warnings although it's not in tests. It's supposed to be temporary.

if self.considers_actual:
return np.fromiter(
(score_function(results.actual.flatten(),
predicted.flatten())
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))
# Clustering scores from data only
else:
return np.fromiter(
(score_function(results.data.X, predicted.flatten())
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))


class Silhouette(ClusteringScore):
Expand Down
6 changes: 5 additions & 1 deletion Orange/projection/freeviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,11 @@ def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1,
is_class_discrete=is_class_discrete)

# Scale the changes (the largest anchor move is alpha * radius)
step = np.min(np.linalg.norm(A, axis=1) / np.linalg.norm(G, axis=1))
with np.errstate(divide="ignore"): # inf's will be ignored by min
step = np.min(np.linalg.norm(A, axis=1)
/ np.linalg.norm(G, axis=1))
if not np.isfinite(step):
break
step = alpha * step
Anew = A - step * G

Expand Down
2 changes: 1 addition & 1 deletion Orange/regression/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ElasticNetCVLearner(LinearRegressionLearner):

def __init__(self, l1_ratio=0.5, eps=0.001, n_alphas=100, alphas=None,
fit_intercept=True, normalize=False, precompute='auto',
max_iter=1000, tol=0.0001, cv=None, copy_X=True,
max_iter=1000, tol=0.0001, cv=5, copy_X=True,
verbose=0, n_jobs=1, positive=False, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()
Expand Down
8 changes: 2 additions & 6 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
It also patches bottleneck to contain these functions.
"""
import warnings
from warnings import warn
from distutils.version import StrictVersion

import numpy as np
import bottleneck as bn
Expand Down Expand Up @@ -394,8 +392,8 @@ def mean(x):
if sp.issparse(x) else
np.mean(x))
if np.isnan(m):
warn('mean() resulted in nan. If input can contain nan values, perhaps '
'you meant nanmean?', stacklevel=2)
warnings.warn('mean() resulted in nan. If input can contain nan values,'
' perhaps you meant nanmean?', stacklevel=2)
return m


Expand Down Expand Up @@ -470,8 +468,6 @@ def nanmedian_sparse(x):

def nanmode(x, axis=0):
""" A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0"""
if StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"):
warn("Use scipy.stats.mode in scipy >= 1.2.0", DeprecationWarning)
nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis]
res = scipy.stats.stats.mode(x, axis)
return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode),
Expand Down
60 changes: 31 additions & 29 deletions Orange/tests/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import scipy.spatial
import scipy.stats
from scipy.sparse import csr_matrix
from sklearn.exceptions import DataConversionWarning

from Orange.data import (Table, Domain, ContinuousVariable,
DiscreteVariable, StringVariable, Instance)
Expand Down Expand Up @@ -58,11 +59,11 @@ def test_deprecated(self):

def test_from_file(self):
with named_file(
"""3 axis=0 asymmetric col_labels row_labels
ann bert chad
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90 1.23 4.56""") as name:
"""3 axis=0 asymmetric col_labels row_labels
ann bert chad
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90 1.23 4.56""") as name:
m = DistMatrix.from_file(name)
np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
[9.01, 2.34, 5.67],
Expand All @@ -76,10 +77,10 @@ def test_from_file(self):
self.assertEqual(m.axis, 0)

with named_file(
"""3 axis=1 row_labels
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90""") as name:
"""3 axis=1 row_labels
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90""") as name:
m = DistMatrix.from_file(name)
np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
[9.01, 2.34, 0],
Expand Down Expand Up @@ -499,26 +500,27 @@ def test_jaccard_distance_many_examples(self):
[0., 0., 0.5]]))

def test_jaccard_distance_numpy(self):
np.testing.assert_almost_equal(
self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
np.array([[0.5]]))
np.testing.assert_almost_equal(
self.dist(self.titanic.X),
np.array([[0., 0., 0.5, 0.5],
[0., 0., 0.5, 0.5],
[0.5, 0.5, 0., 0.],
[0.5, 0.5, 0., 0.]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[2].x, self.titanic[:3].X),
np.array([[0.5, 0.5, 0.]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[:2].X, self.titanic[3].x),
np.array([[0.5],
[0.5]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[:2].X, self.titanic[:3].X),
np.array([[0., 0., 0.5],
[0., 0., 0.5]]))
with self.assertWarns(DataConversionWarning):
np.testing.assert_almost_equal(
self.dist(self.titanic[0].x, self.titanic[2].x, axis=1),
np.array([[0.5]]))
np.testing.assert_almost_equal(
self.dist(self.titanic.X),
np.array([[0., 0., 0.5, 0.5],
[0., 0., 0.5, 0.5],
[0.5, 0.5, 0., 0.],
[0.5, 0.5, 0., 0.]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[2].x, self.titanic[:3].X),
np.array([[0.5, 0.5, 0.]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[:2].X, self.titanic[3].x),
np.array([[0.5],
[0.5]]))
np.testing.assert_almost_equal(
self.dist(self.titanic[:2].X, self.titanic[:3].X),
np.array([[0., 0., 0.5],
[0., 0., 0.5]]))


# noinspection PyTypeChecker
Expand Down
2 changes: 2 additions & 0 deletions Orange/tests/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import unittest
from unittest.mock import Mock
import warnings

import numpy as np
import scipy.sparse as sp
Expand Down Expand Up @@ -430,6 +431,7 @@ def assert_dist_and_unknowns(computed, goal_dist):
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1.1, 0, 0, 0, 0, 0, 0]]
)
warnings.filterwarnings("ignore", ".*", sp.SparseEfficiencyWarning)
X[0, 0] = 0

d = data.Table.from_numpy(domain, X)
Expand Down
1 change: 0 additions & 1 deletion Orange/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,5 @@ def test_reprs(self):

for f in filters:
repr_str = repr(f)
print(repr_str)
new_f = eval(repr_str)
self.assertEqual(repr(new_f), repr_str)
4 changes: 3 additions & 1 deletion Orange/tests/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=missing-docstring

import unittest

import numpy as np
import sklearn

Expand Down Expand Up @@ -120,7 +121,8 @@ def test_single_class(self):
t = self.iris[60:90]
self.assertEqual(len(np.unique(t.Y)), 1)
learn = LogisticRegressionLearner()
model = learn(t)
with self.assertWarns(UserWarning):
model = learn(t)
self.assertEqual(model(t[0]), 1)
self.assertTrue(np.all(model(t[0], ret=Model.Probs) == [0, 1, 0]))
self.assertTrue(np.all(model(t) == 1))
Expand Down
17 changes: 17 additions & 0 deletions Orange/tests/test_orange.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import unittest
import warnings
import os

import scipy.sparse as sp


class TestOrange(unittest.TestCase):
Expand All @@ -10,3 +14,16 @@ def test_orange_has_modules(self):
for _, name, __ in pkgutil.iter_modules(Orange.__path__):
if name not in unimported:
self.assertIn(name, Orange.__dict__)

@unittest.skipUnless(
os.environ.get("TRAVIS"),
"Travis has latest versions; Appveyor doesn't, and users don't care")
def test_remove_matrix_deprecation_filter(self):
# Override filter in Orange.__init__
warnings.filterwarnings(
"once", ".*the matrix subclass.*", PendingDeprecationWarning)
with self.assertWarns(
PendingDeprecationWarning,
msg="Remove filter for PendingDeprecationWarning of np.matrix "
"from Orange.__init__"):
sp.lil_matrix([1, 2, 3])
13 changes: 9 additions & 4 deletions Orange/tests/test_score_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=missing-docstring

import unittest
import warnings

import numpy as np

Expand All @@ -10,7 +11,7 @@
from Orange.modelling import RandomForestLearner
from Orange.preprocess.score import InfoGain, GainRatio, Gini, Chi2, ANOVA,\
UnivariateLinearRegression, ReliefF, FCBF, RReliefF

from Orange.projection import PCA


class FeatureScoringTest(unittest.TestCase):
Expand Down Expand Up @@ -150,12 +151,16 @@ def test_fcbf(self):
DiscreteVariable('target')),
np.full((2, 2), np.nan),
np.r_[0., 1])
weights = scorer(data, None)
np.testing.assert_equal(weights, np.nan)
with warnings.catch_warnings():
# these warnings are expected
warnings.filterwarnings("ignore", "invalid value.*double_scalars")
warnings.filterwarnings("ignore", "invalid value.*true_divide")

weights = scorer(data, None)
np.testing.assert_equal(weights, np.nan)

def test_learner_with_transformation(self):
learner = RandomForestLearner(random_state=0)
from Orange.projection import PCA
iris = Table("iris")
data = PCA(n_components=2)(iris)(iris)
scores = learner.score_data(data)
Expand Down
Loading