Skip to content

Commit 2125acd

Browse files
authored
Merge pull request #3674 from janezd/pylint-distances
Pylint distances
2 parents 180a041 + c12ec4a commit 2125acd

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed

Orange/distance/base.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# This module defines abstract base classes; derived classes are abstract, too
2+
# pylint: disable=abstract-method
3+
14
import numpy as np
25
import sklearn.metrics as skl_metrics
36

@@ -13,10 +16,11 @@
1316
# TODO this *private* function is called from several widgets to prepare
1417
# data for calling the below classes. After we (mostly) stopped relying
1518
# on sklearn.metrics, this is (mostly) unnecessary
16-
19+
# Afterwards, also remove the following line:
20+
# pylint: disable=redefined-outer-name
1721
def _preprocess(table, impute=True):
1822
"""Remove categorical attributes and impute missing values."""
19-
if not len(table):
23+
if not len(table): # this can be an array, pylint: disable=len-as-condition
2024
return table
2125
new_domain = Domain(
2226
[a for a in table.domain.attributes if a.is_continuous],
@@ -120,6 +124,9 @@ class Distance:
120124
impute (bool):
121125
if `True` (default is `False`), nans in the computed distances
122126
are replaced with zeros, and infs with very large numbers.
127+
normalize (bool):
128+
if `True`, columns are normalized before computation. This attribute
129+
applies only if the distance supports normalization.
123130
124131
The capabilities of the metrics are described with class attributes.
125132
@@ -150,6 +157,11 @@ class Distance:
150157
supports_normalization = False
151158
supports_missing = True
152159

160+
# Predefined here to silence pylint, which doesn't look into __new__
161+
normalize = False
162+
axis = 1
163+
impute = False
164+
153165
def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs):
154166
self = super().__new__(cls)
155167
self.axis = axis
@@ -168,16 +180,25 @@ def __new__(cls, e1=None, e2=None, axis=1, impute=False, **kwargs):
168180
or hasattr(e1, "is_sparse") and e1.is_sparse()):
169181
fallback = getattr(self, "fallback", None)
170182
if fallback is not None:
171-
# pylint disable=not-callable
183+
# pylint: disable=not-callable
172184
return fallback(e1, e2, axis, impute)
173185

174186
# Magic constructor
175187
model = self.fit(e1)
176188
return model(e1, e2)
177189

178-
def fit(self, e1):
179-
"""Abstract method returning :obj:`DistanceModel` fit to the data"""
180-
pass
190+
def fit(self, data):
191+
"""
192+
Abstract method returning :obj:`DistanceModel` fit to the data
193+
194+
Args:
195+
e1 (Orange.data.Table, Orange.data.Instance, np.ndarray):
196+
data for fitting the distance model
197+
198+
Returns:
199+
model (DistanceModel)
200+
"""
201+
raise NotImplementedError
181202

182203
@staticmethod
183204
def check_no_discrete(n_vals):
@@ -256,7 +277,7 @@ def compute_distances(self, x1, x2):
256277
"""
257278
Abstract method for computation of distances between rows or columns of
258279
`x1`, or between rows of `x1` and `x2`. Do not call directly."""
259-
pass
280+
raise NotImplementedError
260281

261282

262283
class FittedDistanceModel(DistanceModel):
@@ -268,10 +289,15 @@ class FittedDistanceModel(DistanceModel):
268289
attributes (list of `Variable`): attributes on which the model was fit
269290
discrete (np.ndarray): bool array indicating discrete attributes
270291
continuous (np.ndarray): bool array indicating continuous attributes
292+
normalize (bool):
293+
if `True` (default is `False`) continuous columns are normalized
271294
"""
272295
def __init__(self, attributes, axis=1, impute=False):
273296
super().__init__(axis, impute)
274297
self.attributes = attributes
298+
self.discrete = None
299+
self.continuous = None
300+
self.normalize = False
275301

276302
def __call__(self, e1, e2=None):
277303
if self.attributes is not None and (
@@ -373,7 +399,7 @@ def fit_cols(self, attributes, x, n_vals):
373399
x (np.ndarray): data
374400
n_vals (np.ndarray): number of attribute values, 0 for continuous
375401
"""
376-
pass
402+
raise NotImplementedError
377403

378404
def fit_rows(self, attributes, x, n_vals):
379405
"""
@@ -440,7 +466,8 @@ def fit_rows(self, attributes, x, n_vals):
440466
dist_missing2_cont[:curr_cont],
441467
dist_missing_disc, dist_missing2_disc)
442468

443-
def get_discrete_stats(self, column, n_bins):
469+
@staticmethod
470+
def get_discrete_stats(column, n_bins):
444471
"""
445472
Return tables used computing distance between missing discrete values.
446473
@@ -474,7 +501,7 @@ def get_continuous_stats(self, column):
474501
dist_missing2_cont (float): the value used for distance between two
475502
missing values in column
476503
"""
477-
pass
504+
raise NotImplementedError
478505

479506

480507
# Fallbacks for distances in sparse data

Orange/distance/distance.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .base import (Distance, DistanceModel, FittedDistance, FittedDistanceModel,
1515
SklDistance, _orange_to_numpy)
1616

17+
1718
class EuclideanRowsModel(FittedDistanceModel):
1819
"""
1920
Model for computation of Euclidean distances between rows.
@@ -23,14 +24,14 @@ class EuclideanRowsModel(FittedDistanceModel):
2324
"""
2425
def __init__(self, attributes, impute, normalize,
2526
continuous, discrete,
26-
means, vars, dist_missing2_cont,
27+
means, stdvars, dist_missing2_cont,
2728
dist_missing_disc, dist_missing2_disc):
2829
super().__init__(attributes, 1, impute)
2930
self.normalize = normalize
3031
self.continuous = continuous
3132
self.discrete = discrete
3233
self.means = means
33-
self.vars = vars
34+
self.vars = stdvars
3435
self.dist_missing2_cont = dist_missing2_cont
3536
self.dist_missing_disc = dist_missing_disc
3637
self.dist_missing2_disc = dist_missing2_disc
@@ -91,11 +92,11 @@ class EuclideanColumnsModel(FittedDistanceModel):
9192
Means are used as offsets for normalization, and two deviations are
9293
used for scaling.
9394
"""
94-
def __init__(self, attributes, impute, normalize, means, vars):
95+
def __init__(self, attributes, impute, normalize, means, stdvars):
9596
super().__init__(attributes, 0, impute)
9697
self.normalize = normalize
9798
self.means = means
98-
self.vars = vars
99+
self.vars = stdvars
99100

100101
def compute_distances(self, x1, x2=None):
101102
"""
@@ -134,6 +135,7 @@ class Euclidean(FittedDistance):
134135
rows_model_type = EuclideanRowsModel
135136

136137
def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False):
138+
# pylint: disable=arguments-differ
137139
return super().__new__(cls, e1, e2, axis, impute, normalize=normalize)
138140

139141
def get_continuous_stats(self, column):
@@ -160,9 +162,8 @@ def fit_cols(self, attributes, x, n_vals):
160162
for normalization and imputation.
161163
"""
162164
def nowarn(msg, cat, *args, **kwargs):
163-
if cat is RuntimeWarning and (
164-
msg == "Mean of empty slice"
165-
or msg == "Degrees of freedom <= 0 for slice"):
165+
if cat is RuntimeWarning and msg in (
166+
"Mean of empty slice", "Degrees of freedom <= 0 for slice"):
166167
if self.normalize:
167168
raise ValueError("some columns have no defined values")
168169
else:
@@ -174,11 +175,11 @@ def nowarn(msg, cat, *args, **kwargs):
174175
orig_warn = warnings.warn
175176
with patch("warnings.warn", new=nowarn):
176177
means = np.nanmean(x, axis=0)
177-
vars = np.nanvar(x, axis=0)
178-
if self.normalize and not vars.all():
178+
stdvars = np.nanvar(x, axis=0)
179+
if self.normalize and not stdvars.all():
179180
raise ValueError("some columns are constant")
180181
return EuclideanColumnsModel(
181-
attributes, self.impute, self.normalize, means, vars)
182+
attributes, self.impute, self.normalize, means, stdvars)
182183

183184

184185
class ManhattanRowsModel(FittedDistanceModel):
@@ -270,6 +271,7 @@ class Manhattan(FittedDistance):
270271
rows_model_type = ManhattanRowsModel
271272

272273
def __new__(cls, e1=None, e2=None, axis=1, impute=False, normalize=False):
274+
# pylint: disable=arguments-differ
273275
return super().__new__(cls, e1, e2, axis, impute, normalize=normalize)
274276

275277
def get_continuous_stats(self, column):
@@ -337,6 +339,10 @@ def fit_rows(self, attributes, x, n_vals):
337339

338340
fit_cols = fit_rows
339341

342+
def get_continuous_stats(self, column):
343+
# Implement an unneeded abstract method to silence pylint
344+
return None
345+
340346
class CosineModel(FittedDistanceModel):
341347
"""Model for computation of cosine distances across rows and columns.
342348
All non-zero discrete values are treated as 1."""
@@ -402,6 +408,7 @@ def _compute_dense(self, x1, x2):
402408
compute distances between rows without missing values, and a slower
403409
loop for those with missing values.
404410
"""
411+
# view is false positive, pylint: disable=no-member
405412
nonzeros1 = np.not_equal(x1, 0).view(np.int8)
406413
if self.axis == 1:
407414
nans1 = _distance.any_nan_row(x1)
@@ -421,7 +428,8 @@ def _compute_dense(self, x1, x2):
421428
return _distance.jaccard_cols(
422429
nonzeros1, x1, nans1, self.ps)
423430

424-
def _compute_sparse(self, x1, x2=None):
431+
@staticmethod
432+
def _compute_sparse(x1, x2=None):
425433
symmetric = x2 is None
426434
if symmetric:
427435
x2 = x1
@@ -462,6 +470,10 @@ def fit_rows(self, attributes, x, n_vals):
462470

463471
fit_cols = fit_rows
464472

473+
def get_continuous_stats(self, column):
474+
# Implement an unneeded abstract method to silence pylint
475+
return None
476+
465477

466478
class CorrelationDistanceModel(DistanceModel):
467479
"""Helper class for normal and absolute Pearson and Spearman correlation"""
@@ -561,7 +573,7 @@ def _corrcoef2(a, b, axis=0):
561573
numpy.corrcoef
562574
"""
563575
a, b = np.atleast_2d(a, b)
564-
if not (axis == 0 or axis == 1):
576+
if axis not in (0, 1):
565577
raise ValueError("Invalid axis {} (only 0 or 1 accepted)".format(axis))
566578

567579
mean_a = np.mean(a, axis=axis, keepdims=True)
@@ -597,6 +609,7 @@ def _corrcoef2(a, b, axis=0):
597609

598610

599611
class CorrelationDistance(Distance):
612+
# pylint: disable=abstract-method
600613
supports_missing = False
601614

602615

0 commit comments

Comments
 (0)