Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Orange/preprocess/discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ def fmt(val):
dvar.to_sql = to_sql
return dvar

def __eq__(self, other):
return super().__eq__(other) and self.points == other.points

def __hash__(self):
return hash((type(self), self.variable, tuple(self.points)))


class BinSql:
def __init__(self, var, points):
Expand Down
12 changes: 12 additions & 0 deletions Orange/preprocess/impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def transform(self, c):
else:
return np.where(np.isnan(c), self.value, c)

def __eq__(self, other):
return super().__eq__(other) and self.value == other.value

def __hash__(self):
return hash((type(self), self.variable, float(self.value)))


class BaseImputeMethod(Reprable):
name = ""
Expand Down Expand Up @@ -316,6 +322,12 @@ def transform(self, c):
c[nanindices] = sample
return c

def __eq__(self, other):
return super().__eq__(other) and self.distribution == other.distribution

def __hash__(self):
return hash((type(self), self.variable, self.distribution))


class Random(BaseImputeMethod):
name = "Random values"
Expand Down
39 changes: 32 additions & 7 deletions Orange/preprocess/tests/test_discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from time import struct_time, mktime

import numpy as np

from Orange.data import ContinuousVariable
from Orange.preprocess.discretize import \
_time_binnings, time_binnings, BinDefinition
_time_binnings, time_binnings, BinDefinition, Discretizer


# pylint: disable=redefined-builtin
Expand All @@ -17,12 +19,12 @@ def create(year=1970, month=1, day=1, hour=0, min=0, sec=0):
class TestTimeBinning(unittest.TestCase):
def setUp(self):
self.dates = [mktime(x) for x in
[(1975, 6, 9, 10, 0, 0, 0, 161, 0),
(1975, 6, 9, 10, 50, 0, 0, 161, 0),
(1975, 6, 9, 11, 40, 0, 0, 161, 0),
(1975, 6, 9, 12, 30, 0, 0, 161, 0),
(1975, 6, 9, 13, 20, 0, 0, 161, 0),
(1975, 6, 9, 14, 10, 0, 0, 161, 0)]]
[(1975, 6, 9, 10, 0, 0, 0, 161, 0),
(1975, 6, 9, 10, 50, 0, 0, 161, 0),
(1975, 6, 9, 11, 40, 0, 0, 161, 0),
(1975, 6, 9, 12, 30, 0, 0, 161, 0),
(1975, 6, 9, 13, 20, 0, 0, 161, 0),
(1975, 6, 9, 14, 10, 0, 0, 161, 0)]]

def test_binning(self):
def tr1(s):
Expand Down Expand Up @@ -752,5 +754,28 @@ def test_thresholds(self):
self.assertEqual(bindef.nbins, 2)


class TestDiscretizer(unittest.TestCase):
def test_equality(self):
v1 = ContinuousVariable("x")
v2 = ContinuousVariable("x", number_of_decimals=42)
v3 = ContinuousVariable("y")
assert v1 == v2

t1 = Discretizer(v1, [0, 2, 1])
t1a = Discretizer(v2, [0, 2, 1])
t2 = Discretizer(v3, [0, 2, 1])
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

t1 = Discretizer(v1, [0, 2, 1])
t1a = Discretizer(v2, [1, 2, 0])
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))


if __name__ == '__main__':
unittest.main()
56 changes: 56 additions & 0 deletions Orange/preprocess/tests/test_impute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest

from Orange.data import DiscreteVariable, ContinuousVariable
from Orange.preprocess.impute import ReplaceUnknownsRandom, ReplaceUnknowns
from Orange.statistics.distribution import Discrete


class TestReplaceUnknowns(unittest.TestCase):
def test_equality(self):
v1 = ContinuousVariable("x")
v2 = ContinuousVariable("x")
v3 = ContinuousVariable("y")

t1 = ReplaceUnknowns(v1, 0)
t1a = ReplaceUnknowns(v2, 0)
t2 = ReplaceUnknowns(v3, 0)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

t1 = ReplaceUnknowns(v1, 0)
t1a = ReplaceUnknowns(v1, 1)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))


class TestReplaceUnknownsRandom(unittest.TestCase):
def test_equality(self):
v1 = DiscreteVariable("x", tuple("abc"))
v2 = DiscreteVariable("x", tuple("abc"))
v3 = DiscreteVariable("y", tuple("abc"))

d1 = Discrete([1, 2, 3], v1)
d2 = Discrete([1, 2, 3], v2)
d3 = Discrete([1, 2, 3], v3)

t1 = ReplaceUnknownsRandom(v1, d1)
t1a = ReplaceUnknownsRandom(v2, d2)
t2 = ReplaceUnknownsRandom(v3, d3)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

d1[1] += 1
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))


if __name__ == "__main__":
unittest.main()
88 changes: 88 additions & 0 deletions Orange/preprocess/tests/test_transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import unittest

import numpy as np

from Orange.data import DiscreteVariable
from Orange.preprocess.transformation import \
Transformation, _Indicator, Normalizer, Lookup


class TestTransformEquality(unittest.TestCase):
def setUp(self):
self.disc1 = DiscreteVariable("d1", values=tuple("abc"))
self.disc1a = DiscreteVariable("d1", values=tuple("abc"))
self.disc2 = DiscreteVariable("d2", values=tuple("abc"))
assert self.disc1 == self.disc1a

def test_transformation(self):
t1 = Transformation(self.disc1)
t1a = Transformation(self.disc1a)
t2 = Transformation(self.disc2)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

def test_indicator(self):
t1 = _Indicator(self.disc1, 0)
t1a = _Indicator(self.disc1a, 0)
t2 = _Indicator(self.disc2, 0)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

t1 = _Indicator(self.disc1, 0)
t1a = _Indicator(self.disc1a, 1)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))

def test_normalizer(self):
t1 = Normalizer(self.disc1, 0, 1)
t1a = Normalizer(self.disc1a, 0, 1)
t2 = Normalizer(self.disc2, 0, 1)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

t1 = Normalizer(self.disc1, 0, 1)
t1a = Normalizer(self.disc1a, 1, 1)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))

t1 = Normalizer(self.disc1, 0, 1)
t1a = Normalizer(self.disc1a, 0, 2)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))

def test_lookup(self):
t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
t1a = Lookup(self.disc1a, np.array([0, 2, 1]), 1)
t2 = Lookup(self.disc2, np.array([0, 2, 1]), 1)
self.assertEqual(t1, t1)
self.assertEqual(t1, t1a)
self.assertNotEqual(t1, t2)

self.assertEqual(hash(t1), hash(t1a))
self.assertNotEqual(hash(t1), hash(t2))

t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
t1a = Lookup(self.disc1a, np.array([1, 2, 0]), 1)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))

t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
t1a = Lookup(self.disc1a, np.array([0, 2, 1]), 2)
self.assertNotEqual(t1, t1a)
self.assertNotEqual(hash(t1), hash(t1a))


if __name__ == '__main__':
unittest.main()
62 changes: 38 additions & 24 deletions Orange/preprocess/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,21 @@ def transform(self, c):
raise NotImplementedError(
"ColumnTransformations must implement method 'transform'.")


class Identity(Transformation):
"""Return an untransformed value of `c`.
"""
def transform(self, c):
return c

def __eq__(self, other):
return type(other) is type(self) and self.variable == other.variable
Comment on lines 51 to 52
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about the default __eq__ for Transformation along these lines:

return type(other) is type(self) and vars(self) == vars(other)

Seems that that is usually what we want and it would make a lot of trivial overloads of eq unnecessary?

__hash__ is a bit more tricky since class attributes can often be unhashable objects. We could leave it as it is or try hashing a sorted tuple of vars(self). The latter should work for simple cases like Indicator and at least fails more noticably if it should have been overloaded in a subclass but was not!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm hesitant here. It looks like a good idea, but making the parent class to smart might shoot a derived class in the foot. Somebody could add an attribute without realizing it's used in comparisons. Maybe it's better to be explicit.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You convinced me at first, but then again, you can make an equivalent claim for the other side:
Somebody could add an attribute without realizing it's NOT used in comparisons.

Maybe I preferred comparing everything (vs the minimum) because it behaves closer to how it was before - if some class doesn't overload eq it might erroneously return False (like up to now), but a True should be correct.

In the end it does not really matter, if something is not working like it should it's a bug to be fixed in either case.
I don't mind merging it with either default.


def __hash__(self):
return hash((type(self), self.variable))


class Indicator(Transformation):
"""
Return an indicator value that equals 1 if the variable has the specified
value and 0 otherwise.
class Identity(Transformation):
"""Return an untransformed value of `c`.
"""
def transform(self, c):
return c


class _Indicator(Transformation):
def __init__(self, variable, value):
"""
:param variable: The variable whose transformed value is returned.
Expand All @@ -78,26 +74,27 @@ def __init__(self, variable, value):
super().__init__(variable)
self.value = value

def __eq__(self, other):
return super().__eq__(other) and self.value == other.value

def __hash__(self):
return hash((type(self), self.variable, self.value))


class Indicator(_Indicator):
"""
Return an indicator value that equals 1 if the variable has the specified
value and 0 otherwise.
"""
def transform(self, c):
return c == self.value


class Indicator1(Transformation):
class Indicator1(_Indicator):
"""
Return an indicator value that equals 1 if the variable has the specified
value and -1 otherwise.
"""
def __init__(self, variable, value):
"""
:param variable: The variable whose transformed value is returned.
:type variable: int or str or :obj:`~Orange.data.Variable`

:param value: The value to which the indicator refers
:type value: int or float
"""
super().__init__(variable)
self.value = value

def transform(self, c):
return (c == self.value) * 2 - 1

Expand Down Expand Up @@ -129,6 +126,13 @@ def transform(self, c):
else:
return (c - self.offset) * self.factor

def __eq__(self, other):
return super().__eq__(other) \
and self.offset == other.offset and self.factor == other.factor

def __hash__(self):
return hash((type(self), self.variable, self.offset, self.factor))


class Lookup(Transformation):
"""
Expand All @@ -139,7 +143,7 @@ def __init__(self, variable, lookup_table, unknown=np.nan):
:param variable: The variable whose transformed value is returned.
:type variable: int or str or :obj:`~Orange.data.DiscreteVariable`
:param lookup_table: transformations for each value of `self.variable`
:type lookup_table: np.array or list or tuple
:type lookup_table: np.array
:param unknown: The value to be used as unknown value.
:type unknown: float or int
"""
Expand All @@ -156,3 +160,13 @@ def transform(self, column):
column[mask] = 0
values = self.lookup_table[column]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noticed this in passing - the docs say self.lookup_table can also be a list or tuple, but this looks like it will not work for native python types (indexing with a np.ndarray)

return np.where(mask, self.unknown, values)

def __eq__(self, other):
return super().__eq__(other) \
and np.allclose(self.lookup_table, other.lookup_table,
equal_nan=True) \
and np.allclose(self.unknown, other.unknown, equal_nan=True)

def __hash__(self):
return hash((type(self), self.variable,
tuple(self.lookup_table), self.unknown))
6 changes: 6 additions & 0 deletions Orange/widgets/data/owcontinuize.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,12 @@ def transform(self, c):
t *= self.weight
return t

def __eq__(self, other):
return super().__eq__(other) and self.weight == other.weight

def __hash__(self):
return hash((type(self), self.variable, self.value, self.weight))


def make_indicator_var(source, value_ind, weight=None):
if weight is None:
Expand Down
11 changes: 11 additions & 0 deletions Orange/widgets/data/owcreateclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ def transform(self, c):
res[nans] = np.nan
return res

def __eq__(self, other):
return super().__eq__(other) \
and self.patterns == other.patterns \
and self.case_sensitive == other.case_sensitive \
and self.match_beginning == other.match_beginning

def __hash__(self):
return hash((type(self), self.variable,
tuple(self.patterns),
self.case_sensitive, self.match_beginning))


class ValueFromDiscreteSubstring(Lookup):
"""
Expand Down
Loading