biolab · lanzagar · Aug 7, 2020 · Jul 24, 2020 · Jul 31, 2020 · lanzagar
diff --git a/Orange/preprocess/discretize.py b/Orange/preprocess/discretize.py
@@ -83,6 +83,12 @@ def fmt(val):
         dvar.to_sql = to_sql
         return dvar
 
+    def __eq__(self, other):
+        return super().__eq__(other) and self.points == other.points
+
+    def __hash__(self):
+        return hash((type(self), self.variable, tuple(self.points)))
+
 
 class BinSql:
     def __init__(self, var, points):

diff --git a/Orange/preprocess/impute.py b/Orange/preprocess/impute.py
@@ -32,6 +32,12 @@ def transform(self, c):
         else:
             return np.where(np.isnan(c), self.value, c)
 
+    def __eq__(self, other):
+        return super().__eq__(other) and self.value == other.value
+
+    def __hash__(self):
+        return hash((type(self), self.variable, float(self.value)))
+
 
 class BaseImputeMethod(Reprable):
     name = ""
@@ -316,6 +322,12 @@ def transform(self, c):
         c[nanindices] = sample
         return c
 
+    def __eq__(self, other):
+        return super().__eq__(other) and self.distribution == other.distribution
+
+    def __hash__(self):
+        return hash((type(self), self.variable, self.distribution))
+
 
 class Random(BaseImputeMethod):
     name = "Random values"

diff --git a/Orange/preprocess/tests/test_discretize.py b/Orange/preprocess/tests/test_discretize.py
@@ -5,8 +5,10 @@
 from time import struct_time, mktime
 
 import numpy as np
+
+from Orange.data import ContinuousVariable
 from Orange.preprocess.discretize import \
-    _time_binnings, time_binnings, BinDefinition
+ _time_binnings, time_binnings, BinDefinition, Discretizer
 
 
 # pylint: disable=redefined-builtin
@@ -17,12 +19,12 @@ def create(year=1970, month=1, day=1, hour=0, min=0, sec=0):
 class TestTimeBinning(unittest.TestCase):
     def setUp(self):
         self.dates = [mktime(x) for x in
-            [(1975, 6, 9, 10, 0, 0, 0, 161, 0),
-             (1975, 6, 9, 10, 50, 0, 0, 161, 0),
-             (1975, 6, 9, 11, 40, 0, 0, 161, 0),
-             (1975, 6, 9, 12, 30, 0, 0, 161, 0),
-             (1975, 6, 9, 13, 20, 0, 0, 161, 0),
-             (1975, 6, 9, 14, 10, 0, 0, 161, 0)]]
+                      [(1975, 6, 9, 10, 0, 0, 0, 161, 0),
+                       (1975, 6, 9, 10, 50, 0, 0, 161, 0),
+                       (1975, 6, 9, 11, 40, 0, 0, 161, 0),
+                       (1975, 6, 9, 12, 30, 0, 0, 161, 0),
+                       (1975, 6, 9, 13, 20, 0, 0, 161, 0),
+                       (1975, 6, 9, 14, 10, 0, 0, 161, 0)]]
 
     def test_binning(self):
         def tr1(s):
@@ -752,5 +754,28 @@ def test_thresholds(self):
         self.assertEqual(bindef.nbins, 2)
 
 
+class TestDiscretizer(unittest.TestCase):
+    def test_equality(self):
+        v1 = ContinuousVariable("x")
+        v2 = ContinuousVariable("x", number_of_decimals=42)
+        v3 = ContinuousVariable("y")
+        assert v1 == v2
+
+        t1 = Discretizer(v1, [0, 2, 1])
+        t1a = Discretizer(v2, [0, 2, 1])
+        t2 = Discretizer(v3, [0, 2, 1])
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        t1 = Discretizer(v1, [0, 2, 1])
+        t1a = Discretizer(v2, [1, 2, 0])
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Orange/preprocess/tests/test_impute.py b/Orange/preprocess/tests/test_impute.py
@@ -0,0 +1,56 @@
+import unittest
+
+from Orange.data import DiscreteVariable, ContinuousVariable
+from Orange.preprocess.impute import ReplaceUnknownsRandom, ReplaceUnknowns
+from Orange.statistics.distribution import Discrete
+
+
+class TestReplaceUnknowns(unittest.TestCase):
+    def test_equality(self):
+        v1 = ContinuousVariable("x")
+        v2 = ContinuousVariable("x")
+        v3 = ContinuousVariable("y")
+
+        t1 = ReplaceUnknowns(v1, 0)
+        t1a = ReplaceUnknowns(v2, 0)
+        t2 = ReplaceUnknowns(v3, 0)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        t1 = ReplaceUnknowns(v1, 0)
+        t1a = ReplaceUnknowns(v1, 1)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+
+class TestReplaceUnknownsRandom(unittest.TestCase):
+    def test_equality(self):
+        v1 = DiscreteVariable("x", tuple("abc"))
+        v2 = DiscreteVariable("x", tuple("abc"))
+        v3 = DiscreteVariable("y", tuple("abc"))
+
+        d1 = Discrete([1, 2, 3], v1)
+        d2 = Discrete([1, 2, 3], v2)
+        d3 = Discrete([1, 2, 3], v3)
+
+        t1 = ReplaceUnknownsRandom(v1, d1)
+        t1a = ReplaceUnknownsRandom(v2, d2)
+        t2 = ReplaceUnknownsRandom(v3, d3)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        d1[1] += 1
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Orange/preprocess/tests/test_transformation.py b/Orange/preprocess/tests/test_transformation.py
@@ -0,0 +1,88 @@
+import unittest
+
+import numpy as np
+
+from Orange.data import DiscreteVariable
+from Orange.preprocess.transformation import \
+    Transformation, _Indicator, Normalizer, Lookup
+
+
+class TestTransformEquality(unittest.TestCase):
+    def setUp(self):
+        self.disc1 = DiscreteVariable("d1", values=tuple("abc"))
+        self.disc1a = DiscreteVariable("d1", values=tuple("abc"))
+        self.disc2 = DiscreteVariable("d2", values=tuple("abc"))
+        assert self.disc1 == self.disc1a
+
+    def test_transformation(self):
+        t1 = Transformation(self.disc1)
+        t1a = Transformation(self.disc1a)
+        t2 = Transformation(self.disc2)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+    def test_indicator(self):
+        t1 = _Indicator(self.disc1, 0)
+        t1a = _Indicator(self.disc1a, 0)
+        t2 = _Indicator(self.disc2, 0)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        t1 = _Indicator(self.disc1, 0)
+        t1a = _Indicator(self.disc1a, 1)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+    def test_normalizer(self):
+        t1 = Normalizer(self.disc1, 0, 1)
+        t1a = Normalizer(self.disc1a, 0, 1)
+        t2 = Normalizer(self.disc2, 0, 1)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        t1 = Normalizer(self.disc1, 0, 1)
+        t1a = Normalizer(self.disc1a, 1, 1)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+        t1 = Normalizer(self.disc1, 0, 1)
+        t1a = Normalizer(self.disc1a, 0, 2)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+    def test_lookup(self):
+        t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
+        t1a = Lookup(self.disc1a, np.array([0, 2, 1]), 1)
+        t2 = Lookup(self.disc2, np.array([0, 2, 1]), 1)
+        self.assertEqual(t1, t1)
+        self.assertEqual(t1, t1a)
+        self.assertNotEqual(t1, t2)
+
+        self.assertEqual(hash(t1), hash(t1a))
+        self.assertNotEqual(hash(t1), hash(t2))
+
+        t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
+        t1a = Lookup(self.disc1a, np.array([1, 2, 0]), 1)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+        t1 = Lookup(self.disc1, np.array([0, 2, 1]), 1)
+        t1a = Lookup(self.disc1a, np.array([0, 2, 1]), 2)
+        self.assertNotEqual(t1, t1a)
+        self.assertNotEqual(hash(t1), hash(t1a))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Orange/preprocess/transformation.py b/Orange/preprocess/transformation.py
@@ -48,25 +48,21 @@ def transform(self, c):
         raise NotImplementedError(
             "ColumnTransformations must implement method 'transform'.")
 
-
-class Identity(Transformation):
-    """Return an untransformed value of `c`.
-    """
-    def transform(self, c):
-        return c
-
     def __eq__(self, other):
         return type(other) is type(self) and self.variable == other.variable
 
     def __hash__(self):
         return hash((type(self), self.variable))
 
 
-class Indicator(Transformation):
-    """
-    Return an indicator value that equals 1 if the variable has the specified
-    value and 0 otherwise.
+class Identity(Transformation):
+    """Return an untransformed value of `c`.
     """
+    def transform(self, c):
+        return c
+
+
+class _Indicator(Transformation):
     def __init__(self, variable, value):
         """
         :param variable: The variable whose transformed value is returned.
@@ -78,26 +74,27 @@ def __init__(self, variable, value):
         super().__init__(variable)
         self.value = value
 
+    def __eq__(self, other):
+        return super().__eq__(other) and self.value == other.value
+
+    def __hash__(self):
+        return hash((type(self), self.variable, self.value))
+
+
+class Indicator(_Indicator):
+    """
+    Return an indicator value that equals 1 if the variable has the specified
+    value and 0 otherwise.
+    """
     def transform(self, c):
         return c == self.value
 
 
-class Indicator1(Transformation):
+class Indicator1(_Indicator):
     """
     Return an indicator value that equals 1 if the variable has the specified
     value and -1 otherwise.
     """
-    def __init__(self, variable, value):
-        """
-        :param variable: The variable whose transformed value is returned.
-        :type variable: int or str or :obj:`~Orange.data.Variable`
-
-        :param value: The value to which the indicator refers
-        :type value: int or float
-        """
-        super().__init__(variable)
-        self.value = value
-
     def transform(self, c):
         return (c == self.value) * 2 - 1
 
@@ -129,6 +126,13 @@ def transform(self, c):
         else:
             return (c - self.offset) * self.factor
 
+    def __eq__(self, other):
+        return super().__eq__(other) \
+               and self.offset == other.offset and self.factor == other.factor
+
+    def __hash__(self):
+        return hash((type(self), self.variable, self.offset, self.factor))
+
 
 class Lookup(Transformation):
     """
@@ -139,7 +143,7 @@ def __init__(self, variable, lookup_table, unknown=np.nan):
         :param variable: The variable whose transformed value is returned.
         :type variable: int or str or :obj:`~Orange.data.DiscreteVariable`
         :param lookup_table: transformations for each value of `self.variable`
-        :type lookup_table: np.array or list or tuple
+        :type lookup_table: np.array
         :param unknown: The value to be used as unknown value.
         :type unknown: float or int
         """
@@ -156,3 +160,13 @@ def transform(self, column):
         column[mask] = 0
         values = self.lookup_table[column]
         return np.where(mask, self.unknown, values)
+
+    def __eq__(self, other):
+        return super().__eq__(other) \
+               and np.allclose(self.lookup_table, other.lookup_table,
+                               equal_nan=True) \
+               and np.allclose(self.unknown, other.unknown, equal_nan=True)
+
+    def __hash__(self):
+        return hash((type(self), self.variable,
+                     tuple(self.lookup_table), self.unknown))
diff --git a/Orange/widgets/data/owcontinuize.py b/Orange/widgets/data/owcontinuize.py
@@ -189,6 +189,12 @@ def transform(self, c):
             t *= self.weight
         return t
 
+    def __eq__(self, other):
+        return super().__eq__(other) and self.weight == other.weight
+
+    def __hash__(self):
+        return hash((type(self), self.variable, self.value, self.weight))
+
 
 def make_indicator_var(source, value_ind, weight=None):
     if weight is None:

diff --git a/Orange/widgets/data/owcreateclass.py b/Orange/widgets/data/owcreateclass.py
@@ -86,6 +86,17 @@ def transform(self, c):
         res[nans] = np.nan
         return res
 
+    def __eq__(self, other):
+        return super().__eq__(other) \
+               and self.patterns == other.patterns \
+               and self.case_sensitive == other.case_sensitive \
+               and self.match_beginning == other.match_beginning
+
+    def __hash__(self):
+        return hash((type(self), self.variable,
+                     tuple(self.patterns),
+                     self.case_sensitive, self.match_beginning))
+
 
 class ValueFromDiscreteSubstring(Lookup):
     """