Merge pull request #41 from lensacom/arit

tdna · tdna · commit 9274ba13823d · 2015-06-19T09:44:58.000+02:00
Arit
diff --git a/splearn/rdd.py b/splearn/rdd.py
@@ -393,25 +393,32 @@ def __init__(self, rdd, bsize=-1, dtype=np.ndarray, noblock=False):
             raise ValueError("Only supported type for ArrayRDD is np.ndarray!")
         super(ArrayRDD, self).__init__(rdd, bsize, dtype, noblock)
 
-    def _on_axis(self, func, axis=None):
-        rdd = self._rdd.map(lambda x: getattr(x, func)(axis=axis))
+    def __mul__(self, other):
+        return self.multiply(other)
 
-        if axis is None:
-            return getattr(np.array(rdd.collect()), func)()
-        elif axis == 0:
-            return rdd.reduce(
-                lambda a, b: getattr(np.array((a, b)), func)(axis=0))
-        else:
-            return rdd.reduce(lambda a, b: np.concatenate((a, b)))
+    def __add__(self, other):
+        return self.add(other)
+
+    def __sub__(self, other):
+        return self.subtract(other)
+
+    def __div__(self, other):
+        return self.divide(other)
+
+    __truediv__ = __div__
+
+    def __pow__(self, other):
+        return self.power(other)
+
+    def __floordiv__(self, other):
+        return self.floor_divide(other)
+
+    def __mod__(self, other):
+        return self.mod(other)
 
     def tosparse(self):
         return SparseRDD(self._rdd.map(lambda x: sp.csr_matrix(x)))
 
-    def dot(self, other):
-        # TODO naive dot implementation with another ArrayRDD
-        rdd = self._rdd.map(lambda x: x.dot(other))
-        return ArrayRDD(rdd, bsize=self.bsize, noblock=True)
-
     def flatten(self):
         return self.map(lambda x: x.flatten())
 
@@ -424,14 +431,42 @@ def max(self, axis=None):
     def prod(self, axis=None):
         return self._on_axis('prod', axis)
 
+    def dot(self, other):
+        return self._on_other('dot', other)
 
-class SparseRDD(BlockRDD, ArrayLikeRDDMixin):
+    def add(self, other):
+        return self._on_other('add', other)
 
-    def __init__(self, rdd, bsize=-1, dtype=sp.spmatrix, noblock=False):
-        if dtype is not sp.spmatrix:
-            raise ValueError("Only supported type for SparseRDD is"
-                             " sp.spmatrix!")
-        super(SparseRDD, self).__init__(rdd, bsize, dtype, noblock)
+    def subtract(self, other):
+        return self._on_other('subtract', other)
+
+    def multiply(self, other):
+        return self._on_other('multiply', other)
+
+    def divide(self, other):
+        return self._on_other('divide', other)
+
+    def power(self, other):
+        return self._on_other('power', other)
+
+    def floor_divide(self, other):
+        return self._on_other('floor_divide', other)
+
+    def true_divide(self, other):
+        return self._on_other('true_divide', other)
+
+    def mod(self, other):
+        return self._on_other('mod', other)
+
+    def fmod(self, other):
+        return self._on_other('fmod', other)
+
+    def remainder(self, other):
+        return self._on_other('remainder', other)
+
+    def _on_other(self, func, other):
+        rdd = self._rdd.map(lambda x: getattr(np, func)(x, other))
+        return ArrayRDD(rdd, noblock=True)
 
     def _on_axis(self, func, axis=None):
         rdd = self._rdd.map(lambda x: getattr(x, func)(axis=axis))
@@ -440,9 +475,18 @@ def _on_axis(self, func, axis=None):
             return getattr(np.array(rdd.collect()), func)()
         elif axis == 0:
             return rdd.reduce(
-                lambda a, b: getattr(sp.vstack((a, b)), func)(axis=0))
+                lambda a, b: getattr(np.array((a, b)), func)(axis=0))
         else:
-            return rdd.reduce(lambda a, b: sp.vstack((a, b)))
+            return rdd.reduce(lambda a, b: np.concatenate((a, b)))
+
+
+class SparseRDD(BlockRDD, ArrayLikeRDDMixin):
+
+    def __init__(self, rdd, bsize=-1, dtype=sp.spmatrix, noblock=False):
+        if dtype is not sp.spmatrix:
+            raise ValueError("Only supported type for SparseRDD is"
+                             " sp.spmatrix!")
+        super(SparseRDD, self).__init__(rdd, bsize, dtype, noblock)
 
     def toarray(self):
         """Returns the data as numpy.array from each partition."""
@@ -466,6 +510,17 @@ def min(self, axis=None):
     def max(self, axis=None):
         return self._on_axis('max', axis)
 
+    def _on_axis(self, func, axis=None):
+        rdd = self._rdd.map(lambda x: getattr(x, func)(axis=axis))
+
+        if axis is None:
+            return getattr(np.array(rdd.collect()), func)()
+        elif axis == 0:
+            return rdd.reduce(
+                lambda a, b: getattr(sp.vstack((a, b)), func)(axis=0))
+        else:
+            return rdd.reduce(lambda a, b: sp.vstack((a, b)))
+
 
 class DictRDD(BlockRDD):
 
diff --git a/splearn/tests/test_rdd.py b/splearn/tests/test_rdd.py
@@ -468,8 +468,109 @@ def test_mean(self):
     def test_dot(self):
         A, A_rdd = self.make_dense_rdd((20, 10))
         B, B_rdd = self.make_dense_rdd((10, 20))
-        assert_array_almost_equal(A_rdd.dot(B).toarray(), A.dot(B))
-        assert_array_almost_equal(B_rdd.dot(A).toarray(), B.dot(A))
+        assert_array_equal(A_rdd.dot(B).toarray(), A.dot(B))
+        assert_array_equal(B_rdd.dot(A).toarray(), B.dot(A))
+
+    def test_add(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A + B
+        assert_array_equal(
+            A_rdd.add(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd + B).toarray(), np_res)
+        A_rdd += B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_subtract(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A - B
+        assert_array_equal(
+            A_rdd.subtract(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd - B).toarray(), np_res)
+        A_rdd -= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_multiply(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A * B
+        assert_array_equal(
+            A_rdd.multiply(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd * B).toarray(), np_res)
+        A_rdd *= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_divide(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A / B
+        assert_array_equal(
+            A_rdd.divide(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd / B).toarray(), np_res)
+        A_rdd /= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_power(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A ** B
+        assert_array_equal(
+            A_rdd.power(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd ** B).toarray(), np_res)
+        A_rdd **= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_floor_divide(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A // B
+        assert_array_equal(
+            A_rdd.floor_divide(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd // B).toarray(), np_res)
+        A_rdd //= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_true_divide(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A / B
+        assert_array_equal(
+            A_rdd.true_divide(B).toarray(), np_res
+        )
+
+    def test_mod(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = A % B
+        assert_array_equal(
+            A_rdd.mod(B).toarray(), np_res
+        )
+        assert_array_equal((A_rdd % B).toarray(), np_res)
+        A_rdd %= B
+        assert_array_equal(A_rdd.toarray(), np_res)
+
+    def test_fmod(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = np.fmod(A, B)
+        assert_array_equal(
+            A_rdd.fmod(B).toarray(), np_res
+        )
+
+    def test_remainder(self):
+        A, A_rdd = self.make_dense_rdd((8, 3))
+        B, B_rdd = self.make_dense_rdd((1, 3))
+        np_res = np.remainder(A, B)
+        assert_array_equal(
+            A_rdd.remainder(B).toarray(), np_res
+        )
 
     def test_flatten(self):
         X, X_rdd = self.make_dense_rdd((100, 3, 2))