Use heuristic in add.

hameerabbasi · hameerabbasi · commit bd2c1b23c51d · 2024-10-24T15:08:19.000+02:00
diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py
@@ -5,10 +5,13 @@
 from mlir import ir
 from mlir.dialects import arith, complex, func, linalg, sparse_tensor, tensor
 
+import numpy as np
+
 from ._array import Array
 from ._common import fn_cache
 from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx, pm
 from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType
+from .levels import _determine_format
 
 
 @fn_cache
@@ -17,7 +20,6 @@ def get_add_module(
     b_tensor_type: ir.RankedTensorType,
     out_tensor_type: ir.RankedTensorType,
     dtype: DType,
-    rank: int,
 ) -> ir.Module:
     with ir.Location.unknown(ctx):
         module = ir.Module.create()
@@ -31,7 +33,7 @@ def get_add_module(
             raise RuntimeError(f"Can not add {dtype=}.")
 
         dtype = dtype._get_mlir_type()
-        ordering = ir.AffineMap.get_permutation(range(rank))
+        max_rank = out_tensor_type.rank
 
         with ir.InsertionPoint(module.body):
 
@@ -42,8 +44,13 @@ def add(a, b):
                     [out_tensor_type],
                     [a, b],
                     [out],
-                    ir.ArrayAttr.get([ir.AffineMapAttr.get(p) for p in (ordering,) * 3]),
-                    ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type<parallel>")] * rank),
+                    ir.ArrayAttr.get(
+                        [
+                            ir.AffineMapAttr.get(ir.AffineMap.get_minor_identity(max_rank, t.rank))
+                            for t in (a_tensor_type, b_tensor_type, out_tensor_type)
+                        ]
+                    ),
+                    ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type<parallel>")] * out_tensor_type.rank),
                 )
                 block = generic_op.regions[0].blocks.append(dtype, dtype, dtype)
                 with ir.InsertionPoint(block):
@@ -127,17 +134,16 @@ def broadcast_to(in_tensor):
 
 
 def add(x1: Array, x2: Array) -> Array:
-    ret_storage_format = x1.format
+    ret_storage_format = _determine_format(x1.format, x2.format, dtype=x1.dtype, union=True)
     ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)()
-    out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape)
+    out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape))
 
     # TODO: Decide what will be the output tensor_type
     add_module = get_add_module(
         x1._get_mlir_type(),
         x2._get_mlir_type(),
         out_tensor_type=out_tensor_type,
         dtype=x1.dtype,
-        rank=x1.ndim,
     )
     add_module.invoke(
         "add",
diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py
@@ -207,7 +207,7 @@ def _count_sparse_levels(format: StorageFormat) -> int:
     return sum(_is_sparse_level(lvl) for lvl in format.levels)
 
 
-def _determine_levels(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat:
+def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat:
     if len(formats) == 0:
         if out_ndim is None:
             out_ndim = 0
@@ -222,13 +222,13 @@ def _determine_levels(*formats: StorageFormat, dtype: DType, union: bool, out_nd
     if out_ndim is None:
         out_ndim = max(fmt.rank for fmt in formats)
 
-    n_sparse = 0
     pos_width = 0
     crd_width = 0
-    op = max if union else min
+    op = min if union else max
+    n_sparse = None
     order = ()
     for fmt in formats:
-        n_sparse = op(n_sparse, _count_sparse_levels(fmt))
+        n_sparse = _count_sparse_levels(fmt) if n_sparse is None else op(n_sparse, _count_sparse_levels(fmt))
         pos_width = max(pos_width, fmt.pos_width)
         crd_width = max(crd_width, fmt.crd_width)
         if order != "C":
diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py
@@ -164,12 +164,12 @@ def test_add(rng, dtype):
     assert_csx_equal(expected, actual)
 
     actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor))
-    expected = csc + csr
+    expected = (csc + csr).asformat("csr")
     assert_csx_equal(expected, actual)
 
-    actual = sparse.to_scipy(sparse.add(csr_tensor, dense_tensor))
-    expected = sps.csr_matrix(csr + dense)
-    assert_csx_equal(expected, actual)
+    actual = sparse.to_numpy(sparse.add(csr_tensor, dense_tensor))
+    expected = csr + dense
+    np.testing.assert_array_equal(actual, expected)
 
     actual = sparse.to_numpy(sparse.add(dense_tensor, csr_tensor))
     expected = csr + dense