From 32abcf09f327fe130efc0dd6bc3464230224b5d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Thu, 3 Oct 2024 10:01:56 +0000
Subject: [PATCH 1/2] ENH: Update MLIR backend to LLVM 20.dev

---
 sparse/mlir_backend/_constructors.py     | 39 ++++++++++++++-------
 sparse/mlir_backend/_ops.py              |  8 +++--
 sparse/mlir_backend/tests/test_simple.py | 44 +++++++++++++-----------
 3 files changed, 55 insertions(+), 36 deletions(-)

diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py
index b382c4f2..e2afbda2 100644
--- a/sparse/mlir_backend/_constructors.py
+++ b/sparse/mlir_backend/_constructors.py
@@ -108,11 +108,11 @@ def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType:
 
 
 @fn_cache
-def get_coo_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]:
+def get_coo_class(values_dtype: type[DType], index_dtype: type[DType], *, rank: int = 2) -> type[ctypes.Structure]:
     class Coo(ctypes.Structure):
         _fields_ = [
             ("pos", get_nd_memref_descr(1, index_dtype)),
-            ("coords", get_nd_memref_descr(2, index_dtype)),
+            *[(f"coords_{i}", get_nd_memref_descr(1, index_dtype)) for i in range(rank)],
             ("data", get_nd_memref_descr(1, values_dtype)),
         ]
         dtype = values_dtype
@@ -124,7 +124,7 @@ def from_sps(cls, arr: sps.coo_array | Iterable[np.ndarray]) -> "Coo":
                 if not arr.has_canonical_format:
                     raise Exception("COO must have canonical format")
                 np_pos = np.array([0, arr.size], dtype=index_dtype.np_dtype)
-                np_coords = np.stack(arr.coords, axis=1, dtype=index_dtype.np_dtype)
+                np_coords = [np.array(coord, dtype=index_dtype.np_dtype) for coord in arr.coords]
                 np_data = arr.data
             else:
                 if len(arr) != 3:
@@ -132,21 +132,22 @@ def from_sps(cls, arr: sps.coo_array | Iterable[np.ndarray]) -> "Coo":
                 np_pos, np_coords, np_data = arr
 
             pos = numpy_to_ranked_memref(np_pos)
-            coords = numpy_to_ranked_memref(np_coords)
+            coords = [numpy_to_ranked_memref(coord) for coord in np_coords]
             data = numpy_to_ranked_memref(np_data)
-            coo_instance = cls(pos=pos, coords=coords, data=data)
+            coo_instance = cls(pos, *(coords + [data]))
             _take_owneship(coo_instance, np_pos)
-            _take_owneship(coo_instance, np_coords)
+            for coord in np_coords:
+                _take_owneship(coo_instance, coord)
             _take_owneship(coo_instance, np_data)
 
             return coo_instance
 
         def to_sps(self, shape: tuple[int, ...]) -> sps.coo_array | list[np.ndarray]:
             pos = ranked_memref_to_numpy(self.pos)
-            coords = ranked_memref_to_numpy(self.coords)[pos[0] : pos[1]]
+            coords = [ranked_memref_to_numpy(coord) for coord in self.get_coord_list()]
             data = ranked_memref_to_numpy(self.data)
             return (
-                sps.coo_array((data, coords.T), shape=shape)
+                sps.coo_array((data, np.stack(coords, axis=0, dtype=index_dtype.np_dtype)), shape=shape)
                 if len(shape) == 2
                 else PackedArgumentTuple((pos, coords, data))
             )
@@ -154,12 +155,15 @@ def to_sps(self, shape: tuple[int, ...]) -> sps.coo_array | list[np.ndarray]:
         def to_module_arg(self) -> list:
             return [
                 ctypes.pointer(ctypes.pointer(self.pos)),
-                ctypes.pointer(ctypes.pointer(self.coords)),
+                *[ctypes.pointer(ctypes.pointer(coord)) for coord in self.get_coord_list()],
                 ctypes.pointer(ctypes.pointer(self.data)),
             ]
 
         def get__fields_(self) -> list:
-            return [self.pos, self.coords, self.data]
+            return [self.pos, *self.get_coord_list(), self.data]
+
+        def get_coord_list(self) -> list:
+            return [getattr(self, f"coords_{i}") for i in range(rank)]
 
         @classmethod
         @fn_cache
@@ -173,10 +177,14 @@ def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType:
                 )
                 mid_singleton_lvls = [
                     sparse_tensor.EncodingAttr.build_level_type(
-                        sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.non_unique]
+                        sparse_tensor.LevelFormat.singleton,
+                        [sparse_tensor.LevelProperty.non_unique, sparse_tensor.LevelProperty.soa],
                     )
                 ] * (len(shape) - 2)
-                levels = (compressed_lvl, *mid_singleton_lvls, sparse_tensor.LevelFormat.singleton)
+                last_singleton_lvl = sparse_tensor.EncodingAttr.build_level_type(
+                    sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.soa]
+                )
+                levels = (compressed_lvl, *mid_singleton_lvls, last_singleton_lvl)
                 ordering = ir.AffineMap.get_permutation([*range(len(shape))])
                 encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width)
                 return ir.RankedTensorType.get(list(shape), values_dtype, encoding)
@@ -320,6 +328,7 @@ def __init__(
         self._values_dtype = dtype if dtype is not None else asdtype(obj.dtype)
 
         if _is_scipy_sparse_obj(obj):
+            self.format = obj.format
             self._owns_memory = False
 
             if obj.format in ("csr", "csc"):
@@ -335,22 +344,26 @@ def __init__(
                 raise Exception(f"{obj.format} SciPy format not supported.")
 
         elif _is_numpy_obj(obj):
+            self.format = "dense"
             self._owns_memory = False
             self._index_dtype = asdtype(np.intp)
             self._format_class = get_dense_class(self._values_dtype, self._index_dtype)
             self._obj = self._format_class.from_sps(obj)
 
         elif _is_mlir_obj(obj):
+            self.format = "custom"
             self._owns_memory = True
             self._format_class = type(obj)
             self._obj = obj
 
         elif format is not None:
+            self.format = format
             if format in ["csf", "coo"]:
                 fn_format_class = get_csf_class if format == "csf" else get_coo_class
+                kwargs = {} if format == "csf" else {"rank": len(self.shape)}
                 self._owns_memory = False
                 self._index_dtype = asdtype(np.intp)
-                self._format_class = fn_format_class(self._values_dtype, self._index_dtype)
+                self._format_class = fn_format_class(self._values_dtype, self._index_dtype, **kwargs)
                 self._obj = self._format_class.from_sps(obj)
 
             else:
diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py
index 963bbd1c..6a999c94 100644
--- a/sparse/mlir_backend/_ops.py
+++ b/sparse/mlir_backend/_ops.py
@@ -32,7 +32,7 @@ def get_add_module(
 
             @func.FuncOp.from_py_func(a_tensor_type, b_tensor_type)
             def add(a, b):
-                out = tensor.empty(out_tensor_type, [])
+                out = tensor.empty(out_tensor_type.shape, dtype, encoding=out_tensor_type.encoding)
                 generic_op = linalg.GenericOp(
                     [out_tensor_type],
                     [a, b],
@@ -108,7 +108,9 @@ def get_broadcast_to_module(
 
             @func.FuncOp.from_py_func(in_tensor_type)
             def broadcast_to(in_tensor):
-                out = tensor.empty(out_tensor_type, [])
+                out = tensor.empty(
+                    out_tensor_type.shape, out_tensor_type.element_type, encoding=out_tensor_type.encoding
+                )
                 return linalg.broadcast(in_tensor, outs=[out], dimensions=dimensions)
 
             broadcast_to.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
@@ -156,7 +158,7 @@ def _infer_format_class(rank: int, values_dtype: type[DType], index_dtype: type[
 
 def reshape(x: Tensor, /, shape: tuple[int, ...]) -> Tensor:
     x_tensor_type = x._obj.get_tensor_definition(x.shape)
-    if len(x.shape) == len(shape):
+    if len(x.shape) == len(shape) or x.format == "dense":
         out_tensor_type = x._obj.get_tensor_definition(shape)
         ret_obj = x._format_class()
     else:
diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py
index 98ac90f2..1a174f81 100644
--- a/sparse/mlir_backend/tests/test_simple.py
+++ b/sparse/mlir_backend/tests/test_simple.py
@@ -166,20 +166,18 @@ def test_add(rng, dtype):
     assert isinstance(actual, np.ndarray)
     np.testing.assert_array_equal(actual, expected)
 
-    # NOTE: Fixed in https://github.com/llvm/llvm-project/pull/108615
-    # actual = sparse.add(c_tensor, c_tensor).to_scipy_sparse()
-    # expected = c + c
-    # assert isinstance(actual, np.ndarray)
-    # np.testing.assert_array_equal(actual, expected)
+    actual = sparse.add(dense_tensor, dense_tensor).to_scipy_sparse()
+    expected = dense + dense
+    assert isinstance(actual, np.ndarray)
+    np.testing.assert_array_equal(actual, expected)
 
     actual = sparse.add(csr_2_tensor, coo_tensor).to_scipy_sparse()
     expected = csr_2 + coo
     np.testing.assert_array_equal(actual.todense(), expected.todense())
 
-    # NOTE: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247
-    # actual = sparse.add(d_tensor, d_tensor).to_scipy_sparse()
-    # expected = d + d
-    # np.testing.assert_array_equal(actual.todense(), expected.todense())
+    actual = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
+    expected = coo + coo
+    np.testing.assert_array_equal(actual.todense(), expected.todense())
 
 
 @parametrize_dtypes
@@ -203,7 +201,7 @@ def test_csf_format(dtype):
 def test_coo_3d_format(dtype):
     SHAPE = (2, 2, 4)
     pos = np.array([0, 7])
-    crd = np.array([[0, 1, 0, 0, 1, 1, 0], [1, 3, 1, 0, 0, 1, 0], [3, 1, 1, 0, 1, 1, 1]])
+    crd = [np.array([0, 1, 0, 0, 1, 1, 0]), np.array([1, 3, 1, 0, 0, 1, 0]), np.array([3, 1, 1, 0, 1, 1, 1])]
     data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype)
     coo = [pos, crd, data]
 
@@ -212,11 +210,10 @@ def test_coo_3d_format(dtype):
     for actual, expected in zip(result, coo, strict=False):
         np.testing.assert_array_equal(actual, expected)
 
-    # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
-    # res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
-    # coo_2 = [pos, crd, data * 2]
-    # for actual, expected in zip(res_tensor, coo_2, strict=False):
-    #     np.testing.assert_array_equal(actual, expected)
+    res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
+    coo_2 = [pos, crd, data * 2]
+    for actual, expected in zip(res_tensor, coo_2, strict=False):
+        np.testing.assert_array_equal(actual, expected)
 
 
 @parametrize_dtypes
@@ -232,9 +229,6 @@ def test_reshape(rng, dtype):
         ((80, 1), (80,)),
     ]:
         for format in ["csr", "csc", "coo"]:
-            if format == "coo":
-                # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
-                continue
             if format == "csc":
                 # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641
                 continue
@@ -289,8 +283,18 @@ def test_reshape(rng, dtype):
             np.testing.assert_array_equal(actual, expected)
 
     # DENSE
-    # NOTE: dense reshape is probably broken in MLIR in 19.x branch
-    # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)
+    for shape, new_shape in [
+        ((100, 50), (25, 200)),
+        ((100, 50), (10, 500, 1)),
+        ((80, 1), (8, 10)),
+        ((80, 1), (80,)),
+    ]:
+        dense = np.arange(math.prod(shape), dtype=dtype).reshape(shape)
+        dense_tensor = sparse.asarray(dense)
+        actual = sparse.reshape(dense_tensor, shape=new_shape).to_scipy_sparse()
+        expected = dense.reshape(new_shape)
+
+        np.testing.assert_array_equal(actual, expected)
 
 
 @parametrize_dtypes

From 954996fa24bddae0703c89a5e5b91c7c5e57a16d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Tue, 22 Oct 2024 15:43:09 +0000
Subject: [PATCH 2/2] Use wheel from GitHub release

---
 .github/workflows/ci.yml             | 32 ++++++++++++++--------------
 ci/environment.yml                   |  2 +-
 sparse/mlir_backend/__init__.py      |  2 +-
 sparse/mlir_backend/_common.py       |  2 +-
 sparse/mlir_backend/_constructors.py |  6 +++---
 sparse/mlir_backend/_core.py         | 14 ++++++++++--
 sparse/mlir_backend/_dtypes.py       |  2 +-
 sparse/mlir_backend/_ops.py          | 14 ++++++------
 8 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7497d219..b8c928a7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,30 +2,30 @@ defaults:
   run:
     shell: bash -leo pipefail {0}
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
+# concurrency:
+#   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+#   cancel-in-progress: true
 
 jobs:
   test:
     strategy:
       matrix:
         os: ['ubuntu-latest']
-        python: ['3.10', '3.11', '3.12']
+        python: ['3.10'] # , '3.11', '3.12'
         pip_opts: ['']
         numba_boundscheck: [0]
-        include:
-          - os: macos-latest
-            python: '3.10'
-          - os: windows-latest
-            python: '3.10'
-          - os: ubuntu-latest
-            python: '3.10'
-            numba_boundscheck: 1
-          - os: ubuntu-latest
-            python: '3.10'
-            pip_opts: 'numpy<2'
-      fail-fast: false
+        # include:
+        #   - os: macos-latest
+        #     python: '3.10'
+        #   - os: windows-latest
+        #     python: '3.10'
+        #   - os: ubuntu-latest
+        #     python: '3.10'
+        #     numba_boundscheck: 1
+        #   - os: ubuntu-latest
+        #     python: '3.10'
+        #     pip_opts: 'numpy<2'
+      #fail-fast: false
     runs-on: ${{ matrix.os }}
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/ci/environment.yml b/ci/environment.yml
index cb49a1e5..2827fa37 100644
--- a/ci/environment.yml
+++ b/ci/environment.yml
@@ -12,7 +12,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
-  - mlir-python-bindings==19.*
   - pip:
     - finch-tensor >=0.1.31
     - pytest-codspeed
+    - https://github.com/nullplay/Finch-mlir/releases/download/latest/mlir_finch-0.0.1-cp310-cp310-linux_x86_64.whl
diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py
index 86b42965..757a3664 100644
--- a/sparse/mlir_backend/__init__.py
+++ b/sparse/mlir_backend/__init__.py
@@ -1,5 +1,5 @@
 try:
-    import mlir  # noqa: F401
+    import mlir_finch  # noqa: F401
 except ModuleNotFoundError as e:
     raise ImportError(
         "MLIR Python bindings not installed. Run "
diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py
index f78ac991..a00d91ed 100644
--- a/sparse/mlir_backend/_common.py
+++ b/sparse/mlir_backend/_common.py
@@ -4,7 +4,7 @@
 import weakref
 from dataclasses import dataclass
 
-from mlir import ir
+from mlir_finch import ir
 
 
 class MlirType(abc.ABC):
diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py
index e2afbda2..cdf0a8d2 100644
--- a/sparse/mlir_backend/_constructors.py
+++ b/sparse/mlir_backend/_constructors.py
@@ -2,9 +2,9 @@
 from collections.abc import Iterable
 from typing import Any
 
-import mlir.runtime as rt
-from mlir import ir
-from mlir.dialects import sparse_tensor
+import mlir_finch.runtime as rt
+from mlir_finch import ir
+from mlir_finch.dialects import sparse_tensor
 
 import numpy as np
 import scipy.sparse as sps
diff --git a/sparse/mlir_backend/_core.py b/sparse/mlir_backend/_core.py
index 16e6720b..b73760d0 100644
--- a/sparse/mlir_backend/_core.py
+++ b/sparse/mlir_backend/_core.py
@@ -2,14 +2,24 @@
 import ctypes.util
 import os
 import pathlib
+import sys
 
-from mlir.ir import Context
-from mlir.passmanager import PassManager
+from mlir_finch.ir import Context
+from mlir_finch.passmanager import PassManager
 
 DEBUG = bool(int(os.environ.get("DEBUG", "0")))
 CWD = pathlib.Path(".")
 
+LD_ENV_PATH = f"{sys.prefix}/lib/python3.10/site-packages/lib"
+
+if "LD_LIBRARY_PATH" in os.environ:
+    os.environ["LD_LIBRARY_PATH"] = f"{LD_ENV_PATH}:{os.environ['LD_LIBRARY_PATH']}"
+else:
+    os.environ["LD_LIBRARY_PATH"] = LD_ENV_PATH
+
 MLIR_C_RUNNER_UTILS = ctypes.util.find_library("mlir_c_runner_utils")
+if os.name == "posix":
+    MLIR_C_RUNNER_UTILS = f"{LD_ENV_PATH}/{MLIR_C_RUNNER_UTILS}"
 libc = ctypes.CDLL(ctypes.util.find_library("c")) if os.name != "nt" else ctypes.cdll.msvcrt
 libc.free.argtypes = [ctypes.c_void_p]
 libc.free.restype = None
diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py
index 2ab41401..487025b6 100644
--- a/sparse/mlir_backend/_dtypes.py
+++ b/sparse/mlir_backend/_dtypes.py
@@ -3,7 +3,7 @@
 import sys
 import typing
 
-from mlir import ir
+from mlir_finch import ir
 
 import numpy as np
 
diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py
index 6a999c94..cc1c7f80 100644
--- a/sparse/mlir_backend/_ops.py
+++ b/sparse/mlir_backend/_ops.py
@@ -1,9 +1,9 @@
 import ctypes
 
-import mlir.execution_engine
-import mlir.passmanager
-from mlir import ir
-from mlir.dialects import arith, func, linalg, sparse_tensor, tensor
+import mlir_finch.execution_engine
+import mlir_finch.passmanager
+from mlir_finch import ir
+from mlir_finch.dialects import arith, func, linalg, sparse_tensor, tensor
 
 import numpy as np
 
@@ -67,7 +67,7 @@ def add(a, b):
         if DEBUG:
             (CWD / "add_module_opt.mlir").write_text(str(module))
 
-    return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
+    return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
 
 
 @fn_cache
@@ -92,7 +92,7 @@ def reshape(a, shape):
             if DEBUG:
                 (CWD / "reshape_module_opt.mlir").write_text(str(module))
 
-    return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
+    return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
 
 
 @fn_cache
@@ -120,7 +120,7 @@ def broadcast_to(in_tensor):
             if DEBUG:
                 (CWD / "broadcast_to_module_opt.mlir").write_text(str(module))
 
-    return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
+    return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
 
 
 def add(x1: Tensor, x2: Tensor) -> Tensor: