Try reshape.

hameerabbasi · hameerabbasi · commit 74ddaef72d5e · 2024-11-04T10:42:34.000+01:00
diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py
@@ -27,7 +27,7 @@
     uint32,
     uint64,
 )
-from ._ops import add
+from ._ops import add, reshape
 
 __all__ = [
     "add",
@@ -36,6 +36,7 @@
     "to_numpy",
     "to_scipy",
     "levels",
+    "reshape",
     "from_constituent_arrays",
     "int8",
     "int16",
diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py
@@ -1,6 +1,7 @@
 import ctypes
 import functools
 import weakref
+from collections.abc import Iterable
 
 import mlir.runtime as rt
 
@@ -52,3 +53,13 @@ def finalizer(ptr):
         ctypes.pythonapi.Py_DecRef(ptr)
 
     weakref.finalize(owner, finalizer, ptr)
+
+
+def as_shape(x) -> tuple[int]:
+    if not isinstance(x, Iterable):
+        x = (x,)
+
+    if not all(isinstance(xi, int) for xi in x):
+        raise TypeError("Shape must be an `int` or tuple of `int`s.")
+
+    return tuple(int(xi) for xi in x)
diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from ._array import Array
-from ._common import fn_cache
+from ._common import as_shape, fn_cache
 from ._core import CWD, DEBUG, OPT_LEVEL, SHARED_LIBS, ctx, pm
 from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType
 from .levels import _determine_format
@@ -138,7 +138,7 @@ def add(x1: Array, x2: Array) -> Array:
     ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)()
     out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape))
 
-    # TODO: Decide what will be the output tensor_type
+    # TODO: Determine output format via autoscheduler
     add_module = get_add_module(
         x1._get_mlir_type(),
         x2._get_mlir_type(),
@@ -152,3 +152,24 @@ def add(x1: Array, x2: Array) -> Array:
         *x2._to_module_arg(),
     )
     return Array(storage=ret_storage, shape=tuple(out_tensor_type.shape))
+
+
+def reshape(x: Array, /, shape: tuple[int, ...]):
+    from ._conversions import _from_numpy
+
+    shape = as_shape(shape)
+    ret_storage_format = _determine_format(x.format, dtype=x.dtype, union=len(shape) >= x.ndim, out_ndim=len(shape))
+    shape_array = _from_numpy(np.asarray(shape, dtype=np.uint64))
+    out_tensor_type = ret_storage_format._get_mlir_type(shape=shape)
+    ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)()
+
+    reshape_module = get_reshape_module(x._get_mlir_type(), shape_array._get_mlir_type(), out_tensor_type)
+
+    reshape_module.invoke(
+        "reshape",
+        ctypes.pointer(ctypes.pointer(ret_storage)),
+        *x._to_module_arg(),
+        *shape_array._to_module_arg(),
+    )
+
+    return Array(storage=ret_storage, shape=shape)
diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py
@@ -207,6 +207,10 @@ def _count_sparse_levels(format: StorageFormat) -> int:
     return sum(_is_sparse_level(lvl) for lvl in format.levels)
 
 
+def _count_dense_levels(format: StorageFormat) -> int:
+    return sum(not _is_sparse_level(lvl) for lvl in format.levels)
+
+
 def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat:
     if len(formats) == 0:
         if out_ndim is None:
@@ -225,10 +229,11 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd
     pos_width = 0
     crd_width = 0
     op = min if union else max
-    n_sparse = None
+    counter = _count_sparse_levels if not union else _count_dense_levels
+    n_counted = None
     order = ()
     for fmt in formats:
-        n_sparse = _count_sparse_levels(fmt) if n_sparse is None else op(n_sparse, _count_sparse_levels(fmt))
+        n_counted = counter(fmt) if n_counted is None else op(n_counted, counter(fmt))
         pos_width = max(pos_width, fmt.pos_width)
         crd_width = max(crd_width, fmt.crd_width)
         if order != "C":
@@ -237,8 +242,12 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd
             elif order[: len(fmt.order)] != fmt.order:
                 order = "C"
 
-    if out_ndim < n_sparse:
-        n_sparse = out_ndim
+    order = order + tuple(range(len(order), out_ndim))
+
+    if out_ndim < n_counted:
+        n_counted = out_ndim
+
+    n_sparse = n_counted if union else out_ndim - n_counted
 
     levels = (Level(LevelFormat.Dense),) * (out_ndim - n_sparse) + (Level(LevelFormat.Compressed),) * n_sparse
     return get_storage_format(
diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py
@@ -85,7 +85,7 @@ def sampler_complex_floating(size: tuple[int, ...]):
     raise NotImplementedError(f"{dtype=} not yet supported.")
 
 
-def get_exampe_csf_arrays(dtype: np.dtype) -> tuple:
+def get_example_csf_arrays(dtype: np.dtype) -> tuple:
     pos_1 = np.array([0, 1, 3], dtype=np.int64)
     crd_1 = np.array([1, 0, 1], dtype=np.int64)
     pos_2 = np.array([0, 3, 5, 7], dtype=np.int64)
@@ -207,7 +207,7 @@ def test_csf_format(dtype):
     )
 
     SHAPE = (2, 2, 4)
-    pos_1, crd_1, pos_2, crd_2, data = get_exampe_csf_arrays(dtype)
+    pos_1, crd_1, pos_2, crd_2, data = get_example_csf_arrays(dtype)
     constituent_arrays = (pos_1, crd_1, pos_2, crd_2, data)
 
     csf_array = sparse.from_constituent_arrays(format=format, arrays=constituent_arrays, shape=SHAPE)
@@ -297,3 +297,85 @@ def test_copy():
     np.testing.assert_array_equal(sparse.to_numpy(arr_sp1), arr_np_orig)
     np.testing.assert_array_equal(sparse.to_numpy(arr_sp2), arr_np_orig)
     np.testing.assert_array_equal(sparse.to_numpy(arr_sp3), arr_np_copy)
+
+
+@parametrize_dtypes
+def test_reshape(rng, dtype):
+    DENSITY = 0.5
+    sampler = generate_sampler(dtype, rng)
+
+    # CSR, CSC, COO
+    for shape, new_shape in [
+        ((100, 50), (25, 200)),
+        # ((100, 50), (10, 500, 1)),
+        ((80, 1), (8, 10)),
+        # ((80, 1), (80,)),
+    ]:
+        for format in ["csr", "csc", "coo"]:
+            if format == "coo":
+                # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
+                continue
+            if format == "csc":
+                # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641
+                continue
+
+            arr = sps.random_array(
+                shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler
+            )
+            arr.eliminate_zeros()
+            arr.sum_duplicates()
+            tensor = sparse.asarray(arr)
+
+            actual = sparse.to_scipy(sparse.reshape(tensor, shape=new_shape))
+            expected = arr.todense().reshape(new_shape)
+
+            np.testing.assert_array_equal(actual.todense(), expected)
+
+    # CSF
+    csf_shape = (2, 2, 4)
+    csf_format = sparse.levels.get_storage_format(
+        levels=(
+            sparse.levels.Level(sparse.levels.LevelFormat.Dense),
+            sparse.levels.Level(sparse.levels.LevelFormat.Compressed),
+            sparse.levels.Level(sparse.levels.LevelFormat.Compressed),
+        ),
+        order="C",
+        pos_width=64,
+        crd_width=64,
+        dtype=sparse.asdtype(dtype),
+    )
+    for shape, new_shape, expected_arrs in [
+        (
+            csf_shape,
+            (4, 4, 1),
+            [
+                np.array([0, 0, 3, 5, 7]),
+                np.array([0, 1, 3, 0, 3, 0, 1]),
+                np.array([0, 1, 2, 3, 4, 5, 6, 7]),
+                np.array([0, 0, 0, 0, 0, 0, 0]),
+                np.array([1, 2, 3, 4, 5, 6, 7]),
+            ],
+        ),
+        (
+            csf_shape,
+            (2, 1, 8),
+            [
+                np.array([0, 1, 2]),
+                np.array([0, 0]),
+                np.array([0, 3, 7]),
+                np.array([4, 5, 7, 0, 3, 4, 5]),
+                np.array([1, 2, 3, 4, 5, 6, 7]),
+            ],
+        ),
+    ]:
+        arrs = get_example_csf_arrays(dtype)
+        csf_tensor = sparse.from_constituent_arrays(format=csf_format, arrays=arrs, shape=shape)
+
+        result = sparse.reshape(csf_tensor, shape=new_shape)
+
+        for actual, expected in zip(result.get_constituent_arrays(), expected_arrs, strict=True):
+            np.testing.assert_array_equal(actual, expected)
+
+    # DENSE
+    # NOTE: dense reshape is probably broken in MLIR in 19.x branch
+    # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)