From 01fd2040b421a09cdcd507805a25c2c5a0996c38 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 24 Oct 2024 09:42:47 +0200 Subject: [PATCH 01/14] Fix signed/unsigned integer naming mix-up. --- sparse/mlir_backend/_dtypes.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py index 31d8c5f8..7dad1438 100644 --- a/sparse/mlir_backend/_dtypes.py +++ b/sparse/mlir_backend/_dtypes.py @@ -76,10 +76,10 @@ def np_dtype(self) -> np.dtype: return np.dtype(getattr(np, f"uint{self.bit_width}")) -int8 = UnsignedIntegerDType(bit_width=8) -int16 = UnsignedIntegerDType(bit_width=16) -int32 = UnsignedIntegerDType(bit_width=32) -int64 = UnsignedIntegerDType(bit_width=64) +uint8 = UnsignedIntegerDType(bit_width=8) +uint16 = UnsignedIntegerDType(bit_width=16) +uint32 = UnsignedIntegerDType(bit_width=32) +uint64 = UnsignedIntegerDType(bit_width=64) @dataclasses.dataclass(eq=True, frozen=True, kw_only=True) @@ -89,10 +89,10 @@ def np_dtype(self) -> np.dtype: return np.dtype(getattr(np, f"int{self.bit_width}")) -uint8 = SignedIntegerDType(bit_width=8) -uint16 = SignedIntegerDType(bit_width=16) -uint32 = SignedIntegerDType(bit_width=32) -uint64 = SignedIntegerDType(bit_width=64) +int8 = SignedIntegerDType(bit_width=8) +int16 = SignedIntegerDType(bit_width=16) +int32 = SignedIntegerDType(bit_width=32) +int64 = SignedIntegerDType(bit_width=64) intp: SignedIntegerDType = locals()[f"int{_PTR_WIDTH}"] From 400fc0c484f63f2790d349ea0b55fadfb49dc940 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:31:05 +0200 Subject: [PATCH 02/14] Add mechanism for detecting output format. --- sparse/mlir_backend/levels.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index 6559ec4c..fa0fa737 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -209,3 +209,56 @@ def _get_storage_format( crd_width=crd_width, dtype=dtype, ) + + +def _is_sparse_level(lvl: Level | LevelFormat, /) -> bool: + if isinstance(lvl, Level): + lvl = lvl.format + return LevelFormat.Dense != lvl + + +def _count_sparse_levels(format: StorageFormat) -> int: + return sum(_is_sparse_level(lvl) for lvl in format.levels) + + +def _determine_levels(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: + if len(formats) == 0: + if out_ndim is None: + out_ndim = 0 + return get_storage_format( + levels=(Level(LevelFormat.Dense),) * out_ndim, + order="C", + pos_width=64, + crd_width=64, + dtype=dtype, + ) + + if out_ndim is None: + out_ndim = max(fmt.rank for fmt in formats) + + n_sparse = 0 + pos_width = 0 + crd_width = 0 + op = max if union else min + order = () + for fmt in formats: + n_sparse = op(n_sparse, _count_sparse_levels(fmt)) + pos_width = max(pos_width, fmt.pos_width) + crd_width = max(crd_width, fmt.crd_width) + if order != "C": + if fmt.order[: len(order)] == order: + order = fmt.order + elif order[: len(fmt.order)] != fmt.order: + order = "C" + + if out_ndim < n_sparse: + n_sparse = out_ndim + + levels = (Level(LevelFormat.Dense),) * (out_ndim - n_sparse) + (Level(LevelFormat.Compressed),) * n_sparse + return get_storage_format( + levels=levels, + order=order, + pos_width=pos_width, + crd_width=crd_width, + dtype=dtype, + ) From 574d3d3dae8f93eb9100b4f032086060eed238e4 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:08:19 +0200 Subject: [PATCH 03/14] Use heuristic in `add`. --- sparse/mlir_backend/_ops.py | 20 +++++++++++++------- sparse/mlir_backend/levels.py | 8 ++++---- sparse/mlir_backend/tests/test_simple.py | 8 ++++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 20eee897..52c211ce 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -5,10 +5,13 @@ from mlir_finch import ir from mlir_finch.dialects import arith, complex, func, linalg, sparse_tensor, tensor +import numpy as np + from ._array import Array from ._common import fn_cache from ._core import CWD, DEBUG, SHARED_LIBS, ctx, pm from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType +from .levels import _determine_format @fn_cache @@ -17,7 +20,6 @@ def get_add_module( b_tensor_type: ir.RankedTensorType, out_tensor_type: ir.RankedTensorType, dtype: DType, - rank: int, ) -> ir.Module: with ir.Location.unknown(ctx): module = ir.Module.create() @@ -31,7 +33,7 @@ def get_add_module( raise RuntimeError(f"Can not add {dtype=}.") dtype = dtype._get_mlir_type() - ordering = ir.AffineMap.get_permutation(range(rank)) + max_rank = out_tensor_type.rank with ir.InsertionPoint(module.body): @@ -42,8 +44,13 @@ def add(a, b): [out_tensor_type], [a, b], [out], - ir.ArrayAttr.get([ir.AffineMapAttr.get(p) for p in (ordering,) * 3]), - ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type")] * rank), + ir.ArrayAttr.get( + [ + ir.AffineMapAttr.get(ir.AffineMap.get_minor_identity(max_rank, t.rank)) + for t in (a_tensor_type, b_tensor_type, out_tensor_type) + ] + ), + ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type")] * out_tensor_type.rank), ) block = generic_op.regions[0].blocks.append(dtype, dtype, dtype) with ir.InsertionPoint(block): @@ -129,9 +136,9 @@ def broadcast_to(in_tensor): def add(x1: Array, x2: Array) -> Array: - ret_storage_format = x1.format + ret_storage_format = _determine_format(x1.format, x2.format, dtype=x1.dtype, union=True) ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() - out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape) + out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape)) # TODO: Decide what will be the output tensor_type add_module = get_add_module( @@ -139,7 +146,6 @@ def add(x1: Array, x2: Array) -> Array: x2._get_mlir_type(), out_tensor_type=out_tensor_type, dtype=x1.dtype, - rank=x1.ndim, ) add_module.invoke( "add", diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index fa0fa737..da33a8ab 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -221,7 +221,7 @@ def _count_sparse_levels(format: StorageFormat) -> int: return sum(_is_sparse_level(lvl) for lvl in format.levels) -def _determine_levels(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: +def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: if len(formats) == 0: if out_ndim is None: out_ndim = 0 @@ -236,13 +236,13 @@ def _determine_levels(*formats: StorageFormat, dtype: DType, union: bool, out_nd if out_ndim is None: out_ndim = max(fmt.rank for fmt in formats) - n_sparse = 0 pos_width = 0 crd_width = 0 - op = max if union else min + op = min if union else max + n_sparse = None order = () for fmt in formats: - n_sparse = op(n_sparse, _count_sparse_levels(fmt)) + n_sparse = _count_sparse_levels(fmt) if n_sparse is None else op(n_sparse, _count_sparse_levels(fmt)) pos_width = max(pos_width, fmt.pos_width) crd_width = max(crd_width, fmt.crd_width) if order != "C": diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index f8ae1f31..a376e291 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -164,12 +164,12 @@ def test_add(rng, dtype): assert_csx_equal(expected, actual) actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor)) - expected = csc + csr + expected = (csc + csr).asformat("csr") assert_csx_equal(expected, actual) - actual = sparse.to_scipy(sparse.add(csr_tensor, dense_tensor)) - expected = sps.csr_matrix(csr + dense) - assert_csx_equal(expected, actual) + actual = sparse.to_numpy(sparse.add(csr_tensor, dense_tensor)) + expected = csr + dense + np.testing.assert_array_equal(actual, expected) actual = sparse.to_numpy(sparse.add(dense_tensor, csr_tensor)) expected = csr + dense From 0bad554c51c5ec2e3c4d361af7bd440cec41d68e Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:46:46 +0100 Subject: [PATCH 04/14] `pixi.toml` fixes. --- pixi.toml | 7 ++++--- sparse/mlir_backend/_ops.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pixi.toml b/pixi.toml index 01360600..d69b874c 100644 --- a/pixi.toml +++ b/pixi.toml @@ -27,7 +27,7 @@ mkdocs-jupyter = "*" [feature.tests.tasks] test = "pytest --pyargs sparse -n auto" -test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -v" } +test-mlir = { cmd = "pytest --pyargs sparse.mlir_backend -v" } test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -v", depends-on = ["precompile"] } [feature.tests.dependencies] @@ -55,6 +55,7 @@ finch-tensor = ">=0.1.31" SPARSE_BACKEND = "Finch" [feature.finch.target.osx-arm64.activation.env] +SPARSE_BACKEND = "Finch" PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" [feature.mlir.dependencies] @@ -67,5 +68,5 @@ SPARSE_BACKEND = "MLIR" [environments] tests = ["tests", "extras"] docs = ["docs", "extras"] -mlir-dev = ["tests", "mlir"] -finch-dev = ["tests", "finch"] +mlir-dev = {features = ["tests", "mlir"], no-default-feature = true} +finch-dev = {features = ["tests", "finch"], no-default-feature = true} diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 52c211ce..d04323a7 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -153,4 +153,4 @@ def add(x1: Array, x2: Array) -> Array: *x1._to_module_arg(), *x2._to_module_arg(), ) - return Array(storage=ret_storage, shape=out_tensor_type.shape) + return Array(storage=ret_storage, shape=tuple(out_tensor_type.shape)) From bdb233f3d2a47d24d1b312b2fc8e22a128286275 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Wed, 30 Oct 2024 13:03:38 +0100 Subject: [PATCH 05/14] Don't link to unnecessary MLIR runtime. --- sparse/mlir_backend/_core.py | 6 ++++++ sparse/mlir_backend/_ops.py | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sparse/mlir_backend/_core.py b/sparse/mlir_backend/_core.py index 9488ea46..ac7065cb 100644 --- a/sparse/mlir_backend/_core.py +++ b/sparse/mlir_backend/_core.py @@ -28,6 +28,12 @@ libc.free.argtypes = [ctypes.c_void_p] libc.free.restype = None +SHARED_LIBS = [] +if DEBUG: + SHARED_LIBS.append(MLIR_C_RUNNER_UTILS) + +OPT_LEVEL = 0 if DEBUG else 2 + # TODO: remove global state ctx = Context() diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index d04323a7..f9bb5c2d 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -9,7 +9,7 @@ from ._array import Array from ._common import fn_cache -from ._core import CWD, DEBUG, SHARED_LIBS, ctx, pm +from ._core import CWD, DEBUG, OPT_LEVEL, SHARED_LIBS, ctx, pm from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType from .levels import _determine_format @@ -79,7 +79,7 @@ def add(a, b): if DEBUG: (CWD / "add_module_opt.mlir").write_text(str(module)) - return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS) + return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) @fn_cache @@ -104,7 +104,7 @@ def reshape(a, shape): if DEBUG: (CWD / "reshape_module_opt.mlir").write_text(str(module)) - return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS) + return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) @fn_cache @@ -132,7 +132,7 @@ def broadcast_to(in_tensor): if DEBUG: (CWD / "broadcast_to_module_opt.mlir").write_text(str(module)) - return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS) + return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) def add(x1: Array, x2: Array) -> Array: From a8b8b7f3197e9b76a3533ac049e702ed4ea733cc Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 4 Nov 2024 08:14:39 +0100 Subject: [PATCH 06/14] Try reshape. --- sparse/mlir_backend/__init__.py | 3 +- sparse/mlir_backend/_common.py | 11 +++ sparse/mlir_backend/_ops.py | 25 ++++++- sparse/mlir_backend/levels.py | 17 +++-- sparse/mlir_backend/tests/test_simple.py | 86 +++++++++++++++++++++++- 5 files changed, 133 insertions(+), 9 deletions(-) diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index 20a02beb..f60410ac 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -27,7 +27,7 @@ uint32, uint64, ) -from ._ops import add +from ._ops import add, reshape __all__ = [ "add", @@ -36,6 +36,7 @@ "to_numpy", "to_scipy", "levels", + "reshape", "from_constituent_arrays", "int8", "int16", diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index b382a822..66867e60 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -1,6 +1,7 @@ import ctypes import functools import weakref +from collections.abc import Iterable import mlir_finch.runtime as rt @@ -52,3 +53,13 @@ def finalizer(ptr): ctypes.pythonapi.Py_DecRef(ptr) weakref.finalize(owner, finalizer, ptr) + + +def as_shape(x) -> tuple[int]: + if not isinstance(x, Iterable): + x = (x,) + + if not all(isinstance(xi, int) for xi in x): + raise TypeError("Shape must be an `int` or tuple of `int`s.") + + return tuple(int(xi) for xi in x) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index f9bb5c2d..e4ffa0ad 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -8,7 +8,7 @@ import numpy as np from ._array import Array -from ._common import fn_cache +from ._common import as_shape, fn_cache from ._core import CWD, DEBUG, OPT_LEVEL, SHARED_LIBS, ctx, pm from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType from .levels import _determine_format @@ -140,7 +140,7 @@ def add(x1: Array, x2: Array) -> Array: ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape)) - # TODO: Decide what will be the output tensor_type + # TODO: Determine output format via autoscheduler add_module = get_add_module( x1._get_mlir_type(), x2._get_mlir_type(), @@ -154,3 +154,24 @@ def add(x1: Array, x2: Array) -> Array: *x2._to_module_arg(), ) return Array(storage=ret_storage, shape=tuple(out_tensor_type.shape)) + + +def reshape(x: Array, /, shape: tuple[int, ...]): + from ._conversions import _from_numpy + + shape = as_shape(shape) + ret_storage_format = _determine_format(x.format, dtype=x.dtype, union=len(shape) >= x.ndim, out_ndim=len(shape)) + shape_array = _from_numpy(np.asarray(shape, dtype=np.uint64)) + out_tensor_type = ret_storage_format._get_mlir_type(shape=shape) + ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() + + reshape_module = get_reshape_module(x._get_mlir_type(), shape_array._get_mlir_type(), out_tensor_type) + + reshape_module.invoke( + "reshape", + ctypes.pointer(ctypes.pointer(ret_storage)), + *x._to_module_arg(), + *shape_array._to_module_arg(), + ) + + return Array(storage=ret_storage, shape=shape) diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index da33a8ab..21295ee5 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -221,6 +221,10 @@ def _count_sparse_levels(format: StorageFormat) -> int: return sum(_is_sparse_level(lvl) for lvl in format.levels) +def _count_dense_levels(format: StorageFormat) -> int: + return sum(not _is_sparse_level(lvl) for lvl in format.levels) + + def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: if len(formats) == 0: if out_ndim is None: @@ -239,10 +243,11 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd pos_width = 0 crd_width = 0 op = min if union else max - n_sparse = None + counter = _count_sparse_levels if not union else _count_dense_levels + n_counted = None order = () for fmt in formats: - n_sparse = _count_sparse_levels(fmt) if n_sparse is None else op(n_sparse, _count_sparse_levels(fmt)) + n_counted = counter(fmt) if n_counted is None else op(n_counted, counter(fmt)) pos_width = max(pos_width, fmt.pos_width) crd_width = max(crd_width, fmt.crd_width) if order != "C": @@ -251,8 +256,12 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd elif order[: len(fmt.order)] != fmt.order: order = "C" - if out_ndim < n_sparse: - n_sparse = out_ndim + order = order + tuple(range(len(order), out_ndim)) + + if out_ndim < n_counted: + n_counted = out_ndim + + n_sparse = n_counted if union else out_ndim - n_counted levels = (Level(LevelFormat.Dense),) * (out_ndim - n_sparse) + (Level(LevelFormat.Compressed),) * n_sparse return get_storage_format( diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index a376e291..d1319c1e 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -85,7 +85,7 @@ def sampler_complex_floating(size: tuple[int, ...]): raise NotImplementedError(f"{dtype=} not yet supported.") -def get_exampe_csf_arrays(dtype: np.dtype) -> tuple: +def get_example_csf_arrays(dtype: np.dtype) -> tuple: pos_1 = np.array([0, 1, 3], dtype=np.int64) crd_1 = np.array([1, 0, 1], dtype=np.int64) pos_2 = np.array([0, 3, 5, 7], dtype=np.int64) @@ -205,7 +205,7 @@ def test_csf_format(dtype): ) SHAPE = (2, 2, 4) - pos_1, crd_1, pos_2, crd_2, data = get_exampe_csf_arrays(dtype) + pos_1, crd_1, pos_2, crd_2, data = get_example_csf_arrays(dtype) constituent_arrays = (pos_1, crd_1, pos_2, crd_2, data) csf_array = sparse.from_constituent_arrays(format=format, arrays=constituent_arrays, shape=SHAPE) @@ -297,3 +297,85 @@ def test_copy(): np.testing.assert_array_equal(sparse.to_numpy(arr_sp1), arr_np_orig) np.testing.assert_array_equal(sparse.to_numpy(arr_sp2), arr_np_orig) np.testing.assert_array_equal(sparse.to_numpy(arr_sp3), arr_np_copy) + + +@parametrize_dtypes +def test_reshape(rng, dtype): + DENSITY = 0.5 + sampler = generate_sampler(dtype, rng) + + # CSR, CSC, COO + for shape, new_shape in [ + ((100, 50), (25, 200)), + # ((100, 50), (10, 500, 1)), + ((80, 1), (8, 10)), + # ((80, 1), (80,)), + ]: + for format in ["csr", "csc", "coo"]: + if format == "coo": + # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 + continue + if format == "csc": + # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641 + continue + + arr = sps.random_array( + shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler + ) + arr.eliminate_zeros() + arr.sum_duplicates() + tensor = sparse.asarray(arr) + + actual = sparse.to_scipy(sparse.reshape(tensor, shape=new_shape)) + expected = arr.todense().reshape(new_shape) + + np.testing.assert_array_equal(actual.todense(), expected) + + # CSF + csf_shape = (2, 2, 4) + csf_format = sparse.levels.get_storage_format( + levels=( + sparse.levels.Level(sparse.levels.LevelFormat.Dense), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + ), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + ) + for shape, new_shape, expected_arrs in [ + ( + csf_shape, + (4, 4, 1), + [ + np.array([0, 0, 3, 5, 7]), + np.array([0, 1, 3, 0, 3, 0, 1]), + np.array([0, 1, 2, 3, 4, 5, 6, 7]), + np.array([0, 0, 0, 0, 0, 0, 0]), + np.array([1, 2, 3, 4, 5, 6, 7]), + ], + ), + ( + csf_shape, + (2, 1, 8), + [ + np.array([0, 1, 2]), + np.array([0, 0]), + np.array([0, 3, 7]), + np.array([4, 5, 7, 0, 3, 4, 5]), + np.array([1, 2, 3, 4, 5, 6, 7]), + ], + ), + ]: + arrs = get_example_csf_arrays(dtype) + csf_tensor = sparse.from_constituent_arrays(format=csf_format, arrays=arrs, shape=shape) + + result = sparse.reshape(csf_tensor, shape=new_shape) + + for actual, expected in zip(result.get_constituent_arrays(), expected_arrs, strict=True): + np.testing.assert_array_equal(actual, expected) + + # DENSE + # NOTE: dense reshape is probably broken in MLIR in 19.x branch + # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE) From da91d63826425a1afc8fe147b2ad71a1d5914e8d Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:09:14 +0100 Subject: [PATCH 07/14] Better tests and fixes. --- sparse/mlir_backend/_ops.py | 4 +- sparse/mlir_backend/levels.py | 9 ++-- sparse/mlir_backend/tests/test_simple.py | 68 ++++++++++++++---------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index e4ffa0ad..f4d509b8 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -156,11 +156,11 @@ def add(x1: Array, x2: Array) -> Array: return Array(storage=ret_storage, shape=tuple(out_tensor_type.shape)) -def reshape(x: Array, /, shape: tuple[int, ...]): +def reshape(x: Array, /, shape: tuple[int, ...]) -> Array: from ._conversions import _from_numpy shape = as_shape(shape) - ret_storage_format = _determine_format(x.format, dtype=x.dtype, union=len(shape) >= x.ndim, out_ndim=len(shape)) + ret_storage_format = _determine_format(x.format, dtype=x.dtype, union=len(shape) > x.ndim, out_ndim=len(shape)) shape_array = _from_numpy(np.asarray(shape, dtype=np.uint64)) out_tensor_type = ret_storage_format._get_mlir_type(shape=shape) ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index 21295ee5..e7fd21b0 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -242,12 +242,11 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd pos_width = 0 crd_width = 0 - op = min if union else max counter = _count_sparse_levels if not union else _count_dense_levels n_counted = None order = () for fmt in formats: - n_counted = counter(fmt) if n_counted is None else op(n_counted, counter(fmt)) + n_counted = counter(fmt) if n_counted is None else max(n_counted, counter(fmt)) pos_width = max(pos_width, fmt.pos_width) crd_width = max(crd_width, fmt.crd_width) if order != "C": @@ -256,12 +255,14 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd elif order[: len(fmt.order)] != fmt.order: order = "C" - order = order + tuple(range(len(order), out_ndim)) + if not isinstance(order, str): + order = order + tuple(range(len(order), out_ndim)) + order = order[:out_ndim] if out_ndim < n_counted: n_counted = out_ndim - n_sparse = n_counted if union else out_ndim - n_counted + n_sparse = n_counted if not union else out_ndim - n_counted levels = (Level(LevelFormat.Dense),) * (out_ndim - n_sparse) + (Level(LevelFormat.Compressed),) * n_sparse return get_storage_format( diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index d1319c1e..1b9a4c02 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -300,37 +300,50 @@ def test_copy(): @parametrize_dtypes -def test_reshape(rng, dtype): +@pytest.mark.parametrize( + "format", + [ + "csr", + pytest.param("csc", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109135")), + pytest.param("coo", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109641")), + ], +) +@pytest.mark.parametrize( + ("shape", "new_shape"), + [ + ((100, 50), (25, 200)), + ((100, 50), (10, 500, 1)), + ((80, 1), (8, 10)), + ((80, 1), (80,)), + ], +) +def test_reshape(rng, dtype, format, shape, new_shape): DENSITY = 0.5 sampler = generate_sampler(dtype, rng) - # CSR, CSC, COO - for shape, new_shape in [ - ((100, 50), (25, 200)), - # ((100, 50), (10, 500, 1)), - ((80, 1), (8, 10)), - # ((80, 1), (80,)), - ]: - for format in ["csr", "csc", "coo"]: - if format == "coo": - # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 - continue - if format == "csc": - # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641 - continue - - arr = sps.random_array( - shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler - ) - arr.eliminate_zeros() - arr.sum_duplicates() - tensor = sparse.asarray(arr) - - actual = sparse.to_scipy(sparse.reshape(tensor, shape=new_shape)) - expected = arr.todense().reshape(new_shape) - - np.testing.assert_array_equal(actual.todense(), expected) + arr_sps = sps.random_array( + shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler + ) + arr_sps.eliminate_zeros() + arr_sps.sum_duplicates() + arr = sparse.asarray(arr_sps) + + actual = sparse.reshape(arr, shape=new_shape) + assert actual.shape == new_shape + + try: + scipy_format = sparse.to_scipy(actual).format + except RuntimeError: + pytest.xfail("No library to compare to.") + + expected = sparse.asarray(arr_sps.reshape(new_shape).asformat(scipy_format)) if scipy_format is not None else arr + for x, y in zip(expected.get_constituent_arrays(), actual.get_constituent_arrays(), strict=True): + np.testing.assert_array_equal(x, y) + + +@parametrize_dtypes +def test_reshape_csf(dtype): # CSF csf_shape = (2, 2, 4) csf_format = sparse.levels.get_storage_format( @@ -372,7 +385,6 @@ def test_reshape(rng, dtype): csf_tensor = sparse.from_constituent_arrays(format=csf_format, arrays=arrs, shape=shape) result = sparse.reshape(csf_tensor, shape=new_shape) - for actual, expected in zip(result.get_constituent_arrays(), expected_arrs, strict=True): np.testing.assert_array_equal(actual, expected) From fe2b628ee719b4ed798a98e34624a6cd4c913b0f Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 4 Nov 2024 10:40:42 +0100 Subject: [PATCH 08/14] Minor tweaks. --- sparse/mlir_backend/tests/test_simple.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 1b9a4c02..1672266b 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -304,8 +304,8 @@ def test_copy(): "format", [ "csr", - pytest.param("csc", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109135")), - pytest.param("coo", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109641")), + pytest.param("csc", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109641")), + pytest.param("coo", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109135")), ], ) @pytest.mark.parametrize( @@ -336,7 +336,7 @@ def test_reshape(rng, dtype, format, shape, new_shape): except RuntimeError: pytest.xfail("No library to compare to.") - expected = sparse.asarray(arr_sps.reshape(new_shape).asformat(scipy_format)) if scipy_format is not None else arr + expected = sparse.asarray(arr_sps.reshape(new_shape).asformat(scipy_format)) for x, y in zip(expected.get_constituent_arrays(), actual.get_constituent_arrays(), strict=True): np.testing.assert_array_equal(x, y) From 93ecd40d840e9342756773fa79f9f1b01bc54bad Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 11 Nov 2024 08:25:35 +0500 Subject: [PATCH 09/14] Tests passing after rebase. --- pixi.toml | 4 ++- sparse/mlir_backend/_array.py | 5 +++ sparse/mlir_backend/_ops.py | 46 ++++++++++++++++++++++-- sparse/mlir_backend/tests/test_simple.py | 6 ++-- 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/pixi.toml b/pixi.toml index d69b874c..b25a1269 100644 --- a/pixi.toml +++ b/pixi.toml @@ -60,7 +60,9 @@ PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" [feature.mlir.dependencies] scipy = ">=0.19" -mlir-python-bindings = "19.*" + +[feature.mlir.target.osx-arm64.pypi-dependencies] +finch-mlir = ">=0.0.2" [feature.mlir.activation.env] SPARSE_BACKEND = "MLIR" diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index 50b863b0..ed88efc1 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -41,5 +41,10 @@ def copy(self) -> "Array": arrs = tuple(arr.copy() for arr in self.get_constituent_arrays()) return from_constituent_arrays(format=self.format, arrays=arrs, shape=self.shape) + def asformat(self, format: StorageFormat) -> "Array": + from ._ops import asformat + + return asformat(self, format=format) + def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: return self._storage.get_constituent_arrays() diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index f4d509b8..66a89a7c 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -11,7 +11,7 @@ from ._common import as_shape, fn_cache from ._core import CWD, DEBUG, OPT_LEVEL, SHARED_LIBS, ctx, pm from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType -from .levels import _determine_format +from .levels import StorageFormat, _determine_format @fn_cache @@ -135,7 +135,31 @@ def broadcast_to(in_tensor): return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) -def add(x1: Array, x2: Array) -> Array: +@fn_cache +def get_convert_module( + in_tensor_type: ir.RankedTensorType, + out_tensor_type: ir.RankedTensorType, +): + with ir.Location.unknown(ctx): + module = ir.Module.create() + + with ir.InsertionPoint(module.body): + + @func.FuncOp.from_py_func(in_tensor_type) + def convert(in_tensor): + return sparse_tensor.convert(out_tensor_type, in_tensor) + + convert.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() + if DEBUG: + (CWD / "broadcast_to_module.mlir").write_text(str(module)) + pm.run(module.operation) + if DEBUG: + (CWD / "broadcast_to_module_opt.mlir").write_text(str(module)) + + return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) + + +def add(x1: Array, x2: Array, /) -> Array: ret_storage_format = _determine_format(x1.format, x2.format, dtype=x1.dtype, union=True) ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape)) @@ -156,6 +180,24 @@ def add(x1: Array, x2: Array) -> Array: return Array(storage=ret_storage, shape=tuple(out_tensor_type.shape)) +def asformat(x: Array, /, format: StorageFormat) -> Array: + out_tensor_type = format._get_mlir_type(shape=x.shape) + ret_storage = format._get_ctypes_type(owns_memory=True)() + + convert_module = get_convert_module( + x._get_mlir_type(), + out_tensor_type, + ) + + convert_module.invoke( + "convert", + ctypes.pointer(ctypes.pointer(ret_storage)), + *x._to_module_arg(), + ) + + return Array(storage=ret_storage, shape=x.shape) + + def reshape(x: Array, /, shape: tuple[int, ...]) -> Array: from ._conversions import _from_numpy diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 1672266b..59690ee1 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -185,7 +185,9 @@ def test_add(rng, dtype): expected = csr_2 + coo assert_csx_equal(expected, actual) - actual = sparse.to_scipy(sparse.add(coo_tensor, coo_tensor)) + # This ends up being DCSR, not COO + actual_tensor = sparse.add(coo_tensor, coo_tensor) + actual = sparse.to_scipy(actual_tensor.asformat(coo_tensor.format)) expected = coo + coo np.testing.assert_array_equal(actual.todense(), expected.todense()) @@ -247,7 +249,7 @@ def test_coo_3d_format(dtype): for actual, expected in zip(result, carrs, strict=True): np.testing.assert_array_equal(actual, expected) - result_arrays = sparse.add(coo_array, coo_array).get_constituent_arrays() + result_arrays = sparse.add(coo_array, coo_array).asformat(coo_array.format).get_constituent_arrays() constituent_arrays = (pos, *crd, data * 2) for actual, expected in zip(result_arrays, constituent_arrays, strict=False): np.testing.assert_array_equal(actual, expected) From 4e0fe38716fb31b7b2b50dd14313baa8428f19b9 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 11 Nov 2024 08:34:06 +0500 Subject: [PATCH 10/14] Address review comments by @mtsokol. --- sparse/mlir_backend/_ops.py | 4 ++-- sparse/mlir_backend/levels.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 66a89a7c..a6146a38 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -50,7 +50,7 @@ def add(a, b): for t in (a_tensor_type, b_tensor_type, out_tensor_type) ] ), - ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type")] * out_tensor_type.rank), + ir.ArrayAttr.get([ir.Attribute.parse("#linalg.iterator_type")] * max_rank), ) block = generic_op.regions[0].blocks.append(dtype, dtype, dtype) with ir.InsertionPoint(block): @@ -160,11 +160,11 @@ def convert(in_tensor): def add(x1: Array, x2: Array, /) -> Array: + # TODO: Determine output format via autoscheduler ret_storage_format = _determine_format(x1.format, x2.format, dtype=x1.dtype, union=True) ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() out_tensor_type = ret_storage_format._get_mlir_type(shape=np.broadcast_shapes(x1.shape, x2.shape)) - # TODO: Determine output format via autoscheduler add_module = get_add_module( x1._get_mlir_type(), x2._get_mlir_type(), diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index e7fd21b0..299a73b3 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -212,6 +212,7 @@ def _get_storage_format( def _is_sparse_level(lvl: Level | LevelFormat, /) -> bool: + assert isinstance(lvl, Level | LevelFormat) if isinstance(lvl, Level): lvl = lvl.format return LevelFormat.Dense != lvl @@ -226,6 +227,18 @@ def _count_dense_levels(format: StorageFormat) -> int: def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: + """Determines the output format from a group of input formats. + + 1. Counts the sparse levels for `union=True`, and dense ones for `union=False`. + 2. Gets the max number of counted levels for each format. + 3. Constructs a format with the same number of counted levels. + Sparse levels are replaced with `LevelFormat.Compressed`. + + Returns + ------- + StorageFormat + Output storage format. + """ if len(formats) == 0: if out_ndim is None: out_ndim = 0 From 0577a73a5d8a3ea57cfff6f4c8c2b8bc3525b1ce Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:42:26 +0500 Subject: [PATCH 11/14] Address review comments by @mtsokol. --- sparse/mlir_backend/_ops.py | 4 +- sparse/mlir_backend/tests/test_simple.py | 74 +++++++++++++++++++----- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index a6146a38..08863ef8 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -151,10 +151,10 @@ def convert(in_tensor): convert.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() if DEBUG: - (CWD / "broadcast_to_module.mlir").write_text(str(module)) + (CWD / "convert_module.mlir").write_text(str(module)) pm.run(module.operation) if DEBUG: - (CWD / "broadcast_to_module_opt.mlir").write_text(str(module)) + (CWD / "convert_module.mlir").write_text(str(module)) return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=OPT_LEVEL, shared_libs=SHARED_LIBS) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 59690ee1..2ee86b3b 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -31,9 +31,9 @@ ) -def assert_csx_equal( - expected: sps.csr_array | sps.csc_array, - actual: sps.csr_array | sps.csc_array, +def assert_sps_equal( + expected: sps.csr_array | sps.csc_array | sps.coo_array, + actual: sps.csr_array | sps.csc_array | sps.coo_array, ) -> None: assert expected.format == actual.format expected.eliminate_zeros() @@ -42,8 +42,13 @@ def assert_csx_equal( actual.eliminate_zeros() actual.sum_duplicates() - np.testing.assert_array_equal(expected.indptr, actual.indptr) - np.testing.assert_array_equal(expected.indices, actual.indices) + if expected.format != "coo": + np.testing.assert_array_equal(expected.indptr, actual.indptr) + np.testing.assert_array_equal(expected.indices, actual.indices) + else: + np.testing.assert_array_equal(expected.row, actual.col) + np.testing.assert_array_equal(expected.row, actual.col) + np.testing.assert_array_equal(expected.data, actual.data) @@ -121,10 +126,10 @@ def test_2d_constructors(rng, dtype): dense_2_tensor = sparse.asarray(np.arange(100, dtype=dtype).reshape((25, 4)) + 10) csr_retured = sparse.to_scipy(csr_tensor) - assert_csx_equal(csr_retured, csr) + assert_sps_equal(csr_retured, csr) csc_retured = sparse.to_scipy(csc_tensor) - assert_csx_equal(csc_retured, csc) + assert_sps_equal(csc_retured, csc) dense_returned = sparse.to_numpy(dense_tensor) np.testing.assert_equal(dense_returned, dense) @@ -157,15 +162,15 @@ def test_add(rng, dtype): actual = sparse.to_scipy(sparse.add(csr_tensor, csr_2_tensor)) expected = csr + csr_2 - assert_csx_equal(expected, actual) + assert_sps_equal(expected, actual) actual = sparse.to_scipy(sparse.add(csc_tensor, csc_tensor)) expected = csc + csc - assert_csx_equal(expected, actual) + assert_sps_equal(expected, actual) actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor)) expected = (csc + csr).asformat("csr") - assert_csx_equal(expected, actual) + assert_sps_equal(expected, actual) actual = sparse.to_numpy(sparse.add(csr_tensor, dense_tensor)) expected = csr + dense @@ -183,7 +188,7 @@ def test_add(rng, dtype): actual = sparse.to_scipy(sparse.add(csr_2_tensor, coo_tensor)) expected = csr_2 + coo - assert_csx_equal(expected, actual) + assert_sps_equal(expected, actual) # This ends up being DCSR, not COO actual_tensor = sparse.add(coo_tensor, coo_tensor) @@ -307,7 +312,7 @@ def test_copy(): [ "csr", pytest.param("csc", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109641")), - pytest.param("coo", marks=pytest.mark.xfail(reason="https://github.com/llvm/llvm-project/pull/109135")), + "coo", ], ) @pytest.mark.parametrize( @@ -390,6 +395,45 @@ def test_reshape_csf(dtype): for actual, expected in zip(result.get_constituent_arrays(), expected_arrs, strict=True): np.testing.assert_array_equal(actual, expected) - # DENSE - # NOTE: dense reshape is probably broken in MLIR in 19.x branch - # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE) + +@parametrize_dtypes +def test_reshape_dense(dtype): + SHAPE = (2, 2, 4) + + np_arr = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE) + sp_arr = sparse.asarray(np_arr) + + for new_shape in [ + (4, 4, 1), + (2, 1, 8), + ]: + expected = np_arr.reshape(new_shape) + actual = sparse.reshape(sp_arr, new_shape) + + actual_np = sparse.to_numpy(actual) + + assert actual_np.dtype == expected.dtype + np.testing.assert_equal(actual_np, expected) + + +@pytest.mark.skip(reason="Segfault") +@pytest.mark.parametrize("src_fmt", ["csr", "csc"]) +@pytest.mark.parametrize("dst_fmt", ["csr", "csc"]) +def test_asformat(rng, src_fmt, dst_fmt): + SHAPE = (100, 50) + DENSITY = 0.5 + sampler = generate_sampler(np.float64, rng) + + sps_arr = sps.random_array( + SHAPE, density=DENSITY, format=src_fmt, dtype=np.float64, random_state=rng, data_sampler=sampler + ) + sp_arr = sparse.asarray(sps_arr) + + expected = sps_arr.asformat(dst_fmt) + + actual_fmt = sparse.asarray(expected, copy=False).format + actual = sp_arr.asformat(actual_fmt) + actual_sps = sparse.to_scipy(actual) + + assert actual_sps.format == dst_fmt + assert_sps_equal(expected, actual_sps) From 540bc4ada5b02d99cc734c79c5362c09d250d407 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:46:09 +0500 Subject: [PATCH 12/14] Address review comments by @mtsokol. --- sparse/mlir_backend/_ops.py | 3 +++ sparse/mlir_backend/levels.py | 5 +++-- sparse/mlir_backend/tests/test_simple.py | 9 ++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 08863ef8..9f452d42 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -181,6 +181,9 @@ def add(x1: Array, x2: Array, /) -> Array: def asformat(x: Array, /, format: StorageFormat) -> Array: + if x.format == format: + return x + out_tensor_type = format._get_mlir_type(shape=x.shape) ret_storage = format._get_ctypes_type(owns_memory=True)() diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index 299a73b3..6c45923d 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -231,7 +231,8 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd 1. Counts the sparse levels for `union=True`, and dense ones for `union=False`. 2. Gets the max number of counted levels for each format. - 3. Constructs a format with the same number of counted levels. + 3. Constructs a format with rank of `out_ndim` (max rank of inputs is taken if it's `None`). + If `union=False` counted levels is the number of sparse levels, otherwise dense. Sparse levels are replaced with `LevelFormat.Compressed`. Returns @@ -243,7 +244,7 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd if out_ndim is None: out_ndim = 0 return get_storage_format( - levels=(Level(LevelFormat.Dense),) * out_ndim, + levels=(Level(LevelFormat.Dense if union else LevelFormat.Compressed),) * out_ndim, order="C", pos_width=64, crd_width=64, diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 2ee86b3b..a6d506f2 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -416,9 +416,12 @@ def test_reshape_dense(dtype): np.testing.assert_equal(actual_np, expected) -@pytest.mark.skip(reason="Segfault") -@pytest.mark.parametrize("src_fmt", ["csr", "csc"]) -@pytest.mark.parametrize("dst_fmt", ["csr", "csc"]) +@pytest.mark.parametrize( + "src_fmt", ["csr", "csc", pytest.param("coo", marks=pytest.mark.skip(reason="TODO: Report MLIR issue"))] +) +@pytest.mark.parametrize( + "dst_fmt", ["csr", "csc", pytest.param("coo", marks=pytest.mark.skip(reason="TODO: Report MLIR issue"))] +) def test_asformat(rng, src_fmt, dst_fmt): SHAPE = (100, 50) DENSITY = 0.5 From cf0e4636b399b9696902bb4ea6c47219af67dc68 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:15:09 +0500 Subject: [PATCH 13/14] Test 3D reshape via `asformat`. --- sparse/mlir_backend/_ops.py | 4 ++++ sparse/mlir_backend/levels.py | 22 +++++++++++++++++++- sparse/mlir_backend/tests/test_simple.py | 26 +++++++++++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 9f452d42..029df872 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -1,4 +1,5 @@ import ctypes +import math import mlir_finch.execution_engine import mlir_finch.passmanager @@ -205,6 +206,9 @@ def reshape(x: Array, /, shape: tuple[int, ...]) -> Array: from ._conversions import _from_numpy shape = as_shape(shape) + if math.prod(x.shape) != math.prod(shape): + raise ValueError(f"`math.prod(x.shape) != math.prod(shape)`, {x.shape=}, {shape=}") + ret_storage_format = _determine_format(x.format, dtype=x.dtype, union=len(shape) > x.ndim, out_ndim=len(shape)) shape_array = _from_numpy(np.asarray(shape, dtype=np.uint64)) out_tensor_type = ret_storage_format._get_mlir_type(shape=shape) diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index 6c45923d..c7021523 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -226,6 +226,26 @@ def _count_dense_levels(format: StorageFormat) -> int: return sum(not _is_sparse_level(lvl) for lvl in format.levels) +def _get_sparse_dense_levels( + *, n_sparse: int | None = None, n_dense: int | None = None, ndim: int | None = None +) -> tuple[Level, ...]: + if (n_sparse is not None) + (n_dense is not None) + (ndim is not None) != 2: + assert n_sparse is not None and n_dense is not None and ndim is not None # + assert n_sparse + n_dense == ndim + if n_sparse is None: + n_sparse = ndim - n_dense + if n_dense is None: + n_dense = ndim - n_sparse + if ndim is None: + ndim = n_dense + n_sparse + + assert ndim >= 0 + assert n_dense >= 0 + assert n_sparse >= 0 + + return (Level(LevelFormat.Dense),) * n_dense + (Level(LevelFormat.Compressed),) * n_sparse + + def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_ndim: int | None = None) -> StorageFormat: """Determines the output format from a group of input formats. @@ -278,7 +298,7 @@ def _determine_format(*formats: StorageFormat, dtype: DType, union: bool, out_nd n_sparse = n_counted if not union else out_ndim - n_counted - levels = (Level(LevelFormat.Dense),) * (out_ndim - n_sparse) + (Level(LevelFormat.Compressed),) * n_sparse + levels = _get_sparse_dense_levels(n_sparse=n_sparse, ndim=out_ndim) return get_storage_format( levels=levels, order=order, diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index a6d506f2..a7ae92bb 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -341,7 +341,31 @@ def test_reshape(rng, dtype, format, shape, new_shape): try: scipy_format = sparse.to_scipy(actual).format except RuntimeError: - pytest.xfail("No library to compare to.") + tmp_levels = (sparse.levels.Level(sparse.levels.LevelFormat.Dense),) * len(shape) + tmp_fmt = sparse.levels.get_storage_format( + levels=tmp_levels, + order="C", + pos_width=64, + crd_width=64, + dtype=dtype, + ) + arr_dense = arr.asformat(tmp_fmt) + arr_np = sparse.to_numpy(arr_dense) + expected_np = arr_np.reshape(new_shape) + + out_levels = (sparse.levels.Level(sparse.levels.LevelFormat.Dense),) * len(new_shape) + out_fmt = sparse.levels.get_storage_format( + levels=out_levels, + order="C", + pos_width=64, + crd_width=64, + dtype=dtype, + ) + actual_dense = actual.asformat(out_fmt) + actual_np = sparse.to_numpy(actual_dense) + + np.testing.assert_array_equal(expected_np, actual_np) + return expected = sparse.asarray(arr_sps.reshape(new_shape).asformat(scipy_format)) From e5615874337d194fe953e091ad34fdb05aa1e133 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:24:42 +0500 Subject: [PATCH 14/14] Fix `format="coo"` equivalence test and add reason for conversion failure. --- sparse/mlir_backend/tests/test_simple.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index a7ae92bb..2f4fe12c 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -46,8 +46,8 @@ def assert_sps_equal( np.testing.assert_array_equal(expected.indptr, actual.indptr) np.testing.assert_array_equal(expected.indices, actual.indices) else: - np.testing.assert_array_equal(expected.row, actual.col) - np.testing.assert_array_equal(expected.row, actual.col) + np.testing.assert_array_equal(expected.row, actual.row) + np.testing.assert_array_equal(expected.col, actual.col) np.testing.assert_array_equal(expected.data, actual.data) @@ -441,10 +441,20 @@ def test_reshape_dense(dtype): @pytest.mark.parametrize( - "src_fmt", ["csr", "csc", pytest.param("coo", marks=pytest.mark.skip(reason="TODO: Report MLIR issue"))] + "src_fmt", + [ + "csr", + "csc", + pytest.param("coo", marks=pytest.mark.skip(reason="https://github.com/llvm/llvm-project/issues/116012")), + ], ) @pytest.mark.parametrize( - "dst_fmt", ["csr", "csc", pytest.param("coo", marks=pytest.mark.skip(reason="TODO: Report MLIR issue"))] + "dst_fmt", + [ + "csr", + "csc", + pytest.param("coo", marks=pytest.mark.skip(reason="https://github.com/llvm/llvm-project/issues/116012")), + ], ) def test_asformat(rng, src_fmt, dst_fmt): SHAPE = (100, 50)