Skip to content

Commit 32abcf0

Browse files
committed
ENH: Update MLIR backend to LLVM 20.dev
1 parent db60537 commit 32abcf0

File tree

3 files changed

+55
-36
lines changed

3 files changed

+55
-36
lines changed

sparse/mlir_backend/_constructors.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,11 @@ def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType:
108108

109109

110110
@fn_cache
111-
def get_coo_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]:
111+
def get_coo_class(values_dtype: type[DType], index_dtype: type[DType], *, rank: int = 2) -> type[ctypes.Structure]:
112112
class Coo(ctypes.Structure):
113113
_fields_ = [
114114
("pos", get_nd_memref_descr(1, index_dtype)),
115-
("coords", get_nd_memref_descr(2, index_dtype)),
115+
*[(f"coords_{i}", get_nd_memref_descr(1, index_dtype)) for i in range(rank)],
116116
("data", get_nd_memref_descr(1, values_dtype)),
117117
]
118118
dtype = values_dtype
@@ -124,42 +124,46 @@ def from_sps(cls, arr: sps.coo_array | Iterable[np.ndarray]) -> "Coo":
124124
if not arr.has_canonical_format:
125125
raise Exception("COO must have canonical format")
126126
np_pos = np.array([0, arr.size], dtype=index_dtype.np_dtype)
127-
np_coords = np.stack(arr.coords, axis=1, dtype=index_dtype.np_dtype)
127+
np_coords = [np.array(coord, dtype=index_dtype.np_dtype) for coord in arr.coords]
128128
np_data = arr.data
129129
else:
130130
if len(arr) != 3:
131131
raise Exception("COO must be comprised of three arrays")
132132
np_pos, np_coords, np_data = arr
133133

134134
pos = numpy_to_ranked_memref(np_pos)
135-
coords = numpy_to_ranked_memref(np_coords)
135+
coords = [numpy_to_ranked_memref(coord) for coord in np_coords]
136136
data = numpy_to_ranked_memref(np_data)
137-
coo_instance = cls(pos=pos, coords=coords, data=data)
137+
coo_instance = cls(pos, *(coords + [data]))
138138
_take_owneship(coo_instance, np_pos)
139-
_take_owneship(coo_instance, np_coords)
139+
for coord in np_coords:
140+
_take_owneship(coo_instance, coord)
140141
_take_owneship(coo_instance, np_data)
141142

142143
return coo_instance
143144

144145
def to_sps(self, shape: tuple[int, ...]) -> sps.coo_array | list[np.ndarray]:
145146
pos = ranked_memref_to_numpy(self.pos)
146-
coords = ranked_memref_to_numpy(self.coords)[pos[0] : pos[1]]
147+
coords = [ranked_memref_to_numpy(coord) for coord in self.get_coord_list()]
147148
data = ranked_memref_to_numpy(self.data)
148149
return (
149-
sps.coo_array((data, coords.T), shape=shape)
150+
sps.coo_array((data, np.stack(coords, axis=0, dtype=index_dtype.np_dtype)), shape=shape)
150151
if len(shape) == 2
151152
else PackedArgumentTuple((pos, coords, data))
152153
)
153154

154155
def to_module_arg(self) -> list:
155156
return [
156157
ctypes.pointer(ctypes.pointer(self.pos)),
157-
ctypes.pointer(ctypes.pointer(self.coords)),
158+
*[ctypes.pointer(ctypes.pointer(coord)) for coord in self.get_coord_list()],
158159
ctypes.pointer(ctypes.pointer(self.data)),
159160
]
160161

161162
def get__fields_(self) -> list:
162-
return [self.pos, self.coords, self.data]
163+
return [self.pos, *self.get_coord_list(), self.data]
164+
165+
def get_coord_list(self) -> list:
166+
return [getattr(self, f"coords_{i}") for i in range(rank)]
163167

164168
@classmethod
165169
@fn_cache
@@ -173,10 +177,14 @@ def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType:
173177
)
174178
mid_singleton_lvls = [
175179
sparse_tensor.EncodingAttr.build_level_type(
176-
sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.non_unique]
180+
sparse_tensor.LevelFormat.singleton,
181+
[sparse_tensor.LevelProperty.non_unique, sparse_tensor.LevelProperty.soa],
177182
)
178183
] * (len(shape) - 2)
179-
levels = (compressed_lvl, *mid_singleton_lvls, sparse_tensor.LevelFormat.singleton)
184+
last_singleton_lvl = sparse_tensor.EncodingAttr.build_level_type(
185+
sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.soa]
186+
)
187+
levels = (compressed_lvl, *mid_singleton_lvls, last_singleton_lvl)
180188
ordering = ir.AffineMap.get_permutation([*range(len(shape))])
181189
encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width)
182190
return ir.RankedTensorType.get(list(shape), values_dtype, encoding)
@@ -320,6 +328,7 @@ def __init__(
320328
self._values_dtype = dtype if dtype is not None else asdtype(obj.dtype)
321329

322330
if _is_scipy_sparse_obj(obj):
331+
self.format = obj.format
323332
self._owns_memory = False
324333

325334
if obj.format in ("csr", "csc"):
@@ -335,22 +344,26 @@ def __init__(
335344
raise Exception(f"{obj.format} SciPy format not supported.")
336345

337346
elif _is_numpy_obj(obj):
347+
self.format = "dense"
338348
self._owns_memory = False
339349
self._index_dtype = asdtype(np.intp)
340350
self._format_class = get_dense_class(self._values_dtype, self._index_dtype)
341351
self._obj = self._format_class.from_sps(obj)
342352

343353
elif _is_mlir_obj(obj):
354+
self.format = "custom"
344355
self._owns_memory = True
345356
self._format_class = type(obj)
346357
self._obj = obj
347358

348359
elif format is not None:
360+
self.format = format
349361
if format in ["csf", "coo"]:
350362
fn_format_class = get_csf_class if format == "csf" else get_coo_class
363+
kwargs = {} if format == "csf" else {"rank": len(self.shape)}
351364
self._owns_memory = False
352365
self._index_dtype = asdtype(np.intp)
353-
self._format_class = fn_format_class(self._values_dtype, self._index_dtype)
366+
self._format_class = fn_format_class(self._values_dtype, self._index_dtype, **kwargs)
354367
self._obj = self._format_class.from_sps(obj)
355368

356369
else:

sparse/mlir_backend/_ops.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def get_add_module(
3232

3333
@func.FuncOp.from_py_func(a_tensor_type, b_tensor_type)
3434
def add(a, b):
35-
out = tensor.empty(out_tensor_type, [])
35+
out = tensor.empty(out_tensor_type.shape, dtype, encoding=out_tensor_type.encoding)
3636
generic_op = linalg.GenericOp(
3737
[out_tensor_type],
3838
[a, b],
@@ -108,7 +108,9 @@ def get_broadcast_to_module(
108108

109109
@func.FuncOp.from_py_func(in_tensor_type)
110110
def broadcast_to(in_tensor):
111-
out = tensor.empty(out_tensor_type, [])
111+
out = tensor.empty(
112+
out_tensor_type.shape, out_tensor_type.element_type, encoding=out_tensor_type.encoding
113+
)
112114
return linalg.broadcast(in_tensor, outs=[out], dimensions=dimensions)
113115

114116
broadcast_to.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
@@ -156,7 +158,7 @@ def _infer_format_class(rank: int, values_dtype: type[DType], index_dtype: type[
156158

157159
def reshape(x: Tensor, /, shape: tuple[int, ...]) -> Tensor:
158160
x_tensor_type = x._obj.get_tensor_definition(x.shape)
159-
if len(x.shape) == len(shape):
161+
if len(x.shape) == len(shape) or x.format == "dense":
160162
out_tensor_type = x._obj.get_tensor_definition(shape)
161163
ret_obj = x._format_class()
162164
else:

sparse/mlir_backend/tests/test_simple.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,20 +166,18 @@ def test_add(rng, dtype):
166166
assert isinstance(actual, np.ndarray)
167167
np.testing.assert_array_equal(actual, expected)
168168

169-
# NOTE: Fixed in https://github.com/llvm/llvm-project/pull/108615
170-
# actual = sparse.add(c_tensor, c_tensor).to_scipy_sparse()
171-
# expected = c + c
172-
# assert isinstance(actual, np.ndarray)
173-
# np.testing.assert_array_equal(actual, expected)
169+
actual = sparse.add(dense_tensor, dense_tensor).to_scipy_sparse()
170+
expected = dense + dense
171+
assert isinstance(actual, np.ndarray)
172+
np.testing.assert_array_equal(actual, expected)
174173

175174
actual = sparse.add(csr_2_tensor, coo_tensor).to_scipy_sparse()
176175
expected = csr_2 + coo
177176
np.testing.assert_array_equal(actual.todense(), expected.todense())
178177

179-
# NOTE: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247
180-
# actual = sparse.add(d_tensor, d_tensor).to_scipy_sparse()
181-
# expected = d + d
182-
# np.testing.assert_array_equal(actual.todense(), expected.todense())
178+
actual = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
179+
expected = coo + coo
180+
np.testing.assert_array_equal(actual.todense(), expected.todense())
183181

184182

185183
@parametrize_dtypes
@@ -203,7 +201,7 @@ def test_csf_format(dtype):
203201
def test_coo_3d_format(dtype):
204202
SHAPE = (2, 2, 4)
205203
pos = np.array([0, 7])
206-
crd = np.array([[0, 1, 0, 0, 1, 1, 0], [1, 3, 1, 0, 0, 1, 0], [3, 1, 1, 0, 1, 1, 1]])
204+
crd = [np.array([0, 1, 0, 0, 1, 1, 0]), np.array([1, 3, 1, 0, 0, 1, 0]), np.array([3, 1, 1, 0, 1, 1, 1])]
207205
data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype)
208206
coo = [pos, crd, data]
209207

@@ -212,11 +210,10 @@ def test_coo_3d_format(dtype):
212210
for actual, expected in zip(result, coo, strict=False):
213211
np.testing.assert_array_equal(actual, expected)
214212

215-
# NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
216-
# res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
217-
# coo_2 = [pos, crd, data * 2]
218-
# for actual, expected in zip(res_tensor, coo_2, strict=False):
219-
# np.testing.assert_array_equal(actual, expected)
213+
res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse()
214+
coo_2 = [pos, crd, data * 2]
215+
for actual, expected in zip(res_tensor, coo_2, strict=False):
216+
np.testing.assert_array_equal(actual, expected)
220217

221218

222219
@parametrize_dtypes
@@ -232,9 +229,6 @@ def test_reshape(rng, dtype):
232229
((80, 1), (80,)),
233230
]:
234231
for format in ["csr", "csc", "coo"]:
235-
if format == "coo":
236-
# NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
237-
continue
238232
if format == "csc":
239233
# NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641
240234
continue
@@ -289,8 +283,18 @@ def test_reshape(rng, dtype):
289283
np.testing.assert_array_equal(actual, expected)
290284

291285
# DENSE
292-
# NOTE: dense reshape is probably broken in MLIR in 19.x branch
293-
# dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)
286+
for shape, new_shape in [
287+
((100, 50), (25, 200)),
288+
((100, 50), (10, 500, 1)),
289+
((80, 1), (8, 10)),
290+
((80, 1), (80,)),
291+
]:
292+
dense = np.arange(math.prod(shape), dtype=dtype).reshape(shape)
293+
dense_tensor = sparse.asarray(dense)
294+
actual = sparse.reshape(dense_tensor, shape=new_shape).to_scipy_sparse()
295+
expected = dense.reshape(new_shape)
296+
297+
np.testing.assert_array_equal(actual, expected)
294298

295299

296300
@parametrize_dtypes

0 commit comments

Comments
 (0)