Commit ccba545
As discussed in #216, with upstream fix
llvm/llvm-project#114045 of `linalg` op now
implements `RecursiveMemoryEffects` trait, we can now convert
`tts.scatter` to `linalg.generic` with body of `memref.store` on each
scalar index and value element.
For instance,
`triton_shared/test/Conversion/UnstructuredToMemref/gather_scatter_all_mask.mlir`:
``` mlir
// RUN: triton-shared-opt --triton-to-unstructured --canonicalize --unstructured-to-memref --canonicalize %s
#map = affine_map<(d0) -> (d0)>
module {
tt.func public @masked_gather_scatter(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>) attributes {noinline = false} {
%cst = arith.constant dense<3> : tensor<4xi32>
%cst_0 = arith.constant dense<64> : tensor<4xi32>
%cst_1 = arith.constant dense<4> : tensor<4xi32>
%c2_i32 = arith.constant 2 : i32
%c1_i32 = arith.constant 1 : i32
%c0_i32 = arith.constant 0 : i32
%cst_2 = arith.constant 9.900000e+01 : f32
%0 = builtin.unrealized_conversion_cast %arg1 : !tt.ptr<f32> to memref<*xf32>
%1 = builtin.unrealized_conversion_cast %arg0 : !tt.ptr<f32> to memref<*xf32>
%2 = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32>
%3:2 = scf.for %arg2 = %c0_i32 to %c2_i32 step %c1_i32 iter_args(%arg3 = %2, %arg4 = %2) -> (tensor<4xi32>, tensor<4xi32>) : i32 {
%4 = arith.divsi %arg3, %cst : tensor<4xi32>
%5 = tt.splat %arg2 : i32 -> tensor<4xi32>
%6 = arith.addi %4, %5 : tensor<4xi32>
%7 = arith.cmpi slt, %6, %cst_0 : tensor<4xi32>
%cast = memref.cast %1 : memref<*xf32> to memref<?xf32>
%8 = bufferization.to_tensor %cast restrict : memref<?xf32> to tensor<?xf32>
%9 = tensor.empty() : tensor<4xf32>
%10 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xi32>, tensor<4xi1>) outs(%9 : tensor<4xf32>) {
^bb0(%in: i32, %in_4: i1, %out: f32):
%13 = scf.if %in_4 -> (f32) {
%14 = arith.index_cast %in : i32 to index
%extracted = tensor.extract %8[%14] : tensor<?xf32>
scf.yield %extracted : f32
} else {
scf.yield %cst_2 : f32
}
linalg.yield %13 : f32
} -> tensor<4xf32>
%cast_3 = memref.cast %0 : memref<*xf32> to memref<?xf32>
// tts.scatter lowers to:
linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%6, %10, %7 : tensor<4xi32>, tensor<4xf32>, tensor<4xi1>) {
^bb0(%in: i32, %in_4: f32, %in_5: i1):
scf.if %in_5 {
%13 = arith.index_cast %in : i32 to index
memref.store %in_4, %cast_3[%13] : memref<?xf32>
}
linalg.yield
}
%11 = arith.addi %6, %cst_1 : tensor<4xi32>
%12 = arith.addi %arg4, %cst_1 : tensor<4xi32>
scf.yield %11, %12 : tensor<4xi32>, tensor<4xi32>
}
tt.return
}
}
```
We can also utilize `linalg-fuse-elementwise-ops` now:
```mlir
// RUN: triton-shared-opt --linalg-fuse-elementwise-ops --canonicalize %s
#map = affine_map<(d0) -> (d0)>
module {
tt.func public @masked_gather_scatter(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>) attributes {noinline = false} {
%cst = arith.constant dense<3> : tensor<4xi32>
%cst_0 = arith.constant dense<64> : tensor<4xi32>
%cst_1 = arith.constant dense<4> : tensor<4xi32>
%c2_i32 = arith.constant 2 : i32
%c1_i32 = arith.constant 1 : i32
%c0_i32 = arith.constant 0 : i32
%0 = builtin.unrealized_conversion_cast %arg1 : !tt.ptr<f32> to memref<*xf32>
%1 = builtin.unrealized_conversion_cast %arg0 : !tt.ptr<f32> to memref<*xf32>
%2 = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32>
%3:2 = scf.for %arg2 = %c0_i32 to %c2_i32 step %c1_i32 iter_args(%arg3 = %2, %arg4 = %2) -> (tensor<4xi32>, tensor<4xi32>) : i32 {
%4 = arith.divsi %arg3, %cst : tensor<4xi32>
%5 = tt.splat %arg2 : i32 -> tensor<4xi32>
%6 = arith.addi %4, %5 : tensor<4xi32>
%7 = arith.cmpi slt, %6, %cst_0 : tensor<4xi32>
%cast = memref.cast %1 : memref<*xf32> to memref<?xf32>
%8 = bufferization.to_tensor %cast restrict : memref<?xf32> to tensor<?xf32>
%cast_2 = memref.cast %0 : memref<*xf32> to memref<?xf32>
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xi32>, tensor<4xi1>) {
^bb0(%in: i32, %in_3: i1):
scf.if %in_3 {
%11 = arith.index_cast %in : i32 to index
%extracted = tensor.extract %8[%11] : tensor<?xf32>
%12 = arith.index_cast %in : i32 to index
memref.store %extracted, %cast_2[%12] : memref<?xf32>
}
linalg.yield
}
%9 = arith.addi %6, %cst_1 : tensor<4xi32>
%10 = arith.addi %arg4, %cst_1 : tensor<4xi32>
scf.yield %9, %10 : tensor<4xi32>, tensor<4xi32>
}
tt.return
}
}
```
Co-authored-by: Xiaoran Weng <[email protected]>
1 parent 6f718b7 commit ccba545
File tree
14 files changed
+351
-290
lines changed- lib/Conversion/UnstructuredToMemref
- test/Conversion
- StructuredToMemref
- UnstructuredToMemref
14 files changed
+351
-290
lines changedLines changed: 106 additions & 92 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
52 | 52 | | |
53 | 53 | | |
54 | 54 | | |
55 | | - | |
56 | | - | |
| 55 | + | |
57 | 56 | | |
58 | 57 | | |
59 | 58 | | |
| |||
158 | 157 | | |
159 | 158 | | |
160 | 159 | | |
161 | | - | |
| 160 | + | |
162 | 161 | | |
163 | 162 | | |
164 | 163 | | |
| |||
171 | 170 | | |
172 | 171 | | |
173 | 172 | | |
174 | | - | |
175 | 173 | | |
176 | 174 | | |
177 | 175 | | |
178 | 176 | | |
179 | 177 | | |
180 | 178 | | |
181 | | - | |
| 179 | + | |
182 | 180 | | |
183 | 181 | | |
184 | 182 | | |
185 | 183 | | |
186 | | - | |
| 184 | + | |
187 | 185 | | |
188 | 186 | | |
189 | 187 | | |
190 | 188 | | |
191 | | - | |
192 | | - | |
193 | | - | |
194 | | - | |
195 | | - | |
| 189 | + | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
| 195 | + | |
196 | 196 | | |
197 | 197 | | |
198 | 198 | | |
199 | 199 | | |
200 | 200 | | |
201 | 201 | | |
202 | 202 | | |
203 | | - | |
| 203 | + | |
204 | 204 | | |
205 | 205 | | |
206 | 206 | | |
207 | 207 | | |
208 | | - | |
209 | | - | |
| 208 | + | |
| 209 | + | |
210 | 210 | | |
211 | 211 | | |
212 | 212 | | |
213 | 213 | | |
214 | 214 | | |
215 | 215 | | |
216 | | - | |
217 | | - | |
218 | | - | |
219 | | - | |
220 | | - | |
| 216 | + | |
| 217 | + | |
| 218 | + | |
| 219 | + | |
221 | 220 | | |
222 | | - | |
223 | | - | |
224 | | - | |
225 | | - | |
226 | | - | |
| 221 | + | |
227 | 222 | | |
228 | | - | |
229 | | - | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
230 | 229 | | |
231 | 230 | | |
232 | | - | |
233 | | - | |
234 | | - | |
235 | | - | |
236 | | - | |
237 | | - | |
238 | | - | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
239 | 235 | | |
240 | | - | |
| 236 | + | |
241 | 237 | | |
242 | 238 | | |
243 | 239 | | |
244 | 240 | | |
245 | 241 | | |
| 242 | + | |
| 243 | + | |
246 | 244 | | |
247 | 245 | | |
248 | 246 | | |
249 | | - | |
250 | | - | |
| 247 | + | |
| 248 | + | |
251 | 249 | | |
252 | 250 | | |
253 | 251 | | |
254 | 252 | | |
255 | 253 | | |
256 | 254 | | |
257 | | - | |
| 255 | + | |
258 | 256 | | |
259 | 257 | | |
260 | | - | |
261 | | - | |
262 | | - | |
| 258 | + | |
| 259 | + | |
| 260 | + | |
263 | 261 | | |
264 | 262 | | |
265 | | - | |
| 263 | + | |
266 | 264 | | |
267 | 265 | | |
268 | 266 | | |
269 | | - | |
270 | | - | |
271 | | - | |
272 | | - | |
273 | | - | |
274 | | - | |
275 | | - | |
276 | | - | |
277 | | - | |
278 | | - | |
279 | | - | |
280 | | - | |
| 267 | + | |
| 268 | + | |
| 269 | + | |
| 270 | + | |
281 | 271 | | |
282 | 272 | | |
283 | 273 | | |
284 | 274 | | |
285 | | - | |
| 275 | + | |
286 | 276 | | |
287 | 277 | | |
288 | 278 | | |
| |||
292 | 282 | | |
293 | 283 | | |
294 | 284 | | |
295 | | - | |
| 285 | + | |
296 | 286 | | |
297 | 287 | | |
298 | 288 | | |
| |||
309 | 299 | | |
310 | 300 | | |
311 | 301 | | |
| 302 | + | |
312 | 303 | | |
313 | 304 | | |
314 | | - | |
| 305 | + | |
315 | 306 | | |
316 | 307 | | |
317 | 308 | | |
318 | 309 | | |
319 | | - | |
320 | | - | |
| 310 | + | |
321 | 311 | | |
322 | | - | |
323 | | - | |
324 | | - | |
325 | | - | |
326 | | - | |
327 | | - | |
328 | | - | |
329 | | - | |
330 | | - | |
331 | | - | |
332 | | - | |
333 | | - | |
334 | | - | |
335 | | - | |
336 | | - | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| 321 | + | |
| 322 | + | |
| 323 | + | |
| 324 | + | |
| 325 | + | |
| 326 | + | |
337 | 327 | | |
338 | 328 | | |
339 | | - | |
340 | | - | |
341 | | - | |
| 329 | + | |
| 330 | + | |
342 | 331 | | |
343 | | - | |
344 | | - | |
| 332 | + | |
| 333 | + | |
| 334 | + | |
345 | 335 | | |
346 | | - | |
347 | | - | |
348 | | - | |
| 336 | + | |
| 337 | + | |
| 338 | + | |
349 | 339 | | |
350 | | - | |
351 | | - | |
352 | | - | |
353 | | - | |
354 | | - | |
355 | | - | |
356 | | - | |
357 | | - | |
358 | | - | |
| 340 | + | |
| 341 | + | |
| 342 | + | |
| 343 | + | |
| 344 | + | |
| 345 | + | |
| 346 | + | |
| 347 | + | |
| 348 | + | |
| 349 | + | |
| 350 | + | |
| 351 | + | |
| 352 | + | |
| 353 | + | |
| 354 | + | |
| 355 | + | |
| 356 | + | |
| 357 | + | |
| 358 | + | |
| 359 | + | |
| 360 | + | |
| 361 | + | |
| 362 | + | |
| 363 | + | |
| 364 | + | |
| 365 | + | |
| 366 | + | |
| 367 | + | |
| 368 | + | |
| 369 | + | |
| 370 | + | |
| 371 | + | |
| 372 | + | |
| 373 | + | |
359 | 374 | | |
360 | | - | |
361 | 375 | | |
362 | | - | |
| 376 | + | |
363 | 377 | | |
364 | 378 | | |
365 | 379 | | |
| |||
Lines changed: 8 additions & 4 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
32 | 32 | | |
33 | 33 | | |
34 | 34 | | |
35 | | - | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
36 | 38 | | |
37 | 39 | | |
38 | 40 | | |
| |||
73 | 75 | | |
74 | 76 | | |
75 | 77 | | |
76 | | - | |
77 | | - | |
78 | | - | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
79 | 83 | | |
80 | 84 | | |
81 | 85 | | |
Lines changed: 8 additions & 4 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
31 | 31 | | |
32 | 32 | | |
33 | 33 | | |
34 | | - | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
35 | 37 | | |
36 | 38 | | |
37 | 39 | | |
| |||
50 | 52 | | |
51 | 53 | | |
52 | 54 | | |
53 | | - | |
54 | | - | |
55 | | - | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
56 | 60 | | |
57 | 61 | | |
58 | 62 | | |
0 commit comments