Skip to content

Commit ca23030

Browse files
add AffineScope Trait to gpu.launch.
1 parent c84f5a9 commit ca23030

File tree

3 files changed

+70
-41
lines changed

3 files changed

+70
-41
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
796796
def GPU_LaunchOp : GPU_Op<"launch", [
797797
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
798798
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
799-
RecursiveMemoryEffects]>,
799+
RecursiveMemoryEffects, AffineScope]>,
800800
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
801801
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
802802
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,

mlir/test/Dialect/Affine/ops.mlir

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,32 @@ module attributes {gpu.container_module} {
324324
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
325325
// CHECK: }
326326
// CHECK: gpu.return
327+
328+
// -----
329+
330+
// CHECK-LABEL: @gpu_launch_affine
331+
332+
module {
333+
func.func @gpu_launch_affine() {
334+
%c1 = arith.constant 1 : index
335+
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1)
336+
threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
337+
%thread_id_x = gpu.thread_id x
338+
%c128 = arith.constant 128 : index
339+
affine.for %arg12 = %thread_id_x to %c128 step 8 {
340+
}
341+
gpu.terminator
342+
}
343+
return
344+
}
345+
}
346+
347+
// CHECK-NEXT: %[[VAL_0:.*]] = arith.constant 1 : index
348+
// CHECK-NEXT: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) {
349+
// CHECK-NEXT: %[[VAL_13:.*]] = gpu.thread_id x
350+
// CHECK-NEXT: %[[VAL_14:.*]] = arith.constant 128 : index
351+
// CHECK-NEXT: affine.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step 8 {
352+
// CHECK-NEXT: }
353+
// CHECK-NEXT: gpu.terminator
354+
// CHECK-NEXT: }
355+
// CHECK-NEXT: return

mlir/test/Dialect/GPU/transform-gpu.mlir

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
4343
!type = memref<2 x 32 x f32>
4444
!type1d = memref<32 x f32>
4545

46-
// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
46+
// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)>
4747

4848
// CHECK-LABEL: func.func @warpgroup_3d(
4949
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
6161
// CHECK: gpu.launch
6262
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
6363
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
64-
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
64+
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
6565
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
6666
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
6767
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
9595
!type = memref<2 x 32 x f32>
9696
!type1d = memref<32 x f32>
9797

98-
// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
98+
// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>
9999

100100
// CHECK-LABEL: func.func @warp_3d(
101101
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
114114
// CHECK: gpu.launch
115115
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
116116
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
117-
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
117+
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
118118
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
119119
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
120120
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
354354
!type = memref<2 x 32 x f32>
355355
!type1d = memref<32 x f32>
356356

357-
// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
358-
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
359-
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
357+
// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
358+
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
359+
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>
360360

361361
// CHECK-LABEL: func.func @warpgroup_linear(
362362
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
376376
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
377377
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
378378
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
379-
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
380-
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
381-
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
379+
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
380+
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]]
381+
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
382382
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
383383
// CHECK: scf.if %[[CMPLIN]]
384384
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
410410
!type = memref<2 x 32 x f32>
411411
!type1d = memref<32 x f32>
412412

413-
// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
414-
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
415-
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
413+
// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
414+
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
415+
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>
416416

417417
// CHECK-LABEL: func.func @warp_linear(
418418
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
432432
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
433433
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
434434
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
435-
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
436-
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
437-
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
435+
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
436+
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
437+
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
438438
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
439439
// CHECK: scf.if %[[CMPLIN]]
440440
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
466466
!type = memref<2 x 32 x f32>
467467
!type1d = memref<32 x f32>
468468

469-
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
470-
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
469+
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
470+
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>
471471

472-
// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
473-
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
474-
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
472+
// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
473+
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
474+
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>
475475

476476
// CHECK-LABEL: func.func @map_multi_level_linear(
477477
func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type {
@@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
504504
memref.store %6, %y[%i, %j] : !type
505505
} { mapping = [#gpu.thread<y>, #gpu.thread<x>]}
506506

507-
// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
508-
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
509-
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
507+
// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]]
508+
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]]
509+
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]]
510510
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
511511
// CHECK: scf.if %[[CMPLIN]]
512512
scf.forall (%i, %j, %k) in (%c3, %c2, %c1) {
@@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
515515
memref.store %8, %y[%i, %j] : !type
516516
} {mapping = [#gpu.warp<linear_dim_0>, #gpu.warp<linear_dim_1>, #gpu.warp<linear_dim_2>] }
517517

518-
// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
519-
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
518+
// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]]
519+
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]]
520520
// CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
521521
// CHECK: scf.if %[[COND]]
522522
// CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
@@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} {
545545
!type = memref<2 x 32 x f32>
546546
!type1d = memref<32 x f32>
547547

548-
// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)>
549-
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)>
550-
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)>
548+
// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)>
549+
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)>
550+
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)>
551551

552552
// CHECK-LABEL: func.func @block_linear_existing_launch(
553553
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -566,9 +566,9 @@ func.func @block_linear_existing_launch(
566566
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
567567
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
568568
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
569-
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
570-
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
571-
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
569+
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
570+
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
571+
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
572572
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index
573573
// CHECK: scf.if %[[CMPLIN]]
574574
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
@@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} {
600600
!type = memref<2 x 32 x f32>
601601
!type1d = memref<32 x f32>
602602

603-
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)>
604-
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)>
603+
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)>
604+
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)>
605605

606606
// CHECK-LABEL: func.func @block_linear_generate_launch(
607607
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -620,8 +620,8 @@ func.func @block_linear_generate_launch(
620620
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
621621
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
622622
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
623-
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]])
624-
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
623+
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]]
624+
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
625625
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
626626
// CHECK: memref.load %[[ARGY]][%[[BLX]], %[[BLY]]]
627627
scf.forall (%i, %j) in (%c7, %c9) {
@@ -647,8 +647,8 @@ module attributes {transform.with_named_sequence} {
647647
#map = affine_map<(d0) -> (d0 * 128)>
648648
#map1 = affine_map<(d0) -> (d0 * 32)>
649649

650-
// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
651-
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
650+
// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)>
651+
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>
652652

653653
// CHECK-LABEL: func.func @simple_fill(
654654
func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
@@ -660,14 +660,14 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
660660
// CHECK: gpu.launch
661661
scf.forall (%arg1) in (1) {
662662
// CHECK: %[[BIDX:.*]] = gpu.block_id x
663-
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]])
663+
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]]
664664
%0 = affine.apply #map(%arg1)
665665
%subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>>
666666
scf.forall (%arg2) in (4) {
667667
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
668668
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
669669
// CHECK: %[[TIDZ:.*]] = gpu.thread_id z
670-
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
670+
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
671671
// CHECK-NOT: scf.if
672672
// CHECK: memref.subview %{{.*}}[%[[THX]]]
673673
%1 = affine.apply #map1(%arg2)

0 commit comments

Comments
 (0)