@@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
4343!type = memref <2 x 32 x f32 >
4444!type1d = memref <32 x f32 >
4545
46- // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
46+ // CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)>
4747
4848// CHECK-LABEL: func.func @warpgroup_3d(
4949// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
6161// CHECK: gpu.launch
6262// CHECK: %[[TIDX:.*]] = gpu.thread_id x
6363// CHECK: %[[TIDY:.*]] = gpu.thread_id y
64- // CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
64+ // CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[ %[[TIDX]]]
6565// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
6666// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
6767// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
9595!type = memref <2 x 32 x f32 >
9696!type1d = memref <32 x f32 >
9797
98- // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
98+ // CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>
9999
100100// CHECK-LABEL: func.func @warp_3d(
101101// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
114114// CHECK: gpu.launch
115115// CHECK: %[[TIDX:.*]] = gpu.thread_id x
116116// CHECK: %[[TIDY:.*]] = gpu.thread_id y
117- // CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
117+ // CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[ %[[TIDX]]]
118118// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
119119// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
120120// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
354354!type = memref <2 x 32 x f32 >
355355!type1d = memref <32 x f32 >
356356
357- // CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
358- // CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
359- // CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
357+ // CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
358+ // CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
359+ // CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>
360360
361361// CHECK-LABEL: func.func @warpgroup_linear(
362362// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
376376// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
377377// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
378378// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
379- // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
380- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
381- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
379+ // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
380+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[ %[[TIDX]], %[[TIDY]]]
381+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
382382// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
383383// CHECK: scf.if %[[CMPLIN]]
384384// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
410410!type = memref <2 x 32 x f32 >
411411!type1d = memref <32 x f32 >
412412
413- // CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
414- // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
415- // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
413+ // CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
414+ // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
415+ // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>
416416
417417// CHECK-LABEL: func.func @warp_linear(
418418// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
432432// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
433433// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
434434// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
435- // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
436- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
437- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
435+ // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
436+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
437+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
438438// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
439439// CHECK: scf.if %[[CMPLIN]]
440440// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
466466!type = memref <2 x 32 x f32 >
467467!type1d = memref <32 x f32 >
468468
469- // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
470- // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
469+ // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
470+ // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>
471471
472- // CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
473- // CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
474- // CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
472+ // CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
473+ // CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
474+ // CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>
475475
476476// CHECK-LABEL: func.func @map_multi_level_linear(
477477func.func @map_multi_level_linear (%x: !type , %y: !type , %t: !type1d , %alpha : f32 , %stream : !gpu.async.token ) -> !type {
@@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
504504 memref.store %6 , %y [%i , %j ] : !type
505505 } { mapping = [#gpu.thread <y >, #gpu.thread <x >]}
506506
507- // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
508- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
509- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
507+ // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[ %[[TIDX]], %[[TIDY]]]
508+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[ %[[TIDX]], %[[TIDY]]]
509+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[ %[[TIDX]], %[[TIDY]]]
510510 // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
511511 // CHECK: scf.if %[[CMPLIN]]
512512 scf.forall (%i , %j , %k ) in (%c3 , %c2 , %c1 ) {
@@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
515515 memref.store %8 , %y [%i , %j ] : !type
516516 } {mapping = [#gpu.warp <linear_dim_0 >, #gpu.warp <linear_dim_1 >, #gpu.warp <linear_dim_2 >] }
517517
518- // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
519- // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
518+ // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[ %[[TIDX]], %[[TIDY]]]
519+ // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[ %[[TIDX]], %[[TIDY]]]
520520 // CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
521521 // CHECK: scf.if %[[COND]]
522522 // CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
@@ -648,7 +648,7 @@ module attributes {transform.with_named_sequence} {
648648#map1 = affine_map <(d0 ) -> (d0 * 32 )>
649649
650650// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
651- // CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
651+ // CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>
652652
653653// CHECK-LABEL: func.func @simple_fill(
654654func.func @simple_fill (%arg0: memref <128 xf32 >) -> memref <128 xf32 > {
@@ -667,7 +667,7 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
667667// CHECK: %[[TIDX:.*]] = gpu.thread_id x
668668// CHECK: %[[TIDY:.*]] = gpu.thread_id y
669669// CHECK: %[[TIDZ:.*]] = gpu.thread_id z
670- // CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
670+ // CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
671671// CHECK-NOT: scf.if
672672// CHECK: memref.subview %{{.*}}[%[[THX]]]
673673 %1 = affine.apply #map1 (%arg2 )
0 commit comments