1919// CHECK-SCF-IF-DAG: #[[$TIMES2:.*]] = affine_map<()[s0] -> (s0 * 2)>
2020// CHECK-SCF-IF-DAG: #[[$TIMES4:.*]] = affine_map<()[s0] -> (s0 * 4)>
2121// CHECK-SCF-IF-DAG: #[[$TIMES8:.*]] = affine_map<()[s0] -> (s0 * 8)>
22- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_32xf32 : memref<32xf32, 3 >
23- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_64xf32 : memref<64xf32, 3 >
24- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_128xf32 : memref<128xf32, 3 >
25- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_256xf32 : memref<256xf32, 3 >
22+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_32xf32 : memref<32xf32, #gpu.address_space<workgroup> >
23+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_64xf32 : memref<64xf32, #gpu.address_space<workgroup> >
24+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_128xf32 : memref<128xf32, #gpu.address_space<workgroup> >
25+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_256xf32 : memref<256xf32, #gpu.address_space<workgroup> >
2626
2727// CHECK-SCF-IF-LABEL: func @rewrite_warp_op_to_scf_if(
2828// CHECK-SCF-IF-SAME: %[[laneid:.*]]: index,
@@ -47,8 +47,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index,
4747 %r:2 = gpu.warp_execute_on_lane_0 (%laneid )[32 ]
4848 args (%v0 , %v1 : vector <4 xf32 >, vector <8 xf32 >) -> (vector <1 xf32 >, vector <2 xf32 >) {
4949 ^bb0 (%arg0: vector <128 xf32 >, %arg1: vector <256 xf32 >):
50- // CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<256xf32, 3 >, vector<256xf32>
51- // CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<128xf32, 3 >, vector<128xf32>
50+ // CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<256xf32, #gpu.address_space<workgroup> >, vector<256xf32>
51+ // CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<128xf32, #gpu.address_space<workgroup> >, vector<128xf32>
5252// CHECK-SCF-IF: %[[def_0:.*]] = "some_def"(%[[arg0]]) : (vector<128xf32>) -> vector<32xf32>
5353// CHECK-SCF-IF: %[[def_1:.*]] = "some_def"(%[[arg1]]) : (vector<256xf32>) -> vector<64xf32>
5454 %2 = " some_def" (%arg0 ) : (vector <128 xf32 >) -> vector <32 xf32 >
@@ -60,8 +60,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index,
6060// CHECK-SCF-IF: }
6161// CHECK-SCF-IF: gpu.barrier
6262// CHECK-SCF-IF: %[[o1:.*]] = affine.apply #[[$TIMES2]]()[%[[laneid]]]
63- // CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = [true]} : memref<64xf32, 3 >, vector<2xf32>
64- // CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = [true]} : memref<32xf32, 3 >, vector<1xf32>
63+ // CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = [true]} : memref<64xf32, #gpu.address_space<workgroup> >, vector<2xf32>
64+ // CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = [true]} : memref<32xf32, #gpu.address_space<workgroup> >, vector<1xf32>
6565// CHECK-SCF-IF: "some_use"(%[[r0]]) : (vector<1xf32>) -> ()
6666// CHECK-SCF-IF: "some_use"(%[[r1]]) : (vector<2xf32>) -> ()
6767 " some_use" (%r#0 ) : (vector <1 xf32 >) -> ()
@@ -1065,18 +1065,18 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v
10651065 args (%s0 , %v0 , %v1 , %v2 : f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >) -> (f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >) {
10661066 ^bb0 (%bs0: f32 , %bv0: vector <f32 >, %bv1: vector <1 xf32 >, %bv2: vector <1 x1 xf32 >):
10671067
1068- // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3 >, vector<1x1xf32>
1069- // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3 >, vector<1xf32>
1070- // CHECK-SCF-IF: vector.transfer_read {{.*}}[]{{.*}} : memref<f32, 3 >, vector<f32>
1071- // CHECK-SCF-IF: memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1068+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, #gpu.address_space<workgroup> >, vector<1x1xf32>
1069+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, #gpu.address_space<workgroup> >, vector<1xf32>
1070+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[]{{.*}} : memref<f32, #gpu.address_space<workgroup> >, vector<f32>
1071+ // CHECK-SCF-IF: memref.load {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
10721072 // CHECK-SCF-IF: "some_def_0"(%{{.*}}) : (f32) -> f32
10731073 // CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<f32>) -> vector<f32>
10741074 // CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<1xf32>) -> vector<1xf32>
10751075 // CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<1x1xf32>) -> vector<1x1xf32>
1076- // CHECK-SCF-IF: memref.store {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1077- // CHECK-SCF-IF: vector.transfer_write {{.*}}[] : vector<f32>, memref<f32, 3 >
1078- // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = [true]} : vector<1xf32>, memref<1xf32, 3 >
1079- // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x1xf32>, memref<1x1xf32, 3 >
1076+ // CHECK-SCF-IF: memref.store {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
1077+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[] : vector<f32>, memref<f32, #gpu.address_space<workgroup> >
1078+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = [true]} : vector<1xf32>, memref<1xf32, #gpu.address_space<workgroup> >
1079+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x1xf32>, memref<1x1xf32, #gpu.address_space<workgroup> >
10801080
10811081 %rs0 = " some_def_0" (%bs0 ) : (f32 ) -> f32
10821082 %rv0 = " some_def_1" (%bv0 ) : (vector <f32 >) -> vector <f32 >
@@ -1088,10 +1088,10 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v
10881088 }
10891089
10901090 // CHECK-SCF-IF: gpu.barrier
1091- // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3 >, vector<1x1xf32>
1092- // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3 >, vector<1xf32>
1093- // CHECK-SCF-IF: %[[RV0:.*]] = vector.transfer_read {{.*}}[]{{.*}} : memref<f32, 3 >, vector<f32>
1094- // CHECK-SCF-IF: %[[RS0:.*]] = memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1091+ // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, #gpu.address_space<workgroup> >, vector<1x1xf32>
1092+ // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, #gpu.address_space<workgroup> >, vector<1xf32>
1093+ // CHECK-SCF-IF: %[[RV0:.*]] = vector.transfer_read {{.*}}[]{{.*}} : memref<f32, #gpu.address_space<workgroup> >, vector<f32>
1094+ // CHECK-SCF-IF: %[[RS0:.*]] = memref.load {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
10951095 // CHECK-SCF-IF: return %[[RS0]], %[[RV0]], %[[RV1]], %[[RV2]] : f32, vector<f32>, vector<1xf32>, vector<1x1xf32>
10961096 return %r#0 , %r#1 , %r#2 , %r#3 : f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >
10971097}
@@ -1106,22 +1106,22 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %
11061106 -> (vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) {
11071107 // CHECK-SCF-IF-DAG: %[[C0:.*]] = arith.constant 0 : index
11081108
1109- // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x1xf32>, memref<32x64x1xf32, 3 >
1109+ // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x1xf32>, memref<32x64x1xf32, #gpu.address_space<workgroup> >
11101110 // CHECK-SCF-IF: %[[RID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]]
1111- // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x128xf32>, memref<1x64x128xf32, 3 >
1111+ // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x128xf32>, memref<1x64x128xf32, #gpu.address_space<workgroup> >
11121112 // CHECK-SCF-IF: gpu.barrier
11131113
11141114 // CHECK-SCF-IF: scf.if{{.*}}{
11151115 %r:2 = gpu.warp_execute_on_lane_0 (%laneid )[32 ]
11161116 args (%v0 , %v1 : vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) -> (vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) {
11171117 ^bb0 (%arg0: vector <32 x64 x1 xf32 >, %arg1: vector <1 x64 x128 xf32 >):
11181118
1119- // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3 >, vector<32x64x1xf32>
1120- // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3 >, vector<1x64x128xf32>
1119+ // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<32x64x1xf32, #gpu.address_space<workgroup> >, vector<32x64x1xf32>
1120+ // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<1x64x128xf32, #gpu.address_space<workgroup> >, vector<1x64x128xf32>
11211121 // CHECK-SCF-IF: %[[W0:.*]] = "some_def_0"(%[[SR0]]) : (vector<32x64x1xf32>) -> vector<32x64x1xf32>
11221122 // CHECK-SCF-IF: %[[W1:.*]] = "some_def_1"(%[[SR1]]) : (vector<1x64x128xf32>) -> vector<1x64x128xf32>
1123- // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<32x64x1xf32>, memref<32x64x1xf32, 3 >
1124- // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x64x128xf32>, memref<1x64x128xf32, 3 >
1123+ // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<32x64x1xf32>, memref<32x64x1xf32, #gpu.address_space<workgroup> >
1124+ // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x64x128xf32>, memref<1x64x128xf32, #gpu.address_space<workgroup> >
11251125
11261126 %r0 = " some_def_0" (%arg0 ) : (vector <32 x64 x1 xf32 >) -> vector <32 x64 x1 xf32 >
11271127 %r1 = " some_def_1" (%arg1 ) : (vector <1 x64 x128 xf32 >) -> vector <1 x64 x128 xf32 >
@@ -1132,8 +1132,8 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %
11321132
11331133 // CHECK-SCF-IF: gpu.barrier
11341134 // CHECK-SCF-IF: %[[WID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]]
1135- // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3 >, vector<1x64x1xf32>
1136- // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3 >, vector<1x2x128xf32>
1135+ // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<32x64x1xf32, #gpu.address_space<workgroup> >, vector<1x64x1xf32>
1136+ // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<1x64x128xf32, #gpu.address_space<workgroup> >, vector<1x2x128xf32>
11371137 // CHECK-SCF-IF: return %[[R0]], %[[R1]] : vector<1x64x1xf32>, vector<1x2x128xf32>
11381138 return %r#0 , %r#1 : vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >
11391139}
0 commit comments