|
4 | 4 | //CHECK: #map1 = affine_map<()[s0] -> (s0 mod 4)> |
5 | 5 | gpu.module @test_1_1_assignment { |
6 | 6 | // CHECK-LABEL: create_nd_tdesc |
7 | | - // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
| 7 | + // CHECK-SAME: [[ARG_0:%.*]]: memref<24x32xf32> |
8 | 8 | gpu.func @create_nd_tdesc(%src: memref<24x32xf32>) { |
9 | | - // CHECK: %[[SGID:.*]] = gpu.subgroup_id |
10 | | - // CHECK: %[[C12:.*]] = arith.constant 12 : index |
11 | | - // CHECK: %[[C4:.*]] = arith.constant 4 : index |
12 | | - // CHECK: %[[C8:.*]] = arith.constant 8 : index |
13 | | - // CHECK: %[[DIV:.*]] = affine.apply #map()[%[[SGID]]] |
14 | | - // CHECK: %[[REM:.*]] = affine.apply #map1()[%[[SGID]]] |
15 | | - // CHECK: %[[MUL1:.*]] = index.mul %[[DIV]], %[[C12]] |
16 | | - // CHECK: %[[MUL2:.*]] = index.mul %[[REM]], %[[C8]] |
17 | | - // CHECK: %[[C24:.*]] = arith.constant 24 : index |
18 | | - // CHECK: %[[MOD:.*]] = index.remu %[[MUL1]], %[[C24]] |
19 | | - // CHECK: %[[C0:.*]] = arith.constant 0 : index |
20 | | - // CHECK: %[[ADD1:.*]] = index.add %[[MOD]], %[[C0]] |
21 | | - // CHECK: %[[C32:.*]] = arith.constant 32 : index |
22 | | - // CHECK: %[[MOD1:.*]] = index.remu %[[MUL2]], %[[C32]] |
23 | | - // CHECK: %[[C0_1:.*]] = arith.constant 0 : index |
24 | | - // CHECK: %[[ADD2:.*]] = index.add %[[MOD1]], %[[C0_1]] |
25 | | - // CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][%[[ADD1]], %[[ADD2]]] : memref<24x32xf32> |
26 | | - // CHECK-SAME: -> !xegpu.tensor_desc<12x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>> |
27 | | - // CHECK: gpu.return |
| 9 | + //CHECK: [[SGID:%.+]] = gpu.subgroup_id : index |
| 10 | + //CHECK: [[SGIDY:%.+]] = affine.apply #map()[[[SGID]]] |
| 11 | + //CHECK: [[SGIDX:%.+]] = affine.apply #map1()[[[SGID]]] |
| 12 | + //CHECK: [[C12:%.+]] = arith.constant 12 : index |
| 13 | + //CHECK: [[LY:%.+]] = index.mul [[SGIDY]], [[C12]] |
| 14 | + //CHECK: [[C8:%.+]] = arith.constant 8 : index |
| 15 | + //CHECK: [[LX:%.+]] = index.mul [[SGIDX]], [[C8]] |
| 16 | + //CHECK: [[C0:%.+]] = arith.constant 0 : index |
| 17 | + //CHECK: [[C0_1:%.+]] = arith.constant 0 : index |
| 18 | + //CHECK: [[UY:%.+]] = arith.addi [[LY]], [[C0]] : index |
| 19 | + //CHECK: [[UX:%.+]] = arith.addi [[LX]], [[C0_1]] : index |
| 20 | + //CHECK: [[C24:%.+]] = arith.constant 24 : index |
| 21 | + //CHECK: [[Y:%.+]] = index.remu [[UY]], [[C24]] |
| 22 | + //CHECK: [[C32:%.+]] = arith.constant 32 : index |
| 23 | + //CHECK: [[X:%.+]] = index.remu [[UX]], [[C32]] |
| 24 | + //CHECK: [[TDESC:%.+]] = xegpu.create_nd_tdesc [[ARG_0]][[[Y]], [[X]]] : memref<24x32xf32> -> !xegpu.tensor_desc<12x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>> |
| 25 | + |
28 | 26 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> |
29 | 27 | -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [2, 4], sg_data = [12, 8], lane_layout = [2, 8], lane_data = [1, 1]>> |
30 | 28 | gpu.return |
@@ -180,7 +178,7 @@ gpu.func @dpas_no_sg_data(%a: memref<24x32xf32>, %b: memref<32x24xf32>) { |
180 | 178 | -> vector<24x1xf32> |
181 | 179 | // CHECK: vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 1], lane_data = [1, 1]>} |
182 | 180 | // CHECK-SAME: : vector<12x1xf32> to vector<12x8xf32> |
183 | | - %broadcast = vector.broadcast %load |
| 181 | + %broadcast = vector.broadcast %load |
184 | 182 | {layout_result_0 = #xegpu.layout<sg_layout = [2, 1], sg_data = [12, 8], lane_layout = [2, 1], lane_data = [1, 1]>} |
185 | 183 | : vector<24x1xf32> to vector<24x8xf32> |
186 | 184 | gpu.return |
@@ -367,7 +365,7 @@ gpu.func @dpas_no_sg_data(%a: memref<24x32xf32>, %b: memref<32x24xf32>) { |
367 | 365 | // CHECK-LABEL: @subgroup_id_range_nested_if |
368 | 366 | gpu.func @subgroup_id_range_nested_if(%src: memref<256x128xf32>, %src1: memref<128x64xf32>) { |
369 | 367 | %sg_id = gpu.subgroup_id : index |
370 | | - %c1 = arith.constant 1 : i1 |
| 368 | + %c1 = arith.constant 1 : i1 |
371 | 369 | %c3 = arith.constant 3 : index |
372 | 370 | %c32 = arith.constant 32 : index |
373 | 371 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32> |
|
0 commit comments