22
33// CHECK-LABEL: gpu.func @test_store_nd_1d
44// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) {
5- // CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32>
6- // CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
5+ // CHECK-DAG : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32>
6+ // CHECK-DAG : %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
77// CHECK: xegpu.store_nd %[[CST]], %[[T0]] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
88// CHECK: gpu.return
99gpu.module @test {
@@ -19,8 +19,8 @@ gpu.func @test_store_nd_1d(%arg0: memref<16xf32>){
1919// -----
2020// CHECK-LABEL: gpu.func @test_store_nd_2d
2121// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>) {
22- // CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf16>
23- // CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
22+ // CHECK-DAG : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf16>
23+ // CHECK-DAG : %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
2424// CHECK: xegpu.store_nd %[[CST]], %[[T0]] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
2525gpu.module @test {
2626gpu.func @test_store_nd_2d (%arg0: memref <16 x16 xf16 >){
@@ -38,8 +38,8 @@ gpu.func @test_store_nd_2d(%arg0: memref<16x16xf16>){
3838// CHECK-LABEL: gpu.func @test_load_nd_1d
3939// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16xf32>) {
4040// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
41- // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16xf32> -> vector<1xf32>
42- // CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
41+ // CHECK-DAG : %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16xf32> -> vector<1xf32>
42+ // CHECK-DAG : %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
4343// CHECK: xegpu.store_nd %[[T1]], %[[T2]] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
4444gpu.module @test {
4545gpu.func @test_load_nd_1d (%arg0: memref <16 xf32 >, %arg1: memref <16 xf32 >){
@@ -56,8 +56,8 @@ gpu.func @test_load_nd_1d(%arg0: memref<16xf32>, %arg1: memref<16xf32>){
5656// CHECK-LABEL: gpu.func @test_load_nd_2d
5757// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>) {
5858// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
59- // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
60- // CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
59+ // CHECK-DAG : %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
60+ // CHECK-DAG : %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
6161// CHECK: xegpu.store_nd %[[T1]], %[[T2]] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
6262gpu.module @test {
6363gpu.func @test_load_nd_2d (%arg0: memref <16 x16 xf16 >, %arg1: memref <16 x16 xf16 >){
@@ -77,8 +77,8 @@ gpu.func @test_load_nd_2d(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>){
7777// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
7878// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] : vector<32xf16> to vector<2x16x1xf16>
7979// CHECK: %[[T3:.*]] = vector.extract %[[T2]][0] : vector<16x1xf16> from vector<2x16x1xf16>
80- // CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
81- // CHECK: %[[T5:.*]] = vector.shape_cast %[[T3]] : vector<16x1xf16> to vector<16xf16>
80+ // CHECK-DAG : %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
81+ // CHECK-DAG : %[[T5:.*]] = vector.shape_cast %[[T3]] : vector<16x1xf16> to vector<16xf16>
8282// CHECK: xegpu.store_nd %[[T5]], %[[T4]] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
8383gpu.module @test {
8484gpu.func @test_load_nd_array_length (%arg0: memref <16 x16 xf16 >, %arg1: memref <16 x16 xf16 >){
@@ -100,9 +100,9 @@ gpu.func @test_load_nd_array_length(%arg0: memref<16x16xf16>, %arg1: memref<16x1
100100// CHECK: ^bb0(%[[ARG4:[0-9a-zA-Z]+]]: vector<8x16xf16>, %[[ARG5:[0-9a-zA-Z]+]]: vector<16x16xf16>, %[[ARG6:[0-9a-zA-Z]+]]: vector<8x16xf32>, %[[ARG7:[0-9a-zA-Z]+]]: memref<8x16xf32>):
101101// CHECK: gpu.yield %[[ARG4]], %[[ARG5]], %[[ARG6]] : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32>
102102// CHECK: }
103- // CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]]#0 : vector<8x1xf16> to vector<8xf16>
104- // CHECK: %[[T3:.*]] = vector.shape_cast %[[T1]]#1 : vector<16x1xf16> to vector<16xf16>
105- // CHECK: %[[T4:.*]] = vector.shape_cast %[[T1]]#2 : vector<8x1xf32> to vector<8xf32>
103+ // CHECK-DAG : %[[T2:.*]] = vector.shape_cast %[[T1]]#0 : vector<8x1xf16> to vector<8xf16>
104+ // CHECK-DAG : %[[T3:.*]] = vector.shape_cast %[[T1]]#1 : vector<16x1xf16> to vector<16xf16>
105+ // CHECK-DAG : %[[T4:.*]] = vector.shape_cast %[[T1]]#2 : vector<8x1xf32> to vector<8xf32>
106106// CHECK: %[[T5:.*]] = xegpu.dpas %[[T2]], %[[T3]], %[[T4]] : vector<8xf16>, vector<16xf16>, vector<8xf32> -> vector<8xf32>
107107// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG3]][%{{.*}}] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
108108// CHECK: xegpu.store_nd %[[T5]], %[[T6]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
@@ -120,11 +120,11 @@ gpu.func @test_dpas(%arg0: vector<8x16xf16>, %arg1: vector<16x16xf16>, %arg3: ve
120120// CHECK-LABEL: gpu.func @load_dpas_store
121121// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) {
122122// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]][%{{.*}}] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
123- // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
123+ // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
124124// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
125125// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
126- // CHECK: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
127- // CHECK: %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]][%{{.*}}] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
126+ // CHECK-DAG : %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
127+ // CHECK-DAG : %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]][%{{.*}}] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
128128// CHECK: xegpu.store_nd %[[T4]], %[[T5]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
129129gpu.module @test {
130130gpu.func @load_dpas_store (%arg0: memref <8 x16 xf16 >, %arg1: memref <16 x16 xf16 >, %arg3: memref <8 x16 xf32 >){
0 commit comments