|
1 | 1 | // RUN: mlir-opt --xegpu-wg-to-sg -split-input-file %s | FileCheck %s |
2 | 2 |
|
3 | 3 | gpu.module @test_round_robin_assignment { |
4 | | - // CHECK: test_create_nd_tdesc |
5 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
| 4 | + // CHECK-LABEL: test_create_nd_tdesc |
| 5 | + // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
6 | 6 | gpu.func @test_create_nd_tdesc(%src: memref<24x32xf32>) { |
7 | | - // CHECK-COUNT-12: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<24x32xf32> -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 7 | + // CHECK-COUNT-12: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<24x32xf32> |
| 8 | + // CHECK-SAME: -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
8 | 9 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
9 | 10 | gpu.return |
10 | 11 | } |
11 | 12 |
|
12 | | - // CHECK: test_load_nd_tdesc |
13 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
| 13 | + // CHECK-LABEL: test_load_nd_tdesc |
| 14 | + // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
14 | 15 | gpu.func @test_load_nd_tdesc(%src: memref<24x32xf32>) { |
15 | 16 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
16 | | - // CHECK-COUNT-12: %[[LOAD:.*]] = xegpu.load_nd %{{.*}} : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<2x2xf32> |
| 17 | + // CHECK-COUNT-12: xegpu.load_nd %{{.*}} |
| 18 | + // CHECK-SAME-COUNT-12: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 19 | + // CHECK-SAME-COUNT-12: -> vector<2x2xf32> |
17 | 20 | %load = xegpu.load_nd %tdesc: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<24x32xf32> |
18 | 21 | gpu.return |
19 | 22 | } |
20 | 23 |
|
21 | | - // CHECK: test_store_nd |
22 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
| 24 | + // CHECK-LABEL: test_store_nd |
| 25 | + // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
23 | 26 | gpu.func @test_store_nd(%src: memref<24x32xf32>) { |
24 | 27 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
25 | | - // CHECK-COUNT-12: xegpu.store_nd %{{.*}}, %{{.*}} : vector<2x2xf32>, !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 28 | + // CHECK-COUNT-12: xegpu.store_nd %{{.*}}, %{{.*}} |
| 29 | + // CHECK-SAME-COUNT-12: : vector<2x2xf32>, !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
26 | 30 | %load = xegpu.load_nd %tdesc: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<24x32xf32> |
27 | 31 | xegpu.store_nd %load, %tdesc: vector<24x32xf32>, !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
28 | 32 | gpu.return |
29 | 33 | } |
30 | 34 |
|
31 | | - // CHECK: test_update_nd |
32 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
| 35 | + // CHECK-LABEL: test_update_nd |
| 36 | + // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
33 | 37 | gpu.func @test_update_nd(%src: memref<24x32xf32>){ |
34 | 38 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
35 | | - // CHECK-COUNT-12: %[[UPDATE:.*]] = xegpu.update_nd_offset %{{.*}}, [0, 16] : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 39 | + // CHECK-COUNT-12: xegpu.update_nd_offset %{{.*}}, [0, 16] |
| 40 | + // CHECK-SAME-COUNT-12: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
36 | 41 | %update = xegpu.update_nd_offset %tdesc, [0, 16] : !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
37 | 42 | gpu.return |
38 | 43 | } |
39 | 44 |
|
40 | | - // CHECK: test_dpas |
41 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
42 | | - // CHECK: %[[ARG_1:.*]]: memref<32x24xf32> |
43 | | - // CHECK: %[[ARG_2:.*]]: memref<24x24xf32> |
44 | | - gpu.func @test_dpas(%a: memref<24x32xf32>, %b: memref<32x24xf32>, %c: memref<24x24xf32>) { |
45 | | - // CHECK-COUNT-12: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<24x32xf32> -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
46 | | - // CHECK-COUNT-12: %[[TDESC1:.*]] = xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<32x24xf32> -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
47 | | - // CHECK-COUNT-9: %[[TDESC2:.*]] = xegpu.create_nd_tdesc %{{.*}}[%{{.*}}, %{{.*}}] : memref<24x24xf32> -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
48 | | - // CHECK-COUNT-144: %[[DPAS:.*]] = xegpu.dpas %{{.*}}, %{{.*}} {layout = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} : vector<2x2xf32>, vector<2x2xf32> -> vector<2x2xf32> |
49 | | - %tdesc_a = xegpu.create_nd_tdesc %a[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
50 | | - %load_a = xegpu.load_nd %tdesc_a: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<24x32xf32> |
51 | | - %tdesc_b = xegpu.create_nd_tdesc %b[0, 0] : memref<32x24xf32> -> !xegpu.tensor_desc<32x24xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
52 | | - %load_b = xegpu.load_nd %tdesc_b: !xegpu.tensor_desc<32x24xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<32x24xf32> |
53 | | - %tdesc_c = xegpu.create_nd_tdesc %c[0, 0] : memref<24x24xf32> -> !xegpu.tensor_desc<24x24xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
54 | | - %dpas = xegpu.dpas %load_a, %load_b {layout = #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>} : vector<24x32xf32>, vector<32x24xf32> -> vector<24x24xf32> |
| 45 | + // CHECK-LABEL: test_dpas |
| 46 | + // CHECK-SAME: (%[[ARG_0:.*]]: memref<8x8xf32>, %[[ARG_1:.*]]: memref<8x8xf32>, %[[ARG_2:.*]]: memref<8x8xf32>) |
| 47 | + gpu.func @test_dpas(%a: memref<8x8xf32>, %b: memref<8x8xf32>, %c: memref<8x8xf32>) { |
| 48 | + // CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<8x8xf32> |
| 49 | + // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 50 | + // CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<8x8xf32> |
| 51 | + // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 52 | + // CHECK-COUNT-4: xegpu.create_nd_tdesc %{{.*}}[%{{.*}}, %{{.*}}] : memref<8x8xf32> |
| 53 | + // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
| 54 | + // CHECK-COUNT-16: xegpu.dpas %{{.*}}, %{{.*}} |
| 55 | + // CHECK-SAME-COUNT-16: {layout = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} |
| 56 | + // CHECK-SAME-COUNT-16: : vector<2x2xf32>, vector<2x2xf32> -> vector<2x2xf32> |
| 57 | + %tdesc_a = xegpu.create_nd_tdesc %a[0, 0] : memref<8x8xf32> -> !xegpu.tensor_desc<8x8xf32, #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
| 58 | + %load_a = xegpu.load_nd %tdesc_a: !xegpu.tensor_desc<8x8xf32, #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<8x8xf32> |
| 59 | + %tdesc_b = xegpu.create_nd_tdesc %b[0, 0] : memref<8x8xf32> -> !xegpu.tensor_desc<8x8xf32, #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
| 60 | + %load_b = xegpu.load_nd %tdesc_b: !xegpu.tensor_desc<8x8xf32, #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> -> vector<8x8xf32> |
| 61 | + %tdesc_c = xegpu.create_nd_tdesc %c[0, 0] : memref<8x8xf32> -> !xegpu.tensor_desc<8x8xf32, #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
| 62 | + %dpas = xegpu.dpas %load_a, %load_b {layout = #xegpu.layout<sg_layout = [2, 2], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>} : vector<8x8xf32>, vector<8x8xf32> -> vector<8x8xf32> |
55 | 63 | gpu.return |
56 | 64 | } |
57 | 65 |
|
58 | | - // CHECK: test_prefetch_nd_tdesc |
59 | | - // CHECK: %[[ARG_0:.*]]: memref<24x32xf32> |
| 66 | + // CHECK-LABEL: test_prefetch_nd_tdesc |
| 67 | + // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32> |
60 | 68 | gpu.func @test_prefetch_nd_tdesc(%src: memref<24x32xf32>) { |
61 | 69 | // CHECK-COUNT-12: xegpu.prefetch_nd %{{.*}} : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>> |
62 | 70 | %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>> |
|
0 commit comments