Skip to content

Commit a86fbe2

Browse files
committed
lane prefix to simt
1 parent 13b97b4 commit a86fbe2

File tree

1 file changed

+36
-36
lines changed

1 file changed

+36
-36
lines changed

mlir/test/Dialect/XeGPU/ops.mlir

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ gpu.func @subgroup_load_nd(%src: memref<8x16xf16>) {
7575
gpu.return
7676
}
7777

78-
// CHECK: func @lane_load_nd(%[[arg0:.*]]: memref<8x16xf16>) {
79-
gpu.func @lane_load_nd(%src: memref<8x16xf16>) {
78+
// CHECK: func @simt_load_nd(%[[arg0:.*]]: memref<8x16xf16>) {
79+
gpu.func @simt_load_nd(%src: memref<8x16xf16>) {
8080
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
8181
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
8282
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
@@ -94,8 +94,8 @@ gpu.func @subgroup_load_nd_2(%src: memref<8x16xf16>) {
9494
gpu.return
9595
}
9696

97-
// CHECK: func @lane_load_nd_2(%[[arg0:.*]]: memref<8x16xf16>) {
98-
gpu.func @lane_load_nd_2(%src: memref<8x16xf16>) {
97+
// CHECK: func @simt_load_nd_2(%[[arg0:.*]]: memref<8x16xf16>) {
98+
gpu.func @simt_load_nd_2(%src: memref<8x16xf16>) {
9999
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<16xf16>
100100
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<16xf16>
101101
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16> -> vector<1xf16>
@@ -112,8 +112,8 @@ gpu.func @subgroup_load_nd_3(%src: memref<24x32xf32>) {
112112
gpu.return
113113
}
114114

115-
// CHECK: func @lane_load_nd_3(%[[arg0:.*]]: memref<24x32xf32>) {
116-
gpu.func @lane_load_nd_3(%src: memref<24x32xf32>) {
115+
// CHECK: func @simt_load_nd_3(%[[arg0:.*]]: memref<24x32xf32>) {
116+
gpu.func @simt_load_nd_3(%src: memref<24x32xf32>) {
117117
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
118118
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
119119
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
@@ -130,8 +130,8 @@ gpu.func @subgroup_load_nd_4(%src: memref<24x32xf16>) {
130130
gpu.return
131131
}
132132

133-
// CHECK: func @lane_load_nd_4(%[[arg0:.*]]: memref<24x32xf16>) {
134-
gpu.func @lane_load_nd_4(%src: memref<24x32xf16>) {
133+
// CHECK: func @simt_load_nd_4(%[[arg0:.*]]: memref<24x32xf16>) {
134+
gpu.func @simt_load_nd_4(%src: memref<24x32xf16>) {
135135
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16>
136136
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16>
137137
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
@@ -148,8 +148,8 @@ gpu.func @subgroup_load_nd_5(%src: memref<24x32xf32>) {
148148
gpu.return
149149
}
150150

151-
// CHECK: func @lane_load_nd_5(%[[arg0:.*]]: memref<24x32xf32>) {
152-
gpu.func @lane_load_nd_5(%src: memref<24x32xf32>) {
151+
// CHECK: func @simt_load_nd_5(%[[arg0:.*]]: memref<24x32xf32>) {
152+
gpu.func @simt_load_nd_5(%src: memref<24x32xf32>) {
153153
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<32xf32>
154154
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<32xf32>
155155
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<32xf32> -> vector<2xf32>
@@ -166,8 +166,8 @@ gpu.func @subgroup_load_nd_6(%src: memref<24x32xf16>) {
166166
gpu.return
167167
}
168168

169-
// CHECK: func @lane_load_nd_6(%[[arg0:.*]]: memref<24x32xf16>) {
170-
gpu.func @lane_load_nd_6(%src: memref<24x32xf16>) {
169+
// CHECK: func @simt_load_nd_6(%[[arg0:.*]]: memref<24x32xf16>) {
170+
gpu.func @simt_load_nd_6(%src: memref<24x32xf16>) {
171171
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
172172
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2>>
173173
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
@@ -185,8 +185,8 @@ gpu.func @subgroup_load_nd_7(%src: memref<24x32xf16>) {
185185
gpu.return
186186
}
187187

188-
// CHECK: func @lane_load_nd_7(%[[arg0:.*]]: memref<24x32xf16>) {
189-
gpu.func @lane_load_nd_7(%src: memref<24x32xf16>) {
188+
// CHECK: func @simt_load_nd_7(%[[arg0:.*]]: memref<24x32xf16>) {
189+
gpu.func @simt_load_nd_7(%src: memref<24x32xf16>) {
190190
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
191191
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2>>
192192
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
@@ -204,8 +204,8 @@ gpu.func @subgroup_load_nd_8(%src: memref<24x32xf32>) {
204204
gpu.return
205205
}
206206

207-
// CHECK: func @lane_load_nd_8(%[[arg0:.*]]: memref<24x32xf32>) {
208-
gpu.func @lane_load_nd_8(%src: memref<24x32xf32>) {
207+
// CHECK: func @simt_load_nd_8(%[[arg0:.*]]: memref<24x32xf32>) {
208+
gpu.func @simt_load_nd_8(%src: memref<24x32xf32>) {
209209
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
210210
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
211211
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8xf32>
@@ -224,8 +224,8 @@ gpu.func @subgroup_store_nd(%dst: memref<24x32xf16>) {
224224
gpu.return
225225
}
226226

227-
// CHECK: func @lane_store_nd(%[[arg0:.*]]: memref<24x32xf16>) {
228-
gpu.func @lane_store_nd(%src: memref<24x32xf16>) {
227+
// CHECK: func @simt_store_nd(%[[arg0:.*]]: memref<24x32xf16>) {
228+
gpu.func @simt_store_nd(%src: memref<24x32xf16>) {
229229
// CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<48xf16>
230230
%1 = arith.constant dense<1.0>: vector<48xf16>
231231
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
@@ -249,8 +249,8 @@ gpu.func @subgroup_store_nd_2(%dst: memref<24x32xf16>) {
249249
}
250250

251251

252-
// CHECK: func @lane_store_nd_2(%[[arg0:.*]]: memref<24x32xf16>) {
253-
gpu.func @lane_store_nd_2(%src: memref<24x32xf16>) {
252+
// CHECK: func @simt_store_nd_2(%[[arg0:.*]]: memref<24x32xf16>) {
253+
gpu.func @simt_store_nd_2(%src: memref<24x32xf16>) {
254254
// CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<2xf16>
255255
%1 = arith.constant dense<1.0>: vector<2xf16>
256256
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
@@ -323,8 +323,8 @@ gpu.func @subgroup_load(%src: ui64) {
323323
gpu.return
324324
}
325325

326-
// CHECK: gpu.func @lane_load(%[[arg0:.*]]: ui64) {
327-
gpu.func @lane_load(%src: ui64) {
326+
// CHECK: gpu.func @simt_load(%[[arg0:.*]]: ui64) {
327+
gpu.func @simt_load(%src: ui64) {
328328
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
329329
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
330330
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -349,8 +349,8 @@ gpu.func @subgroup_load_2(%src: ui64) {
349349
gpu.return
350350
}
351351

352-
// CHECK: gpu.func @lane_load_2(%[[arg0:.*]]: ui64) {
353-
gpu.func @lane_load_2(%src: ui64) {
352+
// CHECK: gpu.func @simt_load_2(%[[arg0:.*]]: ui64) {
353+
gpu.func @simt_load_2(%src: ui64) {
354354
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
355355
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
356356
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -375,8 +375,8 @@ gpu.func @subgroup_load_3(%src: ui64) {
375375
gpu.return
376376
}
377377

378-
// CHECK: gpu.func @lane_load_3(%[[arg0:.*]]: ui64) {
379-
gpu.func @lane_load_3(%src: ui64) {
378+
// CHECK: gpu.func @simt_load_3(%[[arg0:.*]]: ui64) {
379+
gpu.func @simt_load_3(%src: ui64) {
380380
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
381381
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
382382
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -405,8 +405,8 @@ gpu.func @subgroup_store(%src: ui64) {
405405

406406

407407

408-
// CHECK: gpu.func @lane_store(%[[arg0:.*]]: ui64) {
409-
gpu.func @lane_store(%src: ui64) {
408+
// CHECK: gpu.func @simt_store(%[[arg0:.*]]: ui64) {
409+
gpu.func @simt_store(%src: ui64) {
410410
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
411411
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
412412
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -437,8 +437,8 @@ gpu.func @subgroup_store_2(%src: ui64) {
437437

438438

439439

440-
// CHECK: gpu.func @lane_store_2(%[[arg0:.*]]: ui64) {
441-
gpu.func @lane_store_2(%src: ui64) {
440+
// CHECK: gpu.func @simt_store_2(%[[arg0:.*]]: ui64) {
441+
gpu.func @simt_store_2(%src: ui64) {
442442
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
443443
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
444444
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -468,8 +468,8 @@ gpu.func @subgroup_store_3(%src: ui64) {
468468
}
469469

470470

471-
// CHECK: gpu.func @lane_store_3(%[[arg0:.*]]: ui64) {
472-
gpu.func @lane_store_3(%src: ui64) {
471+
// CHECK: gpu.func @simt_store_3(%[[arg0:.*]]: ui64) {
472+
gpu.func @simt_store_3(%src: ui64) {
473473
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
474474
%0 = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
475475
//CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -515,8 +515,8 @@ gpu.func @subgroup_dpas(%a : vector<8x16xf16>, %b: vector<16x16xf16>) {
515515
gpu.return
516516
}
517517

518-
// CHECK: gpu.func @lane_dpas(%[[arg0:.*]]: vector<8xf16>, %[[arg1:.*]]: vector<16xf16>)
519-
gpu.func @lane_dpas(%a : vector<8xf16>, %b: vector<16xf16>) {
518+
// CHECK: gpu.func @simt_dpas(%[[arg0:.*]]: vector<8xf16>, %[[arg1:.*]]: vector<16xf16>)
519+
gpu.func @simt_dpas(%a : vector<8xf16>, %b: vector<16xf16>) {
520520
// CHECK: xegpu.dpas %[[arg0]], %[[arg1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
521521
%1 = xegpu.dpas %a, %b : vector<8xf16>, vector<16xf16> -> vector<8xf32>
522522
gpu.return
@@ -529,8 +529,8 @@ gpu.func @subgroup_dpas_packed_b(%a : vector<8x16xf16>, %b: vector<8x16x2xf16>)
529529
gpu.return
530530
}
531531

532-
// CHECK: gpu.func @atomic_rmw(%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>)
533-
gpu.func @atomic_rmw(%src: ui64, %value : vector<16xf32>, %mask : vector<16xi1>) {
532+
// CHECK: gpu.func @subgroup_atomic_rmw(%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>)
533+
gpu.func @subgroup_atomic_rmw(%src: ui64, %value : vector<16xf32>, %mask : vector<16xi1>) {
534534
//CHECK: %[[c:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xindex>
535535
%c = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xindex>
536536
//CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[c]] : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>

0 commit comments

Comments
 (0)