@@ -75,8 +75,8 @@ gpu.func @subgroup_load_nd(%src: memref<8x16xf16>) {
7575 gpu.return
7676}
7777
78- // CHECK: func @lane_load_nd (%[[arg0:.*]]: memref<8x16xf16>) {
79- gpu.func @lane_load_nd (%src: memref <8 x16 xf16 >) {
78+ // CHECK: func @simt_load_nd (%[[arg0:.*]]: memref<8x16xf16>) {
79+ gpu.func @simt_load_nd (%src: memref <8 x16 xf16 >) {
8080 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
8181 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <8 x16 xf16 > -> !xegpu.tensor_desc <8 x16 xf16 >
8282 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
@@ -94,8 +94,8 @@ gpu.func @subgroup_load_nd_2(%src: memref<8x16xf16>) {
9494 gpu.return
9595}
9696
97- // CHECK: func @lane_load_nd_2 (%[[arg0:.*]]: memref<8x16xf16>) {
98- gpu.func @lane_load_nd_2 (%src: memref <8 x16 xf16 >) {
97+ // CHECK: func @simt_load_nd_2 (%[[arg0:.*]]: memref<8x16xf16>) {
98+ gpu.func @simt_load_nd_2 (%src: memref <8 x16 xf16 >) {
9999 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<16xf16>
100100 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <8 x16 xf16 > -> !xegpu.tensor_desc <16 xf16 >
101101 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16> -> vector<1xf16>
@@ -112,8 +112,8 @@ gpu.func @subgroup_load_nd_3(%src: memref<24x32xf32>) {
112112 gpu.return
113113}
114114
115- // CHECK: func @lane_load_nd_3 (%[[arg0:.*]]: memref<24x32xf32>) {
116- gpu.func @lane_load_nd_3 (%src: memref <24 x32 xf32 >) {
115+ // CHECK: func @simt_load_nd_3 (%[[arg0:.*]]: memref<24x32xf32>) {
116+ gpu.func @simt_load_nd_3 (%src: memref <24 x32 xf32 >) {
117117 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
118118 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <8 x16 xf32 >
119119 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
@@ -130,8 +130,8 @@ gpu.func @subgroup_load_nd_4(%src: memref<24x32xf16>) {
130130 gpu.return
131131}
132132
133- // CHECK: func @lane_load_nd_4 (%[[arg0:.*]]: memref<24x32xf16>) {
134- gpu.func @lane_load_nd_4 (%src: memref <24 x32 xf16 >) {
133+ // CHECK: func @simt_load_nd_4 (%[[arg0:.*]]: memref<24x32xf16>) {
134+ gpu.func @simt_load_nd_4 (%src: memref <24 x32 xf16 >) {
135135 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16>
136136 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <16 x16 xf16 >
137137 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
@@ -148,8 +148,8 @@ gpu.func @subgroup_load_nd_5(%src: memref<24x32xf32>) {
148148 gpu.return
149149}
150150
151- // CHECK: func @lane_load_nd_5 (%[[arg0:.*]]: memref<24x32xf32>) {
152- gpu.func @lane_load_nd_5 (%src: memref <24 x32 xf32 >) {
151+ // CHECK: func @simt_load_nd_5 (%[[arg0:.*]]: memref<24x32xf32>) {
152+ gpu.func @simt_load_nd_5 (%src: memref <24 x32 xf32 >) {
153153 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<32xf32>
154154 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <32 xf32 >
155155 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<32xf32> -> vector<2xf32>
@@ -166,8 +166,8 @@ gpu.func @subgroup_load_nd_6(%src: memref<24x32xf16>) {
166166 gpu.return
167167}
168168
169- // CHECK: func @lane_load_nd_6 (%[[arg0:.*]]: memref<24x32xf16>) {
170- gpu.func @lane_load_nd_6 (%src: memref <24 x32 xf16 >) {
169+ // CHECK: func @simt_load_nd_6 (%[[arg0:.*]]: memref<24x32xf16>) {
170+ gpu.func @simt_load_nd_6 (%src: memref <24 x32 xf16 >) {
171171 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
172172 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <16 x16 xf16 , #xegpu.block_tdesc_attr <array_length = 2 >>
173173 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
@@ -185,8 +185,8 @@ gpu.func @subgroup_load_nd_7(%src: memref<24x32xf16>) {
185185 gpu.return
186186}
187187
188- // CHECK: func @lane_load_nd_7 (%[[arg0:.*]]: memref<24x32xf16>) {
189- gpu.func @lane_load_nd_7 (%src: memref <24 x32 xf16 >) {
188+ // CHECK: func @simt_load_nd_7 (%[[arg0:.*]]: memref<24x32xf16>) {
189+ gpu.func @simt_load_nd_7 (%src: memref <24 x32 xf16 >) {
190190 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
191191 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <16 x16 xf16 , #xegpu.block_tdesc_attr <array_length = 2 >>
192192 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
@@ -204,8 +204,8 @@ gpu.func @subgroup_load_nd_8(%src: memref<24x32xf32>) {
204204 gpu.return
205205}
206206
207- // CHECK: func @lane_load_nd_8 (%[[arg0:.*]]: memref<24x32xf32>) {
208- gpu.func @lane_load_nd_8 (%src: memref <24 x32 xf32 >) {
207+ // CHECK: func @simt_load_nd_8 (%[[arg0:.*]]: memref<24x32xf32>) {
208+ gpu.func @simt_load_nd_8 (%src: memref <24 x32 xf32 >) {
209209 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
210210 %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 >
211211 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8xf32>
@@ -224,8 +224,8 @@ gpu.func @subgroup_store_nd(%dst: memref<24x32xf16>) {
224224 gpu.return
225225}
226226
227- // CHECK: func @lane_store_nd (%[[arg0:.*]]: memref<24x32xf16>) {
228- gpu.func @lane_store_nd (%src: memref <24 x32 xf16 >) {
227+ // CHECK: func @simt_store_nd (%[[arg0:.*]]: memref<24x32xf16>) {
228+ gpu.func @simt_store_nd (%src: memref <24 x32 xf16 >) {
229229 // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<48xf16>
230230 %1 = arith.constant dense <1.0 >: vector <48 xf16 >
231231 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
@@ -249,8 +249,8 @@ gpu.func @subgroup_store_nd_2(%dst: memref<24x32xf16>) {
249249}
250250
251251
252- // CHECK: func @lane_store_nd_2 (%[[arg0:.*]]: memref<24x32xf16>) {
253- gpu.func @lane_store_nd_2 (%src: memref <24 x32 xf16 >) {
252+ // CHECK: func @simt_store_nd_2 (%[[arg0:.*]]: memref<24x32xf16>) {
253+ gpu.func @simt_store_nd_2 (%src: memref <24 x32 xf16 >) {
254254 // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<2xf16>
255255 %1 = arith.constant dense <1.0 >: vector <2 xf16 >
256256 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
@@ -323,8 +323,8 @@ gpu.func @subgroup_load(%src: ui64) {
323323 gpu.return
324324}
325325
326- // CHECK: gpu.func @lane_load (%[[arg0:.*]]: ui64) {
327- gpu.func @lane_load (%src: ui64 ) {
326+ // CHECK: gpu.func @simt_load (%[[arg0:.*]]: ui64) {
327+ gpu.func @simt_load (%src: ui64 ) {
328328 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
329329 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
330330 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -349,8 +349,8 @@ gpu.func @subgroup_load_2(%src: ui64) {
349349 gpu.return
350350}
351351
352- // CHECK: gpu.func @lane_load_2 (%[[arg0:.*]]: ui64) {
353- gpu.func @lane_load_2 (%src: ui64 ) {
352+ // CHECK: gpu.func @simt_load_2 (%[[arg0:.*]]: ui64) {
353+ gpu.func @simt_load_2 (%src: ui64 ) {
354354 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
355355 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
356356 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -375,8 +375,8 @@ gpu.func @subgroup_load_3(%src: ui64) {
375375 gpu.return
376376}
377377
378- // CHECK: gpu.func @lane_load_3 (%[[arg0:.*]]: ui64) {
379- gpu.func @lane_load_3 (%src: ui64 ) {
378+ // CHECK: gpu.func @simt_load_3 (%[[arg0:.*]]: ui64) {
379+ gpu.func @simt_load_3 (%src: ui64 ) {
380380 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
381381 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
382382 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -405,8 +405,8 @@ gpu.func @subgroup_store(%src: ui64) {
405405
406406
407407
408- // CHECK: gpu.func @lane_store (%[[arg0:.*]]: ui64) {
409- gpu.func @lane_store (%src: ui64 ) {
408+ // CHECK: gpu.func @simt_store (%[[arg0:.*]]: ui64) {
409+ gpu.func @simt_store (%src: ui64 ) {
410410 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
411411 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
412412 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -437,8 +437,8 @@ gpu.func @subgroup_store_2(%src: ui64) {
437437
438438
439439
440- // CHECK: gpu.func @lane_store_2 (%[[arg0:.*]]: ui64) {
441- gpu.func @lane_store_2 (%src: ui64 ) {
440+ // CHECK: gpu.func @simt_store_2 (%[[arg0:.*]]: ui64) {
441+ gpu.func @simt_store_2 (%src: ui64 ) {
442442 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
443443 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
444444 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -468,8 +468,8 @@ gpu.func @subgroup_store_3(%src: ui64) {
468468}
469469
470470
471- // CHECK: gpu.func @lane_store_3 (%[[arg0:.*]]: ui64) {
472- gpu.func @lane_store_3 (%src: ui64 ) {
471+ // CHECK: gpu.func @simt_store_3 (%[[arg0:.*]]: ui64) {
472+ gpu.func @simt_store_3 (%src: ui64 ) {
473473 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
474474 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
475475 //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
@@ -515,8 +515,8 @@ gpu.func @subgroup_dpas(%a : vector<8x16xf16>, %b: vector<16x16xf16>) {
515515 gpu.return
516516}
517517
518- // CHECK: gpu.func @lane_dpas (%[[arg0:.*]]: vector<8xf16>, %[[arg1:.*]]: vector<16xf16>)
519- gpu.func @lane_dpas (%a : vector <8 xf16 >, %b: vector <16 xf16 >) {
518+ // CHECK: gpu.func @simt_dpas (%[[arg0:.*]]: vector<8xf16>, %[[arg1:.*]]: vector<16xf16>)
519+ gpu.func @simt_dpas (%a : vector <8 xf16 >, %b: vector <16 xf16 >) {
520520 // CHECK: xegpu.dpas %[[arg0]], %[[arg1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
521521 %1 = xegpu.dpas %a , %b : vector <8 xf16 >, vector <16 xf16 > -> vector <8 xf32 >
522522 gpu.return
@@ -529,8 +529,8 @@ gpu.func @subgroup_dpas_packed_b(%a : vector<8x16xf16>, %b: vector<8x16x2xf16>)
529529 gpu.return
530530}
531531
532- // CHECK: gpu.func @atomic_rmw (%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>)
533- gpu.func @atomic_rmw (%src: ui64 , %value : vector <16 xf32 >, %mask : vector <16 xi1 >) {
532+ // CHECK: gpu.func @subgroup_atomic_rmw (%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>)
533+ gpu.func @subgroup_atomic_rmw (%src: ui64 , %value : vector <16 xf32 >, %mask : vector <16 xi1 >) {
534534 //CHECK: %[[c:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xindex>
535535 %c = arith.constant dense <[0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ]> : vector <16 xindex >
536536 //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[c]] : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
0 commit comments