@@ -121,12 +121,12 @@ gpu.func @prefetch_nd_2(%src: memref<48x64xf16>) {
121121 gpu.return
122122}
123123
124- // CHECK: gpu.func @prefetch_nd_offset_1(%[[arg0:.*]]: memref<48x64xf16>) {
125- gpu.func @prefetch_nd_offset_1 (%src: memref <48 x64 xf16 >) {
124+ // CHECK: gpu.func @prefetch_nd_offset_1(%[[arg0:.*]]: memref<48x64xf16>, %arg1: index, %arg2: index ) {
125+ gpu.func @prefetch_nd_offset_1 (%src: memref <48 x64 xf16 >, %x : index , %y : index ) {
126126 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<48x64xf16> -> !xegpu.tensor_desc<8x16xf16>
127- %1 = xegpu.create_nd_tdesc %src [ 0 , 0 ] : memref <48 x64 xf16 > -> !xegpu.tensor_desc <8 x16 xf16 >
128- // CHECK: xegpu.prefetch_nd %[[R0]][0, 0 ] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16>
129- xegpu.prefetch_nd %1 [0 , 0 ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <8 x16 xf16 >
127+ %1 = xegpu.create_nd_tdesc %src : memref <48 x64 xf16 > -> !xegpu.tensor_desc <8 x16 xf16 >
128+ // CHECK: xegpu.prefetch_nd %[[R0]][%arg1, %arg2 ] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16>
129+ xegpu.prefetch_nd %1 [%x , %y ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <8 x16 xf16 >
130130 gpu.return
131131}
132132
@@ -269,12 +269,12 @@ gpu.func @subgroup_load_nd_8(%src: memref<24x32xf32>) {
269269 gpu.return
270270}
271271
272- // CHECK: func @subgroup_load_nd_offset_1(%[[arg0:.*]]: memref<24x32xf32>) {
273- gpu.func @subgroup_load_nd_offset_1 (%src: memref <24 x32 xf32 >) {
272+ // CHECK: func @subgroup_load_nd_offset_1(%[[arg0:.*]]: memref<24x32xf32>, %arg1: index, %arg2: index ) {
273+ gpu.func @subgroup_load_nd_offset_1 (%src: memref <24 x32 xf32 >, %x : index , %y : index ) {
274274 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
275- %1 = xegpu.create_nd_tdesc %src [ 0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 >
276- // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]][0, 0 ] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8x16xf32>
277- %2 = xegpu.load_nd %1 [0 , 0 ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >, transpose = array<i64 : 1 , 0 >}> : !xegpu.tensor_desc <16 x8 xf32 > -> vector <8 x16 xf32 >
275+ %1 = xegpu.create_nd_tdesc %src : memref <24 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 >
276+ // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]][%arg1, %arg2 ] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8x16xf32>
277+ %2 = xegpu.load_nd %1 [%x , %y ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >, transpose = array<i64 : 1 , 0 >}> : !xegpu.tensor_desc <16 x8 xf32 > -> vector <8 x16 xf32 >
278278 gpu.return
279279}
280280
@@ -291,7 +291,7 @@ gpu.func @simt_load_nd_8(%src: memref<24x32xf32>) {
291291// CHECK: func @simt_load_nd_offset_1(%[[arg0:.*]]: memref<24x32xf32>) {
292292gpu.func @simt_load_nd_offset_1 (%src: memref <24 x32 xf32 >) {
293293 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
294- %1 = xegpu.create_nd_tdesc %src [ 0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 >
294+ %1 = xegpu.create_nd_tdesc %src : memref <24 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 >
295295 // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]][0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8xf32>
296296 %2 = xegpu.load_nd %1 [0 , 0 ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >, transpose = array<i64 : 1 , 0 >}> : !xegpu.tensor_desc <16 x8 xf32 > -> vector <8 xf32 >
297297 gpu.return
@@ -319,14 +319,14 @@ gpu.func @simt_store_nd(%src: memref<24x32xf16>) {
319319 gpu.return
320320}
321321
322- // CHECK: func @subgroup_store_nd_2(%[[arg0:.*]]: memref<24x32xf16>) {
323- gpu.func @subgroup_store_nd_2 (%dst: memref <24 x32 xf16 >) {
322+ // CHECK: func @subgroup_store_nd_2(%[[arg0:.*]]: memref<24x32xf16>, %arg1: index ) {
323+ gpu.func @subgroup_store_nd_2 (%dst: memref <24 x32 xf16 >, %x : index ) {
324324 // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<32xf16>
325325 %1 = arith.constant dense <1.0 >: vector <32 xf16 >
326326 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
327- %2 = xegpu.create_nd_tdesc %dst [ 0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <32 xf16 >
328- // CHECK: xegpu.store_nd %[[C]], %[[R0]][0 ] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<32xf16>, !xegpu.tensor_desc<32xf16>
329- xegpu.store_nd %1 , %2 [0 ] <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}>: vector <32 xf16 >, !xegpu.tensor_desc <32 xf16 >
327+ %2 = xegpu.create_nd_tdesc %dst : memref <24 x32 xf16 > -> !xegpu.tensor_desc <32 xf16 >
328+ // CHECK: xegpu.store_nd %[[C]], %[[R0]][%arg1 ] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<32xf16>, !xegpu.tensor_desc<32xf16>
329+ xegpu.store_nd %1 , %2 [%x ] <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}>: vector <32 xf16 >, !xegpu.tensor_desc <32 xf16 >
330330 gpu.return
331331}
332332
@@ -357,7 +357,7 @@ gpu.func @simt_store_nd_offset_1(%src: memref<24x32xf16>) {
357357 // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<2xf16>
358358 %1 = arith.constant dense <1.0 >: vector <2 xf16 >
359359 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
360- %2 = xegpu.create_nd_tdesc %src [ 0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <32 xf16 >
360+ %2 = xegpu.create_nd_tdesc %src : memref <24 x32 xf16 > -> !xegpu.tensor_desc <32 xf16 >
361361 // CHECK: xegpu.store_nd %[[C]], %[[R0]][0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<2xf16>, !xegpu.tensor_desc<32xf16>
362362 xegpu.store_nd %1 , %2 [0 ] <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}>: vector <2 xf16 >, !xegpu.tensor_desc <32 xf16 >
363363 gpu.return
0 commit comments