@@ -130,6 +130,15 @@ gpu.func @prefetch_nd_offset_1(%src: memref<48x64xf16>, %x : index, %y : index)
130130 gpu.return
131131}
132132
133+ // CHECK: gpu.func @prefetch_nd_offset_1(%[[arg0:.*]]: memref<8x24x32x48x64xf16>) {
134+ gpu.func @prefetch_nd_offset_1 (%src: memref <8 x24 x32 x48 x64 xf16 >) {
135+ // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0, 0, 0, 0] : memref<8x24x32x48x64xf16> -> !xegpu.tensor_desc<1x2x4x8x16xf16>
136+ %1 = xegpu.create_nd_tdesc %src [0 , 0 , 0 , 0 , 0 ] : memref <8 x24 x32 x48 x64 xf16 > -> !xegpu.tensor_desc <1 x2 x4 x8 x16 xf16 >
137+ // CHECK: xegpu.prefetch_nd %[[R0]][0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<1x2x4x8x16xf16>
138+ xegpu.prefetch_nd %1 [0 , 0 ] <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <1 x2 x4 x8 x16 xf16 >
139+ gpu.return
140+ }
141+
133142// CHECK: func @subgroup_load_nd(%[[arg0:.*]]: memref<8x16xf16>) {
134143gpu.func @subgroup_load_nd (%src: memref <8 x16 xf16 >) {
135144 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -332,6 +341,17 @@ gpu.func @subgroup_store_nd_2(%dst: memref<24x32xf16>, %x : index) {
332341
333342// CHECK: func @subgroup_store_nd_3(%[[arg0:.*]]: memref<24x32xf16>) {
334343gpu.func @subgroup_store_nd_3 (%dst: memref <24 x32 xf16 >) {
344+ // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<32xf16>
345+ %1 = arith.constant dense <1.0 >: vector <32 xf16 >
346+ // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
347+ %2 = xegpu.create_nd_tdesc %dst [0 , 0 ] : memref <24 x32 xf16 > -> !xegpu.tensor_desc <32 xf16 >
348+ // CHECK: xegpu.store_nd %[[C]], %[[R0]][0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<32xf16>, !xegpu.tensor_desc<32xf16>
349+ xegpu.store_nd %1 , %2 [0 ] <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}>: vector <32 xf16 >, !xegpu.tensor_desc <32 xf16 >
350+ gpu.return
351+ }
352+
353+ // CHECK: func @subgroup_store_nd_offset_1(%[[arg0:.*]]: memref<24x32xf16>) {
354+ gpu.func @subgroup_store_nd_offset_1 (%dst: memref <24 x32 xf16 >) {
335355 // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<32xf16>
336356 %1 = arith.constant dense <1.0 >: vector <32 xf16 >
337357 // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
0 commit comments