Skip to content

Commit 3578c1b

Browse files
committed
add optional offsets to nd load/store/prefetch
1 parent 34447ef commit 3578c1b

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

mlir/test/Dialect/XeGPU/ops.mlir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,15 @@ gpu.func @prefetch_nd_offset_1(%src: memref<48x64xf16>, %x : index, %y : index)
130130
gpu.return
131131
}
132132

133+
// CHECK: gpu.func @prefetch_nd_offset_1(%[[arg0:.*]]: memref<8x24x32x48x64xf16>) {
134+
gpu.func @prefetch_nd_offset_1(%src: memref<8x24x32x48x64xf16>) {
135+
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0, 0, 0, 0] : memref<8x24x32x48x64xf16> -> !xegpu.tensor_desc<1x2x4x8x16xf16>
136+
%1 = xegpu.create_nd_tdesc %src[0, 0, 0, 0, 0] : memref<8x24x32x48x64xf16> -> !xegpu.tensor_desc<1x2x4x8x16xf16>
137+
// CHECK: xegpu.prefetch_nd %[[R0]][0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<1x2x4x8x16xf16>
138+
xegpu.prefetch_nd %1[0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<1x2x4x8x16xf16>
139+
gpu.return
140+
}
141+
133142
// CHECK: func @subgroup_load_nd(%[[arg0:.*]]: memref<8x16xf16>) {
134143
gpu.func @subgroup_load_nd(%src: memref<8x16xf16>) {
135144
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -330,6 +339,17 @@ gpu.func @subgroup_store_nd_2(%dst: memref<24x32xf16>, %x : index) {
330339
gpu.return
331340
}
332341

342+
// CHECK: func @subgroup_store_nd_offset_1(%[[arg0:.*]]: memref<24x32xf16>) {
343+
gpu.func @subgroup_store_nd_offset_1(%dst: memref<24x32xf16>) {
344+
// CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<32xf16>
345+
%1 = arith.constant dense<1.0>: vector<32xf16>
346+
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
347+
%2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<32xf16>
348+
// CHECK: xegpu.store_nd %[[C]], %[[R0]][0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<32xf16>, !xegpu.tensor_desc<32xf16>
349+
xegpu.store_nd %1, %2[0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>: vector<32xf16>, !xegpu.tensor_desc<32xf16>
350+
gpu.return
351+
}
352+
333353
// CHECK: func @subgroup_store_nd_offset_1(%[[arg0:.*]]: memref<24x32xf16>) {
334354
gpu.func @subgroup_store_nd_offset_1(%dst: memref<24x32xf16>) {
335355
// CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<32xf16>

0 commit comments

Comments
 (0)