@@ -261,6 +261,21 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
261261 : !xegpu.tensor_desc<8x16xf16>
262262 ```
263263
264+ The operation may take optional offsets for the tensor descriptor.
265+ The number of offsets must be greater or equal to the rank of the tensor descriptor
266+ and less than the rank of the source memref. The offsets are applied to the innermost
267+ dimension of the source memref.
268+
269+ Examples:
270+ ```mlir
271+ %tdesc = xegpu.create_nd_tdesc %0: memref<2x8x32x32xf32> -> TensorDesc<8x16xf32>
272+ // memref[0, 0, %off0, %off1]
273+ xegpu.prefetch_nd %tdesc[%off0, %off1] : !xegpu.tensor_desc<8x16xf16>
274+ // memref[0, %off0, %off1, %off2]
275+ xegpu.prefetch_nd %tdesc[%off0, %off1, %off2] : !xegpu.tensor_desc<8x16xf16>
276+ // memref[%off0, %off1, %off2, %off3]
277+ xegpu.prefetch_nd %tdesc[%off0, %off1, %off2] : !xegpu.tensor_desc<8x16xf16>
278+ ```
264279 }];
265280
266281 let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
@@ -350,6 +365,21 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
350365 : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
351366 ```
352367
368+ The operation may take optional offsets for the tensor descriptor.
369+ The number of offsets must be greater or equal to the rank of the tensor descriptor
370+ and less than the rank of the source memref. The offsets are applied to the innermost
371+ dimension of the source memref.
372+
373+ Examples:
374+ ```mlir
375+ %1 = xegpu.create_nd_tdesc %0: memref<2x8x32x32xf32> -> TensorDesc<8x16xf32>
376+ // memref[0, 0, %off0, %off1]
377+ xegpu.load_nd %1[%off0, %off1] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
378+ // memref[0, %off0, %off1, %off2]
379+ xegpu.load_nd %1[%off0, %off1, %off2] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
380+ // memref[%off0, %off1, %off2, %off3]
381+ xegpu.load_nd %1[%off0, %off1, %off2, %off3] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
382+ ```
353383
354384 }];
355385
@@ -445,6 +475,21 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
445475 : vector<8xf16>, !xegpu.tensor_desc<8x16xf16>
446476 ```
447477
478+ The operation may take optional offsets for the tensor descriptor.
479+ The number of offsets must be greater or equal to the rank of the tensor descriptor
480+ and less than the rank of the source memref. The offsets are applied to the innermost
481+ dimension of the source memref.
482+
483+ Examples:
484+ ```mlir
485+ %2 = xegpu.create_nd_tdesc %0: memref<2x8x32x32xf32> -> TensorDesc<8x16xf32>
486+ // memref[0, 0, %off0, %off1]
487+ xegpu.store_nd %3, %2[%off0, %off1] : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
488+ // memref[0, %off0, %off1, %off2]
489+ xegpu.store_nd %3, %2[%off0, %off1, %off2] : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
490+ // memref[%off0, %off1, %off2, %off3]
491+ xegpu.store_nd %3, %2[%off0, %off1, %off2, %off3] : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
492+ ```
448493
449494 }];
450495
0 commit comments