@@ -80,7 +80,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
8080 information e.g., memref<?x?xf16>, the strides information has to be explicitly
8181 passed via the "strides" and "const_strides" argument.
8282
83- In SIMT mode, tensor descriptor is augmented with `SGMapAttr ` which describes the
83+ In SIMT mode, tensor descriptor is augmented with `LayoutAttr ` which describes the
8484 mapping of the tensor descriptor to the work items.
8585
8686 Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
@@ -113,7 +113,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
113113 %c0 = arith.constant 0 : index
114114 %c1 = arith.constant 8 : index
115115 %1 = xegpu.create_nd_tdesc %0[%c0, %c0] : memref<1024x1024xf32>
116- -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>>
116+ -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
117117 ```
118118 }];
119119
@@ -306,7 +306,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
306306 fp32 or fp64. It implies that vnni and transpose cannot exit at the
307307 same time.
308308
309- In SIMT mode, LoadNdOp expects the tensor descriptor to be augmented with `SGMapAttr `
309+ In SIMT mode, LoadNdOp expects the tensor descriptor to be augmented with `LayoutAttr `
310310 which describes the mapping of the tensor to the work items. In this case, result
311311 vector represents the data to be loaded by each work-item.
312312
@@ -323,7 +323,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
323323 xegpu.load_nd %1 {l1_hint = #xegpu.cache_hint<cached>,
324324 l2_hint = #xegpu.cache_hint<uncached>}>
325325 : !xegpu.tensor_desc<8x16xf32,
326- #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>> -> vector<8x1xf32>
326+ #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x1xf32>
327327 ```
328328
329329
@@ -364,7 +364,7 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
364364 of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
365365 Corresponding cache hint attribute will be masked.
366366
367- In SIMT mode, StoreNdOp expects the tensor descriptor to be augmented with `SGMapAttr `
367+ In SIMT mode, StoreNdOp expects the tensor descriptor to be augmented with `LayoutAttr `
368368 which describes the mapping of the tensor to the work items. In this case, input
369369 vector represents the data to be stored by each work-item.
370370
@@ -381,7 +381,7 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
381381 l2_hint = #xegpu.cache_hint<write_back>,
382382 l3_hint = #xegpu.cache_hint<write_through>}
383383 : vector<8x1xf16>, !xegpu.tensor_desc<8x16xf16,
384- #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>>
384+ #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
385385 ```
386386
387387
@@ -422,7 +422,7 @@ def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset",
422422 Example 2 (SIMT mode):
423423 ```
424424 %2 = xegpu.update_nd_offset %1, [0, 16]:
425- !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>>
425+ !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
426426 ```
427427 }];
428428
@@ -482,7 +482,7 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
482482 the chunk_size if the chunk size is larger than 1.
483483
484484 In SIMT mode, similar to `create_nd_tdesc` the resulting tensor descriptor is augmented
485- with `SGMapAttr ` which describes the mapping of the tensor descriptor to the work items.
485+ with `LayoutAttr ` which describes the mapping of the tensor descriptor to the work items.
486486 In this case, the first dimension of the tensor descriptor represents the work-items, and
487487 the second dimension represents the chunk size.
488488
@@ -517,7 +517,7 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
517517 %off = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex>
518518 %1 = xegpu.create_tdesc %0, %off : memref<1024xf32>, vector<4xindex>
519519 -> TensorDesc<4x8xf32, #xegpu.scattered_tdesc_attr<chunk_size = 8>,
520- #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 1]>>
520+ #xegpu.layout<lane_layout = [4, 1], lane_data = [1, 1]>>
521521 ```
522522 }];
523523
@@ -623,7 +623,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
623623 The mask operand masks out memory access so that it is safe to pass out-of-boundary
624624 addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
625625
626- In SIMT mode, LoadGatherOp expects the tensor descriptor to be augmented with `SGMapAttr `
626+ In SIMT mode, LoadGatherOp expects the tensor descriptor to be augmented with `LayoutAttr `
627627 which describes the mapping of the tensor to the work items. In this case, result vector
628628 represents the data to be loaded by each work-item. Each work-item recieves a `chunk_size`
629629 number of elements.
@@ -653,7 +653,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
653653 l2_hint = #xegpu.cache_hint<uncached>,
654654 l3_hint = #xegpu.cache_hint<uncached>}
655655 : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>,
656- !xegpu.sg_map<wi_layout = [16, 1], wi_data = [1, 1]>>
656+ !xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
657657 vector<16xi1> -> vector<8x1xf32>
658658 ```
659659
@@ -704,7 +704,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [
704704 has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
705705 introduced on purpose, making sure users are aware of this implicit transformation.
706706
707- In SIMT mode, StoreScatterOp expects the tensor descriptor to be augmented with `SGMapAttr `
707+ In SIMT mode, StoreScatterOp expects the tensor descriptor to be augmented with `LayoutAttr `
708708 which describes the mapping of the tensor to the work items. In this case, input vector
709709 represents the data to be stored by each work-item. Each work-item recieves a `chunk_size`
710710 number of elements.
@@ -732,7 +732,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [
732732 l2_hint = #xegpu.cache_hint<write_back>,
733733 l3_hint = #xegpu.cache_hint<write_through>}
734734 : vector<8x1xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>,
735- !xegpu.sg_map<wi_layout = [16, 1], wi_data = [1, 1]>> vector<16xi1>
735+ !xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> vector<16xi1>
736736 ```
737737
738738 }];
@@ -790,7 +790,7 @@ def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset",
790790 %off = arith.constant dense<[32, 32, 32, 32]> : vector<4xindex>
791791 %2 = xegpu.update_offset %1, %off :
792792 !xegpu.tensor_desc<4x2xf32, #xegpu.scattered_tdesc_attr<chunk_size=2>,
793- #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 1]>>, vector<4xindex>
793+ #xegpu.layout<lane_layout = [4, 1], lane_data = [1, 1]>>, vector<4xindex>
794794 ```
795795 }];
796796
@@ -840,9 +840,9 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
840840 factor, which is computed as `32/bit_width_of_elem_type`. Thus, `B: vector<16x16xf16>`
841841 can be represented as `B: vector<8x16x2xf16>`.
842842
843- In SIMT mode, DpasOp expects attributes `sg_map_a `, `sg_map_b `, and `sg_map_c`
844- which descibes the data fragment owned by each work-item w.r.t. the tensor
845- descriptor these data are loaded from.
843+ In SIMT mode, DpasOp expects layout attributes `a `, `b `, and `c` (only if acc is used)
844+ which describe the data fragment owned by each work-item w.r.t. the tensor descriptor
845+ these data are loaded from.
846846
847847 Note: on PVC, the hardware can perform load with VNNI transformation when data
848848 element type is 16-bit or lower precision, taking 2 or 4 elements from
@@ -853,9 +853,9 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
853853 XeGPU_DpasOpType : $lhs,
854854 XeGPU_DpasOpType : $rhs,
855855 Optional<XeGPU_Vector2DType>: $acc,
856- OptionalAttr<XeGPU_SGMapAttr >:$sg_map_a ,
857- OptionalAttr<XeGPU_SGMapAttr >:$sg_map_b ,
858- OptionalAttr<XeGPU_SGMapAttr >:$sg_map_c );
856+ OptionalAttr<XeGPU_LayoutAttr >:$a_layout ,
857+ OptionalAttr<XeGPU_LayoutAttr >:$b_layout ,
858+ OptionalAttr<XeGPU_LayoutAttr >:$c_layout );
859859 let results = (outs XeGPU_Vector2DType: $result);
860860
861861 let extraClassDeclaration = [{
@@ -876,6 +876,10 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
876876 VectorType getResultType() {
877877 return getResult().getType();
878878 }
879+
880+ bool hasAcc() {
881+ return getAcc() != nullptr;
882+ }
879883 }];
880884
881885 let assemblyFormat = [{
@@ -979,4 +983,24 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
979983 let extraClassDeclaration = extraBaseClassDeclaration;
980984}
981985
986+ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>]> {
987+ let summary = "Convert the layout of the input operand";
988+ let description = [{
989+ `convert_layout` adjusts the data distribution across subgroups and/or work-items by modifying
990+ the `LayoutAttr`. Both `srcMap` and `resMap` must correspond to the same programming scope, such
991+ as workgroup-level (wg) or subgroup-level (sg) code. This operation is not valid once the IR is
992+ lowered to WI level because that is the end result of all distributions.
993+ }];
994+ let arguments = (ins XeGPU_Vector2DType: $source,
995+ XeGPU_LayoutAttr: $srcMap,
996+ XeGPU_LayoutAttr: $resMap
997+ );
998+ let results = (outs XeGPU_Vector2DType: $result);
999+ let assemblyFormat = [{
1000+ $source attr-dict `:` type($source)
1001+ }];
1002+
1003+ let hasVerifier = 1;
1004+ }
1005+
9821006#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
0 commit comments