@@ -34,27 +34,24 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
3434 [ShapedTypeInterface], "::mlir::TensorType"> {
3535 let summary = "TensorDesc describing regions of interested data.";
3636 let description = [{
37- TensorDesc is a type designed to describe regions of the interested data as well as some
38- features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
39- it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
40- to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
41- It encodes the following information:
37+ TensorDesc is a type designed to describe regions of interest in data, as well as some features
38+ unique to Intel hardware. Unlike the built-in tensor type in MLIR, it essentially contains only
39+ metadata and does not hold the data itself. It is primarily designed to support 2D block load/store
40+ and DPAS (matrix multiplication instruction) on Intel GPUs. It encodes the following information:
4241
4342 * shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
4443 and each row contains 16 contiguous data element. The rows could be
45- either contiguous or not, depends on whether the encoding attribute
46- is set or not.
47- * element_type: the data type of the data element, e.g., f16, f32.
44+ either contiguous or not, depends on the encoding attribute. If the
45+ encoding is a BlockTensorDescAttr, rows are contiguous. If the encoding
46+ is a ScatterTensorDescAttr, rows are not necessary to be contiguous. If
47+ encoding is not set, it is considered as a default BlockTensorDescAttr.
4848
49- Similar to the builtin tensor, it also provides an optinal attribute to encoding
50- the following information via the TensorDescAttr object:
51- * memory_space (xegpu::MemorySpace): [optional] where the data is located,
52- global memory or shared memory. It is default to Global.
53- * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
54- that will be loaded by block load at a time. It is default to 1.
55- * boundary_check (bool): [optional] indicates whether the operation detects the boundary
56- and pads with zero for out-of-boundary access. It is default to do boundary check.
49+ * element_type: the data type of the data element, e.g., f16, f32.
5750
51+ Similar to the built-in tensor, it also provides optional attributes for encoding
52+ additional information via either BlockTensorDescAttr or ScatterTensorDescAttr, or
53+ supporting Workgroup, Subgroup, and workitem (or SIMT) level programmings via the
54+ Layout attribute. Please check their definition for details.
5855
5956 Syntax:
6057
@@ -63,7 +60,9 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
6360 element-type ::= float-type | integer-type | index-type
6461 dim-list := (static-dim-list `x`)?
6562 static-dim-list ::= decimal-literal `x` decimal-literal
66- attr-list = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? (, layout `<` wi_layout = value, wi_data = value `>`)?
63+ attr-list = (, encoding-attr)? (, layout-attr)?
64+ enconding-attr = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
65+ layout-attr = (, layout `<` (scope = value,)? (sg_layout = value, sg_data = value, order = value)? wi_layout = value, wi_data = value `>`)?
6766 ```
6867
6968 Examples:
@@ -78,8 +77,14 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
7877 // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space.
7978 xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_space = slm>>
8079
81- // A TensorDesc with a layout
82- xegpu.tensor_desc<8x16xf32, #xegpu.layout<wi_layout = [1, 16], wi_data = [1, 1]>>
80+ // A TensorDesc with a layout for workgroup level programming
81+ xegpu.tensor_desc<32x64xf32, #xegpu.layout<sg_layout = [2, 4], sg_data = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]>>
82+
83+ // A TensorDesc with a layout for subgroup level programming
84+ xegpu.tensor_desc<8x16xf32, #xegpu.layout<scope = sg, wi_layout = [1, 16], wi_data = [1, 1]>>
85+
86+ // A TensorDesc with a layout for workitem level programming
87+ xegpu.tensor_desc<8x16xf32, #xegpu.layout<scope = wi, wi_layout = [1, 16], wi_data = [1, 1]>>
8388 ```
8489 }];
8590
0 commit comments