From cce8abaa92703dea562536c02fee3a8fd00ef9e6 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Fri, 8 Aug 2025 15:57:16 +0000 Subject: [PATCH 01/14] init --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 96 +++++++++++-------- .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 23 +++++ mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 56 +++++++++++ 3 files changed, 134 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 75b16a87e03c6..3b074a35e9cbd 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -29,7 +29,7 @@ class XeGPU_Op traits = []>: void printProperties(::mlir::MLIRContext *ctx, ::mlir::OpAsmPrinter &p, const Properties &prop, ::mlir::ArrayRef<::llvm::StringRef> elidedProps) { - + DictionaryAttr propAttr = dyn_cast_if_present(getPropertiesAsAttr(ctx, prop)); // filter out the elidedProps from propAttr, and get the resultAttr @@ -43,7 +43,7 @@ class XeGPU_Op traits = []>: } if (!filteredAttrs.empty()) { - p << "<" << DictionaryAttr::get(ctx, filteredAttrs) << ">"; + p << "<" << DictionaryAttr::get(ctx, filteredAttrs) << ">"; } } @@ -189,11 +189,11 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface ArrayRef getStaticOffsets(){ auto attr = getConstOffsetsAttr(); - if (attr) + if (attr) return attr; int64_t rank = getMixedSizes().size(); - + setConstOffsets(llvm::SmallVector(rank, 0)); attr = getConstOffsetsAttr(); @@ -233,7 +233,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface auto attr = getConstStridesAttr(); if (attr) return attr; - + if (llvm::isa(getSourceType())) return emptyStrides; @@ -314,15 +314,15 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> { }]; let assemblyFormat = [{ - $TensorDesc `` - custom($offsets, $const_offsets) + $TensorDesc `` + custom($offsets, $const_offsets) prop-dict attr-dict `:` qualified(type($TensorDesc)) }]; let builders = [ - OpBuilder<(ins "Value": $TensorDesc, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + OpBuilder<(ins "Value": $TensorDesc, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -370,7 +370,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [ let arguments = (ins XeGPU_TensorDesc: $TensorDesc, Variadic: $offsets, - OptionalAttr: $const_offsets, + OptionalAttr: $const_offsets, OptionalAttr: $packed, OptionalAttr: $transpose, OptionalAttr: $l1_hint, @@ -390,16 +390,16 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [ }]; let assemblyFormat = [{ - $TensorDesc `` - custom($offsets, $const_offsets) + $TensorDesc `` + custom($offsets, $const_offsets) prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value) }]; let builders = [ - OpBuilder<(ins "Type": $value, "Value": $TensorDesc, + OpBuilder<(ins "Type": $value, "Value": $TensorDesc, "UnitAttr": $packed, "DenseI64ArrayAttr": $transpose, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -442,7 +442,7 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [ let arguments = (ins XeGPU_ValueType: $value, XeGPU_TensorDesc: $TensorDesc, Variadic: $offsets, - OptionalAttr: $const_offsets, + OptionalAttr: $const_offsets, OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); @@ -458,16 +458,16 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [ }]; let assemblyFormat = [{ - $value `,` - $TensorDesc `` - custom($offsets, $const_offsets) + $value `,` + $TensorDesc `` + custom($offsets, $const_offsets) prop-dict attr-dict `:` type($value) `,` qualified(type($TensorDesc)) }]; let builders = [ - OpBuilder<(ins "Value": $value, "Value": $TensorDesc, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + OpBuilder<(ins "Value": $value, "Value": $TensorDesc, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -635,12 +635,12 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> { l3_hint = #xegpu.cache_hint} : !xegpu.tensor_desc<16xf16> ``` - + Example 2: A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc. It combines "create scattered TensorTdesc" and "prefetch with scattered TensorTdesc". The source operand could be a raw pointer (uint64_t). - Please refer to create_tdesc for the restriction of memref. + Please refer to create_tdesc for the restriction of memref. ```mlir %a = memref.alloc() : memref<1024xf32> %0 = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex> @@ -676,16 +676,16 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> { }]; let assemblyFormat = [{ - $source + $source (`[` $offsets^ `]`)? prop-dict - attr-dict `:` type(operands) + attr-dict `:` type(operands) }]; - + let builders = [ OpBuilder<(ins "Value": $source, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -723,7 +723,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> { : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr>, vector<16xi1> -> vector<16x8xf32> ``` - + Example 3 (SIMT mode): ```mlir %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, @@ -732,12 +732,12 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> { : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr> vector<16xi1> -> vector<8xf32> ``` - + Example 4: A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc. It combines "create scattered TensorTdesc" and "load with scattered TensorTdesc". The source operand could be a raw pointer (uint64_t). Please refer to create_tdesc - for the restriction of memref. + for the restriction of memref. ```mlir %a = memref.alloc() : memref<1024xf32> %offsets = vector.step : vector<16xindex> @@ -794,14 +794,14 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> { let assemblyFormat = [{ $source (`[` $offsets^ `]`)? `,` - $mask prop-dict + $mask prop-dict attr-dict `:` type(operands) `->` type($value) }]; let builders = [ OpBuilder<(ins "Type": $value, "Value": $source, "Value": $mask, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -848,7 +848,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> { A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc. It combines "create scattered TensorTdesc" and "store with scattered TensorTdesc". The dest operand could be a raw pointer (uint64_t). - Please refer to create_tdesc for the restriction of memref. + Please refer to create_tdesc for the restriction of memref. ```mlir %a = memref.alloc() : memref<1024xf32> %val = arith.constant dense<0.0> : vector<16xf32> @@ -901,15 +901,15 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> { $value `,` $dest (`[` $offsets^ `]`)? `,` - $mask - prop-dict + $mask + prop-dict attr-dict `:` type(operands) }]; let builders = [ OpBuilder<(ins "Value": $value, "Value": $dest, "Value": $mask, - "xegpu::CachePolicyAttr": $l1_hint, - "xegpu::CachePolicyAttr": $l2_hint, + "xegpu::CachePolicyAttr": $l1_hint, + "xegpu::CachePolicyAttr": $l2_hint, "xegpu::CachePolicyAttr": $l3_hint)> ]; @@ -1146,4 +1146,18 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou let hasCanonicalizer = 1; } +def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc"> { + let summary = "Create a matrix descriptor."; + let description = [{ + Matrices are treated as 2D units. + In case the ROI rank is >2, the two fastest changing dimensions + represent a 2D unit and other dimensions specify the multiple + of these units that are stacked vertically. + Results: + - `matrix_desc` : a descriptor for SLM allocation. + }]; + let results = (outs XeGPU_MatrixDesc:$matrix_desc); + let assemblyFormat = "attr-dict `:` type($matrix_desc)"; +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index b268cabb5d266..6ac126a84d39c 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -201,4 +201,27 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { }]; } +def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> { + let summary = "MatrixDesc describing the data in SLM"; + let description = [{ + MatrixDesc describes the data stored in SLM. Unleass specified via + the the optional layout attribute, the data is stored in a continuous + SLM region in row-major order by default. + }]; + let parameters = (ins ArrayRefParameter<"int64_t">: $shape, + "mlir::Type": $elementType, + OptionalParameter<"mlir::Attribute">: $layout); + + let extraClassDeclaration = [{ + // using mlir::ShapedType::Trait::getElementTypeBitWidth; + // using mlir::ShapedType::Trait::getElementTypeBitWidth; + // using mlir::ShapedType::Trait::getRank; + // using mlir::ShapedType::Trait::getNumElements; + // using mlir::ShapedType::Trait::isDynamicDim; + // using mlir::ShapedType::Trait::hasStaticShape; + }]; + + let hasCustomAssemblyFormat = true; +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 3c0ca114a62d4..50eb90dbc1df9 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -394,6 +394,62 @@ LogicalResult TensorDescType::verify( return success(); } +//===----------------------------------------------------------------------===// +// XeGPU_MatrixDescType +//===----------------------------------------------------------------------===// +mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) { + llvm::SmallVector shape; + mlir::Type elementType; + mlir::FailureOr layout; + + // Parse literal '<' + if (parser.parseLess()) + return {}; + + auto shapeLoc = parser.getCurrentLocation(); + if (mlir::failed(parser.parseDimensionList(shape, false, true))) { + parser.emitError(shapeLoc, "failed to parse parameter 'shape'"); + return {}; + } + + auto elemTypeLoc = parser.getCurrentLocation(); + if (mlir::failed(parser.parseType(elementType))) { + parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'"); + return {}; + } + + // parse optional attributes + if (mlir::succeeded(parser.parseOptionalComma())) { + mlir::Attribute attr; + ParseResult res = parser.parseAttribute(attr); + if (mlir::failed(res)) + return {}; + layout = attr; + } + + // Parse literal '>' + if (parser.parseGreater()) + return {}; + + MLIRContext *ctxt = parser.getContext(); + return MatrixDescType::getChecked( + [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape, + elementType, layout.value_or(mlir::Attribute())); +} + +void MatrixDescType::print(::mlir::AsmPrinter &printer) const { + printer << "<"; + + printer.printDimensionList(getShape()); + printer << 'x'; + printer << getElementType(); + + if (auto layout = getLayout()) + printer << ", " << layout; + + printer << ">"; +} + } // namespace xegpu } // namespace mlir From 76ccc39d6f3c599015d0d6d853cc20a4853fcb7f Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Mon, 11 Aug 2025 18:48:38 +0000 Subject: [PATCH 02/14] sync --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 87 ++++++++++++++++++- .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 16 ++-- 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 3b074a35e9cbd..59c1a432dce66 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1146,18 +1146,101 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou let hasCanonicalizer = 1; } -def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc"> { +def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure]> { let summary = "Create a matrix descriptor."; let description = [{ Matrices are treated as 2D units. In case the ROI rank is >2, the two fastest changing dimensions represent a 2D unit and other dimensions specify the multiple of these units that are stacked vertically. + Arguments: + - `source` : a base address of SLM allocation. Results: - `matrix_desc` : a descriptor for SLM allocation. }]; + let arguments = (ins XeGPU_BaseAddrType:$source); let results = (outs XeGPU_MatrixDesc:$matrix_desc); - let assemblyFormat = "attr-dict `:` type($matrix_desc)"; + let assemblyFormat = "$source prop-dict attr-dict `:` type($source) `->` type($matrix_desc)"; } +def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> { + let arguments = (ins XeGPU_MatrixDesc:$matrix_desc, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets, + OptionalAttr:$layout + ); + let results = (outs XeGPU_ValueType:$res); + let assemblyFormat = [{ + $matrix_desc `` custom($offsets, $const_offsets) + prop-dict attr-dict `:` functional-type(operands, results) + }]; + let summary = "Load matrix from SLM."; + let description = [{ + This operation loads a matrix from the SLM using the matrix descriptor. + There are additional parameters and attributes that support loading, but they must only + be specified for a work-item level operation. + + General rules: + 1. Non-WI-level code must not specify optional attributes. + 2. If the load uses `vector` semantics, all of the vector attributes must be specified. + 3. If the load uses `array` semantics, all of the array attributes must be specified. + + Arguments: + - `matrix_desc` : a matrix descriptor (SLM allocation + matrix type). + - `offsets` : Coordinates of the matrix to load. + Results: + - `res` : loaded matrix elements. + }]; + + let builders = [ + // OpBuilder<(ins "Type":$res, "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, "LayoutAttr": $layout)>, + ]; + let extraClassDeclaration = [{ + SmallVector getMixedOffsets() { + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); + } + }]; + // let hasVerifier = 1; +} + +def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> { + let arguments = (ins + XeGPU_MatrixDesc:$matrix_desc, + XeGPU_ValueType:$data, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets, + OptionalAttr:$layout + ); + let assemblyFormat = [{ + $matrix_desc `` custom($offsets, $const_offsets) `,` $data + prop-dict attr-dict `:` type(operands) + }]; + let summary = "Store matrix from SLM."; + let description = [{ + This operation stores workitem's `data` fragment of the matrix to the SLM (`matrix_desc`). + There are additional parameters and attributes that support loading, but they must only + be specified for a work-item level operation. + + General rules: + 1. Non-WI-level code must not specify optional attributes. + 2. If the store uses `vector` semantics, all of the vector attributes must be specified. + + Arguments: + - `matrix_desc` : a matrix descriptor. + - `data` : data to be stored to the matrix. + - `offsets` : Coordinates of the matrix where the data will be stored. + }]; + let builders = [ + // OpBuilder<(ins "TypedValue": $matrix_desc, "Value" : $data, "llvm::ArrayRef": $offsets, "LayoutAttr": $layout)>, + ]; + let extraClassDeclaration = [{ + SmallVector getMixedOffsets() { + Builder b(getContext()); + return getMixedValues(getConstOffsets(), getOffsets(), b); + } + }]; + // let hasVerifier = 1; +} + + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 6ac126a84d39c..f578fc8bc0735 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -204,21 +204,19 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> { let summary = "MatrixDesc describing the data in SLM"; let description = [{ - MatrixDesc describes the data stored in SLM. Unleass specified via - the the optional layout attribute, the data is stored in a continuous - SLM region in row-major order by default. + MatrixDesc describes a SLM region. Unleass specified via the optional layout attribute, + the data is stored contiguously in the region in row-major order by default. }]; let parameters = (ins ArrayRefParameter<"int64_t">: $shape, "mlir::Type": $elementType, OptionalParameter<"mlir::Attribute">: $layout); let extraClassDeclaration = [{ - // using mlir::ShapedType::Trait::getElementTypeBitWidth; - // using mlir::ShapedType::Trait::getElementTypeBitWidth; - // using mlir::ShapedType::Trait::getRank; - // using mlir::ShapedType::Trait::getNumElements; - // using mlir::ShapedType::Trait::isDynamicDim; - // using mlir::ShapedType::Trait::hasStaticShape; + bool hasRank() const { return true; } + + MatrixDescType cloneWith(std::optional> shape, Type elementType) const { + return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getLayout()); + } }]; let hasCustomAssemblyFormat = true; From cb0a195e340bac10e10b6d5cb9de0d925d39deeb Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Tue, 12 Aug 2025 18:10:33 +0000 Subject: [PATCH 03/14] add unit tests for create_matrix_desc --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 24 +++++++++++++------ mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 1 + mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 10 ++++++++ mlir/test/Dialect/XeGPU/invalid.mlir | 16 +++++++++++++ mlir/test/Dialect/XeGPU/ops.mlir | 18 ++++++++++++++ 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 37e4c2c811155..e4ea0b27323ec 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1101,21 +1101,31 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou let hasCanonicalizer = 1; } -def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure]> { +def isSharedPred : CPred<"isSharedMemory(llvm::cast($_self))">; +class StaticShared1DMemRefOf allowedTypes> : + ConfinedType, [HasStaticShapePred, isSharedPred], + "statically shaped " # MemRefOf.summary # " for shared memory", + "mlir::MemRefType">; + +class SizeInBits : + StrFunc<"llvm::cast($" # name # ".getType()).getNumElements()" + "*llvm::cast($" # name # ".getType()).getElementTypeBitWidth()">; +class AllMemSizesMatch names> : + AllMatchSameOperatorTrait.result, + "size in bits">; + +def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure, + AllMemSizesMatch<["source", "matrix_desc"]>]> { let summary = "Create a matrix descriptor."; let description = [{ - Matrices are treated as 2D units. - In case the ROI rank is >2, the two fastest changing dimensions - represent a 2D unit and other dimensions specify the multiple - of these units that are stacked vertically. Arguments: - `source` : a base address of SLM allocation. Results: - `matrix_desc` : a descriptor for SLM allocation. }]; - let arguments = (ins XeGPU_BaseAddrType:$source); + let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source); let results = (outs XeGPU_MatrixDesc:$matrix_desc); - let assemblyFormat = "$source prop-dict attr-dict `:` type($source) `->` type($matrix_desc)"; + let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))"; } def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> { diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt index 7c6a4f37db9af..603fb5d237544 100644 --- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt @@ -17,6 +17,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect MLIRAffineUtils MLIRArithUtils MLIRDialectUtils + MLIRGPUDialect MLIRIR MLIRViewLikeInterface MLIRVectorDialect diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 2cd086feb5deb..ad4d8bd6e22cd 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/XeGPU/IR/XeGPU.h" @@ -21,6 +22,15 @@ namespace mlir { namespace xegpu { +bool isSharedMemory(const MemRefType &memrefTy) { + Attribute attr = memrefTy.getMemorySpace(); + if (auto intAttr = llvm::dyn_cast(attr)) + return intAttr.getInt() == 3; + if (auto memrefSpace = llvm::dyn_cast(attr)) + return memrefSpace.getValue() == MemorySpace::SLM; + return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr); +} + template static std::string makeString(T array, bool breakline = false) { std::string buf; diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 44e15dd7cbb38..1cd817918a772 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -762,3 +762,19 @@ func.func @slice_attr_repeat_dim() { return } +// ----- +func.func @create_matrix_desc_non_slm() { + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 1> + // expected-error@+1 {{operand #0 must be statically shaped memref of 8-bit signless integer values for shared memory}} + %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 1> -> !xegpu.matrix_desc<16x64xf16> + return +} + +// ----- +func.func @create_matrix_desc_mismatch_sizes() { + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + // expected-error@+1 {{failed to verify that all of {source, matrix_desc} have same size in bits}} + %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x32xf16> + return +} + diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index 67c00f5a9cc2f..c224749031328 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -751,4 +751,22 @@ gpu.func @fence() { gpu.return } +// CHECK-LABEL: gpu.func @create_matrix_desc({{.*}}) { +gpu.func @create_matrix_desc() { + //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> + //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16> + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16> + gpu.return +} + +// CHECK-LABEL: gpu.func @create_matrix_desc_with_stride({{.*}}) { +gpu.func @create_matrix_desc_with_stride() { + //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> + //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> + gpu.return +} + } From 98871ccb013229593e8d169533ab3b03b136f687 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Tue, 12 Aug 2025 20:18:09 +0000 Subject: [PATCH 04/14] add unit test for load_matrix and store_matrix --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 31 ++++++----- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 53 +++++++++++++++++++ mlir/test/Dialect/XeGPU/invalid.mlir | 28 ++++++++++ mlir/test/Dialect/XeGPU/ops.mlir | 29 ++++++++++ 4 files changed, 129 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index e4ea0b27323ec..461df6efb8528 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1128,16 +1128,18 @@ def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure, let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))"; } -def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> { +def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, + AllElementTypesMatch<["matrix_desc", "res"]>, + AllRanksMatch<["matrix_desc", "res"]>]> { let arguments = (ins XeGPU_MatrixDesc:$matrix_desc, Variadic: $offsets, DenseI64ArrayAttr: $const_offsets, - OptionalAttr:$layout + OptionalAttr:$layout ); let results = (outs XeGPU_ValueType:$res); let assemblyFormat = [{ $matrix_desc `` custom($offsets, $const_offsets) - prop-dict attr-dict `:` functional-type(operands, results) + prop-dict attr-dict `` `:` type(operands) `->` type(results) }]; let summary = "Load matrix from SLM."; let description = [{ @@ -1158,23 +1160,27 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> { }]; let builders = [ - // OpBuilder<(ins "Type":$res, "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, "LayoutAttr": $layout)>, + OpBuilder<(ins "Type":$res, "TypedValue": $matrix_desc, + "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, ]; let extraClassDeclaration = [{ SmallVector getMixedOffsets() { return getMixedValues(getConstOffsets(), getOffsets(), getContext()); } }]; - // let hasVerifier = 1; + + let hasVerifier = 1; } -def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> { +def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, + AllElementTypesMatch<["matrix_desc", "data"]>, + AllRanksMatch<["matrix_desc", "data"]>]> { let arguments = (ins XeGPU_MatrixDesc:$matrix_desc, - XeGPU_ValueType:$data, Variadic: $offsets, DenseI64ArrayAttr: $const_offsets, - OptionalAttr:$layout + XeGPU_ValueType:$data, + OptionalAttr:$layout ); let assemblyFormat = [{ $matrix_desc `` custom($offsets, $const_offsets) `,` $data @@ -1196,15 +1202,16 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> { - `offsets` : Coordinates of the matrix where the data will be stored. }]; let builders = [ - // OpBuilder<(ins "TypedValue": $matrix_desc, "Value" : $data, "llvm::ArrayRef": $offsets, "LayoutAttr": $layout)>, + OpBuilder<(ins "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, + "Value" : $data, "LayoutTrait": $layout)>, ]; let extraClassDeclaration = [{ SmallVector getMixedOffsets() { - Builder b(getContext()); - return getMixedValues(getConstOffsets(), getOffsets(), b); + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); } }]; - // let hasVerifier = 1; + + let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index ad4d8bd6e22cd..2051d7030340e 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -935,6 +935,59 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add(context); } +//===----------------------------------------------------------------------===// +// XeGPU_LoadMatrixOp +//===----------------------------------------------------------------------===// +void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res, + TypedValue matrixDesc, + llvm::ArrayRef offsets, + LayoutTrait layout) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + + build(builder, state, res, matrixDesc, dynamicOffsets, staticOffsetsAttr, + layout); +} + +LogicalResult LoadMatrixOp::verify() { + ArrayRef valueShape = getRes().getType().getShape(); + ArrayRef mdescShape = getMatrixDesc().getType().getShape(); + if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("result shape must not exceed matrix desc shape."); + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_StoreMatrixOp +//===----------------------------------------------------------------------===// +void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, + TypedValue matrixDesc, + llvm::ArrayRef offsets, Value data, + LayoutTrait layout) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + + build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data, + layout); +} + +LogicalResult StoreMatrixOp::verify() { + ArrayRef dataShape = getData().getType().getShape(); + ArrayRef mdescShape = getMatrixDesc().getType().getShape(); + if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("data shape must not exceed matrix desc shape."); + + return success(); +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 1cd817918a772..2feb010d343a8 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -778,3 +778,31 @@ func.func @create_matrix_desc_mismatch_sizes() { return } +// ----- +func.func @load_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // expected-error@+1 {{failed to verify that all of {matrix_desc, res} have same element type}} + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf32> + return +} + +// ----- +func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // expected-error@+1 {{result shape must not exceed matrix desc shape}} + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<32x16xf16> + return +} + +// ----- +func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) { + // expected-error@+1 {{failed to verify that all of {matrix_desc, data} have same element type}} + xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf32> + return +} + +// ----- +func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) { + // expected-error@+1 {{data shape must not exceed matrix desc shape}} + xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<32x32xf16> + return +} + diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index c224749031328..cda8f0ac1bb40 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -769,4 +769,33 @@ gpu.func @create_matrix_desc_with_stride() { gpu.return } +// CHECK: gpu.func @load_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) +gpu.func @load_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16> + gpu.return +} + +// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) +gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16> + gpu.return +} + + +// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16> + xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16> + gpu.return +} + +// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16> + xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16> + gpu.return +} + } From 06eec6e51b755cbb13b62cfaa3ba2320e8bc3cb6 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Tue, 12 Aug 2025 20:33:56 +0000 Subject: [PATCH 05/14] refine description --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 42 ++++++++----------- .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 5 ++- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 461df6efb8528..f536650e9d872 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1118,10 +1118,14 @@ def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure, AllMemSizesMatch<["source", "matrix_desc"]>]> { let summary = "Create a matrix descriptor."; let description = [{ + Creates a matrix descriptor from a shared local memory (SLM) buffer. + The resulting matrix descriptor has to have the same size as the underlying + shared local memory. + Arguments: - - `source` : a base address of SLM allocation. + - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer. Results: - - `matrix_desc` : a descriptor for SLM allocation. + - `matrix_desc` : the matrix descriptor. }]; let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source); let results = (outs XeGPU_MatrixDesc:$matrix_desc); @@ -1141,22 +1145,16 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, $matrix_desc `` custom($offsets, $const_offsets) prop-dict attr-dict `` `:` type(operands) `->` type(results) }]; - let summary = "Load matrix from SLM."; - let description = [{ - This operation loads a matrix from the SLM using the matrix descriptor. - There are additional parameters and attributes that support loading, but they must only - be specified for a work-item level operation. - General rules: - 1. Non-WI-level code must not specify optional attributes. - 2. If the load uses `vector` semantics, all of the vector attributes must be specified. - 3. If the load uses `array` semantics, all of the array attributes must be specified. + let description = [{ + This operation reads a block of data from shared local memory (SLM) + using the provided matrix descriptor. Arguments: - - `matrix_desc` : a matrix descriptor (SLM allocation + matrix type). - - `offsets` : Coordinates of the matrix to load. + - `matrix_desc`: the matrix descriptor identifying the SLM region. + - `offsets`: the coordinates within the matrix to read from. Results: - - `res` : loaded matrix elements. + - `res`: the matrix elements loaded from SLM. }]; let builders = [ @@ -1186,20 +1184,14 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, $matrix_desc `` custom($offsets, $const_offsets) `,` $data prop-dict attr-dict `:` type(operands) }]; - let summary = "Store matrix from SLM."; let description = [{ - This operation stores workitem's `data` fragment of the matrix to the SLM (`matrix_desc`). - There are additional parameters and attributes that support loading, but they must only - be specified for a work-item level operation. - - General rules: - 1. Non-WI-level code must not specify optional attributes. - 2. If the store uses `vector` semantics, all of the vector attributes must be specified. + This operation writes the `data` fragment into the shared local memory region + identified by `matrix_desc`. Arguments: - - `matrix_desc` : a matrix descriptor. - - `data` : data to be stored to the matrix. - - `offsets` : Coordinates of the matrix where the data will be stored. + - `matrix_desc`: the matrix descriptor specifying the SLM region. + - `offsets`: the coordinates within the matrix where the data will be written. + - `data`: the values to be stored in the matrix. }]; let builders = [ OpBuilder<(ins "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index f578fc8bc0735..02cabce82398b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -204,8 +204,9 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> { let summary = "MatrixDesc describing the data in SLM"; let description = [{ - MatrixDesc describes a SLM region. Unleass specified via the optional layout attribute, - the data is stored contiguously in the region in row-major order by default. + MatrixDesc represents a block of data stored in shared local memory. + By default, unless a layout attribute is provided, the data is stored + contiguously in row-major order within the region. }]; let parameters = (ins ArrayRefParameter<"int64_t">: $shape, "mlir::Type": $elementType, From 6df4291c7fcecccc233f0b9ffea67e5edaef5d9b Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 13 Aug 2025 00:02:35 +0000 Subject: [PATCH 06/14] add subview op --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 31 ++++++++++++++++++ mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 32 ++++++++++++++++--- mlir/test/Dialect/XeGPU/invalid.mlir | 20 ++++++++++++ mlir/test/Dialect/XeGPU/ops.mlir | 14 ++++++++ 4 files changed, 93 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index f536650e9d872..0c8980bb04b2e 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1206,5 +1206,36 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, let hasVerifier = 1; } +def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOpInterface, + AllElementTypesMatch<["src", "res"]>, + AllRanksMatch<["src", "res"]>]> { + let description = [{ + Create a subview of a matrix descriptor. + Results: + - `src` : a matrix descriptor. + - `offsets` : the coordinates within the matrix the subview will be created from. + }]; + let arguments = (ins XeGPU_MatrixDesc:$src, + Variadic:$offsets, + DenseI64ArrayAttr:$const_offsets, + OptionalAttr: $layout); + let results = (outs XeGPU_MatrixDesc:$res); + let assemblyFormat = [{$src `` custom($offsets, $const_offsets) prop-dict + attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}]; + let builders = [ + OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)> + ]; + + let extraClassDeclaration = [{ + mlir::Value getViewSource() { return getSrc(); } + + SmallVector getMixedOffsets() { + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); + } + }]; + + let hasVerifier = 1; +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 2051d7030340e..a8ec058a12a93 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -944,10 +944,8 @@ void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res, LayoutTrait layout) { llvm::SmallVector dynamicOffsets; llvm::SmallVector staticOffsets; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); - build(builder, state, res, matrixDesc, dynamicOffsets, staticOffsetsAttr, layout); } @@ -970,10 +968,8 @@ void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, LayoutTrait layout) { llvm::SmallVector dynamicOffsets; llvm::SmallVector staticOffsets; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); - build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data, layout); } @@ -988,6 +984,34 @@ LogicalResult StoreMatrixOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// XeGPU_MatrixDescSubviewOp +//===----------------------------------------------------------------------===// + +void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state, + Type resTy, Value src, + llvm::ArrayRef offsets, + LayoutTrait layout) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr, layout); +} + +LogicalResult MatrixDescSubviewOp::verify() { + ArrayRef srcShape = getSrc().getType().getShape(); + ArrayRef resShape = getRes().getType().getShape(); + if (llvm::any_of(llvm::zip_equal(resShape, srcShape), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("result shape must not exceed source shape."); + + if (getSrc().getType().getLayout() != getRes().getType().getLayout()) + return emitOpError("result must inherit the source layout."); + + return success(); +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 2feb010d343a8..63945dab1ccc2 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -806,3 +806,23 @@ func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf return } +// ----- +func.func @matrix_desc_subview_size_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // expected-error@+1 {{result shape must not exceed source shape}} + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<32x16xf16> + return +} + +// ----- +func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { + // expected-error@+1 {{result must inherit the source layout}} + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16> + return +} + +// ----- +func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // expected-error@+1 {{failed to verify that all of {src, res} have same element type}} + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32> + return +} diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index cda8f0ac1bb40..7bceda70dea9f 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -798,4 +798,18 @@ gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, str gpu.return } +// CHECK: gpu.func @matrix_desc_subview([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) +gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) { + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16> + gpu.return +} + +// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) +gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>> + gpu.return +} + } From e11c88db66366d3c61b158959f5418230ce2abbb Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 13 Aug 2025 13:57:59 +0000 Subject: [PATCH 07/14] address comments --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 6 ++++++ mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 1 + mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 3 +++ 3 files changed, 10 insertions(+) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 0c8980bb04b2e..6d06464e204a6 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1153,6 +1153,9 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, Arguments: - `matrix_desc`: the matrix descriptor identifying the SLM region. - `offsets`: the coordinates within the matrix to read from. + - `layout`: [optional] An attribute for guiding distributions among + subgroups and/or work-items. It currently can accept either + LayoutAttr or SliceAttr. Results: - `res`: the matrix elements loaded from SLM. }]; @@ -1192,6 +1195,9 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, - `matrix_desc`: the matrix descriptor specifying the SLM region. - `offsets`: the coordinates within the matrix where the data will be written. - `data`: the values to be stored in the matrix. + - `layout`: [optional] An attribute for guiding distributions among + subgroups and/or work-items. It currently can accept either + LayoutAttr or SliceAttr. }]; let builders = [ OpBuilder<(ins "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt index 603fb5d237544..7869a28dfed57 100644 --- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt @@ -18,6 +18,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect MLIRArithUtils MLIRDialectUtils MLIRGPUDialect + MLIRXeVMDialect MLIRIR MLIRViewLikeInterface MLIRVectorDialect diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index a8ec058a12a93..1157f21230485 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/XeVMDialect.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/XeGPU/IR/XeGPU.h" @@ -28,6 +29,8 @@ bool isSharedMemory(const MemRefType &memrefTy) { return intAttr.getInt() == 3; if (auto memrefSpace = llvm::dyn_cast(attr)) return memrefSpace.getValue() == MemorySpace::SLM; + if (auto xevmSpace = llvm::dyn_cast(attr)) + return xevmSpace.getValue() == xevm::AddrSpace::SHARED; return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr); } From 23380a923cd2c2073a66fd31b70c3650869dcf3b Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 13 Aug 2025 14:30:21 +0000 Subject: [PATCH 08/14] update doc --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 6d06464e204a6..112a18f0705ab 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1220,6 +1220,9 @@ def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOp Results: - `src` : a matrix descriptor. - `offsets` : the coordinates within the matrix the subview will be created from. + - `layout`: [optional] An attribute for guiding distributions among + subgroups and/or work-items. It currently can accept either + LayoutAttr or SliceAttr. }]; let arguments = (ins XeGPU_MatrixDesc:$src, Variadic:$offsets, From 9e3aa8d6631fe177fd17bfdb9fd48da2ef1d5072 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 13 Aug 2025 21:25:18 +0000 Subject: [PATCH 09/14] remove the layout attribute from the subview op --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 8 ++------ mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 5 ++--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 112a18f0705ab..9ae2eb0c2e178 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1220,19 +1220,15 @@ def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOp Results: - `src` : a matrix descriptor. - `offsets` : the coordinates within the matrix the subview will be created from. - - `layout`: [optional] An attribute for guiding distributions among - subgroups and/or work-items. It currently can accept either - LayoutAttr or SliceAttr. }]; let arguments = (ins XeGPU_MatrixDesc:$src, Variadic:$offsets, - DenseI64ArrayAttr:$const_offsets, - OptionalAttr: $layout); + DenseI64ArrayAttr:$const_offsets); let results = (outs XeGPU_MatrixDesc:$res); let assemblyFormat = [{$src `` custom($offsets, $const_offsets) prop-dict attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}]; let builders = [ - OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)> + OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef": $offsets)> ]; let extraClassDeclaration = [{ diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 1157f21230485..27fd6797fed39 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -993,13 +993,12 @@ LogicalResult StoreMatrixOp::verify() { void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state, Type resTy, Value src, - llvm::ArrayRef offsets, - LayoutTrait layout) { + llvm::ArrayRef offsets) { llvm::SmallVector dynamicOffsets; llvm::SmallVector staticOffsets; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); - build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr, layout); + build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr); } LogicalResult MatrixDescSubviewOp::verify() { From af2c25f457f4a94a0e304196040c0484718d54ca Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 14 Aug 2025 00:06:01 +0000 Subject: [PATCH 10/14] refine subview op --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 27 ++++++++++--------- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 23 ++++++++++------ mlir/test/Dialect/XeGPU/invalid.mlir | 14 +++++++--- mlir/test/Dialect/XeGPU/ops.mlir | 15 ++++++++--- 4 files changed, 52 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 9ae2eb0c2e178..65f805d1efa93 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1177,16 +1177,14 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, AllElementTypesMatch<["matrix_desc", "data"]>, AllRanksMatch<["matrix_desc", "data"]>]> { let arguments = (ins + XeGPU_ValueType:$data, XeGPU_MatrixDesc:$matrix_desc, Variadic: $offsets, DenseI64ArrayAttr: $const_offsets, - XeGPU_ValueType:$data, OptionalAttr:$layout ); - let assemblyFormat = [{ - $matrix_desc `` custom($offsets, $const_offsets) `,` $data - prop-dict attr-dict `:` type(operands) - }]; + let assemblyFormat = [{ $data `,` $matrix_desc `` custom($offsets, $const_offsets) + prop-dict attr-dict `` `:` type(operands)}]; let description = [{ This operation writes the `data` fragment into the shared local memory region identified by `matrix_desc`. @@ -1200,8 +1198,8 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, LayoutAttr or SliceAttr. }]; let builders = [ - OpBuilder<(ins "TypedValue": $matrix_desc, "llvm::ArrayRef": $offsets, - "Value" : $data, "LayoutTrait": $layout)>, + OpBuilder<(ins "Value" : $data, "TypedValue": $matrix_desc, + "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, ]; let extraClassDeclaration = [{ SmallVector getMixedOffsets() { @@ -1212,14 +1210,19 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, let hasVerifier = 1; } -def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOpInterface, - AllElementTypesMatch<["src", "res"]>, - AllRanksMatch<["src", "res"]>]> { +def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", + [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> { let description = [{ - Create a subview of a matrix descriptor. - Results: + Creates a subview of a matrix descriptor. The resulting matrix descriptor + may have a lower rank than the source, in which case the dimensions are left-aligned. + + Arguments: - `src` : a matrix descriptor. - `offsets` : the coordinates within the matrix the subview will be created from. + + Results: + - `res` : a matrix descriptor with smaller size. + }]; let arguments = (ins XeGPU_MatrixDesc:$src, Variadic:$offsets, diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 27fd6797fed39..27a652663190d 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -965,15 +965,15 @@ LogicalResult LoadMatrixOp::verify() { //===----------------------------------------------------------------------===// // XeGPU_StoreMatrixOp //===----------------------------------------------------------------------===// -void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, +void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data, TypedValue matrixDesc, - llvm::ArrayRef offsets, Value data, + llvm::ArrayRef offsets, LayoutTrait layout) { llvm::SmallVector dynamicOffsets; llvm::SmallVector staticOffsets; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); - build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data, + build(builder, state, data, matrixDesc, dynamicOffsets, staticOffsetsAttr, layout); } @@ -1002,13 +1002,20 @@ void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state, } LogicalResult MatrixDescSubviewOp::verify() { - ArrayRef srcShape = getSrc().getType().getShape(); - ArrayRef resShape = getRes().getType().getShape(); - if (llvm::any_of(llvm::zip_equal(resShape, srcShape), - [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + MatrixDescType srcTy = getSrc().getType(); + MatrixDescType resTy = getRes().getType(); + ArrayRef srcShape = srcTy.getShape(); + ArrayRef resShape = resTy.getShape(); + + if (srcTy.getRank() < resTy.getRank()) + return emitOpError("result rank must not exceed source rank."); + + if (llvm::any_of( + llvm::zip_equal(resShape, srcShape.take_back(resShape.size())), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) return emitOpError("result shape must not exceed source shape."); - if (getSrc().getType().getLayout() != getRes().getType().getLayout()) + if (srcTy.getLayout() != resTy.getLayout()) return emitOpError("result must inherit the source layout."); return success(); diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 63945dab1ccc2..f2df1a3920e23 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -795,14 +795,14 @@ func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64x // ----- func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) { // expected-error@+1 {{failed to verify that all of {matrix_desc, data} have same element type}} - xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf32> + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.matrix_desc<16x64xf16> return } // ----- func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) { // expected-error@+1 {{data shape must not exceed matrix desc shape}} - xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<32x32xf16> + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<32x32xf16>, !xegpu.matrix_desc<16x64xf16> return } @@ -821,8 +821,16 @@ func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf } // ----- -func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { +func.func @matrix_desc_subview_element_type_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { // expected-error@+1 {{failed to verify that all of {src, res} have same element type}} %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32> return } + +// ----- +func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { + // expected-error@+1 {{result rank must not exceed source rank}} + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<4x8x16xf16> + return +} + diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index 7bceda70dea9f..7a9657587070a 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -786,15 +786,15 @@ gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, stri // CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>) gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16> - xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16> + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16> + xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16> gpu.return } // CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>) gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16> - xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16> + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> + xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> gpu.return } @@ -805,6 +805,13 @@ gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) { gpu.return } +// CHECK: gpu.func @matrix_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) +gpu.func @matrix_desc_subview_lower_rank(%arg0: !xegpu.matrix_desc<16x64xf16>) { + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16> + gpu.return +} + // CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>> From 0531abf5c0164f483d025dd3aa3c39223e4566a4 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 14 Aug 2025 21:00:24 +0000 Subject: [PATCH 11/14] add MemLayoutAttr --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 31 +++++++ .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 18 ++++- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 81 ++++++++++++++++--- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 4 +- mlir/test/Dialect/XeGPU/invalid.mlir | 8 +- mlir/test/Dialect/XeGPU/ops.mlir | 36 ++++----- 6 files changed, 140 insertions(+), 38 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 1f420c13ebae0..59dcbafebc515 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -527,4 +527,35 @@ def XeGPU_RangeAttr : XeGPUAttr<"Range", "range"> { let genVerifyDecl = 1; } +def XeGPU_MemLayoutAttr : XeGPUAttr<"MemLayout", "mem_layout"> { + let summary = [{Specifies memory layouts with named attributes.}]; + + let description = [{ + This attribute stores a collection of named attributes that describe + memory layout properties such as stride, block, etc. + }]; + + let parameters = (ins "DictionaryAttr": $attrs); + let hasCustomAssemblyFormat = 1; + + + let extraClassDeclaration = [{ + /// Get a specific attribute by name + Attribute getAttr(StringRef name) const { + return getAttrs().get(name); + } + + /// Check if a specific attribute exists + bool hasAttr(StringRef name) const { + return getAttrs().contains(name); + } + + ArrayAttr getStrides() { + return getAttrs().getAs("stride"); + } + + }]; + +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 02cabce82398b..f027f3f82c9f4 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -210,13 +210,27 @@ def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInter }]; let parameters = (ins ArrayRefParameter<"int64_t">: $shape, "mlir::Type": $elementType, - OptionalParameter<"mlir::Attribute">: $layout); + OptionalParameter<"MemLayoutAttr">: $mem_layout); let extraClassDeclaration = [{ bool hasRank() const { return true; } MatrixDescType cloneWith(std::optional> shape, Type elementType) const { - return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getLayout()); + return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getMemLayout()); + } + + ArrayAttr getStrides() { + auto layout = getMemLayout(); + if (layout && layout.hasAttr("stride")) { + return layout.getStrides(); + } + + // derive and return default strides + SmallVector defaultStrides; + llvm::append_range(defaultStrides, getShape().drop_front()); + llvm::append_values(defaultStrides, 1); + Builder builder(getContext()); + return builder.getI64ArrayAttr(defaultStrides); } }]; diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index ac9e994d4872c..fe5640627114b 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -427,7 +427,7 @@ RangeAttr::verify(llvm::function_ref emitError, // XeGPU_TensorDescType //===----------------------------------------------------------------------===// -mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { +mlir::Type TensorDescType::parse(AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; mlir::FailureOr encoding; @@ -477,7 +477,7 @@ mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { layout.value_or(mlir::Attribute())); } -void TensorDescType::print(::mlir::AsmPrinter &printer) const { +void TensorDescType::print(AsmPrinter &printer) const { printer << "<"; auto shape = getShape(); @@ -522,10 +522,10 @@ TensorDescType TensorDescType::get(llvm::ArrayRef shape, return Base::get(context, shape, elementType, attr, layout); } -LogicalResult TensorDescType::verify( - llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, - llvm::ArrayRef shape, mlir::Type elementType, - mlir::Attribute encoding, mlir::Attribute layout) { +LogicalResult +TensorDescType::verify(llvm::function_ref emitError, + llvm::ArrayRef shape, mlir::Type elementType, + mlir::Attribute encoding, mlir::Attribute layout) { size_t rank = shape.size(); if (rank == 0) @@ -594,10 +594,10 @@ LogicalResult TensorDescType::verify( //===----------------------------------------------------------------------===// // XeGPU_MatrixDescType //===----------------------------------------------------------------------===// -mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) { +mlir::Type MatrixDescType::parse(AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; - mlir::FailureOr layout; + mlir::FailureOr layout; // Parse literal '<' if (parser.parseLess()) @@ -617,7 +617,7 @@ mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) { // parse optional attributes if (mlir::succeeded(parser.parseOptionalComma())) { - mlir::Attribute attr; + MemLayoutAttr attr; ParseResult res = parser.parseAttribute(attr); if (mlir::failed(res)) return {}; @@ -631,22 +631,79 @@ mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) { MLIRContext *ctxt = parser.getContext(); return MatrixDescType::getChecked( [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape, - elementType, layout.value_or(mlir::Attribute())); + elementType, layout.value_or(MemLayoutAttr())); } -void MatrixDescType::print(::mlir::AsmPrinter &printer) const { +void MatrixDescType::print(AsmPrinter &printer) const { printer << "<"; printer.printDimensionList(getShape()); printer << 'x'; printer << getElementType(); - if (auto layout = getLayout()) + if (auto layout = getMemLayout()) printer << ", " << layout; printer << ">"; } +//===----------------------------------------------------------------------===// +// XeGPU_MatrixDescType +//===----------------------------------------------------------------------===// + +Attribute MemLayoutAttr::parse(AsmParser &parser, Type type) { + + auto context = parser.getContext(); + llvm::SMLoc loc = parser.getCurrentLocation(); + + llvm::SmallDenseSet seenKeys; + SmallVector attributes; + + auto parseElt = [&]() -> ParseResult { + StringRef nameId; + if (failed(parser.parseKeyword(&nameId))) + return parser.emitError(loc, "expected valid attribute name"); + + if (!seenKeys.insert(nameId).second) + return parser.emitError(loc, "duplicate key '") + << nameId << " in mem layout attribute"; + + if (failed(parser.parseEqual())) + return failure(); + + Attribute attr; + if (failed(parser.parseAttribute(attr))) + return failure(); + attributes.emplace_back(nameId, attr); + return success(); + }; + + // Parse literal '<' + if (parser.parseLess()) + return {}; + + if (failed(parser.parseCommaSeparatedList(parseElt))) + return {}; + + // Parse literal '>' + if (parser.parseGreater()) + return {}; + + return parser.getChecked( + loc, context, DictionaryAttr::get(context, attributes)); +} + +void MemLayoutAttr::print(AsmPrinter &printer) const { + printer << "<"; + ArrayRef attrs = getAttrs().getValue(); + for (size_t i = 0; i < attrs.size(); i++) { + printer << attrs[i].getName().str() << " = " << attrs[i].getValue(); + if (i < attrs.size() - 1) + printer << ", "; + } + printer << ">"; +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 27a652663190d..4465ecb25d922 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -1015,8 +1015,8 @@ LogicalResult MatrixDescSubviewOp::verify() { [](auto p) { return std::get<0>(p) > std::get<1>(p); })) return emitOpError("result shape must not exceed source shape."); - if (srcTy.getLayout() != resTy.getLayout()) - return emitOpError("result must inherit the source layout."); + if (srcTy.getStrides() != resTy.getStrides()) + return emitOpError("result must inherit the source strides."); return success(); } diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index f2df1a3920e23..79495c34abff8 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -814,16 +814,16 @@ func.func @matrix_desc_subview_size_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16 } // ----- -func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { - // expected-error@+1 {{result must inherit the source layout}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16> +func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { + // expected-error@+1 {{result must inherit the source strides}} + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16> return } // ----- func.func @matrix_desc_subview_element_type_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { // expected-error@+1 {{failed to verify that all of {src, res} have same element type}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32, #xegpu.mem_layout> return } diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index 7a9657587070a..edd1fc844abeb 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -763,9 +763,9 @@ gpu.func @create_matrix_desc() { // CHECK-LABEL: gpu.func @create_matrix_desc_with_stride({{.*}}) { gpu.func @create_matrix_desc_with_stride() { //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> - //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> + //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> - %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> + %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> gpu.return } @@ -776,10 +776,10 @@ gpu.func @load_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>) { gpu.return } -// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) -gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { - // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16> - %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16> +// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> gpu.return } @@ -791,31 +791,31 @@ gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector< gpu.return } -// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>) -gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> - xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> +// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> + xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> gpu.return } // CHECK: gpu.func @matrix_desc_subview([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16> + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> gpu.return } // CHECK: gpu.func @matrix_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) gpu.func @matrix_desc_subview_lower_rank(%arg0: !xegpu.matrix_desc<16x64xf16>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16> + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16, #xegpu.mem_layout> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16, #xegpu.mem_layout> gpu.return } -// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) -gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>> +// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { + //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> gpu.return } From 03850886e6f39c16f364a7d6c377060936023215 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 14 Aug 2025 21:31:14 +0000 Subject: [PATCH 12/14] refine --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 5 +++-- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 13 +++++++++++++ mlir/test/Dialect/XeGPU/ops.mlir | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 65f805d1efa93..511dadd6c5b38 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1213,8 +1213,9 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> { let description = [{ - Creates a subview of a matrix descriptor. The resulting matrix descriptor - may have a lower rank than the source, in which case the dimensions are left-aligned. + Creates a subview of a matrix descriptor. The resulting matrix descriptor can have + a lower rank than the source; in this case, the result dimensions correspond to the + higher-order dimensions of the source matrix descriptor. Arguments: - `src` : a matrix descriptor. diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index f027f3f82c9f4..bf5cd4c5b070e 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -207,6 +207,19 @@ def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInter MatrixDesc represents a block of data stored in shared local memory. By default, unless a layout attribute is provided, the data is stored contiguously in row-major order within the region. + + Examples: + ```mlir + // A matrix of data stored in column-major order. + !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + + // A matrix of data stored in a blocked layout. Elements within the same block + // are stored contiguously in memory. Blocks are stored in row-major order. + !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + + // A matrix of data stored in column-major order with blocked layout. + !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + ``` }]; let parameters = (ins ArrayRefParameter<"int64_t">: $shape, "mlir::Type": $elementType, diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index edd1fc844abeb..7106b2667f5d0 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -793,8 +793,8 @@ gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector< // CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, [[ARG1:%.+]]: vector<16x16xf16>) gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> - xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][0, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> + xegpu.store_matrix %arg1, %arg0[0, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> gpu.return } From f6862faaf9554480cccce37de85061fdff548ddc Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Fri, 15 Aug 2025 00:40:11 +0000 Subject: [PATCH 13/14] rename matrix_desc to mem_desc --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 42 +++++------ .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 16 ++--- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 10 +-- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 22 +++--- .../Transforms/XeGPUWgToSgDistribute.cpp | 4 +- mlir/test/Dialect/XeGPU/invalid.mlir | 46 ++++++------ mlir/test/Dialect/XeGPU/ops.mlir | 72 +++++++++---------- 7 files changed, 106 insertions(+), 106 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 511dadd6c5b38..4e6c5a8b1a820 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1114,8 +1114,8 @@ class AllMemSizesMatch names> : AllMatchSameOperatorTrait.result, "size in bits">; -def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure, - AllMemSizesMatch<["source", "matrix_desc"]>]> { +def XeGPU_CreateMemDescOp: XeGPU_Op<"create_mem_desc", [Pure, + AllMemSizesMatch<["source", "mem_desc"]>]> { let summary = "Create a matrix descriptor."; let description = [{ Creates a matrix descriptor from a shared local memory (SLM) buffer. @@ -1125,24 +1125,24 @@ def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure, Arguments: - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer. Results: - - `matrix_desc` : the matrix descriptor. + - `mem_desc` : the matrix descriptor. }]; let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source); - let results = (outs XeGPU_MatrixDesc:$matrix_desc); - let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))"; + let results = (outs XeGPU_MemDesc:$mem_desc); + let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($mem_desc))"; } def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, - AllElementTypesMatch<["matrix_desc", "res"]>, - AllRanksMatch<["matrix_desc", "res"]>]> { - let arguments = (ins XeGPU_MatrixDesc:$matrix_desc, + AllElementTypesMatch<["mem_desc", "res"]>, + AllRanksMatch<["mem_desc", "res"]>]> { + let arguments = (ins XeGPU_MemDesc:$mem_desc, Variadic: $offsets, DenseI64ArrayAttr: $const_offsets, OptionalAttr:$layout ); let results = (outs XeGPU_ValueType:$res); let assemblyFormat = [{ - $matrix_desc `` custom($offsets, $const_offsets) + $mem_desc `` custom($offsets, $const_offsets) prop-dict attr-dict `` `:` type(operands) `->` type(results) }]; @@ -1151,7 +1151,7 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, using the provided matrix descriptor. Arguments: - - `matrix_desc`: the matrix descriptor identifying the SLM region. + - `mem_desc`: the matrix descriptor identifying the SLM region. - `offsets`: the coordinates within the matrix to read from. - `layout`: [optional] An attribute for guiding distributions among subgroups and/or work-items. It currently can accept either @@ -1161,7 +1161,7 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, }]; let builders = [ - OpBuilder<(ins "Type":$res, "TypedValue": $matrix_desc, + OpBuilder<(ins "Type":$res, "TypedValue": $mem_desc, "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, ]; let extraClassDeclaration = [{ @@ -1174,23 +1174,23 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, } def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, - AllElementTypesMatch<["matrix_desc", "data"]>, - AllRanksMatch<["matrix_desc", "data"]>]> { + AllElementTypesMatch<["mem_desc", "data"]>, + AllRanksMatch<["mem_desc", "data"]>]> { let arguments = (ins XeGPU_ValueType:$data, - XeGPU_MatrixDesc:$matrix_desc, + XeGPU_MemDesc:$mem_desc, Variadic: $offsets, DenseI64ArrayAttr: $const_offsets, OptionalAttr:$layout ); - let assemblyFormat = [{ $data `,` $matrix_desc `` custom($offsets, $const_offsets) + let assemblyFormat = [{ $data `,` $mem_desc `` custom($offsets, $const_offsets) prop-dict attr-dict `` `:` type(operands)}]; let description = [{ This operation writes the `data` fragment into the shared local memory region - identified by `matrix_desc`. + identified by `mem_desc`. Arguments: - - `matrix_desc`: the matrix descriptor specifying the SLM region. + - `mem_desc`: the matrix descriptor specifying the SLM region. - `offsets`: the coordinates within the matrix where the data will be written. - `data`: the values to be stored in the matrix. - `layout`: [optional] An attribute for guiding distributions among @@ -1198,7 +1198,7 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, LayoutAttr or SliceAttr. }]; let builders = [ - OpBuilder<(ins "Value" : $data, "TypedValue": $matrix_desc, + OpBuilder<(ins "Value" : $data, "TypedValue": $mem_desc, "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, ]; let extraClassDeclaration = [{ @@ -1210,7 +1210,7 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, let hasVerifier = 1; } -def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", +def XeGPU_MemDescSubviewOp: XeGPU_Op<"mem_desc_subview", [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> { let description = [{ Creates a subview of a matrix descriptor. The resulting matrix descriptor can have @@ -1225,10 +1225,10 @@ def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", - `res` : a matrix descriptor with smaller size. }]; - let arguments = (ins XeGPU_MatrixDesc:$src, + let arguments = (ins XeGPU_MemDesc:$src, Variadic:$offsets, DenseI64ArrayAttr:$const_offsets); - let results = (outs XeGPU_MatrixDesc:$res); + let results = (outs XeGPU_MemDesc:$res); let assemblyFormat = [{$src `` custom($offsets, $const_offsets) prop-dict attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}]; let builders = [ diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index bf5cd4c5b070e..6602ff94d6ae3 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -201,24 +201,24 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { }]; } -def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> { - let summary = "MatrixDesc describing the data in SLM"; +def XeGPU_MemDesc: XeGPUTypeDef<"MemDesc", "mem_desc", [ShapedTypeInterface], "mlir::Type"> { + let summary = "MemDesc describing the data in SLM"; let description = [{ - MatrixDesc represents a block of data stored in shared local memory. + MemDesc represents a block of data stored in shared local memory. By default, unless a layout attribute is provided, the data is stored contiguously in row-major order within the region. Examples: ```mlir // A matrix of data stored in column-major order. - !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> // A matrix of data stored in a blocked layout. Elements within the same block // are stored contiguously in memory. Blocks are stored in row-major order. - !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> // A matrix of data stored in column-major order with blocked layout. - !xegpu.matrix_desc<128x128xf16, #xegpu.mem_layout> + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> ``` }]; let parameters = (ins ArrayRefParameter<"int64_t">: $shape, @@ -228,8 +228,8 @@ def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInter let extraClassDeclaration = [{ bool hasRank() const { return true; } - MatrixDescType cloneWith(std::optional> shape, Type elementType) const { - return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getMemLayout()); + MemDescType cloneWith(std::optional> shape, Type elementType) const { + return MemDescType::get(getContext(), shape.value_or(getShape()), elementType, getMemLayout()); } ArrayAttr getStrides() { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index fe5640627114b..1b26542ff65a3 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -592,9 +592,9 @@ TensorDescType::verify(llvm::function_ref emitError, } //===----------------------------------------------------------------------===// -// XeGPU_MatrixDescType +// XeGPU_MemDescType //===----------------------------------------------------------------------===// -mlir::Type MatrixDescType::parse(AsmParser &parser) { +mlir::Type MemDescType::parse(AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; mlir::FailureOr layout; @@ -629,12 +629,12 @@ mlir::Type MatrixDescType::parse(AsmParser &parser) { return {}; MLIRContext *ctxt = parser.getContext(); - return MatrixDescType::getChecked( + return MemDescType::getChecked( [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape, elementType, layout.value_or(MemLayoutAttr())); } -void MatrixDescType::print(AsmPrinter &printer) const { +void MemDescType::print(AsmPrinter &printer) const { printer << "<"; printer.printDimensionList(getShape()); @@ -648,7 +648,7 @@ void MatrixDescType::print(AsmPrinter &printer) const { } //===----------------------------------------------------------------------===// -// XeGPU_MatrixDescType +// XeGPU_MemDescType //===----------------------------------------------------------------------===// Attribute MemLayoutAttr::parse(AsmParser &parser, Type type) { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 4465ecb25d922..1caa37d8353bc 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -942,7 +942,7 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns, // XeGPU_LoadMatrixOp //===----------------------------------------------------------------------===// void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res, - TypedValue matrixDesc, + TypedValue matrixDesc, llvm::ArrayRef offsets, LayoutTrait layout) { llvm::SmallVector dynamicOffsets; @@ -955,7 +955,7 @@ void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res, LogicalResult LoadMatrixOp::verify() { ArrayRef valueShape = getRes().getType().getShape(); - ArrayRef mdescShape = getMatrixDesc().getType().getShape(); + ArrayRef mdescShape = getMemDesc().getType().getShape(); if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape), [](auto p) { return std::get<0>(p) > std::get<1>(p); })) return emitOpError("result shape must not exceed matrix desc shape."); @@ -966,7 +966,7 @@ LogicalResult LoadMatrixOp::verify() { // XeGPU_StoreMatrixOp //===----------------------------------------------------------------------===// void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data, - TypedValue matrixDesc, + TypedValue matrixDesc, llvm::ArrayRef offsets, LayoutTrait layout) { llvm::SmallVector dynamicOffsets; @@ -979,7 +979,7 @@ void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data, LogicalResult StoreMatrixOp::verify() { ArrayRef dataShape = getData().getType().getShape(); - ArrayRef mdescShape = getMatrixDesc().getType().getShape(); + ArrayRef mdescShape = getMemDesc().getType().getShape(); if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape), [](auto p) { return std::get<0>(p) > std::get<1>(p); })) return emitOpError("data shape must not exceed matrix desc shape."); @@ -988,12 +988,12 @@ LogicalResult StoreMatrixOp::verify() { } //===----------------------------------------------------------------------===// -// XeGPU_MatrixDescSubviewOp +// XeGPU_MemDescSubviewOp //===----------------------------------------------------------------------===// -void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state, - Type resTy, Value src, - llvm::ArrayRef offsets) { +void MemDescSubviewOp::build(OpBuilder &builder, OperationState &state, + Type resTy, Value src, + llvm::ArrayRef offsets) { llvm::SmallVector dynamicOffsets; llvm::SmallVector staticOffsets; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); @@ -1001,9 +1001,9 @@ void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state, build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr); } -LogicalResult MatrixDescSubviewOp::verify() { - MatrixDescType srcTy = getSrc().getType(); - MatrixDescType resTy = getRes().getType(); +LogicalResult MemDescSubviewOp::verify() { + MemDescType srcTy = getSrc().getType(); + MemDescType resTy = getRes().getType(); ArrayRef srcShape = srcTy.getShape(); ArrayRef resShape = resTy.getShape(); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 4a5525c8abb30..5d5d698c88cba 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -475,8 +475,8 @@ struct WgToSgElementwiseOp : public ConversionPattern { // is lowered to: // #a = #xegpu.layout // #b = #xegpu.layout -// store_matrix %1, %slm <{layout_input_0 = #a}> : vector<32x16>, matrix_desc<32x64xf32> -// %d = load_matrix %slm <{layout_result_0 = #a}> : matrix_desc<32x64xf32> -> vector<16x32xf32> +// store_matrix %1, %slm <{layout_input_0 = #a}> : vector<32x16>, mem_desc<32x64xf32> +// %d = load_matrix %slm <{layout_result_0 = #a}> : mem_desc<32x64xf32> -> vector<16x32xf32> // xegpu.convert_layout %d <{input_layout = #a, target_layout = #b}> : vector<16x32xf32> // clang-format on struct WgToSgConvertLayoutOp diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 79495c34abff8..e8ef57ca192a9 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -763,74 +763,74 @@ func.func @slice_attr_repeat_dim() { } // ----- -func.func @create_matrix_desc_non_slm() { +func.func @create_mem_desc_non_slm() { %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 1> // expected-error@+1 {{operand #0 must be statically shaped memref of 8-bit signless integer values for shared memory}} - %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 1> -> !xegpu.matrix_desc<16x64xf16> + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 1> -> !xegpu.mem_desc<16x64xf16> return } // ----- -func.func @create_matrix_desc_mismatch_sizes() { +func.func @create_mem_desc_mismatch_sizes() { %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> - // expected-error@+1 {{failed to verify that all of {source, matrix_desc} have same size in bits}} - %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x32xf16> + // expected-error@+1 {{failed to verify that all of {source, mem_desc} have same size in bits}} + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x32xf16> return } // ----- -func.func @load_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>) { - // expected-error@+1 {{failed to verify that all of {matrix_desc, res} have same element type}} - %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf32> +func.func @load_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{failed to verify that all of {mem_desc, res} have same element type}} + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<8x16xf32> return } // ----- -func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64xf16>) { +func.func @load_mem_desc_invalid_result_size(%arg0: !xegpu.mem_desc<16x64xf16>) { // expected-error@+1 {{result shape must not exceed matrix desc shape}} - %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<32x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<32x16xf16> return } // ----- -func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) { - // expected-error@+1 {{failed to verify that all of {matrix_desc, data} have same element type}} - xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.matrix_desc<16x64xf16> +func.func @store_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf32>) { + // expected-error@+1 {{failed to verify that all of {mem_desc, data} have same element type}} + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.mem_desc<16x64xf16> return } // ----- -func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) { +func.func @store_mem_desc_invalid_data_size(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<32x32xf16>) { // expected-error@+1 {{data shape must not exceed matrix desc shape}} - xegpu.store_matrix %arg1, %arg0[8, 8] : vector<32x32xf16>, !xegpu.matrix_desc<16x64xf16> + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<32x32xf16>, !xegpu.mem_desc<16x64xf16> return } // ----- -func.func @matrix_desc_subview_size_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { +func.func @mem_desc_subview_size_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { // expected-error@+1 {{result shape must not exceed source shape}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<32x16xf16> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<32x16xf16> return } // ----- -func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { +func.func @mem_desc_subview_layout_mismatch(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { // expected-error@+1 {{result must inherit the source strides}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16> return } // ----- -func.func @matrix_desc_subview_element_type_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { +func.func @mem_desc_subview_element_type_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { // expected-error@+1 {{failed to verify that all of {src, res} have same element type}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf32, #xegpu.mem_layout> return } // ----- -func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) { +func.func @mem_desc_subview_rank_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { // expected-error@+1 {{result rank must not exceed source rank}} - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<4x8x16xf16> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<4x8x16xf16> return } diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index 7106b2667f5d0..35342eca1354c 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -751,71 +751,71 @@ gpu.func @fence() { gpu.return } -// CHECK-LABEL: gpu.func @create_matrix_desc({{.*}}) { -gpu.func @create_matrix_desc() { +// CHECK-LABEL: gpu.func @create_mem_desc({{.*}}) { +gpu.func @create_mem_desc() { //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> - //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16> + //CHECK: [[mdesc:%.+]] = xegpu.create_mem_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16> %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> - %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16> + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16> gpu.return } -// CHECK-LABEL: gpu.func @create_matrix_desc_with_stride({{.*}}) { -gpu.func @create_matrix_desc_with_stride() { +// CHECK-LABEL: gpu.func @create_mem_desc_with_stride({{.*}}) { +gpu.func @create_mem_desc_with_stride() { //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> - //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> + //CHECK: [[mdesc:%.+]] = xegpu.create_mem_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> - %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> gpu.return } -// CHECK: gpu.func @load_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) -gpu.func @load_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>) { - // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16> - %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16> +// CHECK: gpu.func @load_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @load_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16> gpu.return } -// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) -gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { - // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> - %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> +// CHECK: gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> gpu.return } -// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>) -gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16> - xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16> +// CHECK: gpu.func @store_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> + xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> gpu.return } -// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, [[ARG1:%.+]]: vector<16x16xf16>) -gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>, %arg1: vector<16x16xf16>) { - // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][0, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> - xegpu.store_matrix %arg1, %arg0[0, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> +// CHECK: gpu.func @store_mem_desc_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][0, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> + xegpu.store_matrix %arg1, %arg0[0, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> gpu.return } -// CHECK: gpu.func @matrix_desc_subview([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) -gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> +// CHECK: gpu.func @mem_desc_subview([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @mem_desc_subview(%arg0: !xegpu.mem_desc<16x64xf16>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> gpu.return } -// CHECK: gpu.func @matrix_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>) -gpu.func @matrix_desc_subview_lower_rank(%arg0: !xegpu.matrix_desc<16x64xf16>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16, #xegpu.mem_layout> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16, #xegpu.mem_layout> +// CHECK: gpu.func @mem_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @mem_desc_subview_lower_rank(%arg0: !xegpu.mem_desc<16x64xf16>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout> gpu.return } -// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) -gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout>) { - //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> - %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout> +// CHECK: gpu.func @mem_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @mem_desc_subview_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> gpu.return } From 552c8716df510b0a27eddcc3fb924d1c7d20f474 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Fri, 15 Aug 2025 01:00:11 +0000 Subject: [PATCH 14/14] update docs --- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 24 +++++++++---------- .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 6 ++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 4e6c5a8b1a820..d5e2db0f7551d 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1116,16 +1116,16 @@ class AllMemSizesMatch names> : def XeGPU_CreateMemDescOp: XeGPU_Op<"create_mem_desc", [Pure, AllMemSizesMatch<["source", "mem_desc"]>]> { - let summary = "Create a matrix descriptor."; + let summary = "Create a memory descriptor."; let description = [{ - Creates a matrix descriptor from a shared local memory (SLM) buffer. - The resulting matrix descriptor has to have the same size as the underlying - shared local memory. + Creates a memory descriptor from a shared local memory (SLM) buffer, and xegpu + specific memory layout. The resulting memory descriptor has to have the same size + as the underlying shared local memory. Arguments: - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer. Results: - - `mem_desc` : the matrix descriptor. + - `mem_desc` : the memory descriptor. }]; let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source); let results = (outs XeGPU_MemDesc:$mem_desc); @@ -1148,10 +1148,10 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, let description = [{ This operation reads a block of data from shared local memory (SLM) - using the provided matrix descriptor. + using the provided memory descriptor. Arguments: - - `mem_desc`: the matrix descriptor identifying the SLM region. + - `mem_desc`: the memory descriptor identifying the SLM region. - `offsets`: the coordinates within the matrix to read from. - `layout`: [optional] An attribute for guiding distributions among subgroups and/or work-items. It currently can accept either @@ -1190,7 +1190,7 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, identified by `mem_desc`. Arguments: - - `mem_desc`: the matrix descriptor specifying the SLM region. + - `mem_desc`: the memory descriptor specifying the SLM region. - `offsets`: the coordinates within the matrix where the data will be written. - `data`: the values to be stored in the matrix. - `layout`: [optional] An attribute for guiding distributions among @@ -1213,16 +1213,16 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, def XeGPU_MemDescSubviewOp: XeGPU_Op<"mem_desc_subview", [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> { let description = [{ - Creates a subview of a matrix descriptor. The resulting matrix descriptor can have + Creates a subview of a memory descriptor. The resulting memory descriptor can have a lower rank than the source; in this case, the result dimensions correspond to the - higher-order dimensions of the source matrix descriptor. + higher-order dimensions of the source memory descriptor. Arguments: - - `src` : a matrix descriptor. + - `src` : a memory descriptor. - `offsets` : the coordinates within the matrix the subview will be created from. Results: - - `res` : a matrix descriptor with smaller size. + - `res` : a memory descriptor with smaller size. }]; let arguments = (ins XeGPU_MemDesc:$src, diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 6602ff94d6ae3..a4411ec8620da 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -210,14 +210,14 @@ def XeGPU_MemDesc: XeGPUTypeDef<"MemDesc", "mem_desc", [ShapedTypeInterface], "m Examples: ```mlir - // A matrix of data stored in column-major order. + // A block of data stored in column-major order. !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> - // A matrix of data stored in a blocked layout. Elements within the same block + // A block of data stored in a blocked layout. Elements within the same block // are stored contiguously in memory. Blocks are stored in row-major order. !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> - // A matrix of data stored in column-major order with blocked layout. + // A block of data stored in column-major order with blocked layout. !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> ``` }];