From ad5d0a88a4f065dc3720d977c8e3d125c5b768b8 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 21 Aug 2025 17:58:25 +0000 Subject: [PATCH 1/7] rename getLayoutAttr util --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 66 +++++++++++++++++++ .../mlir/Dialect/XeGPU/IR/XeGPUDialect.td | 2 +- .../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 27 ++++---- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 25 ++++--- .../XeGPU/Transforms/XeGPUBlocking.cpp | 16 ++--- .../Transforms/XeGPUSubgroupDistribute.cpp | 5 +- .../Transforms/XeGPUWgToSgDistribute.cpp | 26 ++++---- mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 30 ++++----- 8 files changed, 132 insertions(+), 65 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index b4d696444cc44..5b4b376157c00 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -185,6 +185,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { InterfaceMethod<"Check the availability of workgroup level layouts", "bool", "isForWorkgroup">, + InterfaceMethod<"Check the availability of subgroup level layouts", + "bool", + "isForSubgroup">, InterfaceMethod<"Get the rank of attribute", "int64_t", "getRank">, @@ -202,6 +205,15 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { InterfaceMethod<"Get the SgData field of the attribute as integer array", "std::optional>", "getSgDataAsInt">, + InterfaceMethod<"Get the InstData field of the attribute as integer array", + "std::optional>", + "getInstDataAsInt">, + InterfaceMethod<"Get the LaneLayout field of the attribute as integer array", + "std::optional>", + "getLaneLayoutAsInt">, + InterfaceMethod<"Get the LaneData field of the attribute as integer array", + "std::optional>", + "getLaneDataAsInt">, InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData", "xegpu::DistributeLayoutAttr", "dropSgLayoutAndData">, @@ -388,6 +400,24 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { return std::nullopt; } + std::optional> getInstDataAsInt() const { + if (DenseI32ArrayAttr inst = getInstData()) + return llvm::to_vector_of(inst.asArrayRef()); + return std::nullopt; + } + + std::optional> getLaneLayoutAsInt() const { + if (DenseI32ArrayAttr layout = getLaneLayout()) + return llvm::to_vector_of(layout.asArrayRef()); + return std::nullopt; + } + + std::optional> getLaneDataAsInt() const { + if (DenseI32ArrayAttr data = getLaneData()) + return llvm::to_vector_of(data.asArrayRef()); + return std::nullopt; + } + /// Delinearizes a linear subgroup ID into its multidimensional indices /// based on the effective subgroup layout. FailureOr> @@ -488,6 +518,42 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { return std::nullopt; } + /// Returns the InstData of the attribute, computed by applying + /// the slice dimensions to the underlying LayoutAttr. + std::optional> getInstDataAsInt() const { + SliceAttr attr = flatten(); + auto parent = dyn_cast(attr.getParent()); + if (auto inst = parent.getInstDataAsInt()) { + ArrayRef dims = attr.getDims().asArrayRef(); + return XeGPUDialect::slice(llvm::ArrayRef(*inst), dims); + } + return std::nullopt; + } + + /// Returns the LaneLayout of the attribute, computed by applying + /// the slice dimensions to the underlying LayoutAttr. + std::optional> getLaneLayoutAsInt() const { + SliceAttr attr = flatten(); + auto parent = dyn_cast(attr.getParent()); + if (auto layout = parent.getLaneLayoutAsInt()) { + ArrayRef dims = attr.getDims().asArrayRef(); + return XeGPUDialect::slice(llvm::ArrayRef(*layout), dims); + } + return std::nullopt; + } + + /// Returns the LaneData of the attribute, computed by applying + /// the slice dimensions to the underlying LayoutAttr. + std::optional> getLaneDataAsInt() const { + SliceAttr attr = flatten(); + auto parent = dyn_cast(attr.getParent()); + if (auto data = parent.getLaneDataAsInt()) { + ArrayRef dims = attr.getDims().asArrayRef(); + return XeGPUDialect::slice(llvm::ArrayRef(*data), dims); + } + return std::nullopt; + } + SliceAttr dropSgLayoutAndData() { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td index 76d58e5ea2424..c173b93face98 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td @@ -40,7 +40,7 @@ def XeGPU_Dialect : Dialect { let extraClassDeclaration = [{ /// Checks if the given shape can be evenly distributed based on the layout /// and data factors provided by the LayoutAttr. - static bool isEvenlyDistributable(llvm::ArrayRef shape, xegpu::LayoutAttr attr); + static bool isEvenlyDistributable(llvm::ArrayRef shape, xegpu::DistributeLayoutAttr attr); /// drops/slices the shape in the specified dims, and return the rest. e.g., /// for shape = [32, 64, 8], dims = [0, 2], it will return [64] diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index b2b2d3ab85231..010199083add9 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -21,6 +21,7 @@ class ValueRange; class TypeConverter; namespace xegpu { +class DistributeLayoutAttr; class LayoutAttr; class TensorDescType; } // namespace xegpu @@ -60,22 +61,22 @@ FailureOr getDistributedVectorType(xegpu::TensorDescType tdescTy); FailureOr getDistributedVectorType(VectorType originalType, LayoutAttr layout); -/// Return the attribute name for the OpOperand to attach LayoutAttr +/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr std::string getLayoutName(const OpOperand &operand); -/// Return the attribute name for the OpResult to attach LayoutAttr +/// Return the attribute name for the OpResult to attach DistributeLayoutAttr std::string getLayoutName(const OpResult result); -/// Retrieves the LayoutAttr associated with a given Value. For TensorDescType -/// values, the LayoutAttr is extracted from the TensorDescType itself. For +/// Retrieves the DistributeLayoutAttr associated with a given Value. For TensorDescType +/// values, the DistributeLayoutAttr is extracted from the TensorDescType itself. For /// other values, it is obtained from the attributes of the defining operation. -/// Returns nullptr if no LayoutAttr is found. -LayoutAttr getLayoutAttr(const Value value); +/// Returns nullptr if no DistributeLayoutAttr is found. +DistributeLayoutAttr getDistributeLayoutAttr(const Value value); -/// Retrieves the LayoutAttr associated with a given OpOperand. It will +/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will /// first check the operand_layout_{id} of the owner operation. If not found, /// it will check the operand itself and its defining op. -LayoutAttr getLayoutAttr(const OpOperand &opr); +DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr); /// Removes the LayoutAttr for a given OpOperand or OpResult if it exists. template >> void removeLayoutAttr(const T &operandOrResult); -/// Removes the LayoutAttr for each OpOperand and OpResult of the given +/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given /// operation if they exist. If the operation contains regions, it is also /// applied recursively to the contained operations void removeLayoutAttrs(Operation *op); -/// Sets the LayoutAttr for a given OpOperand or OpResult by attaching +/// Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching /// it to the owner's dictionary attributes template || std::is_same_v>> -void setLayoutAttr(const T &operandOrResult, const LayoutAttr layout); +void setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout); -/// Set the LayoutAttr for each OpOperand and OpResult of the given operation. +/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation. /// If the operation contains regions, it is also applied recursively to the /// contained operations void setLayoutAttrs(Operation *op, - function_ref getLayoutImpl); + function_ref getLayoutImpl); /// Extract a set of small vectors from a value with a given shape using /// vector.extract_stride_slice diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index a2d708be0e937..2079848c878a3 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -91,7 +91,7 @@ genOffsetsComputingInsts(OpBuilder &builder, Location loc, // Checks if the given shape can be evenly distributed based on the layout // and data factors provided by the LayoutAttr. bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, - xegpu::LayoutAttr attr) { + xegpu::DistributeLayoutAttr attr) { assert(attr && "Layout attribute is missing."); // Checks whether the given shape can be evenly distributed using the @@ -104,52 +104,51 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, // smaller than `layout[i] * data[i]`, allowing multiple compute units to // share the data. auto tryDistribute = [&](llvm::ArrayRef shape, - DenseI32ArrayAttr layout, DenseI32ArrayAttr data, + std::optional> layout, + std::optional> data, bool rr = true) -> optional> { llvm::SmallVector newShape(shape); if (layout) { - auto vec = llvm::to_vector_of(layout.asArrayRef()); - if (vec.size() != shape.size()) + if ((*layout).size() != shape.size()) return std::nullopt; - auto ratio = computeShapeRatio(shape, vec); + auto ratio = computeShapeRatio(shape, *layout); if (!ratio.has_value()) return std::nullopt; newShape = ratio.value(); } if (data) { - auto vec = llvm::to_vector_of(data.asArrayRef()); - if (vec.size() != shape.size()) + if ((*data).size() != shape.size()) return std::nullopt; - auto ratio = computeShapeRatio(newShape, vec); + auto ratio = computeShapeRatio(newShape, *data); if (!ratio.has_value() && rr) - ratio = computeShapeRatio(vec, newShape); + ratio = computeShapeRatio(*data, newShape); if (!ratio.has_value()) return std::nullopt; // if data is not null, we always return it for next phase. - newShape = vec; + newShape = *data; } return newShape; }; // check the sgLayout and sgData auto maybeSgShape = - tryDistribute(shape, attr.getSgLayout(), attr.getSgData()); + tryDistribute(shape, attr.getSgLayoutAsInt(), attr.getSgDataAsInt()); if (!maybeSgShape) return false; auto sgShape = maybeSgShape.value(); // check InstData, it neither have layout nor need round-robin auto maybeInstShape = - tryDistribute(sgShape, nullptr, attr.getInstData(), false); + tryDistribute(sgShape, std::nullopt, attr.getInstDataAsInt(), false); if (!maybeInstShape) return false; auto instShape = maybeInstShape.value(); // check LaneLayout and LaneData auto maybeLaneShape = - tryDistribute(instShape, attr.getLaneLayout(), attr.getLaneData(), false); + tryDistribute(instShape, attr.getLaneLayoutAsInt(), attr.getLaneDataAsInt(), false); return maybeLaneShape.has_value(); } diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index b3144e4c1e55d..c62597df1f895 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -140,10 +140,10 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const { else value = (Value)operandOrResult; - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(operandOrResult); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult); if (layout && layout.isForSubgroup()) { - if (auto inst_data = layout.getInstData()) - return llvm::to_vector_of(inst_data.asArrayRef()); + if (auto inst_data = layout.getInstDataAsInt()) + return inst_data.value(); if (auto type = dyn_cast(value.getType())) return llvm::to_vector(type.getShape()); @@ -204,12 +204,12 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { // skip the op if any of its operands or results has workgroup level layouts bool hasWgLayoutOperands = llvm::any_of(op->getOpOperands(), [](OpOperand &opr) { - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(opr); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr); return layout && layout.isForWorkgroup(); }); bool hasWgLayoutResults = llvm::any_of(op->getOpResults(), [](OpResult result) { - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(result); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result); return layout && layout.isForWorkgroup(); }); if (hasWgLayoutOperands || hasWgLayoutResults) { @@ -220,8 +220,8 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { auto isUnrollable = [](Value value, ArrayRef tileShape) { Type valTy = value.getType(); if (auto tdescTy = dyn_cast(valTy)) { - xegpu::LayoutAttr layout = tdescTy.getLayoutAttr(); - return layout && layout.getInstData(); + xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr(); + return layout && layout.getInstDataAsInt(); } auto shapedType = dyn_cast(valTy); return shapedType && !llvm::equal(tileShape, shapedType.getShape()); @@ -247,7 +247,7 @@ void XeGPUBlockingPass::runOnOperation() { // Preserve the LayoutAttr for each operand to the owner's DictionaryAttr. // This ensures that the LayoutAttr remains accessible even if the defining // operation is replaced. - xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getLayoutAttr(v); }); + xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); }); auto getTileShapeAndCount = [](llvm::ArrayRef shape, xegpu::LayoutAttr layout) { diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index 2088c3c7fc5ec..de9378bd7a6f6 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -841,7 +841,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() { if (!isa(operand.get().getType())) continue; - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(operand); + auto layout = dyn_cast(xegpu::getDistributeLayoutAttr(operand)); if (!layout) { op->emitError("Could not find layout attribute for operand ") << operand.getOperandNumber() << " of operation " << op->getName(); @@ -882,7 +882,8 @@ void XeGPUSubgroupDistributePass::runOnOperation() { if (vecRank == 0) return AffineMap::get(val.getContext()); // Get the layout of the vector type. - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(val); + // TODO: support more layout types + auto layout = dyn_cast(xegpu::getDistributeLayoutAttr(val)); // If no layout is specified, assume the inner most dimension is distributed // for now. if (!layout) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 93b4efcd125ec..c60f9e361bf8e 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -406,7 +406,7 @@ struct WgToSgDpasOp : public OpConversionPattern { if (resultTy.getRank() != 2) return failure(); - auto originalLayout = xegpu::getLayoutAttr(op.getResult()); + auto originalLayout = xegpu::getDistributeLayoutAttr(op.getResult()); if (!originalLayout) return failure(); @@ -470,8 +470,8 @@ struct WgToSgVectorBroadcastOp VectorType resultType = op.getResult().getType(); ArrayRef wgShape = resultType.getShape(); - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op.getResult()); - if (!layout || !layout.getSgLayout()) + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); + if (!layout || !layout.isForWorkgroup()) return failure(); // TODO: Currently only supports cases where the source and result ranks @@ -487,8 +487,8 @@ struct WgToSgVectorBroadcastOp // Check if the output layout is distributable SmallVector sgLayout; - if (auto sgLayoutAttr = layout.getSgLayout()) - sgLayout = llvm::to_vector_of(sgLayoutAttr.asArrayRef()); + if (auto maybeSgLayout = layout.getSgLayoutAsInt()) + sgLayout = *maybeSgLayout; else return failure(); @@ -535,8 +535,8 @@ struct WgToSgElementwiseOp : public ConversionPattern { ArrayRef wgShape = resultType.getShape(); - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op->getResult(0)); - if (!layout || !layout.getSgLayout()) + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); + if (!layout || !layout.isForWorkgroup()) return failure(); SmallVector sgShape = getSgShapeAndCount(wgShape, layout).first; @@ -737,8 +737,8 @@ struct WgToSgArithConstantOp : public OpConversionPattern { if (!vecAttr || !vecAttr.isSplat() || !vecType) return failure(); - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op.getResult()); - if (!layout || !layout.getSgLayout()) + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); + if (!layout || !layout.isForWorkgroup()) return failure(); ArrayRef wgShape = vecType.getShape(); @@ -928,7 +928,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() { }); target.addDynamicallyLegalOp([=](xegpu::DpasOp op) -> bool { - auto layout = xegpu::getLayoutAttr(op.getResult()); + auto layout = xegpu::getDistributeLayoutAttr(op.getResult()); return isLegal(layout); }); @@ -947,12 +947,12 @@ void XeGPUWgToSgDistributePass::runOnOperation() { auto vecType = dyn_cast(op.getType()); if (!vecType) return true; - return isLegal(xegpu::getLayoutAttr(op.getResult())); + return isLegal(xegpu::getDistributeLayoutAttr(op.getResult())); }); target.addDynamicallyLegalOp( [=](vector::BroadcastOp op) -> bool { - return isLegal(xegpu::getLayoutAttr(op.getResult())); + return isLegal(xegpu::getDistributeLayoutAttr(op.getResult())); }); target.addDynamicallyLegalOp( @@ -980,7 +980,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() { } } - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op->getResult(0)); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); return isLegal(layout); }); diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp index 6835f64ad8ef7..5ae025ef34739 100644 --- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp +++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp @@ -114,7 +114,7 @@ std::string xegpu::getLayoutName(const OpResult result) { return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str(); } -xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) { +xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) { if (!value) return nullptr; @@ -132,11 +132,11 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) { // for LoadNdOp, the layout is stored in the tensor descriptor if (auto loadNd = dyn_cast(defOp)) - return getLayoutAttr(loadNd.getTensorDesc()); + return getDistributeLayoutAttr(loadNd.getTensorDesc()); std::string layoutName = getLayoutName(result); if (defOp->hasAttr(layoutName)) - return defOp->getAttrOfType(layoutName); + return defOp->getAttrOfType(layoutName); } if (auto arg = dyn_cast(value)) { @@ -144,41 +144,41 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) { if (auto loop = dyn_cast(parentOp)) { OpOperand *tiedInit = loop.getTiedLoopInit(arg); if (tiedInit) - return getLayoutAttr(tiedInit->get()); + return getDistributeLayoutAttr(tiedInit->get()); } } return nullptr; } -xegpu::LayoutAttr xegpu::getLayoutAttr(const OpOperand &opr) { +xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr) { Operation *op = opr.getOwner(); std::string layoutName = xegpu::getLayoutName(opr); if (op->hasAttr(layoutName)) - return op->getAttrOfType(layoutName); - return getLayoutAttr(opr.get()); + return op->getAttrOfType(layoutName); + return getDistributeLayoutAttr(opr.get()); } template -void xegpu::setLayoutAttr(const T &operandOrResult, const LayoutAttr layout) { +void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout) { Operation *owner = operandOrResult.getOwner(); std::string name = xegpu::getLayoutName(operandOrResult); - if (layout && !owner->hasAttrOfType(name)) + if (layout && !owner->hasAttrOfType(name)) owner->setAttr(name, layout); } // Explicit instantiation for OpResult template void xegpu::setLayoutAttr(const mlir::OpResult &result, - const mlir::xegpu::LayoutAttr layout); + const mlir::xegpu::DistributeLayoutAttr layout); // Explicit instantiation for OpOperand template void xegpu::setLayoutAttr(const mlir::OpOperand &operand, - const mlir::xegpu::LayoutAttr layout); + const mlir::xegpu::DistributeLayoutAttr layout); void xegpu::setLayoutAttrs(Operation *op, - function_ref getLayoutImpl) { + function_ref getLayoutImpl) { op->walk([&](Operation *nestOp) { for (OpOperand &opr : nestOp->getOpOperands()) { auto layout = getLayoutImpl(opr.get()); @@ -195,7 +195,7 @@ template void xegpu::removeLayoutAttr(const T &operandOrResult) { Operation *owner = operandOrResult.getOwner(); std::string name = xegpu::getLayoutName(operandOrResult); - if (owner->hasAttrOfType(name)) + if (owner->hasAttrOfType(name)) owner->removeAttr(name); } @@ -306,7 +306,7 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType( if (!inputTy || !resultTy) return WalkResult::skip(); - xegpu::LayoutAttr layout = xegpu::getLayoutAttr(input); + xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(input); if (!layout) return WalkResult::skip(); @@ -344,7 +344,7 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType( } { // perform the conversion from RankedTensorType to VectorType based on the - // LayoutAttr + // DistributeLayoutAttr // Handle the UnrealizedConversionCastOp introduced by the first step. // For vector->RankedTensorType, it will simply forward the inputs. From 0e34f36690a34f071afd181649b8f86c90dde9b4 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 21 Aug 2025 18:10:49 +0000 Subject: [PATCH 2/7] refine --- .../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 17 +++++++++++--- .../XeGPU/Transforms/XeGPUBlocking.cpp | 5 ++-- .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 4 ++-- .../Transforms/XeGPUSubgroupDistribute.cpp | 7 +++--- .../Transforms/XeGPUWgToSgDistribute.cpp | 10 ++++---- mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 23 ++++++++++--------- 6 files changed, 40 insertions(+), 26 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index 010199083add9..7089559d0c51b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -73,11 +73,21 @@ std::string getLayoutName(const OpResult result); /// Returns nullptr if no DistributeLayoutAttr is found. DistributeLayoutAttr getDistributeLayoutAttr(const Value value); +template +AttrTy getDistributeLayoutAttrOfType(const Value value) { + return dyn_cast_if_present(getDistributeLayoutAttr(value)); +} + /// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will /// first check the operand_layout_{id} of the owner operation. If not found, /// it will check the operand itself and its defining op. DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr); +template +AttrTy getDistributeLayoutAttrOfType(const OpOperand &opr) { + return dyn_cast_if_present(getDistributeLayoutAttr(opr)); +} + /// Removes the LayoutAttr for a given OpOperand or OpResult if it exists. template || @@ -94,13 +104,14 @@ void removeLayoutAttrs(Operation *op); template || std::is_same_v>> -void setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout); +void setDistributeLayoutAttr(const T &operandOrResult, + const DistributeLayoutAttr layout); /// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation. /// If the operation contains regions, it is also applied recursively to the /// contained operations -void setLayoutAttrs(Operation *op, - function_ref getLayoutImpl); +void setDistributeLayoutAttrs( + Operation *op, function_ref getLayoutImpl); /// Extract a set of small vectors from a value with a given shape using /// vector.extract_stride_slice diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index c62597df1f895..2e3e40ed2d457 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -247,7 +247,8 @@ void XeGPUBlockingPass::runOnOperation() { // Preserve the LayoutAttr for each operand to the owner's DictionaryAttr. // This ensures that the LayoutAttr remains accessible even if the defining // operation is replaced. - xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); }); + xegpu::setDistributeLayoutAttrs( + op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); }); auto getTileShapeAndCount = [](llvm::ArrayRef shape, xegpu::LayoutAttr layout) { @@ -377,7 +378,7 @@ void XeGPUBlockingPass::runOnOperation() { if (auto layout = op->getAttrOfType(name)) { op->removeAttr(name); if (!isa(op)) - xegpu::setLayoutAttr(result, layout.dropInstData()); + xegpu::setDistributeLayoutAttr(result, layout.dropInstData()); } } diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp index bef88042fc663..5cb47b2accd68 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp @@ -718,7 +718,7 @@ static LogicalResult updateOp(mlir::OpBuilder &builder, mlir::Operation *op, } // If the result is a vector type, add a temporary layout attribute to the // op. - xegpu::setLayoutAttr(result, layout); + xegpu::setDistributeLayoutAttr(result, layout); } return success(); } @@ -800,7 +800,7 @@ updateControlFlowOps(mlir::OpBuilder &builder, // If the type is a vector type and this region argument is an OpResult, // set the layout attribute on the OpResult. if (auto result = dyn_cast(successorInput)) - xegpu::setLayoutAttr(result, successorOperandLayout); + xegpu::setDistributeLayoutAttr(result, successorOperandLayout); } } return success(); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index de9378bd7a6f6..e48e2180197ec 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -841,14 +841,15 @@ void XeGPUSubgroupDistributePass::runOnOperation() { if (!isa(operand.get().getType())) continue; - auto layout = dyn_cast(xegpu::getDistributeLayoutAttr(operand)); + auto layout = + xegpu::getDistributeLayoutAttrOfType(operand); if (!layout) { op->emitError("Could not find layout attribute for operand ") << operand.getOperandNumber() << " of operation " << op->getName(); signalPassFailure(); return; } - xegpu::setLayoutAttr(operand, layout); + xegpu::setDistributeLayoutAttr(operand, layout); } }); // Step 2: Move all operations of a GPU function inside @@ -883,7 +884,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() { return AffineMap::get(val.getContext()); // Get the layout of the vector type. // TODO: support more layout types - auto layout = dyn_cast(xegpu::getDistributeLayoutAttr(val)); + auto layout = xegpu::getDistributeLayoutAttrOfType(val); // If no layout is specified, assume the inner most dimension is distributed // for now. if (!layout) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index c60f9e361bf8e..a8700ca73efc4 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -429,8 +429,8 @@ struct WgToSgDpasOp : public OpConversionPattern { VectorType resTy = VectorType::get({aVecShape[0], bVecShape[1]}, resultTy.getElementType()); tmpC = xegpu::DpasOp::create(rewriter, loc, resTy, operands); - xegpu::setLayoutAttr(cast(tmpC), - originalLayout.dropSgLayoutAndData()); + xegpu::setDistributeLayoutAttr(cast(tmpC), + originalLayout.dropSgLayoutAndData()); newDpasOps.push_back(tmpC); } @@ -508,8 +508,8 @@ struct WgToSgVectorBroadcastOp for (auto operand : adaptor.getOperands().front()) { auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(), newResultType, operand); - xegpu::setLayoutAttr(newBroadcast->getResult(0), - layout.dropSgLayoutAndData()); + xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0), + layout.dropSgLayoutAndData()); newBroadcastOps.push_back(newBroadcast.getResult()); } @@ -755,7 +755,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern { auto cstOp = arith::ConstantOp::create(rewriter, op.getLoc(), newType, sgAttr); if (auto newLayout = layout.dropSgLayoutAndData()) - xegpu::setLayoutAttr(cstOp->getResult(0), newLayout); + xegpu::setDistributeLayoutAttr(cstOp->getResult(0), newLayout); SmallVector newConsts(count, cstOp); rewriter.replaceOpWithMultiple(op, {newConsts}); diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp index 5ae025ef34739..1d4de68754c20 100644 --- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp +++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp @@ -160,7 +160,8 @@ xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr) } template -void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout) { +void xegpu::setDistributeLayoutAttr(const T &operandOrResult, + const DistributeLayoutAttr layout) { Operation *owner = operandOrResult.getOwner(); std::string name = xegpu::getLayoutName(operandOrResult); if (layout && !owner->hasAttrOfType(name)) @@ -168,25 +169,25 @@ void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr l } // Explicit instantiation for OpResult -template void -xegpu::setLayoutAttr(const mlir::OpResult &result, - const mlir::xegpu::DistributeLayoutAttr layout); +template void xegpu::setDistributeLayoutAttr( + const mlir::OpResult &result, + const mlir::xegpu::DistributeLayoutAttr layout); // Explicit instantiation for OpOperand -template void -xegpu::setLayoutAttr(const mlir::OpOperand &operand, - const mlir::xegpu::DistributeLayoutAttr layout); +template void xegpu::setDistributeLayoutAttr( + const mlir::OpOperand &operand, + const mlir::xegpu::DistributeLayoutAttr layout); -void xegpu::setLayoutAttrs(Operation *op, - function_ref getLayoutImpl) { +void xegpu::setDistributeLayoutAttrs( + Operation *op, function_ref getLayoutImpl) { op->walk([&](Operation *nestOp) { for (OpOperand &opr : nestOp->getOpOperands()) { auto layout = getLayoutImpl(opr.get()); - setLayoutAttr(opr, layout); + setDistributeLayoutAttr(opr, layout); } for (OpResult result : nestOp->getOpResults()) { auto layout = getLayoutImpl(result); - setLayoutAttr(result, layout); + setDistributeLayoutAttr(result, layout); } }); } From a84014ff42002dc5b036558c62e5387536e74019 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 21 Aug 2025 18:12:17 +0000 Subject: [PATCH 3/7] format --- .../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 25 ++++++++++--------- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 4 +-- .../XeGPU/Transforms/XeGPUBlocking.cpp | 9 ++++--- .../Transforms/XeGPUWgToSgDistribute.cpp | 12 ++++++--- mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 6 +++-- 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index 7089559d0c51b..82fd70571c022 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -67,10 +67,11 @@ std::string getLayoutName(const OpOperand &operand); /// Return the attribute name for the OpResult to attach DistributeLayoutAttr std::string getLayoutName(const OpResult result); -/// Retrieves the DistributeLayoutAttr associated with a given Value. For TensorDescType -/// values, the DistributeLayoutAttr is extracted from the TensorDescType itself. For -/// other values, it is obtained from the attributes of the defining operation. -/// Returns nullptr if no DistributeLayoutAttr is found. +/// Retrieves the DistributeLayoutAttr associated with a given Value. For +/// TensorDescType values, the DistributeLayoutAttr is extracted from the +/// TensorDescType itself. For other values, it is obtained from the attributes +/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is +/// found. DistributeLayoutAttr getDistributeLayoutAttr(const Value value); template @@ -78,9 +79,9 @@ AttrTy getDistributeLayoutAttrOfType(const Value value) { return dyn_cast_if_present(getDistributeLayoutAttr(value)); } -/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will -/// first check the operand_layout_{id} of the owner operation. If not found, -/// it will check the operand itself and its defining op. +/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It +/// will first check the operand_layout_{id} of the owner operation. If not +/// found, it will check the operand itself and its defining op. DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr); template @@ -94,8 +95,8 @@ template >> void removeLayoutAttr(const T &operandOrResult); -/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given -/// operation if they exist. If the operation contains regions, it is also +/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the +/// given operation if they exist. If the operation contains regions, it is also /// applied recursively to the contained operations void removeLayoutAttrs(Operation *op); @@ -107,9 +108,9 @@ template getLayoutImpl); diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 2079848c878a3..6de6049facfc6 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -147,8 +147,8 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, auto instShape = maybeInstShape.value(); // check LaneLayout and LaneData - auto maybeLaneShape = - tryDistribute(instShape, attr.getLaneLayoutAsInt(), attr.getLaneDataAsInt(), false); + auto maybeLaneShape = tryDistribute(instShape, attr.getLaneLayoutAsInt(), + attr.getLaneDataAsInt(), false); return maybeLaneShape.has_value(); } diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 2e3e40ed2d457..45fed8e548a89 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -140,7 +140,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const { else value = (Value)operandOrResult; - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(operandOrResult); if (layout && layout.isForSubgroup()) { if (auto inst_data = layout.getInstDataAsInt()) return inst_data.value(); @@ -204,12 +205,14 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { // skip the op if any of its operands or results has workgroup level layouts bool hasWgLayoutOperands = llvm::any_of(op->getOpOperands(), [](OpOperand &opr) { - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(opr); return layout && layout.isForWorkgroup(); }); bool hasWgLayoutResults = llvm::any_of(op->getOpResults(), [](OpResult result) { - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(result); return layout && layout.isForWorkgroup(); }); if (hasWgLayoutOperands || hasWgLayoutResults) { diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index a8700ca73efc4..518c7817a516e 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -470,7 +470,8 @@ struct WgToSgVectorBroadcastOp VectorType resultType = op.getResult().getType(); ArrayRef wgShape = resultType.getShape(); - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getResult()); if (!layout || !layout.isForWorkgroup()) return failure(); @@ -535,7 +536,8 @@ struct WgToSgElementwiseOp : public ConversionPattern { ArrayRef wgShape = resultType.getShape(); - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op->getResult(0)); if (!layout || !layout.isForWorkgroup()) return failure(); @@ -737,7 +739,8 @@ struct WgToSgArithConstantOp : public OpConversionPattern { if (!vecAttr || !vecAttr.isSplat() || !vecType) return failure(); - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op.getResult()); if (!layout || !layout.isForWorkgroup()) return failure(); @@ -980,7 +983,8 @@ void XeGPUWgToSgDistributePass::runOnOperation() { } } - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(op->getResult(0)); return isLegal(layout); }); diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp index 1d4de68754c20..cac1ffe4d3bc3 100644 --- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp +++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp @@ -151,7 +151,8 @@ xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) { return nullptr; } -xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr) { +xegpu::DistributeLayoutAttr +xegpu::getDistributeLayoutAttr(const OpOperand &opr) { Operation *op = opr.getOwner(); std::string layoutName = xegpu::getLayoutName(opr); if (op->hasAttr(layoutName)) @@ -307,7 +308,8 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType( if (!inputTy || !resultTy) return WalkResult::skip(); - xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(input); + xegpu::DistributeLayoutAttr layout = + xegpu::getDistributeLayoutAttr(input); if (!layout) return WalkResult::skip(); From f3af2c307597bf13a04579b3235b45af7ea10392 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Thu, 21 Aug 2025 18:59:45 +0000 Subject: [PATCH 4/7] update convert_layout --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 3 +++ mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 4 ++-- mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp | 6 +++--- mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 5b4b376157c00..77e3c257f234e 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -217,6 +217,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData", "xegpu::DistributeLayoutAttr", "dropSgLayoutAndData">, + InterfaceMethod<"Derive a new layout by dropping InstData", + "xegpu::DistributeLayoutAttr", + "dropInstData">, InterfaceMethod<[{Delinearizes a linear subgroup ID into its multidimensional indices based on the effective subgroup layout.}], "FailureOr>", diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index ab471a1f33ef9..2f6671c5e37cc 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1162,8 +1162,8 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou the IR is lowered to WI level because that is the end result of all distributions. }]; let arguments = (ins XeGPU_VectorType: $source, - XeGPU_LayoutAttr: $input_layout, - XeGPU_LayoutAttr: $target_layout); + DistributeLayoutAttr: $input_layout, + DistributeLayoutAttr: $target_layout); let results = (outs XeGPU_VectorType: $result); let assemblyFormat = [{ $source prop-dict attr-dict `:` type($source) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 45fed8e548a89..80e9d4d25b06c 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -84,9 +84,9 @@ struct ConvertLayoutOpPattern using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op, PatternRewriter &rewriter) const override { - xegpu::LayoutAttr input_layout = op.getInputLayoutAttr(); - xegpu::LayoutAttr target_layout = op.getTargetLayoutAttr(); - if (!input_layout.getInstData() || !target_layout.getInstData()) + xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr(); + xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr(); + if (!input_layout.getInstDataAsInt() || !target_layout.getInstDataAsInt()) return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp."); input_layout = input_layout.dropInstData(); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 518c7817a516e..4fb962908793f 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -613,8 +613,9 @@ struct WgToSgConvertLayoutOp LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - xegpu::LayoutAttr input = op.getInputLayout(); - xegpu::LayoutAttr target = op.getTargetLayout(); + // TODO: currently, we only support LayoutAttr + auto input = dyn_cast(op.getInputLayout()); + auto target = dyn_cast(op.getTargetLayout()); if (!input || !target || !input.isForWorkgroup() || !target.isForWorkgroup()) From 35c64895111db5d7019a64078fbe719dce317b95 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Fri, 22 Aug 2025 14:45:35 +0000 Subject: [PATCH 5/7] fix compilation error in clang --- mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index 82fd70571c022..bad734dbfd9f0 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -9,6 +9,7 @@ #ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_ #define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_ +#include "mlir/Dialect/XeGPU/IR/XeGPU.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/OpDefinition.h" namespace mlir { From c49546af9d1aa1eca506224110f60bea9a5581c2 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 27 Aug 2025 16:50:33 +0000 Subject: [PATCH 6/7] address comments --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 85 ++++++++++--------- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 62 +++++++------- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +- .../XeGPU/Transforms/XeGPUBlocking.cpp | 17 ++-- .../Transforms/XeGPUWgToSgDistribute.cpp | 29 +++---- .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 2 +- 6 files changed, 100 insertions(+), 97 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 77e3c257f234e..db34e35f27510 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -184,7 +184,7 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { let methods = [ InterfaceMethod<"Check the availability of workgroup level layouts", "bool", - "isForWorkgroup">, + "hasSgLayout">, InterfaceMethod<"Check the availability of subgroup level layouts", "bool", "isForSubgroup">, @@ -200,19 +200,19 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { return 0; }], [{}]>, InterfaceMethod<"Get the SgLayout field of the attribute as integer array", - "std::optional>", + "SmallVector", "getSgLayoutAsInt">, InterfaceMethod<"Get the SgData field of the attribute as integer array", - "std::optional>", + "SmallVector", "getSgDataAsInt">, InterfaceMethod<"Get the InstData field of the attribute as integer array", - "std::optional>", + "SmallVector", "getInstDataAsInt">, InterfaceMethod<"Get the LaneLayout field of the attribute as integer array", - "std::optional>", + "SmallVector", "getLaneLayoutAsInt">, InterfaceMethod<"Get the LaneData field of the attribute as integer array", - "std::optional>", + "SmallVector", "getLaneDataAsInt">, InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData", "xegpu::DistributeLayoutAttr", @@ -357,12 +357,12 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { ]; let extraClassDeclaration = [{ - bool isForWorkgroup() { + bool hasSgLayout() { return getSgLayout() != nullptr; } bool isForSubgroup() { - return !isForWorkgroup(); + return !hasSgLayout(); } int64_t getRank() { @@ -391,34 +391,34 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { getLaneLayout(), getLaneData(), getOrder()); } - std::optional> getSgLayoutAsInt() const { + SmallVector getSgLayoutAsInt() const { if (DenseI32ArrayAttr layout = getSgLayout()) return llvm::to_vector_of(layout.asArrayRef()); - return std::nullopt; + return {}; } - std::optional> getSgDataAsInt() const { + SmallVector getSgDataAsInt() const { if (DenseI32ArrayAttr data = getSgData()) return llvm::to_vector_of(data.asArrayRef()); - return std::nullopt; + return {}; } - std::optional> getInstDataAsInt() const { + SmallVector getInstDataAsInt() const { if (DenseI32ArrayAttr inst = getInstData()) return llvm::to_vector_of(inst.asArrayRef()); - return std::nullopt; + return {}; } - std::optional> getLaneLayoutAsInt() const { + SmallVector getLaneLayoutAsInt() const { if (DenseI32ArrayAttr layout = getLaneLayout()) return llvm::to_vector_of(layout.asArrayRef()); - return std::nullopt; + return {}; } - std::optional> getLaneDataAsInt() const { + SmallVector getLaneDataAsInt() const { if (DenseI32ArrayAttr data = getLaneData()) return llvm::to_vector_of(data.asArrayRef()); - return std::nullopt; + return {}; } /// Delinearizes a linear subgroup ID into its multidimensional indices @@ -485,10 +485,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { return parent.getOrder(); } - bool isForWorkgroup() const { + bool hasSgLayout() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - return parent.isForWorkgroup(); + return parent.hasSgLayout(); } bool isForSubgroup() const { @@ -499,62 +499,67 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { /// Returns the SgLayout of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - std::optional> getSgLayoutAsInt() const { + SmallVector getSgLayoutAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - if (auto layout = parent.getSgLayoutAsInt()) { + auto layout = parent.getSgLayoutAsInt(); + if (layout.size()) { ArrayRef dims = attr.getDims().asArrayRef(); - return XeGPUDialect::slice(llvm::ArrayRef(*layout), dims); + return XeGPUDialect::slice(ArrayRef(layout), dims); } - return std::nullopt; + return {}; } /// Returns the SgData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - std::optional> getSgDataAsInt() const { + SmallVector getSgDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - if (auto data = parent.getSgDataAsInt()) { + auto data = parent.getSgDataAsInt(); + if (data.size()) { ArrayRef dims = attr.getDims().asArrayRef(); - return XeGPUDialect::slice(llvm::ArrayRef(*data), dims); + return XeGPUDialect::slice(ArrayRef(data), dims); } - return std::nullopt; + return {}; } /// Returns the InstData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - std::optional> getInstDataAsInt() const { + SmallVector getInstDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - if (auto inst = parent.getInstDataAsInt()) { + auto inst = parent.getInstDataAsInt(); + if (inst.size()) { ArrayRef dims = attr.getDims().asArrayRef(); - return XeGPUDialect::slice(llvm::ArrayRef(*inst), dims); + return XeGPUDialect::slice(llvm::ArrayRef(inst), dims); } - return std::nullopt; + return {}; } /// Returns the LaneLayout of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - std::optional> getLaneLayoutAsInt() const { + SmallVector getLaneLayoutAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - if (auto layout = parent.getLaneLayoutAsInt()) { + auto layout = parent.getLaneLayoutAsInt(); + if (layout.size()) { ArrayRef dims = attr.getDims().asArrayRef(); - return XeGPUDialect::slice(llvm::ArrayRef(*layout), dims); + return XeGPUDialect::slice(llvm::ArrayRef(layout), dims); } - return std::nullopt; + return {}; } /// Returns the LaneData of the attribute, computed by applying /// the slice dimensions to the underlying LayoutAttr. - std::optional> getLaneDataAsInt() const { + SmallVector getLaneDataAsInt() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - if (auto data = parent.getLaneDataAsInt()) { + auto data = parent.getLaneDataAsInt(); + if (data.size()) { ArrayRef dims = attr.getDims().asArrayRef(); - return XeGPUDialect::slice(llvm::ArrayRef(*data), dims); + return XeGPUDialect::slice(llvm::ArrayRef(data), dims); } - return std::nullopt; + return {}; } SliceAttr dropSgLayoutAndData() { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 6de6049facfc6..b460f6dfd2769 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -104,30 +104,30 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, // smaller than `layout[i] * data[i]`, allowing multiple compute units to // share the data. auto tryDistribute = [&](llvm::ArrayRef shape, - std::optional> layout, - std::optional> data, + SmallVector layout, + SmallVector data, bool rr = true) -> optional> { llvm::SmallVector newShape(shape); - if (layout) { - if ((*layout).size() != shape.size()) + if (layout.size()) { + if (layout.size() != shape.size()) return std::nullopt; - auto ratio = computeShapeRatio(shape, *layout); + auto ratio = computeShapeRatio(shape, layout); if (!ratio.has_value()) return std::nullopt; newShape = ratio.value(); } - if (data) { - if ((*data).size() != shape.size()) + if (data.size()) { + if (data.size() != shape.size()) return std::nullopt; - auto ratio = computeShapeRatio(newShape, *data); + auto ratio = computeShapeRatio(newShape, data); if (!ratio.has_value() && rr) - ratio = computeShapeRatio(*data, newShape); + ratio = computeShapeRatio(data, newShape); if (!ratio.has_value()) return std::nullopt; // if data is not null, we always return it for next phase. - newShape = *data; + newShape = data; } return newShape; }; @@ -141,7 +141,7 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, // check InstData, it neither have layout nor need round-robin auto maybeInstShape = - tryDistribute(sgShape, std::nullopt, attr.getInstDataAsInt(), false); + tryDistribute(sgShape, {}, attr.getInstDataAsInt(), false); if (!maybeInstShape) return false; auto instShape = maybeInstShape.value(); @@ -270,7 +270,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId) { // delinearizeSubgroupId is only available for // workgroup-level layout attribute - if (!isForWorkgroup()) + if (!hasSgLayout()) return failure(); // TODO: handle order attribute @@ -282,7 +282,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, if (!hasDefaultOrder()) return mlir::emitError(loc, "order attribute is currently not supported."); - auto dims = llvm::map_to_vector(*getSgLayoutAsInt(), [&](int64_t d) -> Value { + auto dims = llvm::map_to_vector(getSgLayoutAsInt(), [&](int64_t d) -> Value { return builder.createOrFold(loc, d); }); @@ -295,17 +295,17 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, FailureOr>> LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape) { - if (!isForWorkgroup()) + if (!hasSgLayout()) return failure(); - SmallVector sgLayout = getSgLayoutAsInt().value(); - SmallVector sgShape; - if (auto maybeSgShape = getSgDataAsInt()) - sgShape = maybeSgShape.value(); - else if (auto derivedShape = computeShapeRatio(shape, sgLayout)) - sgShape = derivedShape.value(); - else - return failure(); + SmallVector sgLayout = getSgLayoutAsInt(); + SmallVector sgShape = getSgDataAsInt(); + if (sgShape.empty()) { + if (auto derivedShape = computeShapeRatio(shape, sgLayout)) + sgShape = derivedShape.value(); + else + return failure(); + } // delinearize Ids auto maybeIds = delinearizeSubgroupId(builder, loc, linearId); @@ -382,17 +382,17 @@ FailureOr>> SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape) { assert(getRank() == static_cast(shape.size()) && "invalid shape."); - if (!isForWorkgroup()) + if (!hasSgLayout()) return failure(); - SmallVector sgLayout = getSgLayoutAsInt().value(); - SmallVector sgShape; - if (auto maybeSgShape = getSgDataAsInt()) - sgShape = maybeSgShape.value(); - else if (auto derivedShape = computeShapeRatio(shape, sgLayout)) - sgShape = derivedShape.value(); - else - return failure(); + SmallVector sgLayout = getSgLayoutAsInt(); + SmallVector sgShape = getSgDataAsInt(); + if (sgShape.empty()) { + if (auto derivedShape = computeShapeRatio(shape, sgLayout)) + sgShape = derivedShape.value(); + else + return failure(); + } // delinearize Ids auto maybeIds = delinearizeSubgroupId(builder, loc, linearId); diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index c8d180b973f05..f799205069a18 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -938,7 +938,7 @@ LogicalResult ConvertLayoutOp::verify() { // both input and target layouts should be WgLayout or SgLayout at the same // time. - if ((!srcLayout.isForWorkgroup() || !resLayout.isForWorkgroup()) && + if ((!srcLayout.hasSgLayout() || !resLayout.hasSgLayout()) && (!srcLayout.isForSubgroup() || !resLayout.isForSubgroup())) return emitOpError("expected input layout and target layout be WgLayout or " "SgLayout at the same time."); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 80e9d4d25b06c..c0be589708df0 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -86,7 +86,8 @@ struct ConvertLayoutOpPattern PatternRewriter &rewriter) const override { xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr(); xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr(); - if (!input_layout.getInstDataAsInt() || !target_layout.getInstDataAsInt()) + if (input_layout.getInstDataAsInt().empty() || + target_layout.getInstDataAsInt().empty()) return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp."); input_layout = input_layout.dropInstData(); @@ -143,8 +144,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult); if (layout && layout.isForSubgroup()) { - if (auto inst_data = layout.getInstDataAsInt()) - return inst_data.value(); + if (!layout.getInstDataAsInt().empty()) + return layout.getInstDataAsInt(); if (auto type = dyn_cast(value.getType())) return llvm::to_vector(type.getShape()); @@ -207,13 +208,13 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { llvm::any_of(op->getOpOperands(), [](OpOperand &opr) { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr); - return layout && layout.isForWorkgroup(); + return layout && layout.hasSgLayout(); }); bool hasWgLayoutResults = llvm::any_of(op->getOpResults(), [](OpResult result) { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result); - return layout && layout.isForWorkgroup(); + return layout && layout.hasSgLayout(); }); if (hasWgLayoutOperands || hasWgLayoutResults) { LDBG() << "skip unrolling for op with workgroup level layout: " << *op; @@ -224,7 +225,7 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { Type valTy = value.getType(); if (auto tdescTy = dyn_cast(valTy)) { xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr(); - return layout && layout.getInstDataAsInt(); + return layout && !layout.getInstDataAsInt().empty(); } auto shapedType = dyn_cast(valTy); return shapedType && !llvm::equal(tileShape, shapedType.getShape()); @@ -276,7 +277,7 @@ void XeGPUBlockingPass::runOnOperation() { auto layout = llvm::dyn_cast_if_present(type.getEncoding()); - if (layout && layout.isForWorkgroup()) + if (layout && layout.hasSgLayout()) return failure(); int count; @@ -293,7 +294,7 @@ void XeGPUBlockingPass::runOnOperation() { ArrayRef shape = type.getShape(); xegpu::LayoutAttr layout = type.getLayoutAttr(); - if (layout && layout.isForWorkgroup()) + if (layout && layout.hasSgLayout()) return failure(); int count; diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 4fb962908793f..a84a0b1415072 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -51,10 +51,10 @@ getSgShapeAndCount(ArrayRef shape, xegpu::DistributeLayoutAttr layout) { int count = 1; SmallVector sgShape(shape); - if (layout && layout.isForWorkgroup()) { - SmallVector sgLayout = layout.getSgLayoutAsInt().value(); - if (auto maybeSgData = layout.getSgDataAsInt()) - sgShape = *maybeSgData; + if (layout && layout.hasSgLayout()) { + SmallVector sgLayout = layout.getSgLayoutAsInt(); + if (!layout.getSgDataAsInt().empty()) + sgShape = layout.getSgDataAsInt(); else if (auto maybeDerivedSgData = computeShapeRatio(shape, sgLayout)) sgShape = *maybeDerivedSgData; SmallVector distUnit = computeElementwiseMul(sgLayout, sgShape); @@ -88,7 +88,7 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op, // not applicable to ops without workgroup layout attributes xegpu::DistributeLayoutAttr layout = op.getLayoutAttr(); - if (!layout || !layout.isForWorkgroup()) + if (!layout || !layout.hasSgLayout()) return failure(); Value sgId = rewriter.create(loc, /*upper_bound=*/nullptr); @@ -226,7 +226,7 @@ struct WgToSgCreateNdOpNoOffset MLIRContext *ctx = op.getContext(); xegpu::TensorDescType tdescTy = op.getType(); auto layout = dyn_cast(tdescTy.getLayout()); - if (!layout || !layout.isForWorkgroup()) + if (!layout || !layout.hasSgLayout()) return failure(); Type elemTy = tdescTy.getElementType(); @@ -472,7 +472,7 @@ struct WgToSgVectorBroadcastOp xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); - if (!layout || !layout.isForWorkgroup()) + if (!layout || !layout.hasSgLayout()) return failure(); // TODO: Currently only supports cases where the source and result ranks @@ -487,10 +487,8 @@ struct WgToSgVectorBroadcastOp VectorType::get(sgShape, resultType.getElementType()); // Check if the output layout is distributable - SmallVector sgLayout; - if (auto maybeSgLayout = layout.getSgLayoutAsInt()) - sgLayout = *maybeSgLayout; - else + SmallVector sgLayout = layout.getSgLayoutAsInt(); + if (sgLayout.empty()) return failure(); if (!xegpu::XeGPUDialect::isEvenlyDistributable(wgShape, layout)) @@ -538,7 +536,7 @@ struct WgToSgElementwiseOp : public ConversionPattern { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); - if (!layout || !layout.isForWorkgroup()) + if (!layout || !layout.hasSgLayout()) return failure(); SmallVector sgShape = getSgShapeAndCount(wgShape, layout).first; @@ -617,8 +615,7 @@ struct WgToSgConvertLayoutOp auto input = dyn_cast(op.getInputLayout()); auto target = dyn_cast(op.getTargetLayout()); - if (!input || !target || !input.isForWorkgroup() || - !target.isForWorkgroup()) + if (!input || !target || !input.hasSgLayout() || !target.hasSgLayout()) return rewriter.notifyMatchFailure( op, "Input and target layouts must have subgroup layout"); @@ -742,7 +739,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); - if (!layout || !layout.isForWorkgroup()) + if (!layout || !layout.hasSgLayout()) return failure(); ArrayRef wgShape = vecType.getShape(); @@ -920,7 +917,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() { }; auto isLegal = [&](xegpu::DistributeLayoutAttr layout) -> bool { - return !layout || !layout.isForWorkgroup(); + return !layout || !layout.hasSgLayout(); }; target.addDynamicallyLegalOp bool { - return !layout || !layout.isForWorkgroup(); + return !layout || !layout.hasSgLayout(); }; target.addDynamicallyLegalOp( From a723f2115973159e0ddd0333520c7bce87ca208d Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Wed, 27 Aug 2025 16:57:19 +0000 Subject: [PATCH 7/7] roll back isForWorkgroup --- .../include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 10 +++++----- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 6 +++--- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +- .../Dialect/XeGPU/Transforms/XeGPUBlocking.cpp | 8 ++++---- .../XeGPU/Transforms/XeGPUWgToSgDistribute.cpp | 17 +++++++++-------- .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 2 +- 6 files changed, 23 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index db34e35f27510..cfe3e800484ce 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -184,7 +184,7 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> { let methods = [ InterfaceMethod<"Check the availability of workgroup level layouts", "bool", - "hasSgLayout">, + "isForWorkgroup">, InterfaceMethod<"Check the availability of subgroup level layouts", "bool", "isForSubgroup">, @@ -357,12 +357,12 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> { ]; let extraClassDeclaration = [{ - bool hasSgLayout() { + bool isForWorkgroup() { return getSgLayout() != nullptr; } bool isForSubgroup() { - return !hasSgLayout(); + return !isForWorkgroup(); } int64_t getRank() { @@ -485,10 +485,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> { return parent.getOrder(); } - bool hasSgLayout() const { + bool isForWorkgroup() const { SliceAttr attr = flatten(); auto parent = dyn_cast(attr.getParent()); - return parent.hasSgLayout(); + return parent.isForWorkgroup(); } bool isForSubgroup() const { diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index b460f6dfd2769..7f3be7f91c56b 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -270,7 +270,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId) { // delinearizeSubgroupId is only available for // workgroup-level layout attribute - if (!hasSgLayout()) + if (!isForWorkgroup()) return failure(); // TODO: handle order attribute @@ -295,7 +295,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, FailureOr>> LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape) { - if (!hasSgLayout()) + if (!isForWorkgroup()) return failure(); SmallVector sgLayout = getSgLayoutAsInt(); @@ -382,7 +382,7 @@ FailureOr>> SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef shape) { assert(getRank() == static_cast(shape.size()) && "invalid shape."); - if (!hasSgLayout()) + if (!isForWorkgroup()) return failure(); SmallVector sgLayout = getSgLayoutAsInt(); diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index f799205069a18..c8d180b973f05 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -938,7 +938,7 @@ LogicalResult ConvertLayoutOp::verify() { // both input and target layouts should be WgLayout or SgLayout at the same // time. - if ((!srcLayout.hasSgLayout() || !resLayout.hasSgLayout()) && + if ((!srcLayout.isForWorkgroup() || !resLayout.isForWorkgroup()) && (!srcLayout.isForSubgroup() || !resLayout.isForSubgroup())) return emitOpError("expected input layout and target layout be WgLayout or " "SgLayout at the same time."); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index c0be589708df0..9ee002ede7838 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -208,13 +208,13 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const { llvm::any_of(op->getOpOperands(), [](OpOperand &opr) { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr); - return layout && layout.hasSgLayout(); + return layout && layout.isForWorkgroup(); }); bool hasWgLayoutResults = llvm::any_of(op->getOpResults(), [](OpResult result) { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result); - return layout && layout.hasSgLayout(); + return layout && layout.isForWorkgroup(); }); if (hasWgLayoutOperands || hasWgLayoutResults) { LDBG() << "skip unrolling for op with workgroup level layout: " << *op; @@ -277,7 +277,7 @@ void XeGPUBlockingPass::runOnOperation() { auto layout = llvm::dyn_cast_if_present(type.getEncoding()); - if (layout && layout.hasSgLayout()) + if (layout && layout.isForWorkgroup()) return failure(); int count; @@ -294,7 +294,7 @@ void XeGPUBlockingPass::runOnOperation() { ArrayRef shape = type.getShape(); xegpu::LayoutAttr layout = type.getLayoutAttr(); - if (layout && layout.hasSgLayout()) + if (layout && layout.isForWorkgroup()) return failure(); int count; diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index a84a0b1415072..0b7fe81facfce 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -51,7 +51,7 @@ getSgShapeAndCount(ArrayRef shape, xegpu::DistributeLayoutAttr layout) { int count = 1; SmallVector sgShape(shape); - if (layout && layout.hasSgLayout()) { + if (layout && layout.isForWorkgroup()) { SmallVector sgLayout = layout.getSgLayoutAsInt(); if (!layout.getSgDataAsInt().empty()) sgShape = layout.getSgDataAsInt(); @@ -88,7 +88,7 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op, // not applicable to ops without workgroup layout attributes xegpu::DistributeLayoutAttr layout = op.getLayoutAttr(); - if (!layout || !layout.hasSgLayout()) + if (!layout || !layout.isForWorkgroup()) return failure(); Value sgId = rewriter.create(loc, /*upper_bound=*/nullptr); @@ -226,7 +226,7 @@ struct WgToSgCreateNdOpNoOffset MLIRContext *ctx = op.getContext(); xegpu::TensorDescType tdescTy = op.getType(); auto layout = dyn_cast(tdescTy.getLayout()); - if (!layout || !layout.hasSgLayout()) + if (!layout || !layout.isForWorkgroup()) return failure(); Type elemTy = tdescTy.getElementType(); @@ -472,7 +472,7 @@ struct WgToSgVectorBroadcastOp xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); - if (!layout || !layout.hasSgLayout()) + if (!layout || !layout.isForWorkgroup()) return failure(); // TODO: Currently only supports cases where the source and result ranks @@ -536,7 +536,7 @@ struct WgToSgElementwiseOp : public ConversionPattern { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0)); - if (!layout || !layout.hasSgLayout()) + if (!layout || !layout.isForWorkgroup()) return failure(); SmallVector sgShape = getSgShapeAndCount(wgShape, layout).first; @@ -615,7 +615,8 @@ struct WgToSgConvertLayoutOp auto input = dyn_cast(op.getInputLayout()); auto target = dyn_cast(op.getTargetLayout()); - if (!input || !target || !input.hasSgLayout() || !target.hasSgLayout()) + if (!input || !target || !input.isForWorkgroup() || + !target.isForWorkgroup()) return rewriter.notifyMatchFailure( op, "Input and target layouts must have subgroup layout"); @@ -739,7 +740,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern { xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult()); - if (!layout || !layout.hasSgLayout()) + if (!layout || !layout.isForWorkgroup()) return failure(); ArrayRef wgShape = vecType.getShape(); @@ -917,7 +918,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() { }; auto isLegal = [&](xegpu::DistributeLayoutAttr layout) -> bool { - return !layout || !layout.hasSgLayout(); + return !layout || !layout.isForWorkgroup(); }; target.addDynamicallyLegalOp bool { - return !layout || !layout.hasSgLayout(); + return !layout || !layout.isForWorkgroup(); }; target.addDynamicallyLegalOp(