-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[MLIR][XeGPU] refine verifier for TensorDescType #137226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,12 +6,15 @@ | |
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "mlir/Dialect/Utils/IndexingUtils.h" | ||
| #include "mlir/Dialect/XeGPU/IR/XeGPU.h" | ||
| #include "mlir/IR/Builders.h" | ||
| #include "mlir/IR/DialectImplementation.h" | ||
| #include "llvm/ADT/TypeSwitch.h" | ||
| #include <numeric> | ||
|
|
||
| using std::optional; | ||
|
|
||
| namespace mlir { | ||
| namespace xegpu { | ||
|
|
||
|
|
@@ -30,6 +33,71 @@ void XeGPUDialect::initialize() { | |
| >(); | ||
| } | ||
|
|
||
| // Checks if the given shape can be evenly distributed based on the layout | ||
| // and data factors provided by the LayoutAttr. | ||
| bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's have the comment here as it was earlier
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
| xegpu::LayoutAttr attr) { | ||
| assert(attr && "Layout attribute is missing."); | ||
|
|
||
| // Checks whether the given shape can be evenly distributed using the | ||
| // specified layout and data attributes. If successful, it returns the work | ||
| // size for each compute unit; otherwise, it returns `std::nullopt`. The work | ||
| // size per compute unit is calculated as follows: | ||
| // - If `data` is null: newShape[i] = shape[i] / layout[i] | ||
| // - If `data` is not null: newShape[i] = data[i] | ||
| // When round-robin distribution (`rr`) is enabled, `shape[i]` can be | ||
| // smaller than `layout[i] * data[i]`, allowing multiple compute units to | ||
| // share the data. | ||
| auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape, | ||
| DenseI32ArrayAttr layout, DenseI32ArrayAttr data, | ||
| bool rr = true) -> optional<SmallVector<int64_t>> { | ||
| llvm::SmallVector<int64_t> newShape(shape); | ||
| if (layout) { | ||
| auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef()); | ||
| if (vec.size() != shape.size()) | ||
| return std::nullopt; | ||
| auto ratio = computeShapeRatio(shape, vec); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is upstream util, cannot rename it. |
||
| if (!ratio.has_value()) | ||
| return std::nullopt; | ||
| newShape = ratio.value(); | ||
| } | ||
|
|
||
| if (data) { | ||
| auto vec = llvm::to_vector_of<int64_t>(data.asArrayRef()); | ||
| if (vec.size() != shape.size()) | ||
| return std::nullopt; | ||
| auto ratio = computeShapeRatio(newShape, vec); | ||
| if (!ratio.has_value() && rr) | ||
| ratio = computeShapeRatio(vec, newShape); | ||
| if (!ratio.has_value()) | ||
| return std::nullopt; | ||
|
|
||
| // if data is not null, we always return it for next phase. | ||
| newShape = vec; | ||
| } | ||
| return newShape; | ||
| }; | ||
|
|
||
| // check the sgLayout and sgData | ||
| auto maybeSgShape = | ||
| tryDistribute(shape, attr.getSgLayout(), attr.getSgData()); | ||
| if (!maybeSgShape) | ||
| return false; | ||
| auto sgShape = maybeSgShape.value(); | ||
|
|
||
| // check InstData, it neither have layout nor need round-robin | ||
| auto maybeInstShape = | ||
| tryDistribute(sgShape, nullptr, attr.getInstData(), false); | ||
| if (!maybeInstShape) | ||
| return false; | ||
| auto instShape = maybeInstShape.value(); | ||
|
|
||
| // check LaneLayout and LaneData | ||
| auto maybeLaneShape = | ||
| tryDistribute(instShape, attr.getLaneLayout(), attr.getLaneData(), false); | ||
| return maybeLaneShape.has_value(); | ||
| } | ||
|
|
||
| //===----------------------------------------------------------------------===// | ||
| // XeGPU_BlockTensorDescAttr | ||
| //===----------------------------------------------------------------------===// | ||
|
|
@@ -241,7 +309,7 @@ LogicalResult TensorDescType::verify( | |
| llvm::ArrayRef<int64_t> shape, mlir::Type elementType, | ||
| mlir::Attribute encoding, mlir::Attribute layout) { | ||
| size_t rank = shape.size(); | ||
| // Low-pressure types are packed in 32-bit units. | ||
| // Low-precision types are packed in 32-bit units. | ||
| int32_t packingFactor = 32 / elementType.getIntOrFloatBitWidth(); | ||
| if (rank != 1 && rank != 2) | ||
| return emitError() << "expected 1D or 2D tensor"; | ||
|
|
@@ -268,23 +336,21 @@ LogicalResult TensorDescType::verify( | |
| } | ||
| } | ||
|
|
||
| if (auto blockAttr = | ||
| mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding)) { | ||
| auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding); | ||
| if (blockAttr) { | ||
| MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace(); | ||
| if (rank == 2 && memorySpaceAttr && | ||
| memorySpaceAttr.getValue() == MemorySpace::SLM) | ||
| return emitError() << "SLM is not supported for 2D block tensor"; | ||
| } | ||
|
|
||
| if (auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout)) { | ||
|
|
||
| auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout); | ||
| if (layoutAttr) { | ||
| if (rank != (size_t)layoutAttr.getRank()) | ||
| return emitError() << "expected layout rank to match tensor rank"; | ||
|
|
||
| ArrayRef<int32_t> laneLayout = layoutAttr.getLaneLayout().asArrayRef(); | ||
| ArrayRef<int32_t> laneData = layoutAttr.getLaneData().asArrayRef(); | ||
|
|
||
| if (scatterAttr) { | ||
| auto laneData = layoutAttr.getLaneData(); | ||
| if (scatterAttr && laneData) { | ||
| // Validate subgroup mapping rules for scattered tensors. | ||
| // A work-item's slice of the tensor with shape [sg_size] or | ||
| // [sg_size, chunk_size] will be [1] or [1, 32/element_ty_bit_width] | ||
|
|
@@ -294,20 +360,19 @@ LogicalResult TensorDescType::verify( | |
| if (rank > 1 && laneData[0] != 1) | ||
| return emitError() | ||
| << "cannot map over non-contiguous scattered row elements"; | ||
| if (laneData.back() != packingFactor) | ||
| if (laneData[rank - 1] != packingFactor) | ||
| return emitError() << "work item data mapping must match the number of " | ||
| "contiguous elements"; | ||
| } | ||
|
|
||
| for (size_t i = 0; i < shape.size(); ++i) { | ||
| uint32_t numElemPerWi = laneLayout[i] * laneData[i]; | ||
| if (shape[i] < numElemPerWi || shape[i] % numElemPerWi != 0) | ||
| return emitError() << "cannot distribute " << shape[i] << " over " | ||
| << laneLayout[i] << " work items with " | ||
| << laneData[i] << " elements each"; | ||
| if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) { | ||
| std::string shapeStr; | ||
| llvm::raw_string_ostream stream(shapeStr); | ||
| llvm::interleaveComma(shape, stream); | ||
| return emitError() << "cannot distribute [" << shapeStr << "] using " | ||
| << layoutAttr; | ||
| } | ||
| } | ||
|
|
||
| return success(); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be moved to XeGPU/Utils after #135271