-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[mlir][xegpu] Add definition of SliceAttr #150146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
2bc70b6
3959f9e
2027cfc
638c085
91048f0
7eaf0a6
ddc42c2
36e2c3a
6872e6d
ded53b4
223fab9
60e20a0
3630966
398d69b
08e4aa9
62aa1dd
de0a1bb
a483699
e7f2977
e3e4a61
4d72663
3f59105
129312a
0865612
b67f2b1
01e4efe
3077c6c
d1f7bac
27da02a
e49e1cf
59de450
1b16552
0511e1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,7 +175,32 @@ def XeGPU_FenceScopeAttr: | |
let assemblyFormat = "$value"; | ||
} | ||
|
||
def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> { | ||
def LayoutTrait: AttrInterface<"LayoutTrait"> { | ||
let cppNamespace = "::mlir::xegpu"; | ||
let description = [{ | ||
Common trait for all XeGPU layouts. | ||
}]; | ||
|
||
let methods = [ | ||
InterfaceMethod<"Get the effective sg layout", | ||
"std::optional<SmallVector<int64_t>>", | ||
"getEffectiveSgLayout">, | ||
InterfaceMethod<"Get the effective sg data", | ||
"std::optional<SmallVector<int64_t>>", | ||
"getEffectiveSgData">, | ||
InterfaceMethod<"Delinearize the Subgroup Id", | ||
"FailureOr<SmallVector<Value>>", | ||
"delinearizeSubgroupId", | ||
(ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId)>, | ||
|
||
InterfaceMethod<"Get the local offset to be accessed by the given subgroup Id", | ||
"FailureOr<SmallVector<SmallVector<Value>>>", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the need for having a vector<vector<>> here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since each subgroup may be assigned with multiple blocks. |
||
"getOffsets", | ||
(ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)> | ||
]; | ||
} | ||
|
||
def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> { | ||
let summary = [{ | ||
Describes the data distribution to subgroups and work-items for a tensor | ||
specified by the tensor descriptor. | ||
|
@@ -330,12 +355,105 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> { | |
return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), nullptr, | ||
getLaneLayout(), getLaneData(), getOrder()); | ||
} | ||
|
||
std::optional<SmallVector<int64_t>> getEffectiveSgLayout() const { | ||
if (DenseI32ArrayAttr layout = getSgLayout()) | ||
return llvm::to_vector_of<int64_t>(layout.asArrayRef()); | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<SmallVector<int64_t>> getEffectiveSgData() const { | ||
if (DenseI32ArrayAttr data = getSgData()) | ||
return llvm::to_vector_of<int64_t>(data.asArrayRef()); | ||
return std::nullopt; | ||
} | ||
|
||
FailureOr<SmallVector<Value>> | ||
delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId); | ||
|
||
FailureOr<SmallVector<SmallVector<Value>>> | ||
getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape); | ||
|
||
}]; | ||
|
||
let assemblyFormat = "`<` struct(params) `>`"; | ||
let genVerifyDecl = 1; | ||
} | ||
|
||
|
||
def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is desirable to allow nested slice attribute to match the staged reduction use case, where reduction may follow another reduction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will add it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the support for nested SliceAttr has been enabled. @Jianhui-Li |
||
let summary = [{Describes the data distribution and sharing among subgroups or work-items.}]; | ||
|
||
let description = [{ | ||
Like LayoutAttr, SliceAttr describes data distribution among subgroups or work-items. | ||
However, whereas LayoutAttr requires the data to have the same rank as the attribute, | ||
SliceAttr permits the data to have a lower rank. In this case, compute units in the | ||
specified dimensions share the data, provided that the remaining ranks match the data | ||
chencha3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
rank. SliceAttr is commonly used by operations such as vector.multi_reduction and | ||
vector.broadcast. | ||
|
||
Example: | ||
``` | ||
#l = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]> | ||
#r = #xegpu.slice<#l, dim = 0> | ||
|
||
%exp = math.exp %input {layout_result_0 = #l}: vector<256x128xf32> | ||
%red = vector.multi_reduction<add>, %exp, %acc [0] {layout_result_0 = #r}: vector<256x128xf32> to vector<128xf32> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. better to add a comment here explaning the output layout of
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated it |
||
%bcast = vector.broadcast %red {layout_result_0 = #l} : vector<128xf32> to vector<256x128xf32> | ||
``` | ||
}]; | ||
|
||
let parameters = (ins | ||
"xegpu::LayoutAttr": $parent, | ||
"DenseI64ArrayAttr": $dims | ||
); | ||
|
||
let extraClassDeclaration = [{ | ||
|
||
int64_t getRank() const { | ||
return getParent().getRank() - getDims().size(); | ||
} | ||
|
||
DenseI32ArrayAttr getOrder() const { | ||
return getParent().getOrder(); | ||
} | ||
|
||
bool isWgLayout() const { | ||
return getParent().isWgLayout(); | ||
} | ||
|
||
bool isSgLayout() const { | ||
return getParent().isSgLayout(); | ||
} | ||
|
||
std::optional<SmallVector<int64_t>> getEffectiveSgLayout() const { | ||
if (auto layout = getParent().getEffectiveSgLayout()) { | ||
ArrayRef<int64_t> dims = getDims().asArrayRef(); | ||
return XeGPUDialect::dropDims(llvm::ArrayRef<int64_t>(*layout), dims); | ||
} | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<SmallVector<int64_t>> getEffectiveSgData() const { | ||
if (auto data = getParent().getEffectiveSgData()) { | ||
ArrayRef<int64_t> dims = getDims().asArrayRef(); | ||
return XeGPUDialect::dropDims(llvm::ArrayRef<int64_t>(*data), dims); | ||
} | ||
return std::nullopt; | ||
} | ||
|
||
FailureOr<SmallVector<Value>> | ||
delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId); | ||
|
||
FailureOr<SmallVector<SmallVector<Value>>> | ||
getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape); | ||
|
||
}]; | ||
|
||
let assemblyFormat = "`<` $parent `,` `dims` `=` $dims `>`"; | ||
let genVerifyDecl = 1; | ||
} | ||
|
||
def XeGPU_RangeAttr : XeGPUAttr<"Range", "range"> { | ||
let summary = [{Specifies a half-open range}]; | ||
let description = [{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,18 @@ def XeGPU_Dialect : Dialect { | |
/// Checks if the given shape can be evenly distributed based on the layout | ||
/// and data factors provided by the LayoutAttr. | ||
static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr); | ||
|
||
/// drops the data in the specified dimension, and return the rest. e.g., | ||
/// for data = [32, 64, 8], dropPositions = [0, 2], it will return [64] | ||
template<typename T, typename U> | ||
static llvm::SmallVector<T> dropDims(llvm::ArrayRef<T> data, llvm::ArrayRef<U> dropPositions) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit : dropDims -> sliceDims? data->dims, dropPosition -> dropDims There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed it to |
||
llvm::SmallVector<T> result; | ||
for (auto [i, v]: llvm::enumerate(data)) { | ||
if (!llvm::is_contained(dropPositions, i)) | ||
result.push_back(v); | ||
} | ||
return result; | ||
} | ||
}]; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect | |
|
||
DEPENDS | ||
MLIRXeGPUIncGen | ||
MLIRXeGPUAttrInterfaceIncGen | ||
MLIRXeGPUAttrsIncGen | ||
MLIRXeGPUEnumsIncGen | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont you need to link MLIRIndexDialect ,MLIRAffineUtils here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. Yes, they are needed. |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,9 @@ | |
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "mlir/Dialect/Affine/Utils.h" | ||
#include "mlir/Dialect/Arith/Utils/Utils.h" | ||
#include "mlir/Dialect/Index/IR/IndexOps.h" | ||
#include "mlir/Dialect/Utils/IndexingUtils.h" | ||
#include "mlir/Dialect/XeGPU/IR/XeGPU.h" | ||
#include "mlir/Dialect/XeGPU/IR/XeGPUTargetInfo.h" | ||
|
@@ -211,6 +214,178 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError, | |
return success(); | ||
} | ||
|
||
FailureOr<SmallVector<Value>> | ||
LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, | ||
Value linearId) { | ||
// delinearizeSubgroupId is only available for | ||
// workgroup-level layout attribute | ||
if (!isWgLayout()) | ||
return failure(); | ||
|
||
// TODO: handle order attribute | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe assert unavailability of Order attribute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
auto dims = | ||
llvm::map_to_vector(*getEffectiveSgLayout(), [&](int64_t d) -> Value { | ||
return builder.createOrFold<arith::ConstantIndexOp>(loc, d); | ||
}); | ||
|
||
return affine::delinearizeIndex(builder, loc, linearId, dims); | ||
} | ||
|
||
FailureOr<SmallVector<SmallVector<Value>>> | ||
LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to add a comment on what the purpose of this function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
ArrayRef<int64_t> shape) { | ||
if (!isWgLayout()) | ||
return failure(); | ||
|
||
auto sgLayout = getEffectiveSgLayout().value(); | ||
SmallVector<int64_t> sgShape; | ||
if (auto maybeSgShape = getEffectiveSgData()) | ||
sgShape = maybeSgShape.value(); | ||
else if (auto ratio = computeShapeRatio(shape, sgLayout)) | ||
sgShape = ratio.value(); | ||
else | ||
return failure(); | ||
|
||
// distUnit[i] is the minimum value between shape[i] and | ||
// sgLayout[i] * sgShape[i] | ||
SmallVector<int64_t> distUnit = llvm::map_to_vector( | ||
llvm::zip_equal(shape, computeElementwiseMul(sgLayout, sgShape)), | ||
[](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); }); | ||
|
||
// delinearize Ids | ||
auto maybeIds = delinearizeSubgroupId(builder, loc, linearId); | ||
if (failed(maybeIds)) | ||
return failure(); | ||
SmallVector<Value> sgIds = *maybeIds; | ||
|
||
// nd local offset, localOffset[i] = sgId[i] * sgShape[i] | ||
SmallVector<Value> localOffsets = llvm::map_to_vector( | ||
llvm::zip(sgIds, sgShape), [&](const auto &t) -> Value { | ||
return builder.createOrFold<index::MulOp>( | ||
loc, std::get<0>(t), | ||
builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t))); | ||
}); | ||
|
||
SmallVector<SmallVector<Value>> offsets; | ||
for (SmallVector<int64_t> unitOffs : StaticTileOffsetRange(shape, distUnit)) { | ||
SmallVector<Value> base = | ||
llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value { | ||
return builder.create<arith::ConstantIndexOp>(loc, d); | ||
}); | ||
|
||
SmallVector<Value> adds = llvm::map_to_vector( | ||
llvm::zip_equal(base, localOffsets), [&](const auto &t) -> Value { | ||
return builder.createOrFold<arith::AddIOp>(loc, std::get<0>(t), | ||
std::get<1>(t)); | ||
}); | ||
|
||
SmallVector<Value> mods = llvm::map_to_vector( | ||
llvm::zip_equal(adds, shape), [&](const auto &t) -> Value { | ||
return builder.createOrFold<index::RemUOp>( | ||
loc, std::get<0>(t), | ||
builder.create<arith::ConstantIndexOp>(loc, std::get<1>(t))); | ||
}); | ||
|
||
offsets.push_back(mods); | ||
} | ||
|
||
return offsets; | ||
} | ||
|
||
//===----------------------------------------------------------------------===// | ||
// XeGPU_SliceAttr | ||
//===----------------------------------------------------------------------===// | ||
LogicalResult | ||
SliceAttr::verify(llvm::function_ref<InFlightDiagnostic()> emitError, | ||
xegpu::LayoutAttr parent, DenseI64ArrayAttr dims) { | ||
if (!parent || !dims) | ||
return emitError() << "expected parent layout and dims attribute"; | ||
|
||
int rank = parent.getRank(); | ||
// check every element in dims is unique and smaller than rank | ||
llvm::SmallDenseSet<int64_t> seen; | ||
for (int64_t dim : dims.asArrayRef()) { | ||
if (dim >= rank) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: should we check if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch. Fixed. |
||
return emitError() << "invalid dim (" << dim << ") in slice attribute."; | ||
if (!seen.insert(dim).second) | ||
return emitError() << "repeated dim (" << dim << ") in slice attribute."; | ||
} | ||
return success(); | ||
} | ||
|
||
FailureOr<SmallVector<Value>> | ||
SliceAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc, | ||
Value linearId) { | ||
return getParent().delinearizeSubgroupId(builder, loc, linearId); | ||
} | ||
|
||
FailureOr<SmallVector<SmallVector<Value>>> | ||
SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId, | ||
ArrayRef<int64_t> shape) { | ||
assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape."); | ||
if (!isWgLayout()) | ||
return failure(); | ||
|
||
auto sgLayout = getEffectiveSgLayout().value(); | ||
|
||
SmallVector<int64_t> sgShape; | ||
if (auto maybeSgShape = getEffectiveSgData()) | ||
sgShape = maybeSgShape.value(); | ||
else if (auto ratio = computeShapeRatio(shape, sgLayout)) | ||
sgShape = ratio.value(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shape == ratio? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed the variable name to derivedShape for clarification per discussion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems you change the last place, but missed this one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed. |
||
else | ||
return failure(); | ||
|
||
// distUnit[i] is the minimum value between shape[i] and | ||
// sgLayout[i] * sgShape[i] | ||
SmallVector<int64_t> distUnit = llvm::map_to_vector( | ||
llvm::zip_equal(shape, computeElementwiseMul(sgLayout, sgShape)), | ||
[](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); }); | ||
|
||
// delinearize Ids | ||
auto maybeIds = delinearizeSubgroupId(builder, loc, linearId); | ||
if (failed(maybeIds)) | ||
return failure(); | ||
// The effective sgIds for offsets computing correspond | ||
// to the dims that are not sliced. | ||
ArrayRef<int64_t> dims = getDims().asArrayRef(); | ||
SmallVector<Value> sgIds = | ||
XeGPUDialect::dropDims(ArrayRef<Value>(*maybeIds), dims); | ||
|
||
// nd local offset, localOffset[i] = sgId[i] * sgShape[i] | ||
SmallVector<Value> localOffsets = llvm::map_to_vector( | ||
llvm::zip(sgIds, sgShape), [&](const auto &t) -> Value { | ||
return builder.createOrFold<index::MulOp>( | ||
loc, std::get<0>(t), | ||
builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t))); | ||
}); | ||
|
||
SmallVector<SmallVector<Value>> offsets; | ||
for (SmallVector<int64_t> unitOffs : StaticTileOffsetRange(shape, distUnit)) { | ||
SmallVector<Value> base = | ||
llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value { | ||
return builder.create<arith::ConstantIndexOp>(loc, d); | ||
}); | ||
|
||
SmallVector<Value> adds = llvm::map_to_vector( | ||
llvm::zip_equal(base, localOffsets), [&](const auto &t) -> Value { | ||
return builder.createOrFold<arith::AddIOp>(loc, std::get<0>(t), | ||
std::get<1>(t)); | ||
}); | ||
|
||
SmallVector<Value> mods = llvm::map_to_vector( | ||
llvm::zip_equal(adds, shape), [&](const auto &t) -> Value { | ||
return builder.createOrFold<index::RemUOp>( | ||
loc, std::get<0>(t), | ||
builder.create<arith::ConstantIndexOp>(loc, std::get<1>(t))); | ||
}); | ||
|
||
offsets.push_back(mods); | ||
} | ||
|
||
return offsets; | ||
} | ||
|
||
//===----------------------------------------------------------------------===// | ||
// XeGPU_RangeAttr | ||
//===----------------------------------------------------------------------===// | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
newline
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.