Skip to content
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
7d332da
init
chencha3 Apr 17, 2025
d4549ad
Merge branch 'main' into xegpu_unroll_patterns
chencha3 Apr 17, 2025
cdd5059
Merge branch 'main' into xegpu_unroll_patterns
chencha3 Apr 18, 2025
47f9b3d
add patterns for createNdOp and StoreNdOp
chencha3 Apr 18, 2025
932747e
refine nativeShapeFn
chencha3 Apr 18, 2025
f843d98
refine verifier for TensorDescType
chencha3 Apr 23, 2025
c6bdd3c
add loadNd pattern
chencha3 Apr 23, 2025
1d4dc72
add test pass
chencha3 Apr 23, 2025
545f937
format code
chencha3 Apr 23, 2025
008dbc7
add unit test
chencha3 Apr 23, 2025
d077cb0
clean up
chencha3 Apr 24, 2025
0193a04
stage
chencha3 Apr 28, 2025
7f8b00a
Merge branch 'main' into xegpu_unroll_patterns
chencha3 Apr 29, 2025
456465e
add dpas pattern and unit test
chencha3 Apr 29, 2025
906d699
refactor
chencha3 Apr 29, 2025
c63a496
fix format
chencha3 Apr 29, 2025
e2ed1ac
fix format
chencha3 Apr 29, 2025
35b35f0
refine
chencha3 Apr 30, 2025
6fef430
refine
chencha3 Apr 30, 2025
9d24920
cleanup and add patterns for rest nd ops
chencha3 Apr 30, 2025
1a92661
fix format
chencha3 Apr 30, 2025
0126eb9
cleanup
chencha3 Apr 30, 2025
a7d0614
add UnrollOption
chencha3 May 6, 2025
01ca783
fix the format
chencha3 May 6, 2025
ec74833
add comments
chencha3 May 6, 2025
68f95f0
add brief description
chencha3 May 6, 2025
9e6cf29
address comments
chencha3 May 6, 2025
15b1b46
Merge branch 'main' into xegpu_unroll_patterns
chencha3 May 6, 2025
727390f
add comments
chencha3 May 6, 2025
76f8761
fix comments
chencha3 May 6, 2025
45a3d28
renaming
chencha3 May 6, 2025
372dbd7
generalize pack, unpack, createaNdOp for supporting 1D cases
chencha3 May 6, 2025
06cf9b2
refine
chencha3 May 7, 2025
e0399ac
add 1D unit tests
chencha3 May 7, 2025
e873d59
switch to explicit types
chencha3 May 7, 2025
b55f43b
clean up
chencha3 May 7, 2025
383bd1d
move getUnrolledTypes out
chencha3 May 8, 2025
4fc35cf
addressed comments
chencha3 May 8, 2025
536a610
address comments
chencha3 May 8, 2025
39ca440
fix format
chencha3 May 8, 2025
09cec0b
Merge branch 'main' into xegpu_unroll_patterns
chencha3 May 8, 2025
1d3d12c
sync
chencha3 May 8, 2025
96cb62b
address comments
chencha3 May 8, 2025
163204a
Merge branch 'main' into xegpu_unroll_patterns
chencha3 May 9, 2025
1caac76
update cmake
chencha3 May 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,28 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
return $_get($_ctxt, sg_layout, sg_data, inst_data,
DenseI32ArrayAttr::get($_ctxt, lane_layout),
DenseI32ArrayAttr::get($_ctxt, lane_data), order);
}]>,
AttrBuilder<(ins "llvm::ArrayRef<int>": $lane_layout,
"llvm::ArrayRef<int>": $lane_data,
"llvm::ArrayRef<int>": $order),
[{
auto sg_layout = DenseI32ArrayAttr();
auto sg_data = DenseI32ArrayAttr();
auto inst_data = DenseI32ArrayAttr();
return $_get($_ctxt, sg_layout, sg_data, inst_data,
DenseI32ArrayAttr::get($_ctxt, lane_layout),
DenseI32ArrayAttr::get($_ctxt, lane_data),
DenseI32ArrayAttr::get($_ctxt, order));
}]>,
AttrBuilder<(ins "DenseI32ArrayAttr": $lane_layout,
"DenseI32ArrayAttr": $lane_data,
"DenseI32ArrayAttr": $order),
[{
auto sg_layout = DenseI32ArrayAttr();
auto sg_data = DenseI32ArrayAttr();
auto inst_data = DenseI32ArrayAttr();
return $_get($_ctxt, sg_layout, sg_data, inst_data,
lane_layout, lane_data, order);
}]>
];

Expand All @@ -262,7 +284,7 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
}

bool isSgLayout() {
return getSgLayout() == nullptr && getLaneLayout() != nullptr;
return !isWgLayout();
}

int64_t getRank() {
Expand All @@ -274,6 +296,16 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
return attr.size();
return 0;
}

LayoutAttr dropSgLayoutAndData() {
return LayoutAttr::get(getContext(), nullptr, nullptr, getInstData(),
getLaneLayout(), getLaneData(), getOrder());
}

LayoutAttr dropInstData() {
return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), nullptr,
getLaneLayout(), getLaneData(), getOrder());
}
}];

let assemblyFormat = "`<` struct(params) `>`";
Expand Down
7 changes: 1 addition & 6 deletions mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source,
"llvm::ArrayRef<OpFoldResult>": $offsets)>,

OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType> ": $source,
"llvm::ArrayRef<OpFoldResult>": $offsets,
"llvm::ArrayRef<OpFoldResult>": $shape,
"llvm::ArrayRef<OpFoldResult>": $strides)>,

OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source,
OpBuilder<(ins "Type": $tdesc, "Value": $source,
"llvm::ArrayRef<OpFoldResult>": $offsets,
"llvm::ArrayRef<OpFoldResult>": $shape,
"llvm::ArrayRef<OpFoldResult>": $strides)>
Expand Down
43 changes: 43 additions & 0 deletions mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,54 @@ class RewritePatternSet;

namespace xegpu {

/// Options to control the XeGPU unrolling. Its main purpose is to
/// provide a way to customize the native shape of the operation.
struct UnrollOptions {
using FilterConstraintFnType = std::function<LogicalResult(Operation *op)>;
/// Callback function that indicates whether vector unrolling should be

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: let's place this comment above "using" to have uniform look :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it

/// attempted on the operation.
FilterConstraintFnType filterConstraint = nullptr;
UnrollOptions &setFilterConstraint(FilterConstraintFnType constraint) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an example/test that demonstrates usage of this option?

filterConstraint = std::move(constraint);
return *this;
}

using NativeShapeFnType =
std::function<std::optional<SmallVector<int64_t>>(Operation *op)>;
/// Function that returns the shape to unroll to for a given operation.
/// The unrolling is aborted if the function returns `std::nullopt`.
NativeShapeFnType nativeShape = nullptr;
UnrollOptions &setNativeShapeFn(NativeShapeFnType fn) {
nativeShape = std::move(fn);
return *this;
}
};

/// Appends patterns for folding aliasing ops into XeGPU ops into `patterns`.
void populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns);
/// Appends patterns for XeGPU SIMT distribution into `patterns`.
void populateXeGPUSubgroupDistributePatterns(RewritePatternSet &patterns);

/// Collect a set of patterns to unroll xegpu operations to a smaller shapes.
/// Users can control whether an operation to be unrolled or not, as well as
/// its target shape via `options` structure. (via setting filterConstraint
/// and nativeShape respectively, both of them are function refs taking `op` as
/// the input).
/// An `op` is unrolled to the `targetShape` as follows, for each of its
/// operands:
/// 1. the unrolled type `unrolledType` and number of unrolled instances
/// `numUnrolledInstances` are computed from the `targetShape`.
/// 2. pack each operand. ExtractStridedSlice are created to break-up the
/// vector operands. And BuiltinUnrealizedCastop are created to break-up
/// the TensorDesc operands.
/// 3. the original op is cloned `numUnrolledInstances` times, once for each
/// result.
/// 4. unpack the results. InsertStridedSlice are inserted for VectorType
/// result, and BuiltinUnrealizedCastOp are inserted for TensorDescType result
/// to re-assemble the slices into the original shape.
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns,
const UnrollOptions &options);

} // namespace xegpu
} // namespace mlir

Expand Down
44 changes: 17 additions & 27 deletions mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/IR/Builders.h"
Expand Down Expand Up @@ -141,46 +142,24 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
}

void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
Type tdesc, TypedValue<MemRefType> source,
Type tdesc, Value source,
llvm::ArrayRef<OpFoldResult> offsets,
llvm::ArrayRef<OpFoldResult> shape,
llvm::ArrayRef<OpFoldResult> strides) {
assert(shape.size() && offsets.size() && strides.size() &&
shape.size() == strides.size() && shape.size() == offsets.size());

llvm::SmallVector<int64_t> staticOffsets;
llvm::SmallVector<int64_t> staticShape;
llvm::SmallVector<int64_t> staticStrides;
auto intTy = dyn_cast<IntegerType>(source.getType());
auto memrefTy = dyn_cast<MemRefType>(source.getType());
assert(intTy || memrefTy && "Source has to be either int or memref.");

llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<Value> dynamicShape;
llvm::SmallVector<Value> dynamicStrides;

dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
dispatchIndexOpFoldResults(shape, dynamicShape, staticShape);
dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);

auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
auto staticShapeAttr = builder.getDenseI64ArrayAttr(staticShape);
auto staticStridesAttr = builder.getDenseI64ArrayAttr(staticStrides);

build(builder, state, tdesc, source, dynamicOffsets, dynamicShape,
dynamicStrides, staticOffsetsAttr, staticShapeAttr, staticStridesAttr);
}

void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
Type tdesc, TypedValue<IntegerType> source,
llvm::ArrayRef<OpFoldResult> offsets,
llvm::ArrayRef<OpFoldResult> shape,
llvm::ArrayRef<OpFoldResult> strides) {
assert(shape.size() && offsets.size() && strides.size() &&
shape.size() == strides.size() && shape.size() == offsets.size());

llvm::SmallVector<int64_t> staticOffsets;
llvm::SmallVector<int64_t> staticShape;
llvm::SmallVector<int64_t> staticStrides;
llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<Value> dynamicShape;
llvm::SmallVector<Value> dynamicStrides;

dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
dispatchIndexOpFoldResults(shape, dynamicShape, staticShape);
Expand All @@ -190,6 +169,17 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
auto staticShapeAttr = builder.getDenseI64ArrayAttr(staticShape);
auto staticStridesAttr = builder.getDenseI64ArrayAttr(staticStrides);

if (memrefTy) {
auto memrefShape = memrefTy.getShape();
auto [memrefStrides, offset] = memrefTy.getStridesAndOffset();

// if shape and strides are from Memref, we don't need attributes for them
if (staticShape == memrefShape && staticStrides == memrefStrides) {
staticShapeAttr = DenseI64ArrayAttr();
staticStridesAttr = DenseI64ArrayAttr();
}
}

build(builder, state, tdesc, source, dynamicOffsets, dynamicShape,
dynamicStrides, staticOffsetsAttr, staticShapeAttr, staticStridesAttr);
}
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_mlir_dialect_library(MLIRXeGPUTransforms
XeGPUFoldAliasOps.cpp
XeGPUSubgroupDistribute.cpp
XeGPUUnroll.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/XeGPU
Expand Down
Loading
Loading