Skip to content

Commit e19e02f

Browse files
committed
do not add barrier op for subgroup 2d block -> dpas conversion
1 parent 9641912 commit e19e02f

File tree

4 files changed

+20
-8
lines changed

4 files changed

+20
-8
lines changed

third_party/intel/include/Dialect/TritonIntelGPU/Transforms/Utility.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ Attribute inferSrcEncoding(Operation *op, Attribute encoding);
3333
// Retuns true if the operation is an expensive load or store operation.
3434
bool isExpensiveLoadOrStore(Operation *op);
3535

36+
// Returns true if the conversion between tensor types should be a no-op. Will
37+
// be removed once layout conversion for BlockIO types is lifted from
38+
// LoadStoreOpToLLVM.cpp
39+
bool isBlockIONoOpConversion(RankedTensorType srcType,
40+
RankedTensorType dstType);
41+
3642
// Returns true if the tensor type has a subgroup 2d block io encoding
3743
bool hasSubgroup2DBlockEncoding(RankedTensorType tensorType);
3844

third_party/intel/lib/Analysis/Allocation.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "intel/include/Analysis/Allocation.h"
22
#include "intel/include/Analysis/Utility.h"
3+
#include "intel/include/Dialect/TritonIntelGPU/Transforms/Utility.h" // isBlockIONoOpConversion
34
#include "triton/Dialect/Triton/IR/Utility.h"
45
#include "llvm/ADT/TypeSwitch.h"
56

@@ -11,6 +12,9 @@ constexpr unsigned invalidSize = -1;
1112
unsigned allocationAnalysisScratchSizeFn(gpu::ConvertLayoutOp convertLayout) {
1213
RankedTensorType srcTy = convertLayout.getSrc().getType();
1314
RankedTensorType dstTy = convertLayout.getResult().getType();
15+
16+
if (gpu::intel::isBlockIONoOpConversion(srcTy, dstTy))
17+
return 0;
1418
if (gpu::intel::cvtIsSubGroupShuffle(srcTy, dstTy))
1519
return 0;
1620
if (gpu::intel::cvtIsSubGroupTranspose(srcTy, dstTy)) {

third_party/intel/lib/TritonIntelGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,15 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
2525
ConversionPatternRewriter &rewriter) const override {
2626
MLIRContext *ctx = op.getContext();
2727

28-
auto srcTy = op.getSrc().getType();
28+
RankedTensorType srcTy = op.getSrc().getType();
2929
auto dstTy = op.getType();
3030

31-
if (auto srcTensorTy = cast<RankedTensorType>(srcTy)) {
32-
if (auto dstTensorTy = cast<RankedTensorType>(dstTy)) {
31+
if (auto dstTensorTy = cast<RankedTensorType>(dstTy)) {
32+
if (intel::isBlockIONoOpConversion(srcTy, dstTensorTy)) {
3333
// TODO: replace this with proper conversion once conversion is removed
3434
// from LoadStoreOpToLLVM.
35-
if (intel::hasSubgroup2DBlockEncoding(srcTensorTy) &&
36-
intel::hasDotDpasEncoding(dstTensorTy)) {
37-
rewriter.replaceOp(op, op.getSrc());
38-
return success();
39-
}
35+
rewriter.replaceOp(op, op.getSrc());
36+
return success();
4037
}
4138
}
4239

third_party/intel/lib/TritonIntelGPUTransforms/Utility.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ bool isExpensiveLoadOrStore(Operation *op) {
153153
return false;
154154
}
155155

156+
bool isBlockIONoOpConversion(RankedTensorType srcType,
157+
RankedTensorType dstType) {
158+
return hasSubgroup2DBlockEncoding(srcType) && hasDotDpasEncoding(dstType);
159+
}
160+
156161
bool hasSubgroup2DBlockEncoding(RankedTensorType tensorType) {
157162
if (!tensorType.getEncoding())
158163
return false;

0 commit comments

Comments
 (0)