Skip to content

Commit 8a721f3

Browse files
committed
Revert "[AMD] Enable General Swizzling ConvertLayoutOp (#7482)"
This reverts commit 318ff2c.
1 parent 6342021 commit 8a721f3

File tree

4 files changed

+11
-100
lines changed

4 files changed

+11
-100
lines changed

lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,9 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
278278
assert(cvtNeedsSharedMemory(op.getSrc().getType(), op.getType()));
279279

280280
// Try to use swizzling to implement the conversion
281-
if (succeeded(
282-
transferWithinBlockSwizzling(op, adaptor.getSrc(), rewriter))) {
281+
// HACK Remove once AMD tests pass for the swizzling path
282+
if (targetInfo.isCuda() && succeeded(transferWithinBlockSwizzling(
283+
op, adaptor.getSrc(), rewriter))) {
283284
return success();
284285
}
285286

test/Conversion/amd/convert_layout.mlir

Lines changed: 0 additions & 52 deletions
This file was deleted.

third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
#include "Analysis/AMDGPUAllocation.h"
21
#include "PatternTritonGPUOpToLLVM.h"
32
#include "Utility.h"
43
#include "triton/Conversion/TritonGPUToLLVM/PatternTritonGPUOpToLLVM.h"
54
#include "triton/Conversion/TritonGPUToLLVM/Utility.h"
65
#include "triton/Dialect/TritonGPU/Transforms/Utility.h"
76

8-
using ::mlir::transferWithinBlockPadding;
97
using ::mlir::triton::gpu::AMDMfmaEncodingAttr;
108
using ::mlir::triton::gpu::AMDWmmaEncodingAttr;
11-
using ::mlir::triton::gpu::ConvertLayoutOp;
129
using ::mlir::triton::gpu::DotOperandEncodingAttr;
1310
using ::mlir::triton::gpu::MemDescType;
1411
using ::triton::gpu::LinearEncodingAttr;
@@ -290,36 +287,6 @@ struct ConvertLayoutOpMFMAToLinearConversion
290287
protected:
291288
const TargetInfoBase &targetInfo;
292289
};
293-
294-
struct ConvertLayoutForcedPadding
295-
: public ConvertOpToLLVMPattern<ConvertLayoutOp> {
296-
297-
explicit ConvertLayoutForcedPadding(LLVMTypeConverter &typeConverter,
298-
const TargetInfoBase &targetInfo,
299-
PatternBenefit benefit)
300-
: ConvertOpToLLVMPattern<ConvertLayoutOp>(typeConverter, benefit),
301-
targetInfo(targetInfo) {}
302-
303-
LogicalResult
304-
matchAndRewrite(ConvertLayoutOp op, OpAdaptor adaptor,
305-
ConversionPatternRewriter &rewriter) const override {
306-
if (!op->hasAttr(mlir::triton::AMD::AttrSharedMemPadded))
307-
return failure();
308-
auto srcType = op.getSrc().getType();
309-
auto dstType = op.getType();
310-
if (!cvtNeedsSharedMemory(srcType, dstType))
311-
return failure();
312-
313-
auto result = transferWithinBlockPadding(op, adaptor.getSrc(), targetInfo,
314-
getTypeConverter(), rewriter);
315-
rewriter.replaceOp(op, result);
316-
return success();
317-
}
318-
319-
protected:
320-
const TargetInfoBase &targetInfo;
321-
};
322-
323290
} // namespace
324291

325292
void mlir::triton::AMD::populateConvertLayoutOpToLLVMPatterns(
@@ -329,5 +296,4 @@ void mlir::triton::AMD::populateConvertLayoutOpToLLVMPatterns(
329296
benefit);
330297
patterns.add<ConvertLayoutOpMFMAToLinearConversion>(typeConverter, targetInfo,
331298
benefit);
332-
patterns.add<ConvertLayoutForcedPadding>(typeConverter, targetInfo, benefit);
333299
}

third_party/amd/lib/TritonAMDGPUToLLVM/OptimizeLDSUsage.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,15 @@ class OptimizeAMDLDSUsage
9494
LDBG("Trying fit " << cvtOp << " into " << targetLDSSize << " bytes");
9595
OpBuilder builder(cvtOp);
9696

97-
auto ctx = builder.getContext();
9897
auto srcType = cvtOp.getSrc().getType();
9998
auto dstType = cvtOp.getType();
10099

101-
if (!cvtOp->hasAttr(triton::AMD::AttrSharedMemPadded)) {
102-
auto emptyAttribute = UnitAttr::get(ctx);
103-
// Padded conversion seems more friendly with this optimization
104-
// use it instead of general swizzling.
105-
cvtOp->setAttr(triton::AMD::AttrSharedMemPadded, emptyAttribute);
106-
// if padded layout drops LDS usage on itself, we are done, return
107-
if (triton::AMD::getConvertLayoutScratchInBytes(
108-
srcType, dstType, /*usePadding*/ true) <= targetLDSSize)
109-
return;
110-
}
111-
112100
auto srcEnc =
113101
cast<triton::gpu::DistributedEncodingTrait>(srcType.getEncoding());
114102
auto dstEnc =
115103
cast<triton::gpu::DistributedEncodingTrait>(dstType.getEncoding());
116104

105+
auto ctx = srcEnc.getContext();
117106
auto rank = srcType.getRank();
118107

119108
unsigned numWarps = triton::gpu::lookupNumWarps(cvtOp);
@@ -255,6 +244,13 @@ class OptimizeAMDLDSUsage
255244
LDSLimit = targetInfo.getSharedMemorySize();
256245
}
257246

247+
auto context = mod.getContext();
248+
auto emptyAttribute = UnitAttr::get(context);
249+
// TODO choose between padded and swizzled memory patterns
250+
mod.walk([emptyAttribute](triton::gpu::ConvertLayoutOp op) -> void {
251+
op->setAttr(mlir::triton::AMD::AttrSharedMemPadded, emptyAttribute);
252+
});
253+
258254
ModuleAllocation allocAnalysis(
259255
mod, mlir::triton::AMD::AMDAllocationAnalysisScratchSizeFn);
260256
if (allocAnalysis.getSharedMemorySize() <= LDSLimit)

0 commit comments

Comments
 (0)