Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ebf5d9ef7de29b55fd9e9d504f83689b4013e0de
a992f29451b9e140424f35ac5e20177db4afbdc0
15 changes: 12 additions & 3 deletions lib/Conversion/TritonToTritonGPU/RelayoutTritonGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,20 @@ struct TMEMLoadOpPattern : public OpConversionPattern<ttng::TMEMLoadOp> {
LogicalResult
matchAndRewrite(ttng::TMEMLoadOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Type resultType = getTypeConverter()->convertType(op.getType());
RankedTensorType type = getTMEMTensorLayout(
typeConverter, op.getType(), op.getSrc().getType(), lookupNumWarps(op));
rewriter.modifyOpInPlace(op, [&] { op.getResult().setType(type); });
Type resultType = getTypeConverter()->convertType(op.getType());
if (type == resultType)
return success();

rewriter.setInsertionPointAfter(op);
auto cvt = ConvertLayoutOp::create(rewriter, op.getLoc(), resultType,
op.getResult());
rewriter.replaceAllUsesExcept(op.getResult(), cvt, cvt);
// Bypass the rewriter to avoid issues with the conversion framework's
// tracking of conditional replacements.
// See https://github.com/llvm/llvm-project/commit/504b50789602
op.getResult().replaceAllUsesExcept(cvt, cvt);
return success();
}
};
Expand Down Expand Up @@ -115,7 +121,10 @@ class RelayoutTritonGPU
// clang-format on
>(typeConverter, context);

if (failed(applyPartialConversion(mod, target, std::move(patterns))))
ConversionConfig config;
config.allowPatternRollback = false;
if (failed(
applyPartialConversion(mod, target, std::move(patterns), config)))
return signalPassFailure();
}
};
Expand Down
5 changes: 0 additions & 5 deletions third_party/amd/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,6 @@ def make_amdgcn(src, metadata, options):
dump_file_id)
amdgcn = llvm.translate_to_asm(src, amd.TARGET_TRIPLE, options.arch, features, flags, options.enable_fp_fusion,
False)
# TODO: Remove the following workaround once LLVM is bumped to include: https://github.com/llvm/llvm-project/pull/169851
# Workaround for LLVM ERROR: cannot evaluate equated symbol 'amdgcn.device.init.num_named_barrier'
if knobs.compilation.enable_asan and 'gfx1250' not in options.arch:
amdgcn = amdgcn.replace('.amdgpu_metadata',
'\t.set\tamdgcn.device.init.num_named_barrier, 0\n.amdgpu_metadata')
if knobs.amd.dump_amdgcn:
print("// -----// AMDGCN Dump //----- //")
print(amdgcn)
Expand Down