-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[mlir] [amdgpu] Remove s_wait_loadcnt from amdgpu.lds_barrier on gfx12 #152778
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here: https://github.com/llvm/llvm-project/blob/bd9117c569678e7af042074cbcaba860ab6eefb3/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp#L2017
|
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-backend-amdgpu Author: Paul Trojahn (ptrojahn) ChangesJust like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:
Full diff: https://github.com/llvm/llvm-project/pull/152778.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
LogicalResult
matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
- bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+ bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
if (requiresInlineAsm) {
auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
LLVM::AsmDialect::AD_ATT);
- const char *asmStr =
+ const char *asmStrPreGfx12 =
";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+ const char *asmStr =
+ ";;;WARNING: BREAKS DEBUG WATCHES\n"
+ "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
const char *constraints = "";
rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
op,
/*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
- /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+ /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+ constraints, /*has_side_effects=*/true,
/*is_align_stack=*/false, LLVM::TailCallKind::None,
/*asm_dialect=*/asmDialectAttr,
/*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
Location loc = op->getLoc();
ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+ return success();
} else {
- Location loc = op->getLoc();
- ROCDL::WaitDscntOp::create(rewriter, loc, 0);
- ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
- rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+ return op.emitOpError(
+ "don't know how to lower this for chipset major version")
+ << chipset.majorVersion;
}
-
- return success();
}
};
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
// GFX10-NEXT: rocdl.s.barrier
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
- // GFX12: rocdl.s.wait.dscnt 0
- // GFX12-NEXT: rocdl.s.barrier.signal -1
- // GFX12-NEXT: rocdl.s.barrier.wait -1
+ // GFX12: llvm.inline_asm has_side_effects asm_dialect = att
+ // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
amdgpu.lds_barrier
func.return
}
|
|
@llvm/pr-subscribers-mlir-gpu Author: Paul Trojahn (ptrojahn) ChangesJust like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:
Full diff: https://github.com/llvm/llvm-project/pull/152778.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
LogicalResult
matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
- bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+ bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
if (requiresInlineAsm) {
auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
LLVM::AsmDialect::AD_ATT);
- const char *asmStr =
+ const char *asmStrPreGfx12 =
";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+ const char *asmStr =
+ ";;;WARNING: BREAKS DEBUG WATCHES\n"
+ "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
const char *constraints = "";
rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
op,
/*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
- /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+ /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+ constraints, /*has_side_effects=*/true,
/*is_align_stack=*/false, LLVM::TailCallKind::None,
/*asm_dialect=*/asmDialectAttr,
/*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
Location loc = op->getLoc();
ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+ return success();
} else {
- Location loc = op->getLoc();
- ROCDL::WaitDscntOp::create(rewriter, loc, 0);
- ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
- rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+ return op.emitOpError(
+ "don't know how to lower this for chipset major version")
+ << chipset.majorVersion;
}
-
- return success();
}
};
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
// GFX10-NEXT: rocdl.s.barrier
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
- // GFX12: rocdl.s.wait.dscnt 0
- // GFX12-NEXT: rocdl.s.barrier.signal -1
- // GFX12-NEXT: rocdl.s.barrier.wait -1
+ // GFX12: llvm.inline_asm has_side_effects asm_dialect = att
+ // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
amdgpu.lds_barrier
func.return
}
|
|
With #155370 merged this should not be necessary anymore |
Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:
llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Line 2017 in bd9117c