-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[DirectX] Remove lifetime intrinsics and run Dead Store Elimination #152636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-llvm-transforms Author: Farzon Lotfi (farzonl) Changesfixes #151764 This fix has two parts first we track all lifetime intrinsics and if they are users of an alloca of a target extention like dx.RawBuffer then we eliminate those memory intrinsics when we visit the alloca. We do step one to allow us to use the Dead Store Elimination Pass. This removes the alloca and simplifies the use of the target extention back to using just the global. That keeps things in a form the DXILBitcodeWriter is expecting. Obviously to pull this off we needed to bring back the legacy pass manager plumbing for the DSE pass and hook it up into the DirectX backend. The net impact of this change is that DML shader pass rate went from 89.72% (4268 successful compilations) to 90.98% (4328 successful compilations). Full diff: https://github.com/llvm/llvm-project/pull/152636.diff 8 Files Affected:
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 31801daa126ad..e55f94b9022ee 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeDAEPass(PassRegistry &);
LLVM_ABI void initializeDAHPass(PassRegistry &);
LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &);
+LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &);
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &);
LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index f82a43967e67a..ea56c83a3b709 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -77,6 +77,7 @@ struct ForcePassLinking {
(void)llvm::createDXILResourceTypeWrapperPassPass();
(void)llvm::createDeadArgEliminationPass();
(void)llvm::createDeadCodeEliminationPass();
+ (void)llvm::createDeadStoreEliminationPass();
(void)llvm::createDependenceAnalysisWrapperPass();
(void)llvm::createDomOnlyPrinterWrapperPassPass();
(void)llvm::createDomPrinterWrapperPassPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 1398f171b0f78..8e68b6a57e51f 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -33,6 +33,13 @@ class Pass;
//
LLVM_ABI FunctionPass *createDeadCodeEliminationPass();
+//===----------------------------------------------------------------------===//
+//
+// DeadStoreElimination - This pass deletes stores that are post-dominated by
+// must-aliased stores and are not loaded used between the stores.
+//
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass();
+
//===----------------------------------------------------------------------===//
//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 306db6a558779..ee0a18d231904 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -9,10 +9,13 @@
#include "DXILForwardHandleAccesses.h"
#include "DXILShaderFlags.h"
#include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
@@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
DenseMap<GlobalVariable *, IntrinsicInst *> HandleMap;
SmallVector<LoadInst *> LoadsToProcess;
+ DenseMap<AllocaInst *, SmallVector<IntrinsicInst *>> LifeTimeIntrinsicMap;
for (BasicBlock &BB : F)
for (Instruction &Inst : BB)
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -78,6 +82,17 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
case Intrinsic::dx_resource_handlefromimplicitbinding:
processHandle(II, HandleMap);
break;
+ // Note: Lifetime intrinsics do not show up as users of an Alloca.
+ // As a result we walk the whole function to find the lifetimes and
+ // store them so that we may delete the alloca matches
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ if (II->arg_size() >= 2) {
+ Value *Ptr = II->getArgOperand(1);
+ if (auto *Alloca = dyn_cast<AllocaInst>(Ptr->stripPointerCasts()))
+ LifeTimeIntrinsicMap[Alloca].push_back(II);
+ }
+ break;
default:
continue;
}
@@ -98,8 +113,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
GV = dyn_cast_or_null<GlobalVariable>(Loaded);
} else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
- for (auto &Use : NestedAlloca->uses()) {
- auto *Store = dyn_cast<StoreInst>(Use.getUser());
+
+ if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca);
+ It != LifeTimeIntrinsicMap.end()) {
+ llvm::for_each(It->second,
+ [](IntrinsicInst *II) { II->eraseFromParent(); });
+ LifeTimeIntrinsicMap.erase(It);
+ }
+
+ for (auto *User : NestedAlloca->users()) {
+ auto *Store = dyn_cast<StoreInst>(User);
if (!Store)
continue;
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 84751d2db2266..4ca22479f3194 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <optional>
@@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
initializeDXILFinalizeLinkageLegacyPass(*PR);
initializeDXILPrettyPrinterLegacyPass(*PR);
initializeDXILForwardHandleAccessesLegacyPass(*PR);
+ initializeDSELegacyPassPass(*PR);
initializeDXILCBufferAccessLegacyPass(*PR);
}
@@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig {
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILFlattenArraysLegacyPass());
addPass(createDXILForwardHandleAccessesLegacyPass());
+ addPass(createDeadStoreEliminationPass());
addPass(createDXILLegalizeLegacyPass());
addPass(createDXILResourceImplicitBindingLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9b8718035df62..17725cc8c3b96 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,6 +38,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -69,6 +70,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
+
+namespace {
+
+/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
+class DSELegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ DSELegacyPass() : FunctionPass(ID) {
+ initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ PostDominatorTree &PDT =
+ getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+
+#ifdef LLVM_ENABLE_STATS
+ if (AreStatisticsEnabled())
+ for (auto &I : instructions(F))
+ NumRemainingStores += isa<StoreInst>(&I);
+#endif
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ }
+};
+
+} // end anonymous namespace
+
+char DSELegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
+ false)
+
+namespace llvm {
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+ return new DSELegacyPass();
+}
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index c7e4a3e824700..032a3a7792824 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeMergeICmpsLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
+ initializeDSELegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
similarity index 72%
rename from llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
rename to llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
index 7c0813b0b4e36..c628aafbd8d39 100644
--- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
+++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
+; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK
+; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK
+
+; Note: test to confirm fix for issues: 140819 & 151764
%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
@global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
@@ -11,11 +13,11 @@
define void @CSMain() local_unnamed_addr {
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
-; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
+; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
; CHECK-NEXT: ret void
@@ -25,9 +27,11 @@ entry:
%handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
%val = load i32, ptr @global, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %alloca)
store i32 %val , ptr %alloca, align 8
%indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8
%buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
store i32 0, ptr %buff, align 4
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %alloca)
ret void
}
|
eeb8904
to
55c336d
Compare
fixes llvm#151764 This fix has two parts first we track all lifetime intrinsics and if they are users of an alloca of a target extention like dx.RawBuffer then we eliminate those memory intrinsics when we visit the alloca. We do step one to allow us to use the Dead Store Elimination Pass. This removes the alloca and simplifies the use of the target extention back to using just the global. That keeps things in a form the DXILBitcodeWriter is expecting. Obviously to pull this off we needed to bring back the legacy pass manager plumbing for the DSE pass and hook it up into the DirectX backend.
…1 to account for intrinsic changes.
c4ec501
to
79a4e9d
Compare
@@ -38,6 +38,7 @@ | |||
#include "llvm/ADT/Statistic.h" | |||
#include "llvm/ADT/StringRef.h" | |||
#include "llvm/Analysis/AliasAnalysis.h" | |||
#include "llvm/Analysis/AssumptionCache.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The old pass manager had this dependency when it was removed so I brought it back.
fixes #151764
This fix has two parts first we track all lifetime intrinsics and if they are users of an alloca of a target extention like dx.RawBuffer then we eliminate those memory intrinsics when we visit the alloca.
We do step one to allow us to use the Dead Store Elimination Pass. This removes the alloca and simplifies the use of the target extention back to using just the global. That keeps things in a form the DXILBitcodeWriter is expecting.
Obviously to pull this off we needed to bring back the legacy pass manager plumbing for the DSE pass and hook it up into the DirectX backend.
The net impact of this change is that DML shader pass rate went from 89.72% (4268 successful compilations) to 90.98% (4328 successful compilations).