Skip to content

Commit eeb8904

Browse files
committed
[DirectX] remove lifetime intrinsics and run Dead Store Elimination
fixes #151764 This fix has two parts first we track all lifetime intrinsics and if they are users of an alloca of a target extention like dx.RawBuffer then we eliminate those memory intrinsics when we visit the alloca. We do step one to allow us to use the Dead Store Elimination Pass. This removes the alloca and simplifies the use of the target extention back to using just the global. That keeps things in a form the DXILBitcodeWriter is expecting. Obviously to pull this off we needed to bring back the legacy pass manager plumbing for the DSE pass and hook it up into the DirectX backend.
1 parent 04672e2 commit eeb8904

File tree

8 files changed

+125
-7
lines changed

8 files changed

+125
-7
lines changed

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &);
8585
LLVM_ABI void initializeDAEPass(PassRegistry &);
8686
LLVM_ABI void initializeDAHPass(PassRegistry &);
8787
LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &);
88+
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &);
8889
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &);
8990
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &);
9091
LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ struct ForcePassLinking {
7777
(void)llvm::createDXILResourceTypeWrapperPassPass();
7878
(void)llvm::createDeadArgEliminationPass();
7979
(void)llvm::createDeadCodeEliminationPass();
80+
(void)llvm::createDeadStoreEliminationPass();
8081
(void)llvm::createDependenceAnalysisWrapperPass();
8182
(void)llvm::createDomOnlyPrinterWrapperPassPass();
8283
(void)llvm::createDomPrinterWrapperPassPass();

llvm/include/llvm/Transforms/Scalar.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ class Pass;
3333
//
3434
LLVM_ABI FunctionPass *createDeadCodeEliminationPass();
3535

36+
//===----------------------------------------------------------------------===//
37+
//
38+
// DeadStoreElimination - This pass deletes stores that are post-dominated by
39+
// must-aliased stores and are not loaded used between the stores.
40+
//
41+
LLVM_ABI FunctionPass *createDeadStoreEliminationPass();
42+
3643
//===----------------------------------------------------------------------===//
3744
//
3845
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.

llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99
#include "DXILForwardHandleAccesses.h"
1010
#include "DXILShaderFlags.h"
1111
#include "DirectX.h"
12+
#include "llvm/ADT/STLExtras.h"
1213
#include "llvm/Analysis/DXILResource.h"
1314
#include "llvm/Analysis/Loads.h"
1415
#include "llvm/IR/DiagnosticInfo.h"
1516
#include "llvm/IR/Dominators.h"
17+
#include "llvm/IR/InstrTypes.h"
18+
#include "llvm/IR/Instructions.h"
1619
#include "llvm/IR/IntrinsicInst.h"
1720
#include "llvm/IR/Intrinsics.h"
1821
#include "llvm/IR/IntrinsicsDirectX.h"
@@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
7073

7174
DenseMap<GlobalVariable *, IntrinsicInst *> HandleMap;
7275
SmallVector<LoadInst *> LoadsToProcess;
76+
DenseMap<AllocaInst *, SmallVector<IntrinsicInst *>> LifeTimeIntrinsicMap;
7377
for (BasicBlock &BB : F)
7478
for (Instruction &Inst : BB)
7579
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -78,6 +82,17 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
7882
case Intrinsic::dx_resource_handlefromimplicitbinding:
7983
processHandle(II, HandleMap);
8084
break;
85+
// Note: Lifetime intrinsics do not show up as users of an Alloca.
86+
// As a result we walk the whole function to find the lifetimes and
87+
// store them so that we may delete the alloca matches
88+
case Intrinsic::lifetime_start:
89+
case Intrinsic::lifetime_end:
90+
if (II->arg_size() >= 2) {
91+
Value *Ptr = II->getArgOperand(1);
92+
if (auto *Alloca = dyn_cast<AllocaInst>(Ptr->stripPointerCasts()))
93+
LifeTimeIntrinsicMap[Alloca].push_back(II);
94+
}
95+
break;
8196
default:
8297
continue;
8398
}
@@ -98,8 +113,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
98113
NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
99114
GV = dyn_cast_or_null<GlobalVariable>(Loaded);
100115
} else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
101-
for (auto &Use : NestedAlloca->uses()) {
102-
auto *Store = dyn_cast<StoreInst>(Use.getUser());
116+
117+
if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca);
118+
It != LifeTimeIntrinsicMap.end()) {
119+
llvm::for_each(It->second,
120+
[](IntrinsicInst *II) { II->eraseFromParent(); });
121+
LifeTimeIntrinsicMap.erase(It);
122+
}
123+
124+
for (auto *User : NestedAlloca->users()) {
125+
auto *Store = dyn_cast<StoreInst>(User);
103126
if (!Store)
104127
continue;
105128

llvm/lib/Target/DirectX/DirectXTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/Support/Compiler.h"
4646
#include "llvm/Support/ErrorHandling.h"
4747
#include "llvm/Target/TargetLoweringObjectFile.h"
48+
#include "llvm/Transforms/Scalar.h"
4849
#include "llvm/Transforms/Scalar/Scalarizer.h"
4950
#include <optional>
5051

@@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
7273
initializeDXILFinalizeLinkageLegacyPass(*PR);
7374
initializeDXILPrettyPrinterLegacyPass(*PR);
7475
initializeDXILForwardHandleAccessesLegacyPass(*PR);
76+
initializeDSELegacyPassPass(*PR);
7577
initializeDXILCBufferAccessLegacyPass(*PR);
7678
}
7779

@@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig {
112114
addPass(createScalarizerPass(DxilScalarOptions));
113115
addPass(createDXILFlattenArraysLegacyPass());
114116
addPass(createDXILForwardHandleAccessesLegacyPass());
117+
addPass(createDeadStoreEliminationPass());
115118
addPass(createDXILLegalizeLegacyPass());
116119
addPass(createDXILResourceImplicitBindingLegacyPass());
117120
addPass(createDXILTranslateMetadataLegacyPass());

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "llvm/ADT/Statistic.h"
3939
#include "llvm/ADT/StringRef.h"
4040
#include "llvm/Analysis/AliasAnalysis.h"
41+
#include "llvm/Analysis/AssumptionCache.h"
4142
#include "llvm/Analysis/CaptureTracking.h"
4243
#include "llvm/Analysis/GlobalsModRef.h"
4344
#include "llvm/Analysis/LoopInfo.h"
@@ -69,6 +70,7 @@
6970
#include "llvm/IR/PassManager.h"
7071
#include "llvm/IR/PatternMatch.h"
7172
#include "llvm/IR/Value.h"
73+
#include "llvm/InitializePasses.h"
7274
#include "llvm/Support/Casting.h"
7375
#include "llvm/Support/CommandLine.h"
7476
#include "llvm/Support/Debug.h"
@@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
26662668
PA.preserve<LoopAnalysis>();
26672669
return PA;
26682670
}
2671+
2672+
namespace {
2673+
2674+
/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2675+
class DSELegacyPass : public FunctionPass {
2676+
public:
2677+
static char ID; // Pass identification, replacement for typeid
2678+
2679+
DSELegacyPass() : FunctionPass(ID) {
2680+
initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
2681+
}
2682+
2683+
bool runOnFunction(Function &F) override {
2684+
if (skipFunction(F))
2685+
return false;
2686+
2687+
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2688+
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2689+
const TargetLibraryInfo &TLI =
2690+
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2691+
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2692+
PostDominatorTree &PDT =
2693+
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2694+
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2695+
2696+
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2697+
2698+
#ifdef LLVM_ENABLE_STATS
2699+
if (AreStatisticsEnabled())
2700+
for (auto &I : instructions(F))
2701+
NumRemainingStores += isa<StoreInst>(&I);
2702+
#endif
2703+
2704+
return Changed;
2705+
}
2706+
2707+
void getAnalysisUsage(AnalysisUsage &AU) const override {
2708+
AU.setPreservesCFG();
2709+
AU.addRequired<AAResultsWrapperPass>();
2710+
AU.addRequired<TargetLibraryInfoWrapperPass>();
2711+
AU.addPreserved<GlobalsAAWrapperPass>();
2712+
AU.addRequired<DominatorTreeWrapperPass>();
2713+
AU.addPreserved<DominatorTreeWrapperPass>();
2714+
AU.addRequired<PostDominatorTreeWrapperPass>();
2715+
AU.addRequired<MemorySSAWrapperPass>();
2716+
AU.addPreserved<PostDominatorTreeWrapperPass>();
2717+
AU.addPreserved<MemorySSAWrapperPass>();
2718+
AU.addRequired<LoopInfoWrapperPass>();
2719+
AU.addPreserved<LoopInfoWrapperPass>();
2720+
AU.addRequired<AssumptionCacheTracker>();
2721+
}
2722+
};
2723+
2724+
} // end anonymous namespace
2725+
2726+
char DSELegacyPass::ID = 0;
2727+
2728+
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2729+
false)
2730+
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2731+
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2732+
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2733+
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2734+
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2735+
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2736+
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2737+
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2738+
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2739+
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2740+
false)
2741+
2742+
namespace llvm {
2743+
LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
2744+
return new DSELegacyPass();
2745+
}
2746+
} // namespace llvm

llvm/lib/Transforms/Scalar/Scalar.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
3737
initializeMergeICmpsLegacyPassPass(Registry);
3838
initializeNaryReassociateLegacyPassPass(Registry);
3939
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
40+
initializeDSELegacyPassPass(Registry);
4041
initializeReassociateLegacyPassPass(Registry);
4142
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
4243
initializeSROALegacyPassPass(Registry);

llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll renamed to llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
1+
; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK
2+
; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK
3+
4+
; Note: test to confirm fix for issues: 140819 & 151764
35

46
%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
57
@global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
@@ -11,11 +13,11 @@
1113
define void @CSMain() local_unnamed_addr {
1214
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
1315
; CHECK-NEXT: [[ENTRY:.*:]]
14-
; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
16+
; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
1517
; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
1618
; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
17-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
18-
; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
19+
; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
20+
; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
1921
; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
2022
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
2123
; CHECK-NEXT: ret void
@@ -25,9 +27,11 @@ entry:
2527
%handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
2628
store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
2729
%val = load i32, ptr @global, align 4
30+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %alloca)
2831
store i32 %val , ptr %alloca, align 8
2932
%indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8
3033
%buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
3134
store i32 0, ptr %buff, align 4
35+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %alloca)
3236
ret void
3337
}

0 commit comments

Comments
 (0)