diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 31801daa126ad..e55f94b9022ee 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &); LLVM_ABI void initializeDAEPass(PassRegistry &); LLVM_ABI void initializeDAHPass(PassRegistry &); LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &); +LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &); LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &); LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &); LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index f82a43967e67a..ea56c83a3b709 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -77,6 +77,7 @@ struct ForcePassLinking { (void)llvm::createDXILResourceTypeWrapperPassPass(); (void)llvm::createDeadArgEliminationPass(); (void)llvm::createDeadCodeEliminationPass(); + (void)llvm::createDeadStoreEliminationPass(); (void)llvm::createDependenceAnalysisWrapperPass(); (void)llvm::createDomOnlyPrinterWrapperPassPass(); (void)llvm::createDomPrinterWrapperPassPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 1398f171b0f78..8e68b6a57e51f 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -33,6 +33,13 @@ class Pass; // LLVM_ABI FunctionPass *createDeadCodeEliminationPass(); +//===----------------------------------------------------------------------===// +// +// DeadStoreElimination - This pass deletes stores that are post-dominated by +// must-aliased stores and are not loaded used between the stores. +// +LLVM_ABI FunctionPass *createDeadStoreEliminationPass(); + //===----------------------------------------------------------------------===// // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp index 306db6a558779..695eacbb2fadc 100644 --- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp +++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp @@ -9,10 +9,13 @@ #include "DXILForwardHandleAccesses.h" #include "DXILShaderFlags.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" @@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) { DenseMap HandleMap; SmallVector LoadsToProcess; + DenseMap> LifeTimeIntrinsicMap; for (BasicBlock &BB : F) for (Instruction &Inst : BB) if (auto *II = dyn_cast(&Inst)) { @@ -78,6 +82,14 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) { case Intrinsic::dx_resource_handlefromimplicitbinding: processHandle(II, HandleMap); break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + if (II->arg_size() >= 1) { + Value *Ptr = II->getArgOperand(0); + if (auto *Alloca = dyn_cast(Ptr)) + LifeTimeIntrinsicMap[Alloca].push_back(II); + } + break; default: continue; } @@ -98,8 +110,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) { NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr); GV = dyn_cast_or_null(Loaded); } else if (auto *NestedAlloca = dyn_cast(V)) { - for (auto &Use : NestedAlloca->uses()) { - auto *Store = dyn_cast(Use.getUser()); + + if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca); + It != LifeTimeIntrinsicMap.end()) { + llvm::for_each(It->second, + [](IntrinsicInst *II) { II->eraseFromParent(); }); + LifeTimeIntrinsicMap.erase(It); + } + + for (auto *User : NestedAlloca->users()) { + auto *Store = dyn_cast(User); if (!Store) continue; diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 84751d2db2266..4ca22479f3194 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include @@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { initializeDXILFinalizeLinkageLegacyPass(*PR); initializeDXILPrettyPrinterLegacyPass(*PR); initializeDXILForwardHandleAccessesLegacyPass(*PR); + initializeDSELegacyPassPass(*PR); initializeDXILCBufferAccessLegacyPass(*PR); } @@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig { addPass(createScalarizerPass(DxilScalarOptions)); addPass(createDXILFlattenArraysLegacyPass()); addPass(createDXILForwardHandleAccessesLegacyPass()); + addPass(createDeadStoreEliminationPass()); addPass(createDXILLegalizeLegacyPass()); addPass(createDXILResourceImplicitBindingLegacyPass()); addPass(createDXILTranslateMetadataLegacyPass()); diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index f46d54b463171..8093e44245d20 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" @@ -69,6 +70,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve(); return PA; } + +namespace { + +/// A legacy pass for the legacy pass manager that wraps \c DSEPass. +class DSELegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + DSELegacyPass() : FunctionPass(ID) { + initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + LoopInfo &LI = getAnalysis().getLoopInfo(); + + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); + +#ifdef LLVM_ENABLE_STATS + if (AreStatisticsEnabled()) + for (auto &I : instructions(F)) + NumRemainingStores += isa(&I); +#endif + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + } +}; + +} // end anonymous namespace + +char DSELegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, + false) + +namespace llvm { +LLVM_ABI FunctionPass *createDeadStoreEliminationPass() { + return new DSELegacyPass(); +} +} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index c7e4a3e824700..032a3a7792824 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeMergeICmpsLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); + initializeDSELegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll similarity index 73% rename from llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll rename to llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll index 7c0813b0b4e36..ce5c2d7ca32bf 100644 --- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll +++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll @@ -1,5 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s +; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK +; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK + +; Note: test to confirm fix for issues: 140819 & 151764 %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) } @global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4 @@ -11,11 +13,11 @@ define void @CSMain() local_unnamed_addr { ; CHECK-LABEL: define void @CSMain() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8 +; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8 ; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) ; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4 -; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8 +; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4 +; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0) ; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -25,9 +27,11 @@ entry: %handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name) store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4 %val = load i32, ptr @global, align 4 + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) store i32 %val , ptr %alloca, align 8 %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8 %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0) store i32 0, ptr %buff, align 4 + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) ret void } diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 151603a7161c5..360a6f6959e9f 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: DXIL Resource Type Analysis ; CHECK-NEXT: Target Transform Information - +; CHECK-NEXT: Assumption Cache Tracker ; CHECK-OBJ-NEXT: Machine Module Information ; CHECK-OBJ-NEXT: Machine Branch Probability Analysis ; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata @@ -26,6 +26,13 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: DXIL Forward Handle Accesses +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: DXIL Legalizer ; CHECK-NEXT: DXIL Resource Binding Analysis ; CHECK-NEXT: DXIL Resource Implicit Binding diff --git a/llvm/test/tools/dxil-dis/lifetimes.ll b/llvm/test/tools/dxil-dis/lifetimes.ll index 3c1666f7381f4..af7a19a4d4f66 100644 --- a/llvm/test/tools/dxil-dis/lifetimes.ll +++ b/llvm/test/tools/dxil-dis/lifetimes.ll @@ -4,10 +4,8 @@ target triple = "dxil-unknown-shadermodel6.7-library" define void @test_lifetimes() { ; CHECK-LABEL: test_lifetimes ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i32], [2 x i32]* [[ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* nonnull [[BITCAST]]) -; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* nonnull [[BITCAST]]) ; CHECK-NEXT: ret void