Skip to content

[DirectX] Remove lifetime intrinsics and run Dead Store Elimination #152636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 12, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeDAEPass(PassRegistry &);
LLVM_ABI void initializeDAHPass(PassRegistry &);
LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &);
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &);
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &);
LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &);
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct ForcePassLinking {
(void)llvm::createDXILResourceTypeWrapperPassPass();
(void)llvm::createDeadArgEliminationPass();
(void)llvm::createDeadCodeEliminationPass();
(void)llvm::createDeadStoreEliminationPass();
(void)llvm::createDependenceAnalysisWrapperPass();
(void)llvm::createDomOnlyPrinterWrapperPassPass();
(void)llvm::createDomPrinterWrapperPassPass();
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/Transforms/Scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ class Pass;
//
LLVM_ABI FunctionPass *createDeadCodeEliminationPass();

//===----------------------------------------------------------------------===//
//
// DeadStoreElimination - This pass deletes stores that are post-dominated by
// must-aliased stores and are not loaded used between the stores.
//
LLVM_ABI FunctionPass *createDeadStoreEliminationPass();

//===----------------------------------------------------------------------===//
//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
Expand Down
27 changes: 25 additions & 2 deletions llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
#include "DXILForwardHandleAccesses.h"
#include "DXILShaderFlags.h"
#include "DirectX.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
Expand Down Expand Up @@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {

DenseMap<GlobalVariable *, IntrinsicInst *> HandleMap;
SmallVector<LoadInst *> LoadsToProcess;
DenseMap<AllocaInst *, SmallVector<IntrinsicInst *>> LifeTimeIntrinsicMap;
for (BasicBlock &BB : F)
for (Instruction &Inst : BB)
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
Expand All @@ -78,6 +82,17 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
case Intrinsic::dx_resource_handlefromimplicitbinding:
processHandle(II, HandleMap);
break;
// Note: Lifetime intrinsics do not show up as users of an Alloca.
// As a result we walk the whole function to find the lifetimes and
// store them so that we may delete the alloca matches
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
if (II->arg_size() >= 1) {
Value *Ptr = II->getArgOperand(0);
if (auto *Alloca = dyn_cast<AllocaInst>(Ptr->stripPointerCasts()))
LifeTimeIntrinsicMap[Alloca].push_back(II);
}
break;
default:
continue;
}
Expand All @@ -98,8 +113,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
GV = dyn_cast_or_null<GlobalVariable>(Loaded);
} else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
for (auto &Use : NestedAlloca->uses()) {
auto *Store = dyn_cast<StoreInst>(Use.getUser());

if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca);
It != LifeTimeIntrinsicMap.end()) {
llvm::for_each(It->second,
[](IntrinsicInst *II) { II->eraseFromParent(); });
LifeTimeIntrinsicMap.erase(It);
}

for (auto *User : NestedAlloca->users()) {
auto *Store = dyn_cast<StoreInst>(User);
if (!Store)
continue;

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <optional>

Expand Down Expand Up @@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
initializeDXILFinalizeLinkageLegacyPass(*PR);
initializeDXILPrettyPrinterLegacyPass(*PR);
initializeDXILForwardHandleAccessesLegacyPass(*PR);
initializeDSELegacyPassPass(*PR);
initializeDXILCBufferAccessLegacyPass(*PR);
}

Expand Down Expand Up @@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig {
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILFlattenArraysLegacyPass());
addPass(createDXILForwardHandleAccessesLegacyPass());
addPass(createDeadStoreEliminationPass());
addPass(createDXILLegalizeLegacyPass());
addPass(createDXILResourceImplicitBindingLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
Expand Down
78 changes: 78 additions & 0 deletions llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old pass manager had this dependency when it was removed so I brought it back.

#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
Expand Down Expand Up @@ -69,6 +70,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}

namespace {

/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
class DSELegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid

DSELegacyPass() : FunctionPass(ID) {
initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
}

bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;

AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);

#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
for (auto &I : instructions(F))
NumRemainingStores += isa<StoreInst>(&I);
#endif

return Changed;
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
}
};

} // end anonymous namespace

char DSELegacyPass::ID = 0;

INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)

namespace llvm {
LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
return new DSELegacyPass();
}
} // namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Scalar/Scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeMergeICmpsLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeDSELegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s --check-prefixes=CHECK,FHCHECK
; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK

; Note: test to confirm fix for issues: 140819 & 151764

%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
@global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
Expand All @@ -11,11 +13,11 @@
define void @CSMain() local_unnamed_addr {
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
; FHCHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
; FHCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
; FHCHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
; CHECK-NEXT: ret void
Expand All @@ -25,9 +27,11 @@ entry:
%handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
%val = load i32, ptr @global, align 4
call void @llvm.lifetime.start.p0(ptr nonnull %alloca)
store i32 %val , ptr %alloca, align 8
%indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8
%buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
store i32 0, ptr %buff, align 4
call void @llvm.lifetime.end.p0(ptr nonnull %alloca)
ret void
}
9 changes: 8 additions & 1 deletion llvm/test/CodeGen/DirectX/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; CHECK-NEXT: Target Library Information
; CHECK-NEXT: DXIL Resource Type Analysis
; CHECK-NEXT: Target Transform Information

; CHECK-NEXT: Assumption Cache Tracker
; CHECK-OBJ-NEXT: Machine Module Information
; CHECK-OBJ-NEXT: Machine Branch Probability Analysis
; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
Expand All @@ -26,6 +26,13 @@
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: DXIL Forward Handle Accesses
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: DXIL Legalizer
; CHECK-NEXT: DXIL Resource Binding Analysis
; CHECK-NEXT: DXIL Resource Implicit Binding
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/tools/dxil-dis/lifetimes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ target triple = "dxil-unknown-shadermodel6.7-library"
define void @test_lifetimes() {
; CHECK-LABEL: test_lifetimes
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i32], [2 x i32]* [[ALLOCA]], i32 0, i32 0
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* nonnull [[BITCAST]])
; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* nonnull [[BITCAST]])
; CHECK-NEXT: ret void
Expand Down