Skip to content

Commit 52bed8b

Browse files
jaladreipsigcbot
authored andcommitted
Refactor optimization handling in new inline raytracing
Refactor optimization handling in new inline raytracing to defer modifying the function until we are done with all liveness objects. This way, we don't invalidate liveness objects and avoid costly recalculations.
1 parent f858b91 commit 52bed8b

File tree

3 files changed

+95
-55
lines changed

3 files changed

+95
-55
lines changed

IGC/AdaptorCommon/LivenessUtils/AllocationLivenessAnalyzer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ class AllocationLivenessAnalyzer : public llvm::FunctionPass {
3030
struct Edge {
3131
llvm::BasicBlock *from;
3232
llvm::BasicBlock *to;
33+
34+
bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
35+
bool operator!=(const Edge &other) const { return !(other == *this); }
3336
};
3437

3538
llvm::SmallVector<Edge> lifetimeEndEdges;

IGC/AdaptorCommon/RayTracing/NewTraceRayInlineLoweringPass.cpp

Lines changed: 91 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ SPDX-License-Identifier: MIT
1414

1515
#include "common/LLVMWarningsPush.hpp"
1616
#include <llvm/ADT/STLExtras.h>
17+
#include <llvm/ADT/DenseMapInfo.h>
18+
#include <llvm/ADT/Hashing.h>
1719
#include <llvm/Analysis/LoopInfo.h>
1820
#include <llvm/IR/Dominators.h>
1921
#include <llvm/IR/InstIterator.h>
@@ -25,6 +27,29 @@ SPDX-License-Identifier: MIT
2527
using namespace IGC;
2628
using namespace llvm;
2729

30+
namespace llvm {
31+
template<>
32+
struct DenseMapInfo<IGC::AllocationLivenessAnalyzer::LivenessData::Edge> {
33+
using Edge = IGC::AllocationLivenessAnalyzer::LivenessData::Edge;
34+
35+
static inline Edge getEmptyKey() {
36+
return Edge{ DenseMapInfo<BasicBlock *>::getEmptyKey(), DenseMapInfo<BasicBlock *>::getEmptyKey() };
37+
}
38+
39+
static inline Edge getTombstoneKey() {
40+
return Edge{ DenseMapInfo<BasicBlock *>::getTombstoneKey(), DenseMapInfo<BasicBlock *>::getTombstoneKey() };
41+
}
42+
43+
static unsigned getHashValue(const Edge &E) {
44+
return (unsigned)hash_combine(E.from, E.to);
45+
}
46+
47+
static bool isEqual(const Edge &LHS, const Edge &RHS) {
48+
return LHS == RHS;
49+
}
50+
};
51+
} // namespace llvm
52+
2853
void InlineRaytracing::getAdditionalAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CodeGenContextWrapper>(); }
2954

3055
bool InlineRaytracing::LowerAllocations(Function &F) {
@@ -771,8 +796,7 @@ void InlineRaytracing::StopAndStartRayquery(RTBuilder &IRB, Instruction *I, Valu
771796
}
772797
}
773798

774-
void InlineRaytracing::HandleOptimizationsAndSpills(llvm::Function &F, LivenessDataMap &livenessDataMap,
775-
DominatorTree &DT, LoopInfo &LI) {
799+
void InlineRaytracing::HandleOptimizationsAndSpills(llvm::Function &F, LivenessDataMap &livenessDataMap) {
776800
RTBuilder IRB(&*F.getEntryBlock().begin(), *m_pCGCtx);
777801

778802
SmallVector<Instruction *> continuationInstructions;
@@ -800,108 +824,122 @@ void InlineRaytracing::HandleOptimizationsAndSpills(llvm::Function &F, LivenessD
800824
m_pCGCtx->platform.enableRayQueryThrottling(m_pCGCtx->getModuleMetaData()->compOpt.EnableDynamicRQManagement) &&
801825
m_numSlotsUsed == 1;
802826

827+
MapVector<Instruction *, SmallVector<std::function<void(RTBuilder &)>>> instructionClosures;
828+
MapVector<LivenessData::Edge, SmallVector<std::function<void(RTBuilder &)>>> edgeClosures;
829+
803830
for (auto &entry : livenessDataMap) {
804-
bool cfgChanged = false;
805831

806832
auto *rqObject = entry.first;
807833
auto *LD = &entry.second;
808834

809835
// process the allocation acquire point
810836
// handle rayquery check
811-
if (doRQCheckRelease) {
812-
// check before the allocation is acquired
813-
IRB.SetInsertPoint(LD->lifetimeStart);
814-
IRB.CreateRayQueryCheckIntrinsic();
815-
}
837+
instructionClosures[LD->lifetimeStart].push_back([this, doRQCheckRelease](RTBuilder &IRB) {
838+
839+
if (doRQCheckRelease)
840+
IRB.CreateRayQueryCheckIntrinsic();
841+
});
816842

817843
// process the allocation release points
818844
for (auto *I : LD->lifetimeEndInstructions) {
819-
IRB.SetInsertPoint(isa<ReturnInst>(I) ? I : I->getNextNode());
820845

821-
auto *stackPtr = getStackPtr(IRB, IRB.Insert(rqObject->clone()));
846+
instructionClosures[isa<ReturnInst>(I) ? I : I->getNextNode()].push_back(
847+
[this, rqObject, doRQCheckRelease](RTBuilder &IRB) {
822848

823-
// handle cache control
824-
InsertCacheControl(IRB, stackPtr);
849+
auto *stackPtr = getStackPtr(IRB, IRB.Insert(rqObject->clone()));
825850

826-
// handle rayquery release
827-
if (doRQCheckRelease)
828-
IRB.CreateRayQueryReleaseIntrinsic();
851+
// handle cache control
852+
InsertCacheControl(IRB, stackPtr);
829853

830-
IGC_ASSERT(DT.dominates(LD->lifetimeStart, I));
854+
// handle rayquery release
855+
if (doRQCheckRelease)
856+
IRB.CreateRayQueryReleaseIntrinsic();
857+
});
831858
}
832859

833-
for (const auto &[from, to] : LD->lifetimeEndEdges) {
834-
auto *succ = to;
835-
// to avoid multiple executions of rayquery release instructions,
836-
// we need to ensure that the "to" block has a single predecessor
837-
if (!to->getSinglePredecessor()) {
838-
succ = SplitEdge(from, succ);
839-
840-
// we invalidated other the liveness data for other instructions
841-
cfgChanged = true;
842-
}
843-
844-
IRB.SetInsertPoint(succ->getFirstNonPHI());
860+
for (const auto &edge : LD->lifetimeEndEdges) {
845861

846-
auto *stackPtr = getStackPtr(IRB, IRB.Insert(rqObject->clone()));
862+
edgeClosures[edge].push_back([this, rqObject, doRQCheckRelease](RTBuilder &IRB) {
847863

848-
// handle cache control
849-
InsertCacheControl(IRB, stackPtr);
864+
auto *stackPtr = getStackPtr(IRB, IRB.Insert(rqObject->clone()));
850865

851-
// handle rayquery release
852-
if (doRQCheckRelease)
853-
IRB.CreateRayQueryReleaseIntrinsic();
866+
// handle cache control
867+
InsertCacheControl(IRB, stackPtr);
854868

855-
IGC_ASSERT(DT.dominates(LD->lifetimeStart, succ));
869+
// handle rayquery release
870+
if (doRQCheckRelease)
871+
IRB.CreateRayQueryReleaseIntrinsic();
872+
});
856873
}
857874

858875
// handle continuation instructions
859876
for (auto *I : continuationInstructions) {
877+
860878
if (!LD->ContainsInstruction(*I))
861879
continue;
862880

863-
IRB.SetInsertPoint(I);
864-
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
881+
instructionClosures[I].push_back([this, rqObject, doRQCheckRelease, I](RTBuilder &IRB) {
882+
883+
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
884+
});
865885
}
866886

867887
// handle indirect calls
868888
for (auto *I : indirectCallInstructions) {
889+
869890
if (!LD->ContainsInstruction(*I))
870891
continue;
871892

872-
IRB.SetInsertPoint(I);
873-
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
893+
instructionClosures[I].push_back([this, rqObject, doRQCheckRelease, I](RTBuilder &IRB) {
894+
895+
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
896+
});
874897
}
875898

876899
// handle hidden control flow instructions
877900
for (auto *I : hiddenCFInstructions) {
901+
878902
if (!LD->ContainsInstruction(*I))
879903
continue;
880904

881-
IRB.SetInsertPoint(I);
882-
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
905+
instructionClosures[I].push_back([this, rqObject, doRQCheckRelease, I](RTBuilder &IRB) {
906+
907+
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), true, doRQCheckRelease);
908+
});
883909
}
884910

885911
// handle barriers
886912
for (auto *I : barrierInstructions) {
913+
887914
if (!LD->ContainsInstruction(*I))
888915
continue;
889916

890-
IRB.SetInsertPoint(I);
891-
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), false, doRQCheckRelease);
917+
instructionClosures[I].push_back([this, rqObject, doRQCheckRelease, I](RTBuilder &IRB) {
918+
919+
StopAndStartRayquery(IRB, I, IRB.Insert(rqObject->clone()), false, doRQCheckRelease);
920+
});
892921
}
922+
}
923+
924+
for (const auto [I, closures] : instructionClosures) {
893925

894-
if (cfgChanged) {
895-
auto nextentry = livenessDataMap.find(rqObject);
926+
IRB.SetInsertPoint(I);
927+
for (const auto c : closures)
928+
c(IRB);
929+
}
896930

897-
// TODO: can we incrementally update LoopInfo and DomTree?
898-
DT.recalculate(F);
931+
for (const auto [edge, closures] : edgeClosures) {
899932

900-
getAnalysis<LoopInfoWrapperPass>().releaseMemory();
901-
getAnalysis<LoopInfoWrapperPass>().runOnFunction(F);
902-
while (++nextentry != livenessDataMap.end())
903-
nextentry->second = ProcessInstruction(nextentry->first, DT, getAnalysis<LoopInfoWrapperPass>().getLoopInfo());
904-
}
933+
auto *succ = edge.to;
934+
// to avoid multiple executions of rayquery release instructions,
935+
// we need to ensure that the "to" block has a single predecessor
936+
if (!edge.to->getSinglePredecessor())
937+
succ = SplitEdge(edge.from, succ);
938+
939+
IRB.SetInsertPoint(succ->getFirstNonPHI());
940+
941+
for (const auto c : closures)
942+
c(IRB);
905943
}
906944
}
907945

@@ -971,7 +1009,7 @@ bool InlineRaytracing::runOnFunction(Function &F) {
9711009

9721010
auto livenessData = AnalyzeLiveness(F, DT, LI);
9731011
AssignSlots(F, livenessData);
974-
HandleOptimizationsAndSpills(F, livenessData, DT, LI);
1012+
HandleOptimizationsAndSpills(F, livenessData);
9751013
LowerSlotAssignments(F);
9761014
LowerStackPtrs(F);
9771015

IGC/AdaptorCommon/RayTracing/NewTraceRayInlineLoweringPass.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ class InlineRaytracing : public AllocationLivenessAnalyzer {
4444
bool LowerAllocations(llvm::Function &F);
4545
LivenessDataMap AnalyzeLiveness(llvm::Function &F, llvm::DominatorTree &DT, llvm::LoopInfo &LI);
4646
void AssignSlots(llvm::Function &F, const LivenessDataMap &livenessDataMap);
47-
void HandleOptimizationsAndSpills(llvm::Function &F, LivenessDataMap &livenessDataMap, llvm::DominatorTree &DT,
48-
llvm::LoopInfo &LI);
47+
void HandleOptimizationsAndSpills(llvm::Function &F, LivenessDataMap &livenessDataMap);
4948
void LowerSlotAssignments(llvm::Function &F);
5049
void LowerStackPtrs(llvm::Function &F);
5150

0 commit comments

Comments
 (0)