Skip to content

Commit ef6c200

Browse files
lioujheyuigcbot
authored andcommitted
Enable Loop unrolling protmotion for Alloc for some
platforms Enable Loop unrolling protmotion for Alloc for some platforms
1 parent 6adbbc8 commit ef6c200

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

IGC/Compiler/GenTTI.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,24 @@ bool GenIntrinsicsTTIImpl::isLoweredToCall(const Function *F) {
4242
// instructions. Set this to false unless IGC legalization can fix them.
4343
bool GenIntrinsicsTTIImpl::shouldBuildLookupTables() { return false; }
4444

45+
bool GenIntrinsicsTTIImpl::enablePromoteLoopUnrollwithAlloca() {
46+
const IGC::TriboolFlag RK_PromoteLoopUnrollwithAlloca =
47+
static_cast<TriboolFlag>(IGC_GET_FLAG_VALUE(ForcePromoteLoopUnrollwithAlloca));
48+
switch (RK_PromoteLoopUnrollwithAlloca) {
49+
case TriboolFlag::Enabled:
50+
return true;
51+
case TriboolFlag::Disabled:
52+
return false;
53+
default:
54+
if (ctx->type == ShaderType::OPENCL_SHADER)
55+
return false;
56+
if (!ctx->platform.isCoreChildOf(IGFX_XE2_HPG_CORE))
57+
return false;
58+
59+
return true;
60+
}
61+
}
62+
4563
void *GenIntrinsicsTTIImpl::getAdjustedAnalysisPointer(const void *ID) {
4664
if (ID == &TargetTransformInfoWrapperPass::ID)
4765
return (TargetTransformInfo *)this;
@@ -318,7 +336,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
318336
// It can potentially do some global cost estimations.
319337
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
320338
bool AllocaFound = false;
321-
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca ) {
339+
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca) {
322340
unsigned int ThresholdBoost = 0;
323341
for (auto BB : L->blocks()) {
324342
for (auto &I : *BB) {
@@ -357,13 +375,12 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
357375
}
358376
}
359377
if (AllocaFound) {
360-
// LLVM default only to 10, boost to UnrollMaxCountForAlloca
361-
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
362378
UP.UpperBound = true;
363379
UP.Force = UnrollLoopForCodeSizeOnly ? false : true;
364380

365-
if (IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca) &&
366-
ctx->type != ShaderType::OPENCL_SHADER) {
381+
if (enablePromoteLoopUnrollwithAlloca()){
382+
// LLVM default only to 10, boost to UnrollMaxCountForAlloca
383+
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
367384
UP.Threshold += ThresholdBoost;
368385
LLVM_DEBUG(dbgs() << "Increasing L:" << L->getName() << " threshold to " << UP.Threshold
369386
<< " due to Alloca accessed by:");
@@ -648,7 +665,7 @@ llvm::InstructionCost GenIntrinsicsTTIImpl::internalCalculateCost(const User *U,
648665
}
649666
}
650667

651-
if (IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca)) {
668+
if (enablePromoteLoopUnrollwithAlloca()) {
652669
const GetElementPtrInst *GEP = nullptr;
653670
if (Operator::getOpcode(U) == Instruction::Load)
654671
GEP = dyn_cast<GetElementPtrInst>(cast<LoadInst>(U)->getPointerOperand());

IGC/Compiler/GenTTI.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class GenIntrinsicsTTIImpl : public IGCLLVM::TTIImplCRTPBase<GenIntrinsicsTTIImp
3232
DenseMap<Value *, bool> isGEPLoopInduction;
3333

3434
bool shouldBuildLookupTables();
35+
bool enablePromoteLoopUnrollwithAlloca();
3536

3637
bool isLoweredToCall(const Function *F);
3738

IGC/common/igc_flags.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ DECLARE_IGC_REGKEY(
437437
true)
438438
DECLARE_IGC_REGKEY(bool, UnrollLoopForCodeSizeOnly, false,
439439
"Only unroll the loop if it can reduce program size/register pressure. Ignore all other threshold "
440-
"setting but still enable EnablePromoteLoopUnrollwithAlloca due to high likelyhood to reduce size.",
440+
"setting but still enable PromoteLoopUnrollwithAlloca due to high likelyhood to reduce size.",
441441
true)
442442
DECLARE_IGC_REGKEY(DWORD, SetLoopUnrollThreshold, 0,
443443
"Set the loop unroll threshold. Value 0 will use the default threshold.", false)
@@ -448,12 +448,14 @@ DECLARE_IGC_REGKEY(DWORD, SetLoopUnrollMaxPercentThresholdBoostForHighRegPressur
448448
"Set the loop unroll max allowed threshold boost in percentage for shaders with high reg pressure. "
449449
"The LLVM internal value is 400.",
450450
false)
451-
DECLARE_IGC_REGKEY(
452-
bool, EnablePromoteLoopUnrollwithAlloca, false,
451+
DECLARE_IGC_REGKEY_ENUM(ForcePromoteLoopUnrollwithAlloca, -1,
453452
"Loop cost estimation assumes Load/Store who accesses Alloca with index deductible to loop count having 0 cost. "
454453
"Disable this flag makes them always cost something as well as disables dynamic threshold increase based on the "
455-
"size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled.",
456-
false)
454+
"size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled."
455+
"-1 - default behavior, decided by platforms"
456+
" 0 - force disabled"
457+
" 1 - force enabled",
458+
TRIBOOL_OPTIONS, false)
457459
DECLARE_IGC_REGKEY(DWORD, PromoteLoopUnrollwithAllocaCountThreshold, 256,
458460
"The loop trip count OR number of alloca elements cutoff to stop regkey "
459461
"EnablePromoteLoopUnrollwithAlloca (Check regkey description).",

0 commit comments

Comments
 (0)