Skip to content

Commit 8eb1fe4

Browse files
lioujheyuigcbot
authored andcommitted
Enable loop unroll but only for reducing code size during compilation retry
Enable loop unroll but only for reducing code size during compilation retry
1 parent d81684b commit 8eb1fe4

File tree

4 files changed

+38
-4
lines changed

4 files changed

+38
-4
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,8 @@ void OptimizeIR(CodeGenContext *const pContext) {
13531353
}
13541354

13551355

1356-
if (!pContext->m_retryManager.IsFirstTry()) {
1356+
// Can be completely repalced by LoopUnrollForCodeSizeOnly in GenTTI, Consider completely remove this pass
1357+
if (!pContext->m_retryManager.IsFirstTry() && pContext->m_retryManager.IsLastTry()) {
13571358
mpm.add(new DisableLoopUnrollOnRetry());
13581359
}
13591360

IGC/Compiler/GenTTI.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ SPDX-License-Identifier: MIT
66
77
============================= end_copyright_notice ===========================*/
88

9+
#include <utility>
910
#include "Compiler/GenTTI.h"
1011
#include "GenISAIntrinsics/GenIntrinsics.h"
1112
#include "GenISAIntrinsics/GenIntrinsicInst.h"
@@ -18,6 +19,7 @@ SPDX-License-Identifier: MIT
1819
#include "llvm/Analysis/CodeMetrics.h"
1920
#include "llvm/Analysis/LoopInfo.h"
2021
#include "llvm/Analysis/ScalarEvolution.h"
22+
#include "llvm/Support/InstructionCost.h"
2123
#include "llvmWrapper/Transforms/Utils/LoopUtils.h"
2224
#include "common/LLVMWarningsPop.hpp"
2325

@@ -173,6 +175,8 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
173175
}
174176

175177
unsigned LoopUnrollThreshold = ctx->m_DriverInfo.GetLoopUnrollThreshold();
178+
bool UnrollLoopForCodeSizeOnly =
179+
IGC_IS_FLAG_ENABLED(UnrollLoopForCodeSizeOnly) || (!ctx->m_retryManager.IsFirstTry());
176180

177181
// override the LoopUnrollThreshold if the registry key is set
178182
if (IGC_GET_FLAG_VALUE(SetLoopUnrollThreshold) != 0) {
@@ -274,6 +278,12 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
274278
UP.Force = true;
275279
}
276280

281+
if (UnrollLoopForCodeSizeOnly) {
282+
UP.Threshold = getLoopSize(L, *this) + 1;
283+
UP.MaxPercentThresholdBoost = 100;
284+
UP.Partial = false;
285+
}
286+
277287
// For all the load/store who (having a GEP to),
278288
// 1. Accessing a fixed size Alloca
279289
// 2. Having an loop-iteration-inducted-only index
@@ -306,8 +316,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
306316
//
307317
// TODO: Having an analysis pass to link alloca with loops globally so that they are either unrolled together or not.
308318
// It can potentially do some global cost estimations.
309-
// TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
310-
// reduce register pressure.
311319
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
312320
bool AllocaFound = false;
313321
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
@@ -353,7 +361,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
353361
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
354362
UP.Threshold += ThresholdBoost;
355363
UP.UpperBound = true;
356-
UP.Force = true;
364+
UP.Force = UnrollLoopForCodeSizeOnly ? false : true;
357365

358366
LLVM_DEBUG(dbgs() << "Increasing L:" << L->getName() << " threshold to " << UP.Threshold
359367
<< " due to Alloca accessed by:");
@@ -363,6 +371,9 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
363371
}
364372
}
365373

374+
if (IGC_IS_FLAG_ENABLED(UnrollLoopForCodeSizeOnly))
375+
return;
376+
366377
unsigned sendMessage = 0;
367378
unsigned TripCount = 0;
368379
BasicBlock *ExitingBlock = L->getLoopLatch();
@@ -679,4 +690,20 @@ llvm::InstructionCost GenIntrinsicsTTIImpl::internalCalculateCost(const User *U,
679690

680691
return BaseT::getInstructionCost(U, Operands, CostKind);
681692
}
693+
694+
// Strip from LLVM::LoopUnrollPass::ApproximateLoopSize
695+
unsigned getLoopSize(const Loop *L, const TargetTransformInfo &TTI) {
696+
SmallPtrSet<const Value *, 32> EphValues;
697+
698+
CodeMetrics Metrics;
699+
for (BasicBlock *BB : L->blocks())
700+
Metrics.analyzeBasicBlock(BB, TTI, EphValues);
701+
702+
InstructionCost LoopSize;
703+
LoopSize = Metrics.NumInsts;
704+
705+
LoopSize = (LoopSize > 3/*BEInsns + 1*/) ? LoopSize : 3;
706+
return *LoopSize.getValue();
707+
}
708+
682709
} // namespace llvm

IGC/Compiler/GenTTI.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,6 @@ class GenIntrinsicsTTIImpl : public IGCLLVM::TTIImplCRTPBase<GenIntrinsicsTTIImp
6161
TTI::TargetCostKind CostKind);
6262
};
6363

64+
unsigned getLoopSize(const Loop *L, const TargetTransformInfo &TTI);
65+
6466
} // namespace llvm

IGC/common/igc_flags.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,10 @@ DECLARE_IGC_REGKEY(
433433
"The mask is casted to IGC::SyncInstMask and informs which synchronization objects should not be coalesced. Note "
434434
"that synchronization objects classified in multiple types are not disabled if any bit describing them is off.",
435435
true)
436+
DECLARE_IGC_REGKEY(bool, UnrollLoopForCodeSizeOnly, false,
437+
"Only unroll the loop if it can reduce program size/register pressure. Ignore all other threshold "
438+
"setting but still enable EnablePromoteLoopUnrollwithAlloca due to high likelyhood to reduce size.",
439+
true)
436440
DECLARE_IGC_REGKEY(DWORD, SetLoopUnrollThreshold, 0,
437441
"Set the loop unroll threshold. Value 0 will use the default threshold.", false)
438442
DECLARE_IGC_REGKEY(

0 commit comments

Comments
 (0)