@@ -6,6 +6,7 @@ SPDX-License-Identifier: MIT
6
6
7
7
============================= end_copyright_notice ===========================*/
8
8
9
+ #include < utility>
9
10
#include " Compiler/GenTTI.h"
10
11
#include " GenISAIntrinsics/GenIntrinsics.h"
11
12
#include " GenISAIntrinsics/GenIntrinsicInst.h"
@@ -18,6 +19,7 @@ SPDX-License-Identifier: MIT
18
19
#include " llvm/Analysis/CodeMetrics.h"
19
20
#include " llvm/Analysis/LoopInfo.h"
20
21
#include " llvm/Analysis/ScalarEvolution.h"
22
+ #include " llvm/Support/InstructionCost.h"
21
23
#include " llvmWrapper/Transforms/Utils/LoopUtils.h"
22
24
#include " common/LLVMWarningsPop.hpp"
23
25
@@ -173,6 +175,8 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
173
175
}
174
176
175
177
unsigned LoopUnrollThreshold = ctx->m_DriverInfo .GetLoopUnrollThreshold ();
178
+ bool UnrollLoopForCodeSizeOnly =
179
+ IGC_IS_FLAG_ENABLED (UnrollLoopForCodeSizeOnly) || (!ctx->m_retryManager .IsFirstTry ());
176
180
177
181
// override the LoopUnrollThreshold if the registry key is set
178
182
if (IGC_GET_FLAG_VALUE (SetLoopUnrollThreshold) != 0 ) {
@@ -274,6 +278,12 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
274
278
UP.Force = true ;
275
279
}
276
280
281
+ if (UnrollLoopForCodeSizeOnly) {
282
+ UP.Threshold = getLoopSize (L, *this ) + 1 ;
283
+ UP.MaxPercentThresholdBoost = 100 ;
284
+ UP.Partial = false ;
285
+ }
286
+
277
287
// For all the load/store who (having a GEP to),
278
288
// 1. Accessing a fixed size Alloca
279
289
// 2. Having an loop-iteration-inducted-only index
@@ -306,8 +316,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
306
316
//
307
317
// TODO: Having an analysis pass to link alloca with loops globally so that they are either unrolled together or not.
308
318
// It can potentially do some global cost estimations.
309
- // TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
310
- // reduce register pressure.
311
319
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE (PromoteLoopUnrollwithAllocaCountThreshold);
312
320
bool AllocaFound = false ;
313
321
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
@@ -353,7 +361,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
353
361
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
354
362
UP.Threshold += ThresholdBoost;
355
363
UP.UpperBound = true ;
356
- UP.Force = true ;
364
+ UP.Force = UnrollLoopForCodeSizeOnly ? false : true ;
357
365
358
366
LLVM_DEBUG (dbgs () << " Increasing L:" << L->getName () << " threshold to " << UP.Threshold
359
367
<< " due to Alloca accessed by:" );
@@ -363,6 +371,9 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
363
371
}
364
372
}
365
373
374
+ if (IGC_IS_FLAG_ENABLED (UnrollLoopForCodeSizeOnly))
375
+ return ;
376
+
366
377
unsigned sendMessage = 0 ;
367
378
unsigned TripCount = 0 ;
368
379
BasicBlock *ExitingBlock = L->getLoopLatch ();
@@ -679,4 +690,20 @@ llvm::InstructionCost GenIntrinsicsTTIImpl::internalCalculateCost(const User *U,
679
690
680
691
return BaseT::getInstructionCost (U, Operands, CostKind);
681
692
}
693
+
694
+ // Strip from LLVM::LoopUnrollPass::ApproximateLoopSize
695
+ unsigned getLoopSize (const Loop *L, const TargetTransformInfo &TTI) {
696
+ SmallPtrSet<const Value *, 32 > EphValues;
697
+
698
+ CodeMetrics Metrics;
699
+ for (BasicBlock *BB : L->blocks ())
700
+ Metrics.analyzeBasicBlock (BB, TTI, EphValues);
701
+
702
+ InstructionCost LoopSize;
703
+ LoopSize = Metrics.NumInsts ;
704
+
705
+ LoopSize = (LoopSize > 3 /* BEInsns + 1*/ ) ? LoopSize : 3 ;
706
+ return *LoopSize.getValue ();
707
+ }
708
+
682
709
} // namespace llvm
0 commit comments