@@ -4054,17 +4054,24 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
40544054 }
40554055
40564056 // Try to find an unroll count that maximizes the use of the instruction
4057- // window.
4058- unsigned UC = std::max (16ll / Size, 2ll );
4059- unsigned BestUC = 0 ;
4060- while (UC <= 8 && UC * Size <= 48 ) {
4061- if ((UC * Size % 16 ) == 0 || (BestUC * Size % 16 ) < (UC * Size % 16 ) % 16 ) {
4057+ // window, i.e. trying to fetch as many instructions per cycle as possible.
4058+ unsigned MaxInstsPerLine = 16 ;
4059+ unsigned UC = 1 ;
4060+ unsigned BestUC = 1 ;
4061+ unsigned SizeWithBestUC = BestUC * Size;
4062+ while (UC <= 8 ) {
4063+ unsigned SizeWithUC = UC * Size;
4064+ if (SizeWithUC > 48 )
4065+ break ;
4066+ if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4067+ (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
40624068 BestUC = UC;
4069+ SizeWithBestUC = BestUC * Size;
40634070 }
40644071 UC++;
40654072 }
40664073
4067- if (BestUC == 0 || none_of (Stores, [&LoadedValues](StoreInst *SI) {
4074+ if (BestUC == 1 || none_of (Stores, [&LoadedValues](StoreInst *SI) {
40684075 return LoadedValues.contains (SI->getOperand (0 ));
40694076 }))
40704077 return ;
@@ -4090,15 +4097,21 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
40904097 // Disable partial & runtime unrolling on -Os.
40914098 UP.PartialOptSizeThreshold = 0 ;
40924099
4093- if (ST->getProcFamily () == AArch64Subtarget::Falkor &&
4094- EnableFalkorHWPFUnrollFix)
4095- getFalkorUnrollingPreferences (L, SE, UP);
4096-
4097- if (ST->getProcFamily () == AArch64Subtarget::AppleA14 ||
4098- ST->getProcFamily () == AArch64Subtarget::AppleA15 ||
4099- ST->getProcFamily () == AArch64Subtarget::AppleA16 ||
4100- ST->getProcFamily () == AArch64Subtarget::AppleM4)
4100+ // Apply subtarget-specific unrolling preferences.
4101+ switch (ST->getProcFamily ()) {
4102+ case AArch64Subtarget::AppleA14:
4103+ case AArch64Subtarget::AppleA15:
4104+ case AArch64Subtarget::AppleA16:
4105+ case AArch64Subtarget::AppleM4:
41014106 getAppleRuntimeUnrollPreferences (L, SE, UP, *this );
4107+ break ;
4108+ case AArch64Subtarget::Falkor:
4109+ if (EnableFalkorHWPFUnrollFix)
4110+ getFalkorUnrollingPreferences (L, SE, UP);
4111+ break ;
4112+ default :
4113+ break ;
4114+ }
41024115
41034116 // Scan the loop: don't unroll loops with calls as this could prevent
41044117 // inlining. Don't unroll vector loops either, as they don't benefit much from
0 commit comments