Skip to content

Commit e682785

Browse files
committed
[AArch64] Enable RT and partial unrolling for loops with reductions.
Update unrolling preferences for Apple Silicon CPUs to enable partial unrolling and runtime unrolling for small loops with reductions. This builds on top of unroller changes to introduce parallel reduction phis, if possible: #149470.
1 parent 0e73ebc commit e682785

File tree

2 files changed

+139
-23
lines changed

2 files changed

+139
-23
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Support/Debug.h"
2626
#include "llvm/TargetParser/AArch64TargetParser.h"
2727
#include "llvm/Transforms/InstCombine/InstCombiner.h"
28+
#include "llvm/Transforms/Utils/UnrollLoop.h"
2829
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
2930
#include <algorithm>
3031
#include <optional>
@@ -4969,6 +4970,22 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
49694970
if (!L->getExitBlock())
49704971
return;
49714972

4973+
// Check if the loop contains any reductions that could be parallelized when
4974+
// unrolling. If so, enable partial unrolling, if the trip count is know to be
4975+
// a multiple of 2.
4976+
bool HasParellelizableReductions =
4977+
L->getNumBlocks() == 1 &&
4978+
any_of(L->getHeader()->phis(),
4979+
[&SE, L](PHINode &Phi) {
4980+
return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
4981+
}) &&
4982+
isLoopSizeWithinBudget(L, TTI, 12, nullptr);
4983+
if (HasParellelizableReductions &&
4984+
SE.getSmallConstantTripMultiple(L, L->getExitingBlock()) % 2 == 0) {
4985+
UP.Partial = true;
4986+
UP.MaxCount = 4;
4987+
}
4988+
49724989
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
49734990
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
49744991
(SE.getSmallConstantMaxTripCount(L) > 0 &&
@@ -4984,6 +5001,11 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
49845001
// Limit to loops with trip counts that are cheap to expand.
49855002
UP.SCEVExpansionBudget = 1;
49865003

5004+
if (HasParellelizableReductions) {
5005+
UP.Runtime = true;
5006+
UP.DefaultUnrollRuntimeCount = 4;
5007+
}
5008+
49875009
// Try to unroll small loops, of few-blocks with low budget, if they have
49885010
// load/store dependencies, to expose more parallel memory access streams,
49895011
// or if they do little work inside a block (i.e. load -> X -> store pattern).

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 117 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -783,16 +783,34 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
783783
; APPLE-NEXT: [[ENTRY:.*]]:
784784
; APPLE-NEXT: br label %[[LOOP:.*]]
785785
; APPLE: [[LOOP]]:
786-
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
786+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
787+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
788+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
789+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
787790
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
788791
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
789792
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
790-
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
791-
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
792-
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
793-
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
793+
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
794+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
795+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
796+
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
797+
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
798+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
799+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
800+
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
801+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP2]]
802+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
803+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
804+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
805+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
806+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
807+
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
808+
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
794809
; APPLE: [[EXIT]]:
795-
; APPLE-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
810+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
811+
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
812+
; APPLE-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
813+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
796814
; APPLE-NEXT: ret i32 [[BIN_RDX2]]
797815
;
798816
; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial(
@@ -923,21 +941,42 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
923941
; APPLE-NEXT: [[ENTRY:.*]]:
924942
; APPLE-NEXT: br label %[[LOOP:.*]]
925943
; APPLE: [[LOOP]]:
926-
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
927-
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
928-
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ]
944+
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
945+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3:%.*]], %[[LOOP]] ]
946+
; APPLE-NEXT: [[RDX_21:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
947+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
948+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RES_2:%.*]], %[[LOOP]] ]
949+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ]
929950
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
930951
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
931-
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
932-
; APPLE-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]]
933-
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
934-
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
935-
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
952+
; APPLE-NEXT: [[RES_2]] = add i32 [[RDX]], [[TMP0]]
953+
; APPLE-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
954+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
955+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
956+
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
957+
; APPLE-NEXT: [[BIN_RDX3]] = add i32 [[RDX_1]], [[TMP1]]
958+
; APPLE-NEXT: [[RDX_2_NEXT_1:%.*]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
959+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
960+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
961+
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
962+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_21]], [[TMP2]]
963+
; APPLE-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_NEXT_1]], [[TMP2]]
964+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
965+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
966+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
967+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
968+
; APPLE-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[TMP3]]
969+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
970+
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
971+
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
936972
; APPLE: [[EXIT]]:
937-
; APPLE-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
938-
; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ]
973+
; APPLE-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
974+
; APPLE-NEXT: [[RES_3:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ]
939975
; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]]
940-
; APPLE-NEXT: ret i32 [[SUM]]
976+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[SUM]]
977+
; APPLE-NEXT: [[BIN_RDX4:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
978+
; APPLE-NEXT: [[SUM1:%.*]] = add i32 [[BIN_RDX4]], [[RES_3]]
979+
; APPLE-NEXT: ret i32 [[SUM1]]
941980
;
942981
; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
943982
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
@@ -992,18 +1031,72 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
9921031
; APPLE-LABEL: define i32 @test_add_reduction_runtime(
9931032
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
9941033
; APPLE-NEXT: [[ENTRY:.*]]:
1034+
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
1035+
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3
1036+
; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
1037+
; APPLE-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
1038+
; APPLE: [[ENTRY_NEW]]:
1039+
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
9951040
; APPLE-NEXT: br label %[[LOOP:.*]]
9961041
; APPLE: [[LOOP]]:
997-
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ]
998-
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP]] ]
1042+
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
1043+
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
1044+
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
1045+
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
1046+
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
1047+
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
9991048
; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
10001049
; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
1001-
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
1002-
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
1050+
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP6]]
1051+
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1
1052+
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
1053+
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
1054+
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]]
1055+
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2
1056+
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
1057+
; APPLE-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
1058+
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]]
1059+
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3
1060+
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
1061+
; APPLE-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
1062+
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]]
1063+
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4
1064+
; APPLE-NEXT: [[NITER_NEXT_3]] = add nuw i64 [[NITER]], 4
1065+
; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
1066+
; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
1067+
; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
1068+
; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
1069+
; APPLE-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
1070+
; APPLE-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
1071+
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
1072+
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
1073+
; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
1074+
; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
1075+
; APPLE: [[EXIT_UNR_LCSSA]]:
1076+
; APPLE-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1077+
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1078+
; APPLE-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1079+
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
1080+
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
1081+
; APPLE: [[LOOP_EPIL_PREHEADER]]:
1082+
; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
1083+
; APPLE: [[LOOP_EPIL]]:
1084+
; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
1085+
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
1086+
; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
1087+
; APPLE-NEXT: [[GEP_A_EPIL1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL1]]
1088+
; APPLE-NEXT: [[TMP7:%.*]] = load i32, ptr [[GEP_A_EPIL1]], align 2
1089+
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP7]]
1090+
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL1]], 1
10031091
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
1004-
; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]]
1092+
; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
1093+
; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
1094+
; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]]
1095+
; APPLE: [[EXIT_EPILOG_LCSSA]]:
1096+
; APPLE-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
1097+
; APPLE-NEXT: br label %[[EXIT]]
10051098
; APPLE: [[EXIT]]:
1006-
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP]] ]
1099+
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
10071100
; APPLE-NEXT: ret i32 [[RES]]
10081101
;
10091102
; OTHER-LABEL: define i32 @test_add_reduction_runtime(
@@ -1092,6 +1185,7 @@ exit:
10921185
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
10931186
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
10941187
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
1188+
; APPLE: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
10951189
;.
10961190
; OTHER: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
10971191
; OTHER: [[META1]] = !{!"llvm.loop.unroll.disable"}

0 commit comments

Comments
 (0)