Skip to content

Commit ca5ae56

Browse files
committed
[AArch64] Add isAppleMLike helper to check for M cores and aligned CPUs.
Add a new isAppleMLike helper, that returns true if the core is part of the Apple M core family or Apple A14 or later. Used to apply cost decisions consistently to those groups of cores. The function is now a single place to update when new cores are added. It also makes sure we apply unrolling decisions for newer Apple cores to Apple A17.
1 parent 095f8e0 commit ca5ae56

File tree

4 files changed

+22
-224
lines changed

4 files changed

+22
-224
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -222,17 +222,8 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
222222
PrefetchDistance = 280;
223223
MinPrefetchStride = 2048;
224224
MaxPrefetchIterationsAhead = 3;
225-
switch (ARMProcFamily) {
226-
case AppleA14:
227-
case AppleA15:
228-
case AppleA16:
229-
case AppleA17:
230-
case AppleM4:
225+
if (isAppleMLike())
231226
MaxInterleaveFactor = 4;
232-
break;
233-
default:
234-
break;
235-
}
236227
break;
237228
case ExynosM3:
238229
MaxInterleaveFactor = 4;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,21 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
169169
return ARMProcFamily;
170170
}
171171

172+
/// Returns true if the processor is an Apple M-series or aligned A-series
173+
/// (A14 or newer).
174+
bool isAppleMLike() const {
175+
switch (ARMProcFamily) {
176+
case AppleA14:
177+
case AppleA15:
178+
case AppleA16:
179+
case AppleA17:
180+
case AppleM4:
181+
return true;
182+
default:
183+
return false;
184+
}
185+
}
186+
172187
bool isXRaySupported() const override { return true; }
173188

174189
/// Returns true if the function has a streaming body.

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5309,20 +5309,11 @@ void AArch64TTIImpl::getUnrollingPreferences(
53095309
}
53105310

53115311
// Apply subtarget-specific unrolling preferences.
5312-
switch (ST->getProcFamily()) {
5313-
case AArch64Subtarget::AppleA14:
5314-
case AArch64Subtarget::AppleA15:
5315-
case AArch64Subtarget::AppleA16:
5316-
case AArch64Subtarget::AppleM4:
5312+
if (ST->isAppleMLike())
53175313
getAppleRuntimeUnrollPreferences(L, SE, UP, *this);
5318-
break;
5319-
case AArch64Subtarget::Falkor:
5320-
if (EnableFalkorHWPFUnrollFix)
5321-
getFalkorUnrollingPreferences(L, SE, UP);
5322-
break;
5323-
default:
5324-
break;
5325-
}
5314+
else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5315+
EnableFalkorHWPFUnrollFix)
5316+
getFalkorUnrollingPreferences(L, SE, UP);
53265317

53275318
// If this is a small, multi-exit loop similar to something like std::find,
53285319
// then there is typically a performance improvement achieved by unrolling.

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 2 additions & 201 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
44
; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
55
; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
6-
; RUN: opt -p loop-unroll -mcpu=apple-a17 -S %s | FileCheck --check-prefix=APPLE-A17 %s
6+
; RUN: opt -p loop-unroll -mcpu=apple-a17 -S %s | FileCheck --check-prefix=APPLE %s
77
; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
88

99
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
@@ -101,23 +101,6 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
101101
; APPLE: [[EXIT]]:
102102
; APPLE-NEXT: ret void
103103
;
104-
; APPLE-A17-LABEL: define void @small_load_store_loop(
105-
; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
106-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
107-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
108-
; APPLE-A17: [[LOOP]]:
109-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
110-
; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
111-
; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
112-
; APPLE-A17-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
113-
; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
114-
; APPLE-A17-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
115-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
116-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
117-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
118-
; APPLE-A17: [[EXIT]]:
119-
; APPLE-A17-NEXT: ret void
120-
;
121104
; OTHER-LABEL: define void @small_load_store_loop(
122105
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
123106
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -234,24 +217,6 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k
234217
; APPLE: [[EXIT]]:
235218
; APPLE-NEXT: ret void
236219
;
237-
; APPLE-A17-LABEL: define void @load_op_store_loop(
238-
; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
239-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
240-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
241-
; APPLE-A17: [[LOOP]]:
242-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
243-
; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
244-
; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
245-
; APPLE-A17-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
246-
; APPLE-A17-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
247-
; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
248-
; APPLE-A17-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
249-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
250-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
251-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
252-
; APPLE-A17: [[EXIT]]:
253-
; APPLE-A17-NEXT: ret void
254-
;
255220
; OTHER-LABEL: define void @load_op_store_loop(
256221
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
257222
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -348,33 +313,7 @@ define void @load_op_store_loop_multiblock(ptr %src, ptr %dst, i64 %N, i64 %scal
348313
; APPLE: [[EXIT]]:
349314
; APPLE-NEXT: ret void
350315
;
351-
; APPLE-A17-LABEL: define void @load_op_store_loop_multiblock(
352-
; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
353-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
354-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
355-
; APPLE-A17: [[LOOP]]:
356-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ]
357-
; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
358-
; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
359-
; APPLE-A17-NEXT: [[L1:%.*]] = load float, ptr [[GEP_SRC]], align 4
360-
; APPLE-A17-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
361-
; APPLE-A17-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1
362-
; APPLE-A17-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]]
363-
; APPLE-A17: [[LOOPCONT]]:
364-
; APPLE-A17-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L1]], %[[LOOP]] ]
365-
; APPLE-A17-NEXT: [[O:%.*]] = fadd float [[D]], [[K]]
366-
; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
367-
; APPLE-A17-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
368-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
369-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
370-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
371-
; APPLE-A17: [[LOOPODD]]:
372-
; APPLE-A17-NEXT: [[L2]] = fneg float [[L1]]
373-
; APPLE-A17-NEXT: br label %[[LOOPCONT]]
374-
; APPLE-A17: [[EXIT]]:
375-
; APPLE-A17-NEXT: ret void
376-
;
377-
; OTHER-LABEL: define void @load_op_store_loop_multiblock(
316+
; ; OTHER-LABEL: define void @load_op_store_loop_multiblock(
378317
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
379318
; OTHER-NEXT: [[ENTRY:.*]]:
380319
; OTHER-NEXT: br label %[[LOOP:.*]]
@@ -713,66 +652,6 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
713652
; APPLE: [[EXIT]]:
714653
; APPLE-NEXT: ret void
715654
;
716-
; APPLE-A17-LABEL: define void @early_continue_dep_on_load_large(
717-
; APPLE-A17-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
718-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
719-
; APPLE-A17-NEXT: br label %[[LOOP_HEADER:.*]]
720-
; APPLE-A17: [[LOOP_HEADER]]:
721-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
722-
; APPLE-A17-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]]
723-
; APPLE-A17-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4
724-
; APPLE-A17-NEXT: [[C_1:%.*]] = icmp sgt i32 [[L_1]], [[T_1]]
725-
; APPLE-A17-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
726-
; APPLE-A17: [[THEN]]:
727-
; APPLE-A17-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4
728-
; APPLE-A17-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4
729-
; APPLE-A17-NEXT: [[C_2:%.*]] = icmp ugt i8 [[L_2]], 7
730-
; APPLE-A17-NEXT: br i1 [[C_2]], label %[[MERGE:.*]], label %[[ELSE:.*]]
731-
; APPLE-A17: [[ELSE]]:
732-
; APPLE-A17-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64
733-
; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]]
734-
; APPLE-A17-NEXT: [[L_3:%.*]] = load i8, ptr [[GEP_A]], align 1
735-
; APPLE-A17-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[L_3]] to i64
736-
; APPLE-A17-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]]
737-
; APPLE-A17-NEXT: [[L_4:%.*]] = load i32, ptr [[GEP_B]], align 4
738-
; APPLE-A17-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]]
739-
; APPLE-A17-NEXT: [[L_5:%.*]] = load i32, ptr [[GEP_C]], align 4
740-
; APPLE-A17-NEXT: br label %[[MERGE]]
741-
; APPLE-A17: [[MERGE]]:
742-
; APPLE-A17-NEXT: [[MERGE_1:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_4]], %[[ELSE]] ]
743-
; APPLE-A17-NEXT: [[MERGE_2:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_5]], %[[ELSE]] ]
744-
; APPLE-A17-NEXT: [[ADD14:%.*]] = add nsw i32 [[MERGE_2]], [[X]]
745-
; APPLE-A17-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]]
746-
; APPLE-A17-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[IV]] to i32
747-
; APPLE-A17-NEXT: [[ADD16:%.*]] = add nsw i32 [[MERGE_1]], [[TMP0]]
748-
; APPLE-A17-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]]
749-
; APPLE-A17-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64
750-
; APPLE-A17-NEXT: [[GEP_P_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]]
751-
; APPLE-A17-NEXT: [[L_6:%.*]] = load i32, ptr [[GEP_P_2]], align 4
752-
; APPLE-A17-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[MERGE_2]]
753-
; APPLE-A17-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]]
754-
; APPLE-A17-NEXT: [[SUB22:%.*]] = sub i32 [[TMP0]], [[MERGE_1]]
755-
; APPLE-A17-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]]
756-
; APPLE-A17-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64
757-
; APPLE-A17-NEXT: [[GEP_P2_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]]
758-
; APPLE-A17-NEXT: [[L_7:%.*]] = load i32, ptr [[GEP_P2_1]], align 4
759-
; APPLE-A17-NEXT: [[C_3:%.*]] = icmp sgt i32 [[L_1]], [[L_6]]
760-
; APPLE-A17-NEXT: [[C_4:%.*]] = icmp sgt i32 [[L_1]], [[L_7]]
761-
; APPLE-A17-NEXT: [[AND34:%.*]] = and i1 [[C_3]], [[C_4]]
762-
; APPLE-A17-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]]
763-
; APPLE-A17: [[STORE_RES]]:
764-
; APPLE-A17-NEXT: [[C_5:%.*]] = icmp sgt i32 [[L_1]], [[T_2]]
765-
; APPLE-A17-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5
766-
; APPLE-A17-NEXT: [[RES:%.*]] = select i1 [[C_5]], i8 1, i8 2
767-
; APPLE-A17-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1
768-
; APPLE-A17-NEXT: br label %[[LOOP_LATCH]]
769-
; APPLE-A17: [[LOOP_LATCH]]:
770-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
771-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
772-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
773-
; APPLE-A17: [[EXIT]]:
774-
; APPLE-A17-NEXT: ret void
775-
;
776655
; OTHER-LABEL: define void @early_continue_dep_on_load_large(
777656
; OTHER-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
778657
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -935,23 +814,6 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
935814
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
936815
; APPLE-NEXT: ret i32 [[BIN_RDX2]]
937816
;
938-
; APPLE-A17-LABEL: define i32 @test_add_reduction_unroll_partial(
939-
; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
940-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
941-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
942-
; APPLE-A17: [[LOOP]]:
943-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
944-
; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
945-
; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
946-
; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
947-
; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
948-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
949-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
950-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
951-
; APPLE-A17: [[EXIT]]:
952-
; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
953-
; APPLE-A17-NEXT: ret i32 [[RES]]
954-
;
955817
; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial(
956818
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
957819
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -1025,29 +887,6 @@ define i32 @test_add_reduction_multi_block(ptr %a, i64 noundef %n) {
1025887
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
1026888
; APPLE-NEXT: ret i32 [[RES]]
1027889
;
1028-
; APPLE-A17-LABEL: define i32 @test_add_reduction_multi_block(
1029-
; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
1030-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
1031-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
1032-
; APPLE-A17: [[LOOP]]:
1033-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1034-
; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ]
1035-
; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
1036-
; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
1037-
; APPLE-A17-NEXT: [[C:%.*]] = call i1 @cond()
1038-
; APPLE-A17-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
1039-
; APPLE-A17: [[THEN]]:
1040-
; APPLE-A17-NEXT: store i32 0, ptr [[GEP_A]], align 4
1041-
; APPLE-A17-NEXT: br label %[[LOOP_LATCH]]
1042-
; APPLE-A17: [[LOOP_LATCH]]:
1043-
; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
1044-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1045-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
1046-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
1047-
; APPLE-A17: [[EXIT]]:
1048-
; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
1049-
; APPLE-A17-NEXT: ret i32 [[RES]]
1050-
;
1051890
; OTHER-LABEL: define i32 @test_add_reduction_multi_block(
1052891
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
1053892
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -1140,27 +979,6 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
1140979
; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]]
1141980
; APPLE-NEXT: ret i32 [[SUM]]
1142981
;
1143-
; APPLE-A17-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
1144-
; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
1145-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
1146-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
1147-
; APPLE-A17: [[LOOP]]:
1148-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1149-
; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
1150-
; APPLE-A17-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ]
1151-
; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
1152-
; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
1153-
; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
1154-
; APPLE-A17-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]]
1155-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1156-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
1157-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
1158-
; APPLE-A17: [[EXIT]]:
1159-
; APPLE-A17-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
1160-
; APPLE-A17-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ]
1161-
; APPLE-A17-NEXT: [[SUM:%.*]] = add i32 [[RES_1]], [[RES_2]]
1162-
; APPLE-A17-NEXT: ret i32 [[SUM]]
1163-
;
1164982
; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
1165983
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
1166984
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -1281,23 +1099,6 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
12811099
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[BIN_RDX4]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
12821100
; APPLE-NEXT: ret i32 [[RES]]
12831101
;
1284-
; APPLE-A17-LABEL: define i32 @test_add_reduction_runtime(
1285-
; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
1286-
; APPLE-A17-NEXT: [[ENTRY:.*]]:
1287-
; APPLE-A17-NEXT: br label %[[LOOP:.*]]
1288-
; APPLE-A17: [[LOOP]]:
1289-
; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1290-
; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
1291-
; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
1292-
; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
1293-
; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
1294-
; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1295-
; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1296-
; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
1297-
; APPLE-A17: [[EXIT]]:
1298-
; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ]
1299-
; APPLE-A17-NEXT: ret i32 [[RES]]
1300-
;
13011102
; OTHER-LABEL: define i32 @test_add_reduction_runtime(
13021103
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
13031104
; OTHER-NEXT: [[ENTRY:.*]]:

0 commit comments

Comments
 (0)