Skip to content

Commit 93ce4ff

Browse files
committed
[VPlan] Extend CSE to eliminate GEPs
The motivation for this patch is to close the gap between the VPlan-based CSE and the legacy CSE, to make it easier to remove the legacy CSE. Before this patch, stubbing out the legacy CSE leads to 22 test failures, and after this patch, there are only 12 failures, and all of them seem to have a single root cause: VPInterleaveGroup::execute() needs to be broken up. The small improvements from this patch are of course welcome, but come at the cost of dropping some GEP flags.
1 parent dd29fbd commit 93ce4ff

File tree

10 files changed

+24
-63
lines changed

10 files changed

+24
-63
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,6 +1900,9 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
19001900

19011901
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
19021902

1903+
/// This recipe primarily generates a GEP instruction.
1904+
unsigned getOpcode() const { return Instruction::GetElementPtr; }
1905+
19031906
void execute(VPTransformState &State) override;
19041907

19051908
bool onlyFirstLaneUsed(const VPValue *Op) const override {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1978,7 +1978,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
19781978
return TypeSwitch<const VPSingleDefRecipe *,
19791979
std::optional<std::pair<bool, unsigned>>>(R)
19801980
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1981-
VPWidenSelectRecipe, VPReplicateRecipe>(
1981+
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe,
1982+
VPVectorPointerRecipe>(
19821983
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
19831984
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
19841985
return std::make_pair(true, I->getVectorIntrinsicID());

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
518518
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
519519
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
520520
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
521-
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
522-
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
523-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
524521
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
525-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
522+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
526523
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
527524
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
528525
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -584,11 +581,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
584581
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
585582
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
586583
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
587-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
588-
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
589-
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
590584
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
591-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
585+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
592586
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
593587
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
594588
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -656,11 +650,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
656650
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
657651
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
658652
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
659-
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
660-
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
661-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
662653
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
663-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
654+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
664655
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
665656
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
666657
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -719,11 +710,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
719710
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
720711
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
721712
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
722-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
723-
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
724-
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
725713
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
726-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
714+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
727715
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
728716
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
729717
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
7676
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
7777
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
7878
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
79-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
80-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
81-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
8279
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
83-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP22]], align 4
80+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
8481
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
8582
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
8683
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -216,11 +213,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
216213
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
217214
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
218215
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
219-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
220-
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
221-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
222216
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
223-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
217+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
224218
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
225219
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
226220
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -296,11 +290,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
296290
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
297291
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
298292
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
299-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
300-
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
301-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
302293
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
303-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
294+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
304295
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
305296
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
306297
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -378,11 +369,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
378369
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
379370
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
380371
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
381-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
382-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
383-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
384372
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
385-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
373+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
386374
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
387375
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
388376
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -455,11 +443,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
455443
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
456444
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
457445
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
458-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
459-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
460-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
461446
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
462-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
447+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
463448
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
464449
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
465450
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -430,11 +430,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
430430
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
431431
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
432432
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
433-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
434-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
435-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
436433
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
437-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
434+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
438435
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
439436
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
440437
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -557,11 +554,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
557554
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
558555
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
559556
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
560-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
561-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
562-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
563557
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
564-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
558+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
565559
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
566560
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
567561
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -1060,11 +1054,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
10601054
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
10611055
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
10621056
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
1063-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
1064-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
1065-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
10661057
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
1067-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
1058+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
10681059
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
10691060
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
10701061
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]

llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
3636
; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]]
3737
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]]
3838
; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
39-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
40-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
41-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
4239
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
43-
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4
44-
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4
45-
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4
40+
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
41+
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
42+
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP6]], align 4
4643
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
4744
; CHECK: [[MIDDLE_BLOCK]]:
4845
;

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,8 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) {
6060
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
6161
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
6262
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
63-
; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 8
6463
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[A]], align 1
65-
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1
64+
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP2]], align 1
6665
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
6766
; VF8UF2: [[MIDDLE_BLOCK]]:
6867
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
6666
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
6767
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
6868
; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
69-
; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8
7069
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[A]], align 1
71-
; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1
70+
; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP3]], align 1
7271
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
7372
; VF8UF2: [[MIDDLE_BLOCK]]:
7473
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
8282
; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]>
8383
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
8484
; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10>
85-
; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer ir<%A>, ir<1>
8685
; CHECK-NEXT: WIDEN store ir<%A>, ir<%add>
87-
; CHECK-NEXT: WIDEN store vp<[[VPTR4]]>, ir<%add>.1
86+
; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>.1
8887
; CHECK-NEXT: Successor(s): middle.block
8988
; CHECK-EMPTY:
9089
; CHECK-NEXT: middle.block:

llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
168168
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]]
169169
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]]
170170
; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]]
171-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
172171
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
173-
; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]
172+
; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]]
174173
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
175174
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
176175
; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

0 commit comments

Comments
 (0)