Skip to content

Commit 46fcece

Browse files
artagnonfhahnlukel97
authored
[VPlan] Extend CSE to eliminate GEPs (#156699)
The motivation for this patch is to close the gap between the VPlan-based CSE and the legacy CSE, to make it easier to remove the legacy CSE. Before this patch, stubbing out the legacy CSE leads to 22 test failures, and after this patch, there are only 12 failures, and all of them seem to have a single root cause: VPlanTransforms::createInterleaveGroups() and VPInterleaveGroup::execute(). The improvements from this patch are of course welcome. While developing the patch, a miscompile was found when GEP source-element-types differ, and this has been fixed. Co-authored-by: Florian Hahn <[email protected]> Co-authored-by: Luke Lau <[email protected]>
1 parent 6ae9fcd commit 46fcece

11 files changed

+161
-67
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,6 +1902,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
19021902

19031903
void execute(VPTransformState &State) override;
19041904

1905+
Type *getSourceElementType() const { return IndexedTy; }
1906+
19051907
bool onlyFirstLaneUsed(const VPValue *Op) const override {
19061908
assert(is_contained(operands(), Op) &&
19071909
"Op must be an operand of the recipe");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,20 +1978,39 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
19781978
return TypeSwitch<const VPSingleDefRecipe *,
19791979
std::optional<std::pair<bool, unsigned>>>(R)
19801980
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1981-
VPWidenSelectRecipe, VPReplicateRecipe>(
1981+
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
19821982
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
19831983
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
19841984
return std::make_pair(true, I->getVectorIntrinsicID());
19851985
})
19861986
.Default([](auto *) { return std::nullopt; });
19871987
}
19881988

1989+
/// If recipe \p R will lower to a GEP with a non-i8 source element type,
1990+
/// return that source element type.
1991+
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
1992+
// All VPInstructions that lower to GEPs must have the i8 source element
1993+
// type (as they are PtrAdds), so we omit it.
1994+
return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
1995+
.Case<VPReplicateRecipe, VPWidenGEPRecipe>([](auto *I) -> Type * {
1996+
if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
1997+
return GEP->getSourceElementType();
1998+
return nullptr;
1999+
})
2000+
.Case<VPVectorPointerRecipe>(
2001+
[](auto *I) { return I->getSourceElementType(); })
2002+
.Default([](auto *) { return nullptr; });
2003+
}
2004+
19892005
/// Returns true if recipe \p Def can be safely handed for CSE.
19902006
static bool canHandle(const VPSingleDefRecipe *Def) {
19912007
// We can extend the list of handled recipes in the future,
19922008
// provided we account for the data embedded in them while checking for
1993-
// equality or hashing.
1994-
auto C = getOpcodeOrIntrinsicID(Def);
2009+
// equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode,
2010+
// as it is essentially a GEP with different semantics.
2011+
auto C = isa<VPVectorPointerRecipe>(Def)
2012+
? std::make_pair(false, Instruction::GetElementPtr)
2013+
: getOpcodeOrIntrinsicID(Def);
19952014

19962015
// The issue with (Insert|Extract)Value is that the index of the
19972016
// insert/extract is not a proper operand in LLVM IR, and hence also not in
@@ -2012,8 +2031,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
20122031
VPTypeAnalysis TypeInfo(*Plan);
20132032
hash_code Result = hash_combine(
20142033
Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
2015-
TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def),
2016-
hash_combine_range(Def->operands()));
2034+
getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2035+
vputils::isSingleScalar(Def), hash_combine_range(Def->operands()));
20172036
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(Def))
20182037
if (RFlags->hasPredicate())
20192038
return hash_combine(Result, RFlags->getPredicate());
@@ -2026,6 +2045,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
20262045
return L == R;
20272046
if (L->getVPDefID() != R->getVPDefID() ||
20282047
getOpcodeOrIntrinsicID(L) != getOpcodeOrIntrinsicID(R) ||
2048+
getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
20292049
vputils::isSingleScalar(L) != vputils::isSingleScalar(R) ||
20302050
!equal(L->operands(), R->operands()))
20312051
return false;

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
518518
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
519519
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
520520
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
521-
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
522-
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
523-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
524521
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
525-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
522+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
526523
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
527524
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
528525
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -584,11 +581,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
584581
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
585582
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
586583
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
587-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
588-
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
589-
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
590584
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
591-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
585+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
592586
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
593587
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
594588
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -656,11 +650,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
656650
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
657651
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
658652
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
659-
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
660-
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
661-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
662653
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
663-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
654+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
664655
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
665656
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
666657
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -719,11 +710,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
719710
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
720711
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
721712
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
722-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
723-
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
724-
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
725713
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
726-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
714+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
727715
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
728716
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
729717
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
7676
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
7777
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
7878
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
79-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
80-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
81-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
8279
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
83-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP22]], align 4
80+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
8481
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
8582
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
8683
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -216,11 +213,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
216213
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
217214
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
218215
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
219-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
220-
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
221-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
222216
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
223-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
217+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
224218
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
225219
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
226220
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -296,11 +290,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
296290
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
297291
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
298292
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
299-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
300-
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
301-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
302293
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
303-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
294+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
304295
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
305296
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
306297
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -378,11 +369,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
378369
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
379370
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
380371
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
381-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
382-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
383-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
384372
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
385-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
373+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
386374
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
387375
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
388376
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -455,11 +443,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
455443
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
456444
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
457445
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
458-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
459-
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
460-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
461446
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
462-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
447+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
463448
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
464449
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
465450
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -430,11 +430,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
430430
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
431431
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
432432
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
433-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
434-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
435-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
436433
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
437-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
434+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
438435
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
439436
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
440437
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -557,11 +554,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
557554
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
558555
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
559556
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
560-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
561-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
562-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
563557
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
564-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
558+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
565559
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
566560
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
567561
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -1060,11 +1054,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
10601054
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
10611055
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
10621056
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
1063-
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
1064-
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
1065-
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
10661057
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
1067-
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
1058+
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
10681059
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
10691060
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
10701061
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]

llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
3636
; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]]
3737
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]]
3838
; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
39-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
40-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
41-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
4239
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
43-
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4
44-
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4
45-
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4
40+
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
41+
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
42+
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP6]], align 4
4643
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
4744
; CHECK: [[MIDDLE_BLOCK]]:
4845
;

0 commit comments

Comments
 (0)