-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Consolidate logic for narrow to single scalars #151506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesThe logic for narrowing to single scalar recipes is in two different places: narrowToSingleScalarRecipes and legalizeAndOptimizeInductions. Consolidate them, with minor test changes. -- 8< -- Full diff: https://github.com/llvm/llvm-project/pull/151506.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..3e7061584f2eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -631,30 +631,6 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
if (!PhiR)
continue;
- // Try to narrow wide and replicating recipes to uniform recipes, based on
- // VPlan analysis.
- // TODO: Apply to all recipes in the future, to replace legacy uniformity
- // analysis.
- auto Users = collectUsersRecursively(PhiR);
- for (VPUser *U : reverse(Users)) {
- auto *Def = dyn_cast<VPSingleDefRecipe>(U);
- auto *RepR = dyn_cast<VPReplicateRecipe>(U);
- // Skip recipes that shouldn't be narrowed.
- if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
- Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
- (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
- continue;
-
- // Skip recipes that may have other lanes than their first used.
- if (!vputils::isSingleScalar(Def) && !vputils::onlyFirstLaneUsed(Def))
- continue;
-
- auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
- Def->operands(), /*IsUniform*/ true);
- Clone->insertAfter(Def);
- Def->replaceAllUsesWith(Clone);
- }
-
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1238,21 +1214,19 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
continue;
- auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
- // Skip recipes that aren't single scalars or don't have only their
- // scalar results used. In the latter case, we would introduce extra
- // broadcasts.
- if (!vputils::isSingleScalar(RepOrWidenR) ||
- any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
- return !U->usesScalars(RepOrWidenR);
- }))
+ auto *Def = cast<VPSingleDefRecipe>(&R);
+ // Skip recipes that aren't single scalars or that don't have users or
+ // that don't have only their scalar results used (would introduce extra
+ // broadcasts in this case).
+ if (!vputils::isSingleScalar(Def) || Def->getNumUsers() == 0 ||
+ any_of(Def->users(),
+ [Def](VPUser *U) { return !U->usesScalars(Def); }))
continue;
- auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
- RepOrWidenR->operands(),
- true /*IsSingleScalar*/);
- Clone->insertBefore(RepOrWidenR);
- RepOrWidenR->replaceAllUsesWith(Clone);
+ auto *Clone = new VPReplicateRecipe(
+ Def->getUnderlyingInstr(), Def->operands(), true /*IsSingleScalar*/);
+ Clone->insertBefore(Def);
+ Def->replaceAllUsesWith(Clone);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..5f80214973158 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -40,6 +40,9 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
inline bool isSingleScalar(const VPValue *VPV) {
+ if (onlyFirstLaneUsed(VPV))
+ return true;
+
auto PreservesUniformity = [](unsigned Opcode) -> bool {
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
index 82f2fdd431238..cc6d70331e69d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
@@ -57,11 +57,11 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
index af2b238105925..0775ef9b8de75 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
@@ -15,6 +15,13 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -50,10 +57,14 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -97,6 +108,11 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -351,6 +367,11 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
index 61f511c16e88b..ab31abd970814 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
@@ -240,11 +240,11 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
+; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
index e412d130e115f..93e7581a8ba3d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
@@ -79,11 +79,11 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -159,11 +159,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -182,11 +182,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF4-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF4-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
+; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF4-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
index ef6ce08da5230..1d140ffc14d14 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
@@ -115,11 +115,11 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
-; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP6]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
61ec5b3 to
b6a897a
Compare
|
Gentle ping. |
1 similar comment
|
Gentle ping. |
llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
Outdated
Show resolved
Hide resolved
d23bd61 to
00640e3
Compare
|
Gentle ping. |
00640e3 to
0573a53
Compare
0573a53 to
7ee8546
Compare
Follow up on 132bacd ([VPlan] Also allow extracts as users when converting to single scalars.) to also allow WidenStore recipes as users when converting to single scalars. While at it, refine the vputils::isSingleScalar check to also check vputils::onlyFirstLaneUsed, and forbid zero users. The motivation for this patch is to ease consolidation of the logic to narrow to single scalar, and the consolidation will be tackled in a follow-up.
The logic for narrowing to single scalar recipes is in two different places: narrowToSingleScalarRecipes and legalizeAndOptimizeInductions. Consolidate them, with minor test changes.
7ee8546 to
59bce4a
Compare
|
Will open a fresh patch as soon as #166559 lands. |
Consolidate the logic for narrowing to single scalar recipes is in two different places: narrowToSingleScalarRecipes and legalizeAndOptimizeInductions.