Skip to content

Commit 132bacd

Browse files
committed
[VPlan] Also allow extracts as users when converting to single scalars.
Extracts technically do not use scalars, but vectors, but if the operand is a single scalar we do not need a vector and they should not block forming single scalars.
1 parent a205695 commit 132bacd

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1311,7 +1311,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
13111311
// scalar results used. In the latter case, we would introduce extra
13121312
// broadcasts.
13131313
if (!vputils::isSingleScalar(RepOrWidenR) ||
1314-
!vputils::onlyScalarValuesUsed(RepOrWidenR))
1314+
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
1315+
return U->usesScalars(RepOrWidenR) ||
1316+
match(cast<VPRecipeBase>(U),
1317+
m_ExtractLastElement(m_VPValue()));
1318+
}))
13151319
continue;
13161320

13171321
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),

llvm/test/Transforms/LoopVectorize/pr66616.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,11 @@ define void @pr66616(ptr %ptr) {
1313
; CHECK: vector.body:
1414
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1515
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
16-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
17-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
18-
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
16+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 1
1917
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2018
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2119
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2220
; CHECK: middle.block:
23-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
2421
; CHECK-NEXT: br label [[PREHEADER:%.*]]
2522
; CHECK: scalar.ph:
2623
; CHECK-NEXT: br label [[LOOP_1:%.*]]

0 commit comments

Comments
 (0)