Skip to content

Commit aecacea

Browse files
committed
[VPlan] Support scalable VFs in narrowInterleaveGroups.
Update narrowInterleaveGroups to support scalable VFs. After the transform, the vector loop will process a single iteration of the original vector loop for fixed-width vectors and vscale iterations for scalable vectors.
1 parent ba5d487 commit aecacea

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3533,12 +3533,12 @@ static bool isAlreadyNarrow(VPValue *VPV) {
35333533
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
35343534
unsigned VectorRegWidth) {
35353535
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
3536-
if (VF.isScalable() || !VectorLoop)
3536+
if (!VectorLoop)
35373537
return;
35383538

35393539
VPTypeAnalysis TypeInfo(Plan);
35403540

3541-
unsigned FixedVF = VF.getFixedValue();
3541+
unsigned VFMinVal = VF.getKnownMinValue();
35423542
SmallVector<VPInterleaveRecipe *> StoreGroups;
35433543
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
35443544
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
@@ -3574,7 +3574,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
35743574
continue;
35753575

35763576
// Bail out on non-consecutive interleave groups.
3577-
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
3577+
if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
35783578
VectorRegWidth))
35793579
return;
35803580

@@ -3693,10 +3693,21 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
36933693
// original iteration.
36943694
auto *CanIV = Plan.getCanonicalIV();
36953695
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
3696-
Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
3697-
CanIV->getScalarType(), 1 * Plan.getUF())));
3698-
Plan.getVF().replaceAllUsesWith(
3699-
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
3696+
VPBuilder PHBuilder(Plan.getVectorPreheader());
3697+
3698+
VPValue *UF = Plan.getOrAddLiveIn(
3699+
ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
3700+
if (VF.isScalable()) {
3701+
VPValue *VScale = PHBuilder.createElementCount(
3702+
CanIV->getScalarType(), ElementCount::getScalable(1));
3703+
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
3704+
Inc->setOperand(1, VScaleUF);
3705+
Plan.getVF().replaceAllUsesWith(VScale);
3706+
} else {
3707+
Inc->setOperand(1, UF);
3708+
Plan.getVF().replaceAllUsesWith(
3709+
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
3710+
}
37003711
removeDeadRecipes(Plan);
37013712
}
37023713

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,15 @@ define void @load_store_interleave_group(ptr noalias %data) {
1616
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
19+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1920
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2021
; CHECK: [[VECTOR_BODY]]:
2122
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2223
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
2324
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
24-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP1]], align 8
25-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
26-
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
27-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
28-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]])
29-
; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
30-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
25+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP1]], align 8
26+
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
27+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
3128
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3229
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3330
; CHECK: [[MIDDLE_BLOCK]]:
@@ -69,20 +66,16 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
6966
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
7067
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]]
7168
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]]
69+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
7270
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7371
; CHECK: [[VECTOR_BODY]]:
7472
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
7573
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
7674
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP0]]
77-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x double>, ptr [[TMP1]], align 8
78-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[WIDE_VEC]])
79-
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 0
80-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 1
81-
; CHECK-NEXT: [[TMP8:%.*]] = fneg <vscale x 2 x double> [[TMP6]]
75+
; CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 2 x double>, ptr [[TMP1]], align 8
8276
; CHECK-NEXT: [[TMP9:%.*]] = fneg <vscale x 2 x double> [[TMP7]]
83-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]])
84-
; CHECK-NEXT: store <vscale x 4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
85-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
77+
; CHECK-NEXT: store <vscale x 2 x double> [[TMP9]], ptr [[TMP1]], align 8
78+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
8679
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
8780
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
8881
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)