Skip to content

Commit 465b17c

Browse files
authored
[VPlan] Support scalable VFs in narrowInterleaveGroups. (#154842)
Update narrowInterleaveGroups to support scalable VFs. After the transform, the vector loop will process a single iteration of the original vector loop for fixed-width vectors and vscale iterations for scalable vectors.
1 parent 6332e2b commit 465b17c

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3615,12 +3615,12 @@ static bool isAlreadyNarrow(VPValue *VPV) {
36153615
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
36163616
unsigned VectorRegWidth) {
36173617
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
3618-
if (VF.isScalable() || !VectorLoop)
3618+
if (!VectorLoop)
36193619
return;
36203620

36213621
VPTypeAnalysis TypeInfo(Plan);
36223622

3623-
unsigned FixedVF = VF.getFixedValue();
3623+
unsigned VFMinVal = VF.getKnownMinValue();
36243624
SmallVector<VPInterleaveRecipe *> StoreGroups;
36253625
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
36263626
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
@@ -3656,7 +3656,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
36563656
continue;
36573657

36583658
// Bail out on non-consecutive interleave groups.
3659-
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
3659+
if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
36603660
VectorRegWidth))
36613661
return;
36623662

@@ -3775,10 +3775,21 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
37753775
// original iteration.
37763776
auto *CanIV = Plan.getCanonicalIV();
37773777
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
3778-
Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
3779-
CanIV->getScalarType(), 1 * Plan.getUF())));
3780-
Plan.getVF().replaceAllUsesWith(
3781-
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
3778+
VPBuilder PHBuilder(Plan.getVectorPreheader());
3779+
3780+
VPValue *UF = Plan.getOrAddLiveIn(
3781+
ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
3782+
if (VF.isScalable()) {
3783+
VPValue *VScale = PHBuilder.createElementCount(
3784+
CanIV->getScalarType(), ElementCount::getScalable(1));
3785+
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
3786+
Inc->setOperand(1, VScaleUF);
3787+
Plan.getVF().replaceAllUsesWith(VScale);
3788+
} else {
3789+
Inc->setOperand(1, UF);
3790+
Plan.getVF().replaceAllUsesWith(
3791+
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
3792+
}
37823793
removeDeadRecipes(Plan);
37833794
}
37843795

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,15 @@ define void @load_store_interleave_group(ptr noalias %data) {
1616
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
19+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1920
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2021
; CHECK: [[VECTOR_BODY]]:
2122
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2223
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
2324
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
24-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP1]], align 8
25-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
26-
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
27-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
28-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]])
29-
; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
30-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
25+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP1]], align 8
26+
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8
27+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
3128
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3229
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3330
; CHECK: [[MIDDLE_BLOCK]]:
@@ -69,20 +66,16 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
6966
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
7067
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]]
7168
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]]
69+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
7270
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7371
; CHECK: [[VECTOR_BODY]]:
7472
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
7573
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
7674
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP0]]
77-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x double>, ptr [[TMP1]], align 8
78-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> [[WIDE_VEC]])
79-
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 0
80-
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[STRIDED_VEC]], 1
81-
; CHECK-NEXT: [[TMP8:%.*]] = fneg <vscale x 2 x double> [[TMP6]]
75+
; CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 2 x double>, ptr [[TMP1]], align 8
8276
; CHECK-NEXT: [[TMP9:%.*]] = fneg <vscale x 2 x double> [[TMP7]]
83-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]])
84-
; CHECK-NEXT: store <vscale x 4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
85-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
77+
; CHECK-NEXT: store <vscale x 2 x double> [[TMP9]], ptr [[TMP1]], align 8
78+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
8679
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
8780
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
8881
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)