Skip to content

Commit 7b78c86

Browse files
committed
[VPlan] Plumb scalable register size through narrowInterleaveGroups
1 parent 5b57455 commit 7b78c86

File tree

4 files changed

+20
-26
lines changed

4 files changed

+20
-26
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7253,7 +7253,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72537253

72547254
VPlanTransforms::narrowInterleaveGroups(
72557255
BestVPlan, BestVF,
7256-
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
7256+
TTI.getRegisterBitWidth(BestVF.isScalable()
7257+
? TargetTransformInfo::RGK_ScalableVector
7258+
: TargetTransformInfo::RGK_FixedWidthVector));
72577259
VPlanTransforms::removeDeadRecipes(BestVPlan);
72587260

72597261
VPlanTransforms::convertToConcreteRecipes(BestVPlan);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4145,8 +4145,9 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
41454145
/// members both equal to \p VF. The interleave group must also access the full
41464146
/// vector width \p VectorRegWidth.
41474147
static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
4148-
unsigned VF, VPTypeAnalysis &TypeInfo,
4149-
unsigned VectorRegWidth) {
4148+
ElementCount VF,
4149+
VPTypeAnalysis &TypeInfo,
4150+
TypeSize VectorRegWidth) {
41504151
if (!InterleaveR || InterleaveR->getMask())
41514152
return false;
41524153

@@ -4168,9 +4169,11 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
41684169
return false;
41694170
}
41704171

4171-
unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF;
4172-
auto IG = InterleaveR->getInterleaveGroup();
4173-
return IG->getFactor() == VF && IG->getNumMembers() == VF &&
4172+
unsigned VFMin = VF.getKnownMinValue();
4173+
TypeSize GroupSize = TypeSize::get(
4174+
GroupElementTy->getScalarSizeInBits() * VFMin, VF.isScalable());
4175+
const auto *IG = InterleaveR->getInterleaveGroup();
4176+
return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
41744177
GroupSize == VectorRegWidth;
41754178
}
41764179

@@ -4236,14 +4239,13 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
42364239
}
42374240

42384241
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
4239-
unsigned VectorRegWidth) {
4242+
TypeSize VectorRegWidth) {
42404243
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
42414244
if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0)
42424245
return;
42434246

42444247
VPTypeAnalysis TypeInfo(Plan);
42454248

4246-
unsigned VFMinVal = VF.getKnownMinValue();
42474249
SmallVector<VPInterleaveRecipe *> StoreGroups;
42484250
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
42494251
if (isa<VPCanonicalIVPHIRecipe>(&R))
@@ -4278,7 +4280,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
42784280
continue;
42794281

42804282
// Bail out on non-consecutive interleave groups.
4281-
if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
4283+
if (!isConsecutiveInterleaveGroup(InterleaveR, VF, TypeInfo,
42824284
VectorRegWidth))
42834285
return;
42844286

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ struct VPlanTransforms {
348348
/// form of loop-aware SLP, where we use interleave groups to identify
349349
/// candidates.
350350
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
351-
unsigned VectorRegWidth);
351+
TypeSize VectorRegWidth);
352352

353353
/// Predicate and linearize the control-flow in the only loop region of
354354
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop

llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,9 @@ define void @load_store_interleave_group(ptr noalias %data) {
5050
; EPILOGUE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
5151
; EPILOGUE-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 1
5252
; EPILOGUE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP4]]
53-
; EPILOGUE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 4 x i64>, ptr [[TMP5]], align 8
54-
; EPILOGUE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]])
55-
; EPILOGUE-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0
56-
; EPILOGUE-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1
57-
; EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]])
58-
; EPILOGUE-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
59-
; EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
53+
; EPILOGUE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP5]], align 8
54+
; EPILOGUE-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 8
55+
; EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
6056
; EPILOGUE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6157
; EPILOGUE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6258
; EPILOGUE: [[MIDDLE_BLOCK]]:
@@ -153,15 +149,9 @@ define void @load_store_interleave_group_i32(ptr noalias %data) {
153149
; EPILOGUE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
154150
; EPILOGUE-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 2
155151
; EPILOGUE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP4]]
156-
; EPILOGUE-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 16 x i32>, ptr [[TMP5]], align 8
157-
; EPILOGUE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> [[WIDE_VEC]])
158-
; EPILOGUE-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
159-
; EPILOGUE-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
160-
; EPILOGUE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 2
161-
; EPILOGUE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 3
162-
; EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv16i32(<vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP9]])
163-
; EPILOGUE-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
164-
; EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
152+
; EPILOGUE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 8
153+
; EPILOGUE-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP5]], align 8
154+
; EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
165155
; EPILOGUE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
166156
; EPILOGUE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
167157
; EPILOGUE: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)