Skip to content

Commit 6bc97e3

Browse files
committed
Reland [VPlan] Handle WidenGEP in narrowToSingleScalars
Changes: Fix a missed update to WidenGEP::onlyFirstLaneUsed, and include reduced-case test that was previously crashing. This allows us to strip a special case in VPWidenGEP::execute.
1 parent cba7c30 commit 6bc97e3

File tree

3 files changed

+48
-10
lines changed

3 files changed

+48
-10
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,14 +1845,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
18451845
}
18461846

18471847
/// Returns true if the recipe only uses the first lane of operand \p Op.
1848-
bool usesFirstLaneOnly(const VPValue *Op) const override {
1849-
assert(is_contained(operands(), Op) &&
1850-
"Op must be an operand of the recipe");
1851-
if (Op == getOperand(0))
1852-
return isPointerLoopInvariant();
1853-
else
1854-
return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1855-
}
1848+
bool usesFirstLaneOnly(const VPValue *Op) const override;
18561849

18571850
protected:
18581851
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,6 +2527,11 @@ void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
25272527
}
25282528
#endif
25292529

2530+
bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const {
2531+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
2532+
return vputils::isSingleScalar(Op);
2533+
}
2534+
25302535
void VPWidenGEPRecipe::execute(VPTransformState &State) {
25312536
assert(State.VF.isVector() && "not widening");
25322537
// Construct a vector GEP by widening the operands of the scalar GEP as

llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,48 @@
1-
; RUN: not --crash opt -p loop-vectorize -force-vector-width=2 \
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6
2+
; RUN: opt -p loop-vectorize -force-vector-width=2 \
23
; RUN: -force-target-supports-scalable-vectors=true \
3-
; RUN: -scalable-vectorization=preferred -S %s
4+
; RUN: -scalable-vectorization=preferred -S %s | FileCheck %s
45

56
define void @widengep_narrow(ptr %in, ptr noalias %p) {
7+
; CHECK-LABEL: define void @widengep_narrow(
8+
; CHECK-SAME: ptr [[IN:%.*]], ptr noalias [[P:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
12+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
13+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14+
; CHECK: [[VECTOR_PH]]:
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
16+
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
17+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
18+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
19+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP4]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
23+
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
24+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]]
25+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
26+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
27+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
28+
; CHECK: [[VECTOR_BODY]]:
29+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
30+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
31+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> [[VEC_IND]]
32+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
33+
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2
34+
; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
35+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 [[TMP10]]
36+
; CHECK-NEXT: store ptr [[TMP11]], ptr [[P]], align 8
37+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
38+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
39+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
40+
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK: [[MIDDLE_BLOCK]]:
42+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
43+
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
44+
; CHECK: [[SCALAR_PH]]:
45+
;
646
entry:
747
br label %loop
848

0 commit comments

Comments
 (0)