Skip to content

Commit c0f7d51

Browse files
authored
[VPlan] Simplify ExplicitVectorLength(%AVL) -> %AVL when AVL <= VF (llvm#167647)
[`llvm.experimental.get.vector.length`](https://llvm.org/docs/LangRef.html#id2399) has the property that if the AVL (%cnt) is less than or equal to VF (%max_lanes) then the return value is just AVL. This patch uses SCEV to simplify this in optimizeForVFAndUF, and adds `ExplicitVectorLength` to `VPInstruction::opcodeMayReadOrWriteFromMemory` so it gets removed once dead.
1 parent 78554d9 commit c0f7d51

File tree

4 files changed

+44
-18
lines changed

4 files changed

+44
-18
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,6 +1299,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
12991299
case VPInstruction::ExtractLastLanePerPart:
13001300
case VPInstruction::ExtractPenultimateElement:
13011301
case VPInstruction::ActiveLaneMask:
1302+
case VPInstruction::ExplicitVectorLength:
13021303
case VPInstruction::FirstActiveLane:
13031304
case VPInstruction::LastActiveLane:
13041305
case VPInstruction::FirstOrderRecurrenceSplice:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,6 +1843,35 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
18431843
return true;
18441844
}
18451845

1846+
/// From the definition of llvm.experimental.get.vector.length,
1847+
/// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF.
1848+
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
1849+
PredicatedScalarEvolution &PSE) {
1850+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1851+
vp_depth_first_deep(Plan.getEntry()))) {
1852+
for (VPRecipeBase &R : *VPBB) {
1853+
VPValue *AVL;
1854+
if (!match(&R, m_EVL(m_VPValue(AVL))))
1855+
continue;
1856+
1857+
ScalarEvolution &SE = *PSE.getSE();
1858+
const SCEV *AVLSCEV = vputils::getSCEVExprForVPValue(AVL, SE);
1859+
if (isa<SCEVCouldNotCompute>(AVLSCEV))
1860+
continue;
1861+
const SCEV *VFSCEV = SE.getElementCount(AVLSCEV->getType(), VF);
1862+
if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV))
1863+
continue;
1864+
1865+
VPValue *Trunc = VPBuilder(&R).createScalarZExtOrTrunc(
1866+
AVL, Type::getInt32Ty(Plan.getContext()), AVLSCEV->getType(),
1867+
R.getDebugLoc());
1868+
R.getVPSingleValue()->replaceAllUsesWith(Trunc);
1869+
return true;
1870+
}
1871+
}
1872+
return false;
1873+
}
1874+
18461875
void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
18471876
unsigned BestUF,
18481877
PredicatedScalarEvolution &PSE) {
@@ -1852,6 +1881,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
18521881
bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF);
18531882
MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
18541883
MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
1884+
MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE);
18551885

18561886
if (MadeChange) {
18571887
Plan.setVF(BestVF);

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,11 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
125125
; CHECK: vector.ph:
126126
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
127127
; CHECK: vector.body:
128-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true)
129-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
128+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 4 x i1> splat (i1 true), i32 8)
130129
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i8> [[VP_OP_LOAD]], splat (i8 1)
131-
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
130+
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[DST:%.*]], <vscale x 4 x i1> splat (i1 true), i32 8)
132131
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i8> [[TMP6]], [[VP_OP_LOAD1]]
133-
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
132+
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 4 x i1> splat (i1 true), i32 8)
134133
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
135134
; CHECK: middle.block:
136135
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -164,12 +163,11 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
164163
; CHECK: vector.ph:
165164
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
166165
; CHECK: vector.body:
167-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 16, i32 8, i1 true)
168-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP1:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
166+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 8 x i1> splat (i1 true), i32 16)
169167
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 8 x i8> [[VP_OP_LOAD]], splat (i8 1)
170-
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP4:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
168+
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[DST:%.*]], <vscale x 8 x i1> splat (i1 true), i32 16)
171169
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i8> [[TMP6]], [[VP_OP_LOAD1]]
172-
; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
170+
; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr align 1 [[DST]], <vscale x 8 x i1> splat (i1 true), i32 16)
173171
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
174172
; CHECK: middle.block:
175173
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -204,12 +202,11 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
204202
; CHECK: vector.ph:
205203
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
206204
; CHECK: vector.body:
207-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 32, i32 16, i1 true)
208-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP1:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
205+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 32)
209206
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
210-
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP4:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
207+
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 32)
211208
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 16 x i8> [[TMP6]], [[VP_OP_LOAD1]]
212-
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
209+
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 32)
213210
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
214211
; CHECK: middle.block:
215212
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -243,12 +240,11 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
243240
; CHECK: vector.ph:
244241
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
245242
; CHECK: vector.body:
246-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 24, i32 16, i1 true)
247-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
243+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 24)
248244
; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
249-
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
245+
; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 24)
250246
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 16 x i8> [[TMP6]], [[VP_OP_LOAD1]]
251-
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
247+
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 24)
252248
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
253249
; CHECK: middle.block:
254250
; CHECK-NEXT: br label [[FOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ define void @foo(ptr %arg) #0 {
1010
; CHECK: [[VECTOR_PH]]:
1111
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1212
; CHECK: [[VECTOR_BODY]]:
13-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 3, i32 2, i1 true)
14-
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ARG]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
13+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ARG]], <vscale x 2 x i1> splat (i1 true), i32 3)
1514
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
1615
; CHECK: [[MIDDLE_BLOCK]]:
1716
; CHECK-NEXT: br label %[[EXIT:.*]]

0 commit comments

Comments
 (0)