Skip to content

Commit c93df39

Browse files
committed
[VPlan] Extend licm with speculative-exec-check
Technically speaking, LICM is only unsafe if the recipe reads or writes memory in the absence of additional information. It is moreover safe to hoist if the recipe is guaranteed to execute, or if it is safe to speculatively execute it. This patch aligns the VPlan-licm more closely with the IR-LICM.
1 parent 0e6557d commit c93df39

File tree

4 files changed

+60
-13
lines changed

4 files changed

+60
-13
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15461546
/// True if the intrinsic may have side-effects.
15471547
bool MayHaveSideEffects;
15481548

1549+
/// True if the intrinsic is safe to speculatively execute.
1550+
bool IsSafeToSpeculativelyExecute;
1551+
15491552
public:
15501553
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
15511554
ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1569,6 +1572,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15691572
MayHaveSideEffects = MayWriteToMemory ||
15701573
!Attrs.hasAttribute(Attribute::NoUnwind) ||
15711574
!Attrs.hasAttribute(Attribute::WillReturn);
1575+
IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
15721576
}
15731577

15741578
~VPWidenIntrinsicRecipe() override = default;
@@ -1608,6 +1612,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
16081612
/// Returns true if the intrinsic may have side-effects.
16091613
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
16101614

1615+
/// Returns true if the intrinsic is safe to speculatively execute.
1616+
bool isSafeToSpeculativelyExecute() const {
1617+
return IsSafeToSpeculativelyExecute;
1618+
}
1619+
16111620
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16121621
/// Print the recipe.
16131622
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Analysis/InstSimplifyFolder.h"
3131
#include "llvm/Analysis/LoopInfo.h"
3232
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
33+
#include "llvm/Analysis/ValueTracking.h"
3334
#include "llvm/Analysis/VectorUtils.h"
3435
#include "llvm/IR/Intrinsics.h"
3536
#include "llvm/IR/MDBuilder.h"
@@ -2102,6 +2103,16 @@ void VPlanTransforms::cse(VPlan &Plan) {
21022103
}
21032104
}
21042105

2106+
static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
2107+
if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
2108+
return WC->getCalledScalarFunction()->isSpeculatable();
2109+
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
2110+
return WI->isSafeToSpeculativelyExecute();
2111+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
2112+
return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
2113+
return !R->mayHaveSideEffects();
2114+
}
2115+
21052116
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
21062117
static void licm(VPlan &Plan) {
21072118
VPBasicBlock *Preheader = Plan.getVectorPreheader();
@@ -2110,6 +2121,11 @@ static void licm(VPlan &Plan) {
21102121
// out of a loop region. Does not address legality concerns such as aliasing
21112122
// or speculation safety.
21122123
auto CannotHoistRecipe = [](VPRecipeBase &R) {
2124+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2125+
// their memory location is not modified in the vector loop.
2126+
if (R.mayReadOrWriteMemory() || R.isPhi())
2127+
return true;
2128+
21132129
// Allocas cannot be hoisted.
21142130
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
21152131
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2119,14 +2135,13 @@ static void licm(VPlan &Plan) {
21192135
// preheader. Preform a shallow traversal of the vector loop region, to
21202136
// exclude recipes in replicate regions.
21212137
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2138+
bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
21222139
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
21232140
vp_depth_first_shallow(LoopRegion->getEntry()))) {
21242141
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
21252142
if (CannotHoistRecipe(R))
21262143
continue;
2127-
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2128-
// their memory location is not modified in the vector loop.
2129-
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
2144+
if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
21302145
any_of(R.operands(), [](VPValue *Op) {
21312146
return !Op->isDefinedOutsideLoopRegions();
21322147
}))

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
4545
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
4646
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4747
; FORCED: [[MIDDLE_BLOCK]]:
48-
; FORCED-NEXT: br [[EXIT:label %.*]]
49-
; FORCED: [[SCALAR_PH:.*:]]
48+
; FORCED-NEXT: br label %[[EXIT:.*]]
49+
; FORCED: [[EXIT]]:
50+
; FORCED-NEXT: ret void
5051
;
5152
entry:
5253
br label %loop.body
@@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
9192
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
9293
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
9394
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
95+
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
9496
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
9597
; FORCED: [[VECTOR_BODY]]:
9698
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
9799
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
98-
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
99100
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
100101
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
101102
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
102-
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
103+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
103104
; FORCED: [[MIDDLE_BLOCK]]:
104-
; FORCED-NEXT: br [[EXIT:label %.*]]
105-
; FORCED: [[SCALAR_PH:.*:]]
105+
; FORCED-NEXT: br label %[[EXIT:.*]]
106+
; FORCED: [[EXIT]]:
107+
; FORCED-NEXT: ret void
106108
;
107109
entry:
108110
br label %loop.body

llvm/test/Transforms/LoopVectorize/X86/funclet.ll

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
12
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
23
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
34
target triple = "i686-pc-windows-msvc18.0.0"
45

56
define void @test1() #0 personality ptr @__CxxFrameHandler3 {
7+
; CHECK-LABEL: define void @test1(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null)
11+
; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
12+
; CHECK: [[CATCH_DISPATCH]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
14+
; CHECK: [[CATCH:.*]]:
15+
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
16+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
17+
; CHECK: [[FOR_COND_CLEANUP:.*]]:
18+
; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
19+
; CHECK: [[FOR_BODY]]:
20+
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
21+
; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
22+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
23+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
24+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
25+
; CHECK: [[TRY_CONT]]:
26+
; CHECK-NEXT: ret void
27+
; CHECK: [[UNREACHABLE]]:
28+
; CHECK-NEXT: unreachable
29+
;
630
entry:
731
invoke void @_CxxThrowException(ptr null, ptr null)
8-
to label %unreachable unwind label %catch.dispatch
32+
to label %unreachable unwind label %catch.dispatch
933

1034
catch.dispatch: ; preds = %entry
1135
%0 = catchswitch within none [label %catch] unwind to caller
@@ -31,9 +55,6 @@ unreachable: ; preds = %entry
3155
unreachable
3256
}
3357

34-
; CHECK-LABEL: define void @test1(
35-
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
36-
; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
3758

3859
declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
3960

0 commit comments

Comments
 (0)