Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
/// True if the intrinsic may have side-effects.
bool MayHaveSideEffects;

/// True if the intrinsic is safe to speculatively execute.
bool IsSafeToSpeculativelyExecute;

public:
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
Expand All @@ -1577,6 +1580,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
MayHaveSideEffects = MayWriteToMemory ||
!Attrs.hasAttribute(Attribute::NoUnwind) ||
!Attrs.hasAttribute(Attribute::WillReturn);
IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
}

~VPWidenIntrinsicRecipe() override = default;
Expand Down Expand Up @@ -1616,6 +1620,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
/// Returns true if the intrinsic may have side-effects.
bool mayHaveSideEffects() const { return MayHaveSideEffects; }

/// Returns true if the intrinsic is safe to speculatively execute.
bool isSafeToSpeculativelyExecute() const {
return IsSafeToSpeculativelyExecute;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
25 changes: 22 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
Expand Down Expand Up @@ -2087,6 +2088,16 @@ void VPlanTransforms::cse(VPlan &Plan) {
}
}

static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
return WC->getCalledScalarFunction()->isSpeculatable();
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
return WI->isSafeToSpeculativelyExecute();
if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
return !R->mayHaveSideEffects();
}

/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
Expand All @@ -2095,6 +2106,15 @@ static void licm(VPlan &Plan) {
// out of a loop region. Does not address legality concerns such as aliasing
// or speculation safety.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
// Assumes don't alias anything or throw.
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
return false;

// TODO: Relax checks in the future, e.g. we could also hoist reads, if
// their memory location is not modified in the vector loop.
if (R.mayReadOrWriteMemory() || R.isPhi())
return true;

// Allocas cannot be hoisted.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
return RepR && RepR->getOpcode() == Instruction::Alloca;
Expand All @@ -2104,14 +2124,13 @@ static void licm(VPlan &Plan) {
// preheader. Preform a shallow traversal of the vector loop region, to
// exclude recipes in replicate regions.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (CannotHoistRecipe(R))
continue;
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
// their memory location is not modified in the vector loop.
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: br [[EXIT:label %.*]]
; FORCED: [[SCALAR_PH:.*:]]
; FORCED-NEXT: br label %[[EXIT:.*]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.body
Expand Down Expand Up @@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: br [[EXIT:label %.*]]
; FORCED: [[SCALAR_PH:.*:]]
; FORCED-NEXT: br label %[[EXIT:.*]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.body
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[N:%.*]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -551,14 +553,6 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1)
; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1)
; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1)
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
Expand Down
29 changes: 25 additions & 4 deletions llvm/test/Transforms/LoopVectorize/X86/funclet.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"

define void @test1() #0 personality ptr @__CxxFrameHandler3 {
; CHECK-LABEL: define void @test1(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null)
; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
; CHECK: [[CATCH_DISPATCH]]:
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
; CHECK: [[CATCH:.*]]:
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP:.*]]:
; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
; CHECK: [[TRY_CONT]]:
; CHECK-NEXT: ret void
; CHECK: [[UNREACHABLE]]:
; CHECK-NEXT: unreachable
;
entry:
invoke void @_CxxThrowException(ptr null, ptr null)
to label %unreachable unwind label %catch.dispatch
to label %unreachable unwind label %catch.dispatch

catch.dispatch: ; preds = %entry
%0 = catchswitch within none [label %catch] unwind to caller
Expand All @@ -31,9 +55,6 @@ unreachable: ; preds = %entry
unreachable
}

; CHECK-LABEL: define void @test1(
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]

declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/LoopVectorize/assume.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[FOR_END:label %.*]]
; CHECK: [[SCALAR_PH:.*:]]
; CHECK-NEXT: br label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
Expand Down Expand Up @@ -73,29 +74,28 @@ define void @test2(ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2
; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4
; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[FOR_END:label %.*]]
; CHECK: [[SCALAR_PH:.*:]]
; CHECK-NEXT: br label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
entry:
%ptrint = ptrtoint ptr %a to i64
Expand Down Expand Up @@ -163,7 +163,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/LoopVectorize/scalable-assume.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1
Expand All @@ -101,8 +101,6 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x float>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP15:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD3]], splat (float 1.000000e+00)
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1
Expand Down