diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 8052e3199ec88..73babcc6a6006 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1054,12 +1054,17 @@ void VPlan::execute(VPTransformState *State) { InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { // For now only return the cost of the vector loop region, ignoring any other - // blocks, like the preheader or middle blocks. + // blocks, like the preheader or middle blocks, expect for checking them for + // recipes with invalid costs. InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx); - // If any instructions in the middle block are invalid return invalid. - // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created. - if (!getMiddleBlock()->cost(VF, Ctx).isValid()) + // If the cost of the loop region is invalid or any recipe in the skeleton + // outside loop regions are invalid return an invalid cost. + if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly( + vp_depth_first_shallow(getEntry())), + [&VF, &Ctx](VPBasicBlock *VPBB) { + return !VPBB->cost(VF, Ctx).isValid(); + })) return InstructionCost::getInvalid(); return Cost; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 8495deea31e8b..b4df63dd08fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -1,47 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 -; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \ -; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S %s | FileCheck %s -; FIXME: Hoisted vector code should be costed with scalable cost. -; In this example, ` @llvm.minimumnum` has an invalid cost, -; and hence should not be produced by LoopVectorize. - -; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00) define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-LABEL: define void @cost_hoisted_vector_code( ; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[ARG]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( [[BROADCAST_SPLAT]], zeroinitializer) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[ARG]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 4 -; CHECK-NEXT: store [[TMP7]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; entry: