From e3f00ea0f19076a460172458d63a22433f7d46bd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 4 Aug 2025 11:49:19 +0100 Subject: [PATCH 1/2] [VPlan] Return invalid cost if any skeleton block has invalid costs. We need to reject plans that contain recipes with invalid costs. LICM can move recipes with invalid costs out of the loop region, which then get missed by the main cost computation. Extend the logic to check recipes for invalid cost currently only covering the middle block to include all skeleton blocks. Fixes https://github.com/llvm/llvm-project/issues/144358 Fixes https://github.com/llvm/llvm-project/issues/151664 --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 +++-- .../pr151664-cost-hoisted-vector-scalable.ll | 41 +++++-------------- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 2138b4154d2c2..9cc62247ee553 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1072,9 +1072,13 @@ InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { // blocks, like the preheader or middle blocks. InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx); - // If any instructions in the middle block are invalid return invalid. - // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created. - if (!getMiddleBlock()->cost(VF, Ctx).isValid()) + // If any instructions in the skeleton outside loop regions are invalid return + // invalid. + if (any_of(VPBlockUtils::blocksOnly( + vp_depth_first_shallow(getEntry())), + [&VF, &Ctx](VPBasicBlock *VPBB) { + return !VPBB->cost(VF, Ctx).isValid(); + })) return InstructionCost::getInvalid(); return Cost; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 8495deea31e8b..b4df63dd08fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -1,47 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 -; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \ -; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S %s | FileCheck %s -; FIXME: Hoisted vector code should be costed with scalable cost. -; In this example, ` @llvm.minimumnum` has an invalid cost, -; and hence should not be produced by LoopVectorize. - -; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00) define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-LABEL: define void @cost_hoisted_vector_code( ; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[ARG]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( [[BROADCAST_SPLAT]], zeroinitializer) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[ARG]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 4 -; CHECK-NEXT: store [[TMP7]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; entry: From 2760573e1dc7a239d20304eca2c5a383c63cb510 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 7 Aug 2025 09:47:30 +0100 Subject: [PATCH 2/2] !fixup clarify comments, add early exit for invalid cost --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0f6980e979929..73babcc6a6006 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1054,16 +1054,17 @@ void VPlan::execute(VPTransformState *State) { InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { // For now only return the cost of the vector loop region, ignoring any other - // blocks, like the preheader or middle blocks. + // blocks, like the preheader or middle blocks, expect for checking them for + // recipes with invalid costs. InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx); - // If any instructions in the skeleton outside loop regions are invalid return - // invalid. - if (any_of(VPBlockUtils::blocksOnly( - vp_depth_first_shallow(getEntry())), - [&VF, &Ctx](VPBasicBlock *VPBB) { - return !VPBB->cost(VF, Ctx).isValid(); - })) + // If the cost of the loop region is invalid or any recipe in the skeleton + // outside loop regions are invalid return an invalid cost. + if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly( + vp_depth_first_shallow(getEntry())), + [&VF, &Ctx](VPBasicBlock *VPBB) { + return !VPBB->cost(VF, Ctx).isValid(); + })) return InstructionCost::getInvalid(); return Cost;