Skip to content

Commit 95c32bf

Browse files
authored
[VPlan] Return invalid cost if any skeleton block has invalid costs. (#151940)
We need to reject plans that contain recipes with invalid costs. LICM can move recipes with invalid costs out of the loop region, which then get missed by the main cost computation. Extend the logic to check recipes for invalid cost currently only covering the middle block to include all skeleton blocks. Fixes #144358 Fixes #151664 PR: #151940
1 parent 04196ba commit 95c32bf

File tree

2 files changed

+20
-34
lines changed

2 files changed

+20
-34
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,12 +1054,17 @@ void VPlan::execute(VPTransformState *State) {
10541054

10551055
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
10561056
// For now only return the cost of the vector loop region, ignoring any other
1057-
// blocks, like the preheader or middle blocks.
1057+
// blocks, like the preheader or middle blocks, expect for checking them for
1058+
// recipes with invalid costs.
10581059
InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
10591060

1060-
// If any instructions in the middle block are invalid return invalid.
1061-
// TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created.
1062-
if (!getMiddleBlock()->cost(VF, Ctx).isValid())
1061+
// If the cost of the loop region is invalid or any recipe in the skeleton
1062+
// outside loop regions are invalid return an invalid cost.
1063+
if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(
1064+
vp_depth_first_shallow(getEntry())),
1065+
[&VF, &Ctx](VPBasicBlock *VPBB) {
1066+
return !VPBB->cost(VF, Ctx).isValid();
1067+
}))
10631068
return InstructionCost::getInvalid();
10641069

10651070
return Cost;

llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,28 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
2-
; REQUIRES: asserts
3-
; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \
4-
; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s
2+
; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S %s | FileCheck %s
53

6-
; FIXME: Hoisted vector code should be costed with scalable cost.
7-
; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost,
8-
; and hence should not be produced by LoopVectorize.
9-
10-
; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00)
114
define void @cost_hoisted_vector_code(ptr %p, float %arg) {
125
; CHECK-LABEL: define void @cost_hoisted_vector_code(
136
; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
147
; CHECK-NEXT: [[ENTRY:.*:]]
15-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
16-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
178
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
189
; CHECK: [[VECTOR_PH]]:
19-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
20-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
21-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]]
22-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]]
23-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
25-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[ARG]], i64 0
26-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
27-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]]
28-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[BROADCAST_SPLAT]], <vscale x 4 x float> zeroinitializer)
10+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[ARG]], i64 0
11+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
12+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer)
2913
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
3014
; CHECK: [[VECTOR_BODY]]:
3115
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3216
; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]]
3317
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
34-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
35-
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
36-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]]
37-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4
38-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4
39-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]]
40-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
41-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
18+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4
19+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP8]], align 4
20+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4
21+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8
22+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8
23+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4224
; CHECK: [[MIDDLE_BLOCK]]:
43-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]]
44-
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
25+
; CHECK-NEXT: br label %[[SCALAR_PH]]
4526
; CHECK: [[SCALAR_PH]]:
4627
;
4728
entry:

0 commit comments

Comments
 (0)