Skip to content

Commit 6fddb71

Browse files
committed
[VPlan] Return invalid cost if any skeleton block has invalid costs. (llvm#151940)
We need to reject plans that contain recipes with invalid costs. LICM can move recipes with invalid costs out of the loop region, which then get missed by the main cost computation. Extend the logic to check recipes for invalid cost currently only covering the middle block to include all skeleton blocks. Fixes llvm#144358 Fixes llvm#151664 PR: llvm#151940 (cherry picked from commit 95c32bf)
1 parent 246b46c commit 6fddb71

File tree

2 files changed

+54
-4
lines changed

2 files changed

+54
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,12 +1074,17 @@ void VPlan::execute(VPTransformState *State) {
10741074

10751075
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
10761076
// For now only return the cost of the vector loop region, ignoring any other
1077-
// blocks, like the preheader or middle blocks.
1077+
// blocks, like the preheader or middle blocks, expect for checking them for
1078+
// recipes with invalid costs.
10781079
InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
10791080

1080-
// If any instructions in the middle block are invalid return invalid.
1081-
// TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created.
1082-
if (!getMiddleBlock()->cost(VF, Ctx).isValid())
1081+
// If the cost of the loop region is invalid or any recipe in the skeleton
1082+
// outside loop regions are invalid return an invalid cost.
1083+
if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(
1084+
vp_depth_first_shallow(getEntry())),
1085+
[&VF, &Ctx](VPBasicBlock *VPBB) {
1086+
return !VPBB->cost(VF, Ctx).isValid();
1087+
}))
10831088
return InstructionCost::getInvalid();
10841089

10851090
return Cost;
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
2+
; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S %s | FileCheck %s
3+
4+
define void @cost_hoisted_vector_code(ptr %p, float %arg) {
5+
; CHECK-LABEL: define void @cost_hoisted_vector_code(
6+
; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
9+
; CHECK: [[VECTOR_PH]]:
10+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[ARG]], i64 0
11+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
12+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer)
13+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
14+
; CHECK: [[VECTOR_BODY]]:
15+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
16+
; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]]
17+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
18+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP8]], i32 0
19+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4
20+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP4]], align 4
21+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4
22+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8
23+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8
24+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: [[MIDDLE_BLOCK]]:
26+
; CHECK-NEXT: br i1 false, [[EXIT:label %.*]], label %[[SCALAR_PH]]
27+
; CHECK: [[SCALAR_PH]]:
28+
;
29+
entry:
30+
br label %loop
31+
32+
loop: ; preds = %loop, %entry
33+
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
34+
%res = tail call float @llvm.minimumnum.f32(float %arg, float 0.0)
35+
%gep.p.red = getelementptr float, ptr %p, i64 %iv
36+
store float %res, ptr %gep.p.red, align 4
37+
%iv.next = add i64 %iv, 1
38+
%exit.cond = icmp eq i64 %iv.next, 0
39+
br i1 %exit.cond, label %exit, label %loop
40+
41+
exit: ; preds = %loop
42+
ret void
43+
}
44+
45+
declare float @llvm.minimumnum.f32(float, float)

0 commit comments

Comments
 (0)