Skip to content

Commit 7f5bfe9

Browse files
author
git apple-llvm automerger
committed
Merge commit '95c32bf2d46d' from llvm.org/main into next
2 parents c9ce864 + 95c32bf commit 7f5bfe9

File tree

2 files changed

+20
-34
lines changed

2 files changed

+20
-34
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,12 +1054,17 @@ void VPlan::execute(VPTransformState *State) {
10541054

10551055
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
10561056
// For now only return the cost of the vector loop region, ignoring any other
1057-
// blocks, like the preheader or middle blocks.
1057+
// blocks, like the preheader or middle blocks, expect for checking them for
1058+
// recipes with invalid costs.
10581059
InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
10591060

1060-
// If any instructions in the middle block are invalid return invalid.
1061-
// TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created.
1062-
if (!getMiddleBlock()->cost(VF, Ctx).isValid())
1061+
// If the cost of the loop region is invalid or any recipe in the skeleton
1062+
// outside loop regions are invalid return an invalid cost.
1063+
if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(
1064+
vp_depth_first_shallow(getEntry())),
1065+
[&VF, &Ctx](VPBasicBlock *VPBB) {
1066+
return !VPBB->cost(VF, Ctx).isValid();
1067+
}))
10631068
return InstructionCost::getInvalid();
10641069

10651070
return Cost;

llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,28 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
2-
; REQUIRES: asserts
3-
; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \
4-
; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s
2+
; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S %s | FileCheck %s
53

6-
; FIXME: Hoisted vector code should be costed with scalable cost.
7-
; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost,
8-
; and hence should not be produced by LoopVectorize.
9-
10-
; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00)
114
define void @cost_hoisted_vector_code(ptr %p, float %arg) {
125
; CHECK-LABEL: define void @cost_hoisted_vector_code(
136
; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
147
; CHECK-NEXT: [[ENTRY:.*:]]
15-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
16-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
178
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
189
; CHECK: [[VECTOR_PH]]:
19-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
20-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
21-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]]
22-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]]
23-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
25-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[ARG]], i64 0
26-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
27-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]]
28-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[BROADCAST_SPLAT]], <vscale x 4 x float> zeroinitializer)
10+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[ARG]], i64 0
11+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
12+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer)
2913
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
3014
; CHECK: [[VECTOR_BODY]]:
3115
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3216
; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]]
3317
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
34-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
35-
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
36-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]]
37-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4
38-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4
39-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]]
40-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
41-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
18+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4
19+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP8]], align 4
20+
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4
21+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8
22+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8
23+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4224
; CHECK: [[MIDDLE_BLOCK]]:
43-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]]
44-
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
25+
; CHECK-NEXT: br label %[[SCALAR_PH]]
4526
; CHECK: [[SCALAR_PH]]:
4627
;
4728
entry:

0 commit comments

Comments
 (0)