Skip to content

Commit f1e98a6

Browse files
[LV] Return Invalid from getLegacyCost when instruction cost forced.
LoopVectorizationCostModel::expectedCost will only override the cost returned by getInstructionCost when valid. This patch ensures we do the same in VPCostContext::getLegacyCost, avoiding the "VPlan cost model and legacy cost model disagreed" assert in the included test.
1 parent c34cba0 commit f1e98a6

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6760,9 +6760,10 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
67606760

67616761
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
67626762
ElementCount VF) const {
6763-
if (ForceTargetInstructionCost.getNumOccurrences())
6763+
InstructionCost Cost = CM.getInstructionCost(UI, VF);
6764+
if (Cost.isValid() && ForceTargetInstructionCost.getNumOccurrences())
67646765
return InstructionCost(ForceTargetInstructionCost.getNumOccurrences());
6765-
return CM.getInstructionCost(UI, VF);
6766+
return Cost;
67666767
}
67676768

67686769
bool VPCostContext::isLegacyUniformAfterVectorization(Instruction *I,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -passes=loop-vectorize -force-target-instruction-cost=1 -debug-only=loop-vectorize -S -disable-output 2>&1 | FileCheck %s
3+
target triple = "aarch64-linux-gnu"
4+
5+
define i32 @invalid_legacy_cost(i64 %N) #0 {
6+
; CHECK: LV: Checking a loop in 'invalid_legacy_cost
7+
; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %0 = alloca i8, i64 0, align 16
8+
entry:
9+
br label %for.body
10+
11+
for.body:
12+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
13+
%0 = alloca i8, i64 0, align 16
14+
%arrayidx = getelementptr ptr, ptr null, i64 %iv
15+
store ptr %0, ptr %arrayidx, align 8
16+
%iv.next = add i64 %iv, 1
17+
%exitcond.not = icmp eq i64 %iv, %N
18+
br i1 %exitcond.not, label %for.end, label %for.body
19+
20+
for.end:
21+
ret i32 0
22+
}
23+
24+
attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }

0 commit comments

Comments
 (0)