Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6760,9 +6760,10 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {

InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
ElementCount VF) const {
if (ForceTargetInstructionCost.getNumOccurrences())
return InstructionCost(ForceTargetInstructionCost.getNumOccurrences());
return CM.getInstructionCost(UI, VF);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a fault with your patch, but it looks like it was already broken due to this:

  return InstructionCost(ForceTargetInstructionCost.getNumOccurrences());

I'm pretty sure it should be:

  return InstructionCost(ForceTargetInstructionCost);

similar to LoopVectorizationCostModel::expectedCost

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right, it should be ForceTargetInstructionCost. I've updated this in the latest commit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, to test this we need a test that passes a large value, e.g. -force-target-instruction-cost=100 in llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll would do it

InstructionCost Cost = CM.getInstructionCost(UI, VF);
if (Cost.isValid() && ForceTargetInstructionCost.getNumOccurrences())
return InstructionCost(ForceTargetInstructionCost);
return Cost;
}

bool VPCostContext::isLegacyUniformAfterVectorization(Instruction *I,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -force-target-instruction-cost=1 -S %s | FileCheck %s
; RUN: opt -p loop-vectorize -force-target-instruction-cost=10 -S %s | FileCheck %s

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx14.0.0"
Expand Down Expand Up @@ -65,19 +65,17 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[START]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 32
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 32
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 16
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 16
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP1]], align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
Expand Down Expand Up @@ -366,6 +364,63 @@ exit:
ret void
}

define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {
; CHECK-LABEL: define void @invalid_legacy_cost(
; CHECK-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = alloca i8, i64 0, align 16
; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 0, align 16
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[X]], i64 [[IV]]
; CHECK-NEXT: store ptr [[TMP12]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%0 = alloca i8, i64 0, align 16
%arrayidx = getelementptr ptr, ptr %x, i64 %iv
store ptr %0, ptr %arrayidx, align 8
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv, %N
br i1 %exitcond.not, label %for.end, label %for.body

for.end:
ret void
}

attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }

declare void @llvm.assume(i1 noundef)
declare i64 @llvm.umin.i64(i64, i64)
;.
Expand All @@ -392,4 +447,6 @@ declare i64 @llvm.umin.i64(i64, i64)
; CHECK: [[META20]] = !{[[META13]]}
; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]}
; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
;.
Loading