Skip to content

Commit 31711c9

Browse files
authored
[VPlan] Only apply forced cost to recipes with underlying values. (#168372)
Only apply forced instruction costs to recipes with underlying values to match the legacy cost model. A VPlan may have a number of additional VPInstructions without underlying values that are not considered for its cost, and assigning forced costs to them would incorrectly inflate its cost. This fixes a cost divergence between legacy and VPlan-based cost models with forced instruction costs. PR: #168372
1 parent 3005886 commit 31711c9

File tree

2 files changed

+61
-3
lines changed

2 files changed

+61
-3
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,14 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
277277
RecipeCost = 0;
278278
} else {
279279
RecipeCost = computeCost(VF, Ctx);
280-
if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
281-
RecipeCost.isValid())
282-
RecipeCost = InstructionCost(ForceTargetInstructionCost);
280+
RecipeCost = computeCost(VF, Ctx);
281+
if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
282+
RecipeCost.isValid()) {
283+
if (UI)
284+
RecipeCost = InstructionCost(ForceTargetInstructionCost);
285+
else
286+
RecipeCost = InstructionCost(0);
287+
}
283288
}
284289

285290
LLVM_DEBUG({

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,59 @@ for.end:
380380
ret void
381381
}
382382

383+
define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) #1 {
384+
; COMMON-LABEL: define void @loop_with_freeze_and_conditional_srem(
385+
; COMMON-SAME: ptr [[DST:%.*]], ptr [[KEYINFO:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[DIVISOR:%.*]]) {
386+
; COMMON-NEXT: [[ENTRY:.*]]:
387+
; COMMON-NEXT: br label %[[LOOP:.*]]
388+
; COMMON: [[LOOP]]:
389+
; COMMON-NEXT: [[INDEX_NEXT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
390+
; COMMON-NEXT: [[LOADED:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4
391+
; COMMON-NEXT: [[FROZEN:%.*]] = freeze i32 [[LOADED]]
392+
; COMMON-NEXT: [[CMP:%.*]] = icmp eq i32 [[FROZEN]], 0
393+
; COMMON-NEXT: br i1 [[CMP]], label %[[IF_ZERO:.*]], label %[[IF_NONZERO:.*]]
394+
; COMMON: [[IF_ZERO]]:
395+
; COMMON-NEXT: store i32 0, ptr [[KEYINFO]], align 4
396+
; COMMON-NEXT: br label %[[LOOP_LATCH]]
397+
; COMMON: [[IF_NONZERO]]:
398+
; COMMON-NEXT: [[TMP11:%.*]] = srem i32 1, [[DIVISOR]]
399+
; COMMON-NEXT: store i32 [[TMP11]], ptr [[DST]], align 4
400+
; COMMON-NEXT: br label %[[LOOP_LATCH]]
401+
; COMMON: [[LOOP_LATCH]]:
402+
; COMMON-NEXT: [[IV_NEXT]] = add i64 [[INDEX_NEXT]], 1
403+
; COMMON-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
404+
; COMMON-NEXT: br i1 [[TMP16]], label %[[EXIT:.*]], label %[[LOOP]]
405+
; COMMON: [[EXIT]]:
406+
; COMMON-NEXT: ret void
407+
;
408+
entry:
409+
br label %loop
410+
411+
loop: ; preds = %loop.latch, %entry
412+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
413+
%loaded = load i32, ptr %invariant.ptr, align 4
414+
%frozen = freeze i32 %loaded
415+
%cmp = icmp eq i32 %frozen, 0
416+
br i1 %cmp, label %if.zero, label %if.nonzero
417+
418+
if.zero: ; preds = %loop
419+
store i32 0, ptr %keyinfo, align 4
420+
br label %loop.latch
421+
422+
if.nonzero: ; preds = %loop
423+
%rem = srem i32 1, %divisor
424+
store i32 %rem, ptr %dst, align 4
425+
br label %loop.latch
426+
427+
loop.latch: ; preds = %if.nonzero, %if.zero
428+
%iv.next = add i64 %iv, 1
429+
%exitcond = icmp eq i64 %iv, 32
430+
br i1 %exitcond, label %exit, label %loop
431+
432+
exit: ; preds = %loop.latch
433+
ret void
434+
}
435+
383436
attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
384437

385438
declare void @llvm.assume(i1 noundef)

0 commit comments

Comments
 (0)