Skip to content

Commit a2a7524

Browse files
committed
[VPlan] Skip applying InstsToScalarize with forced instr costs.
ForceTargetInstructionCost in the legacy cost model overrides any costs from InstsToScalarize. Match the behavior in the VPlan-based cost model. This fixes a crash with -force-target-instr-cost for the added test case.
1 parent a464e38 commit a2a7524

File tree

2 files changed

+89
-10
lines changed

2 files changed

+89
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6923,16 +6923,17 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
69236923
});
69246924
Cost += ForcedCost;
69256925
}
6926-
for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
6927-
if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
6928-
continue;
6929-
CostCtx.SkipCostComputation.insert(Scalarized);
6930-
LLVM_DEBUG({
6931-
dbgs() << "Cost of " << ScalarCost << " for VF " << VF
6932-
<< ": profitable to scalarize " << *Scalarized << "\n";
6933-
});
6934-
Cost += ScalarCost;
6935-
}
6926+
if (!ForceTargetInstructionCost.getNumOccurrences())
6927+
for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
6928+
if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
6929+
continue;
6930+
CostCtx.SkipCostComputation.insert(Scalarized);
6931+
LLVM_DEBUG({
6932+
dbgs() << "Cost of " << ScalarCost << " for VF " << VF
6933+
<< ": profitable to scalarize " << *Scalarized << "\n";
6934+
});
6935+
Cost += ScalarCost;
6936+
}
69366937

69376938
return Cost;
69386939
}

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,84 @@ for.end:
380380
ret void
381381
}
382382

383+
define void @forced_scalar_instr(ptr %gep.dst) {
384+
; COMMON-LABEL: define void @forced_scalar_instr(
385+
; COMMON-SAME: ptr [[GEP_DST:%.*]]) {
386+
; COMMON-NEXT: [[ENTRY:.*:]]
387+
; COMMON-NEXT: br label %[[VECTOR_PH:.*]]
388+
; COMMON: [[VECTOR_PH]]:
389+
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
390+
; COMMON: [[VECTOR_BODY]]:
391+
; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
392+
; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
393+
; COMMON-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
394+
; COMMON-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
395+
; COMMON-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
396+
; COMMON-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
397+
; COMMON: [[PRED_STORE_IF]]:
398+
; COMMON-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
399+
; COMMON-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 0
400+
; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
401+
; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], 1
402+
; COMMON-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4
403+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]]
404+
; COMMON: [[PRED_STORE_CONTINUE]]:
405+
; COMMON-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
406+
; COMMON-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
407+
; COMMON: [[PRED_STORE_IF1]]:
408+
; COMMON-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
409+
; COMMON-NEXT: [[TMP9:%.*]] = add i32 [[TMP0]], 1
410+
; COMMON-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]]
411+
; COMMON-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], 1
412+
; COMMON-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4
413+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]]
414+
; COMMON: [[PRED_STORE_CONTINUE2]]:
415+
; COMMON-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
416+
; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
417+
; COMMON: [[PRED_STORE_IF3]]:
418+
; COMMON-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2
419+
; COMMON-NEXT: [[TMP14:%.*]] = add i32 [[TMP0]], 2
420+
; COMMON-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]]
421+
; COMMON-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], 1
422+
; COMMON-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
423+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]]
424+
; COMMON: [[PRED_STORE_CONTINUE4]]:
425+
; COMMON-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
426+
; COMMON-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
427+
; COMMON: [[PRED_STORE_IF5]]:
428+
; COMMON-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
429+
; COMMON-NEXT: [[TMP19:%.*]] = add i32 [[TMP0]], 3
430+
; COMMON-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]]
431+
; COMMON-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], 1
432+
; COMMON-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4
433+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]]
434+
; COMMON: [[PRED_STORE_CONTINUE6]]:
435+
; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
436+
; COMMON-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
437+
; COMMON-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
438+
; COMMON-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
439+
; COMMON: [[MIDDLE_BLOCK]]:
440+
; COMMON-NEXT: br label %[[EXIT:.*]]
441+
; COMMON: [[EXIT]]:
442+
; COMMON-NEXT: ret void
443+
;
444+
entry:
445+
br label %loop
446+
447+
loop:
448+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
449+
%gep = getelementptr i32, ptr %gep.dst, i64 %iv
450+
%t = trunc i64 %iv to i32
451+
%o = or i32 %t, 1
452+
store i32 %o, ptr %gep, align 4
453+
%iv.next = add i64 %iv, 1
454+
%ec = icmp eq i64 %iv, 4
455+
br i1 %ec, label %exit, label %loop
456+
457+
exit:
458+
ret void
459+
}
460+
383461
attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
384462

385463
declare void @llvm.assume(i1 noundef)

0 commit comments

Comments
 (0)