Skip to content

Commit 94c5214

Browse files
david-armfhahn
authored andcommitted
[LV] Add scalar load/stores to VPReplicateRecipe::computeCost (llvm#153218)
Avoid calling getLegacyCost for single scalar loads and stores where the cost is trivial to calculate. (cherry picked from commit ba4ce60)
1 parent 5267d0f commit 94c5214

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,6 +3044,24 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30443044
return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),
30453045
Ctx) *
30463046
(isSingleScalar() ? 1 : VF.getFixedValue());
3047+
case Instruction::Load:
3048+
case Instruction::Store: {
3049+
if (isSingleScalar()) {
3050+
bool IsLoad = UI->getOpcode() == Instruction::Load;
3051+
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
3052+
Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1));
3053+
const Align Alignment = getLoadStoreAlignment(UI);
3054+
unsigned AS = getLoadStoreAddressSpace(UI);
3055+
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
3056+
InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
3057+
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI);
3058+
return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
3059+
ScalarPtrTy, nullptr, nullptr);
3060+
}
3061+
// TODO: See getMemInstScalarizationCost for how to handle replicating and
3062+
// predicated cases.
3063+
break;
3064+
}
30473065
}
30483066
}
30493067

llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -608,8 +608,10 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
608608
; CHECK: vector.body:
609609
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SDIV_CONTINUE14:%.*]] ]
610610
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
611-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
612-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i32> [[WIDE_LOAD]], zeroinitializer
611+
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
612+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP44]], align 4
613+
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], zeroinitializer
614+
; CHECK-NEXT: [[TMP1:%.*]] = xor <8 x i1> [[TMP46]], splat (i1 true)
613615
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
614616
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
615617
; CHECK: pred.sdiv.if:
@@ -682,14 +684,15 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
682684
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE14]]
683685
; CHECK: pred.sdiv.continue14:
684686
; CHECK-NEXT: [[TMP41:%.*]] = phi <8 x i32> [ [[TMP36]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP40]], [[PRED_SDIV_IF13]] ]
685-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP41]], <8 x i32> zeroinitializer
687+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP46]], <8 x i32> zeroinitializer, <8 x i32> [[TMP41]]
686688
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
687-
; CHECK-NEXT: store <8 x i32> [[PREDPHI]], ptr [[TMP42]], align 4
689+
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i32 0
690+
; CHECK-NEXT: store <8 x i32> [[PREDPHI]], ptr [[TMP45]], align 4
688691
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
689692
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
690-
; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
693+
; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
691694
; CHECK: middle.block:
692-
; CHECK-NEXT: br label [[SCALAR_PH]]
695+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
693696
; CHECK: scalar.ph:
694697
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ]
695698
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
@@ -708,7 +711,7 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
708711
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_DST]], align 4
709712
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
710713
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], 16
711-
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]]
714+
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP8:![0-9]+]]
712715
; CHECK: exit:
713716
; CHECK-NEXT: ret void
714717
;

0 commit comments

Comments
 (0)