Skip to content

Commit 2a6b09e

Browse files
committed
[LV] Use type from InsertPos for cost computation of interleave groups.
Previously the legacy cost model would pick the type for the cost computation depending on the order of the members in the input IR. This is incompatible with the VPlan-based cost model (independent of original IR order) and also doesn't match code-gen, which uses the type of the insert position. Update the legacy cost model to use the type (and address space) from the Group's insert position. This brings the legacy cost model in line with the legacy cost model and fixes a divergence between both models. Note that the X86 cost model seems to assign different costs to groups with i64 and double types. Added a TODO to check. Fixes #112922.
1 parent b5fa4fe commit 2a6b09e

File tree

3 files changed

+205
-12
lines changed

3 files changed

+205
-12
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5738,14 +5738,15 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
57385738
InstructionCost
57395739
LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
57405740
ElementCount VF) {
5741-
Type *ValTy = getLoadStoreType(I);
5742-
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
5743-
unsigned AS = getLoadStoreAddressSpace(I);
5744-
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5745-
57465741
const auto *Group = getInterleavedAccessGroup(I);
57475742
assert(Group && "Fail to get an interleaved access group.");
57485743

5744+
Instruction *InsertPos = Group->getInsertPos();
5745+
Type *ValTy = getLoadStoreType(InsertPos);
5746+
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
5747+
unsigned AS = getLoadStoreAddressSpace(InsertPos);
5748+
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5749+
57495750
unsigned InterleaveFactor = Group->getFactor();
57505751
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
57515752

@@ -5760,8 +5761,9 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
57605761
(Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed()) ||
57615762
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
57625763
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
5763-
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
5764-
AS, CostKind, Legal->isMaskRequired(I), UseMaskForGaps);
5764+
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
5765+
Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
5766+
UseMaskForGaps);
57655767

57665768
if (Group->isReverse()) {
57675769
// TODO: Add support for reversed masked interleaved access.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2958,11 +2958,20 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
29582958

29592959
InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
29602960
VPCostContext &Ctx) const {
2961-
Instruction *I = getInsertPos();
2961+
Instruction *InsertPos = getInsertPos();
2962+
// Find the VPValue index of the interleave group. We need to skip gaps.
2963+
unsigned InsertPosIdx = 0;
2964+
for (unsigned Idx = 0; IG->getFactor(); ++Idx)
2965+
if (auto *Member = IG->getMember(Idx)) {
2966+
if (Member == InsertPos)
2967+
break;
2968+
InsertPosIdx++;
2969+
}
29622970
Type *ValTy = Ctx.Types.inferScalarType(
2963-
getNumDefinedValues() > 0 ? getVPValue(0) : getStoredValues()[0]);
2971+
getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
2972+
: getStoredValues()[InsertPosIdx]);
29642973
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
2965-
unsigned AS = getLoadStoreAddressSpace(I);
2974+
unsigned AS = getLoadStoreAddressSpace(InsertPos);
29662975
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
29672976

29682977
unsigned InterleaveFactor = IG->getFactor();
@@ -2976,8 +2985,8 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
29762985

29772986
// Calculate the cost of the whole interleaved group.
29782987
InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(
2979-
I->getOpcode(), WideVecTy, IG->getFactor(), Indices, IG->getAlign(), AS,
2980-
CostKind, getMask(), NeedsMaskForGaps);
2988+
InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
2989+
IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps);
29812990

29822991
if (!IG->isReverse())
29832992
return Cost;

llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,184 @@ exit:
586586
ret void
587587
}
588588

589+
; Test case for https://github.com/llvm/llvm-project/issues/112922.
590+
define void @interleave_store_double_i64(ptr %dst) {
591+
; CHECK-LABEL: define void @interleave_store_double_i64(
592+
; CHECK-SAME: ptr [[DST:%.*]]) {
593+
; CHECK-NEXT: [[ENTRY:.*]]:
594+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
595+
; CHECK: [[VECTOR_PH]]:
596+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
597+
; CHECK: [[VECTOR_BODY]]:
598+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
599+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
600+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
601+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[TMP0]]
602+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
603+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
604+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
605+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
606+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
607+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
608+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
609+
; CHECK: [[MIDDLE_BLOCK]]:
610+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
611+
; CHECK: [[SCALAR_PH]]:
612+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
613+
; CHECK-NEXT: br label %[[LOOP:.*]]
614+
; CHECK: [[LOOP]]:
615+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
616+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
617+
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8
618+
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
619+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8
620+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
621+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
622+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
623+
; CHECK: [[EXIT]]:
624+
; CHECK-NEXT: ret void
625+
;
626+
entry:
627+
br label %loop
628+
629+
loop:
630+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
631+
%gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1
632+
store i64 %iv, ptr %gep.1, align 8
633+
%gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv
634+
store double 0.000000e+00, ptr %gep.0, align 8
635+
%iv.next = add i64 %iv, 1
636+
%ec = icmp eq i64 %iv, 1
637+
br i1 %ec, label %exit, label %loop
638+
639+
exit:
640+
ret void
641+
}
642+
643+
define void @interleave_store_i64_double(ptr %dst) {
644+
; CHECK-LABEL: define void @interleave_store_i64_double(
645+
; CHECK-SAME: ptr [[DST:%.*]]) {
646+
; CHECK-NEXT: [[ENTRY:.*]]:
647+
; CHECK-NEXT: br label %[[LOOP:.*]]
648+
; CHECK: [[LOOP]]:
649+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
650+
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
651+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8
652+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
653+
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8
654+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
655+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
656+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
657+
; CHECK: [[EXIT]]:
658+
; CHECK-NEXT: ret void
659+
;
660+
entry:
661+
br label %loop
662+
663+
loop:
664+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
665+
%gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv
666+
store double 0.000000e+00, ptr %gep.0, align 8
667+
%gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1
668+
store i64 %iv, ptr %gep.1, align 8
669+
%iv.next = add i64 %iv, 1
670+
%ec = icmp eq i64 %iv, 1
671+
br i1 %ec, label %exit, label %loop
672+
673+
exit:
674+
ret void
675+
}
676+
677+
; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64.
678+
define void @interleave_store_double_i64_2(ptr %dst) {
679+
; CHECK-LABEL: define void @interleave_store_double_i64_2(
680+
; CHECK-SAME: ptr [[DST:%.*]]) {
681+
; CHECK-NEXT: [[ENTRY:.*]]:
682+
; CHECK-NEXT: br label %[[LOOP:.*]]
683+
; CHECK: [[LOOP]]:
684+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
685+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
686+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
687+
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
688+
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8
689+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
690+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
691+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
692+
; CHECK: [[EXIT]]:
693+
; CHECK-NEXT: ret void
694+
;
695+
entry:
696+
br label %loop
697+
698+
loop:
699+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
700+
%gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1
701+
store double 0.000000e+00, ptr %gep.1, align 8
702+
%gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv
703+
store i64 %iv, ptr %gep.0, align 8
704+
%iv.next = add i64 %iv, 1
705+
%ec = icmp eq i64 %iv, 1
706+
br i1 %ec, label %exit, label %loop
707+
708+
exit:
709+
ret void
710+
}
711+
712+
define void @interleave_store_i64_double_2(ptr %dst) {
713+
; CHECK-LABEL: define void @interleave_store_i64_double_2(
714+
; CHECK-SAME: ptr [[DST:%.*]]) {
715+
; CHECK-NEXT: [[ENTRY:.*]]:
716+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
717+
; CHECK: [[VECTOR_PH]]:
718+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
719+
; CHECK: [[VECTOR_BODY]]:
720+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
721+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
722+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
723+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[TMP0]]
724+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
725+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
726+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
727+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
728+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
729+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
730+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
731+
; CHECK: [[MIDDLE_BLOCK]]:
732+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
733+
; CHECK: [[SCALAR_PH]]:
734+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
735+
; CHECK-NEXT: br label %[[LOOP:.*]]
736+
; CHECK: [[LOOP]]:
737+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
738+
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
739+
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8
740+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
741+
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
742+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
743+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
744+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
745+
; CHECK: [[EXIT]]:
746+
; CHECK-NEXT: ret void
747+
;
748+
entry:
749+
br label %loop
750+
751+
loop:
752+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
753+
%gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv
754+
store i64 %iv, ptr %gep.0, align 8
755+
%gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1
756+
store double 0.000000e+00, ptr %gep.1, align 8
757+
%iv.next = add i64 %iv, 1
758+
%ec = icmp eq i64 %iv, 1
759+
br i1 %ec, label %exit, label %loop
760+
761+
exit:
762+
ret void
763+
}
764+
765+
766+
589767
attributes #0 = { "target-features"="+sse4.2" }
590768
attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
591769

@@ -601,4 +779,8 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
601779
; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
602780
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
603781
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
782+
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
783+
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
784+
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
785+
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
604786
;.

0 commit comments

Comments
 (0)