Skip to content

Commit 81ca03e

Browse files
committed
[LV] Don't consider second op as invariant in getDivRemSpeculationCost.
The second operand when using a safe divisor will always be a select in the loop, so won't be invariant; don't treat it as such. This fixes a divergence with legacy and VPlan based cost model. Fixes llvm#156066. (cherry picked from commit e0f00bd)
1 parent 054cc9f commit 81ca03e

File tree

2 files changed

+144
-11
lines changed

2 files changed

+144
-11
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,19 +3061,12 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
30613061
toVectorTy(Type::getInt1Ty(I->getContext()), VF),
30623062
CmpInst::BAD_ICMP_PREDICATE, CostKind);
30633063

3064-
// Certain instructions can be cheaper to vectorize if they have a constant
3065-
// second vector operand. One example of this are shifts on x86.
3066-
Value *Op2 = I->getOperand(1);
3067-
auto Op2Info = TTI.getOperandInfo(Op2);
3068-
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
3069-
Legal->isInvariant(Op2))
3070-
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
3071-
30723064
SmallVector<const Value *, 4> Operands(I->operand_values());
30733065
SafeDivisorCost += TTI.getArithmeticInstrCost(
3074-
I->getOpcode(), VecTy, CostKind,
3075-
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
3076-
Op2Info, Operands, I);
3066+
I->getOpcode(), VecTy, CostKind,
3067+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
3068+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
3069+
Operands, I);
30773070
return {ScalarizationCost, SafeDivisorCost};
30783071
}
30793072

llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,146 @@ exit:
598598
ret i32 %p.2
599599
}
600600

601+
; Test case for https://github.com/llvm/llvm-project/issues/156066.
602+
define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 {
603+
; CHECK-LABEL: @sdiv_by_zero(
604+
; CHECK-NEXT: bb:
605+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
606+
; CHECK: vector.ph:
607+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
608+
; CHECK: vector.body:
609+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SDIV_CONTINUE14:%.*]] ]
610+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
611+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
612+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i32> [[WIDE_LOAD]], zeroinitializer
613+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
614+
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
615+
; CHECK: pred.sdiv.if:
616+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 0
617+
; CHECK-NEXT: [[TMP4:%.*]] = sdiv i32 [[TMP3]], 0
618+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0
619+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE]]
620+
; CHECK: pred.sdiv.continue:
621+
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_SDIV_IF]] ]
622+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
623+
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_SDIV_IF1:%.*]], label [[PRED_SDIV_CONTINUE2:%.*]]
624+
; CHECK: pred.sdiv.if1:
625+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 1
626+
; CHECK-NEXT: [[TMP9:%.*]] = sdiv i32 [[TMP8]], 0
627+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP9]], i32 1
628+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE2]]
629+
; CHECK: pred.sdiv.continue2:
630+
; CHECK-NEXT: [[TMP11:%.*]] = phi <8 x i32> [ [[TMP6]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_SDIV_IF1]] ]
631+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
632+
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4:%.*]]
633+
; CHECK: pred.sdiv.if3:
634+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 2
635+
; CHECK-NEXT: [[TMP14:%.*]] = sdiv i32 [[TMP13]], 0
636+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP14]], i32 2
637+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]]
638+
; CHECK: pred.sdiv.continue4:
639+
; CHECK-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP11]], [[PRED_SDIV_CONTINUE2]] ], [ [[TMP15]], [[PRED_SDIV_IF3]] ]
640+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
641+
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_SDIV_IF5:%.*]], label [[PRED_SDIV_CONTINUE6:%.*]]
642+
; CHECK: pred.sdiv.if5:
643+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 3
644+
; CHECK-NEXT: [[TMP19:%.*]] = sdiv i32 [[TMP18]], 0
645+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP19]], i32 3
646+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE6]]
647+
; CHECK: pred.sdiv.continue6:
648+
; CHECK-NEXT: [[TMP21:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_SDIV_CONTINUE4]] ], [ [[TMP20]], [[PRED_SDIV_IF5]] ]
649+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
650+
; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_SDIV_IF7:%.*]], label [[PRED_SDIV_CONTINUE8:%.*]]
651+
; CHECK: pred.sdiv.if7:
652+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 4
653+
; CHECK-NEXT: [[TMP24:%.*]] = sdiv i32 [[TMP23]], 0
654+
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP24]], i32 4
655+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE8]]
656+
; CHECK: pred.sdiv.continue8:
657+
; CHECK-NEXT: [[TMP26:%.*]] = phi <8 x i32> [ [[TMP21]], [[PRED_SDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_SDIV_IF7]] ]
658+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
659+
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_SDIV_IF9:%.*]], label [[PRED_SDIV_CONTINUE10:%.*]]
660+
; CHECK: pred.sdiv.if9:
661+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 5
662+
; CHECK-NEXT: [[TMP29:%.*]] = sdiv i32 [[TMP28]], 0
663+
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP29]], i32 5
664+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE10]]
665+
; CHECK: pred.sdiv.continue10:
666+
; CHECK-NEXT: [[TMP31:%.*]] = phi <8 x i32> [ [[TMP26]], [[PRED_SDIV_CONTINUE8]] ], [ [[TMP30]], [[PRED_SDIV_IF9]] ]
667+
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
668+
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_SDIV_IF11:%.*]], label [[PRED_SDIV_CONTINUE12:%.*]]
669+
; CHECK: pred.sdiv.if11:
670+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 6
671+
; CHECK-NEXT: [[TMP34:%.*]] = sdiv i32 [[TMP33]], 0
672+
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP34]], i32 6
673+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE12]]
674+
; CHECK: pred.sdiv.continue12:
675+
; CHECK-NEXT: [[TMP36:%.*]] = phi <8 x i32> [ [[TMP31]], [[PRED_SDIV_CONTINUE10]] ], [ [[TMP35]], [[PRED_SDIV_IF11]] ]
676+
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
677+
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_SDIV_IF13:%.*]], label [[PRED_SDIV_CONTINUE14]]
678+
; CHECK: pred.sdiv.if13:
679+
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i32> [[WIDE_LOAD]], i32 7
680+
; CHECK-NEXT: [[TMP39:%.*]] = sdiv i32 [[TMP38]], 0
681+
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <8 x i32> [[TMP36]], i32 [[TMP39]], i32 7
682+
; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE14]]
683+
; CHECK: pred.sdiv.continue14:
684+
; CHECK-NEXT: [[TMP41:%.*]] = phi <8 x i32> [ [[TMP36]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP40]], [[PRED_SDIV_IF13]] ]
685+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP41]], <8 x i32> zeroinitializer
686+
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
687+
; CHECK-NEXT: store <8 x i32> [[PREDPHI]], ptr [[TMP42]], align 4
688+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
689+
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
690+
; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
691+
; CHECK: middle.block:
692+
; CHECK-NEXT: br label [[SCALAR_PH]]
693+
; CHECK: scalar.ph:
694+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ]
695+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
696+
; CHECK: loop.header:
697+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
698+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
699+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
700+
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[L]], 0
701+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP_LATCH]], label [[THEN:%.*]]
702+
; CHECK: then:
703+
; CHECK-NEXT: [[SDIV:%.*]] = sdiv i32 [[L]], 0
704+
; CHECK-NEXT: br label [[LOOP_LATCH]]
705+
; CHECK: loop.latch:
706+
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[SDIV]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ]
707+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
708+
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_DST]], align 4
709+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
710+
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], 16
711+
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]]
712+
; CHECK: exit:
713+
; CHECK-NEXT: ret void
714+
;
715+
bb:
716+
br label %loop.header
717+
718+
loop.header:
719+
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %bb ]
720+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
721+
%l = load i32, ptr %gep.src, align 4
722+
%icmp = icmp eq i32 %l, 0
723+
br i1 %icmp, label %loop.latch, label %then
724+
725+
then:
726+
%sdiv = sdiv i32 %l, 0
727+
br label %loop.latch
728+
729+
loop.latch:
730+
%merge = phi i32 [ %sdiv, %then ], [ 0, %loop.header ]
731+
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
732+
store i32 %merge, ptr %gep.dst, align 4
733+
%iv.next = add i64 %iv, 1
734+
%ec = icmp ult i64 %iv, 16
735+
br i1 %ec, label %loop.header, label %exit
736+
737+
exit:
738+
ret void
739+
}
740+
601741
attributes #0 = { "target-cpu"="znver4" }
602742
attributes #1 = { "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
603743
attributes #2 = { "target-cpu"="znver3" }

0 commit comments

Comments
 (0)