Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 11 additions & 17 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class VectorCombine {
const DataLayout *DL, TTI::TargetCostKind CostKind,
bool TryEarlyFoldsOnly)
: F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), SQ(*DL),
TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}

bool run();
Expand All @@ -88,6 +88,7 @@ class VectorCombine {
AssumptionCache ∾
const DataLayout *DL;
TTI::TargetCostKind CostKind;
const SimplifyQuery SQ;

/// If true, only perform beneficial early IR transforms. Do not introduce new
/// vector operations.
Expand Down Expand Up @@ -1185,24 +1186,26 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
// Fold the vector constants in the original vectors into a new base vector to
// get more accurate cost modelling.
Value *NewVecC = nullptr;
TargetFolder Folder(*DL);
if (CI)
NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
else if (UO)
NewVecC =
Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags());
simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
else if (BO)
NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]);
else if (II->arg_size() == 2)
NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
VecCs[1], II->getType(), &I);
NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
else if (II)
NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ);

if (!NewVecC)
return false;

// Get cost estimate for the insert element. This cost will factor into
// both sequences.
InstructionCost OldCost = VectorOpCost;
InstructionCost NewCost =
ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
CostKind, *Index, NewVecC);

for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg(
II->getIntrinsicID(), Idx, &TTI)))
Expand Down Expand Up @@ -1247,15 +1250,6 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
ScalarInst->copyIRFlags(&I);

// Create a new base vector if the constant folding failed.
if (!NewVecC) {
if (CI)
NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
else if (UO || BO)
NewVecC = Builder.CreateNAryOp(Opcode, VecCs);
else
NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs);
}
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
replaceValue(I, *Insert);
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ define <2 x float> @maxnum(float %x, float %y) {
; AVX2-LABEL: define <2 x float> @maxnum(
; AVX2-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
; AVX2-NEXT: [[V_SCALAR:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
; AVX2-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> poison, <2 x float> poison)
; AVX2-NEXT: [[V:%.*]] = insertelement <2 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; AVX2-NEXT: [[V:%.*]] = insertelement <2 x float> poison, float [[V_SCALAR]], i64 0
; AVX2-NEXT: ret <2 x float> [[V]]
;
%x.insert = insertelement <2 x float> poison, float %x, i32 0
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Transforms/VectorCombine/binop-scalarize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,22 @@ define <4 x i8> @udiv_ub(i8 %x, i8 %y) {
%v = udiv <4 x i8> %x.insert, %y.insert
ret <4 x i8> %v
}


; Unfoldable constant expression may cause infinite loop between
; scalarizing insertelement and folding binop(insert(x,a,idx),insert(y,b,idx))
@val = external hidden global ptr, align 8

define <2 x i64> @pr153012(i64 %idx) #0 {
; CHECK-LABEL: define <2 x i64> @pr153012(
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 [[IDX]], i32 0
; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i64> splat (i64 2), [[A]]
; CHECK-NEXT: ret <2 x i64> [[B]]
;
entry:
%a = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 %idx, i32 0
%b = or disjoint <2 x i64> splat (i64 2), %a
ret <2 x i64> %b
}
33 changes: 11 additions & 22 deletions llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
; CHECK-LABEL: define <4 x i32> @umax_fixed(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
Expand All @@ -19,8 +18,7 @@ define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
Expand All @@ -33,8 +31,7 @@ define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]])
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
Expand All @@ -46,8 +43,7 @@ define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
Expand All @@ -59,8 +55,7 @@ define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
Expand All @@ -72,8 +67,7 @@ define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42)
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42))
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
Expand All @@ -100,8 +94,7 @@ define <4 x float> @fabs_fixed(float %x) {
; CHECK-LABEL: define <4 x float> @fabs_fixed(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
Expand All @@ -113,8 +106,7 @@ define <vscale x 4 x float> @fabs_scalable(float %x) {
; CHECK-LABEL: define <vscale x 4 x float> @fabs_scalable(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
Expand All @@ -126,8 +118,7 @@ define <4 x float> @fma_fixed(float %x, float %y, float %z) {
; CHECK-LABEL: define <4 x float> @fma_fixed(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
Expand All @@ -141,8 +132,7 @@ define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
; CHECK-LABEL: define <vscale x 4 x float> @fma_scalable(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
Expand All @@ -156,8 +146,7 @@ define <4 x float> @scalar_argument(float %x) {
; CHECK-LABEL: define <4 x float> @scalar_argument(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 42)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 42)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
Expand Down
Loading