-
Notifications
You must be signed in to change notification settings - Fork 15.3k
release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@lukel97 What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport 3a4a60d Requested by: @lukel97 Full diff: https://github.com/llvm/llvm-project/pull/153958.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..639f8686a271e 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -74,7 +74,7 @@ class VectorCombine {
const DataLayout *DL, TTI::TargetCostKind CostKind,
bool TryEarlyFoldsOnly)
: F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
- DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
+ DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), SQ(*DL),
TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
bool run();
@@ -88,6 +88,7 @@ class VectorCombine {
AssumptionCache ∾
const DataLayout *DL;
TTI::TargetCostKind CostKind;
+ const SimplifyQuery SQ;
/// If true, only perform beneficial early IR transforms. Do not introduce new
/// vector operations.
@@ -1185,17 +1186,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
// Fold the vector constants in the original vectors into a new base vector to
// get more accurate cost modelling.
Value *NewVecC = nullptr;
- TargetFolder Folder(*DL);
if (CI)
- NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
+ NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
else if (UO)
NewVecC =
- Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags());
+ simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
else if (BO)
- NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]);
- else if (II->arg_size() == 2)
- NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
- VecCs[1], II->getType(), &I);
+ NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
+ else if (II)
+ NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ);
+
+ if (!NewVecC)
+ return false;
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
@@ -1203,6 +1205,7 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
InstructionCost NewCost =
ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
CostKind, *Index, NewVecC);
+
for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg(
II->getIntrinsicID(), Idx, &TTI)))
@@ -1247,15 +1250,6 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
ScalarInst->copyIRFlags(&I);
- // Create a new base vector if the constant folding failed.
- if (!NewVecC) {
- if (CI)
- NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
- else if (UO || BO)
- NewVecC = Builder.CreateNAryOp(Opcode, VecCs);
- else
- NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs);
- }
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
replaceValue(I, *Insert);
return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
index 5f3229398792a..ad5f5a7107c2b 100644
--- a/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
@@ -13,8 +13,7 @@ define <2 x float> @maxnum(float %x, float %y) {
; AVX2-LABEL: define <2 x float> @maxnum(
; AVX2-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
; AVX2-NEXT: [[V_SCALAR:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
-; AVX2-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> poison, <2 x float> poison)
-; AVX2-NEXT: [[V:%.*]] = insertelement <2 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; AVX2-NEXT: [[V:%.*]] = insertelement <2 x float> poison, float [[V_SCALAR]], i64 0
; AVX2-NEXT: ret <2 x float> [[V]]
;
%x.insert = insertelement <2 x float> poison, float %x, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/binop-scalarize.ll b/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
index 52a706a0b59a7..bc07f8b086496 100644
--- a/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
@@ -20,3 +20,22 @@ define <4 x i8> @udiv_ub(i8 %x, i8 %y) {
%v = udiv <4 x i8> %x.insert, %y.insert
ret <4 x i8> %v
}
+
+
+; Unfoldable constant expression may cause infinite loop between
+; scalarizing insertelement and folding binop(insert(x,a,idx),insert(y,b,idx))
+@val = external hidden global ptr, align 8
+
+define <2 x i64> @pr153012(i64 %idx) #0 {
+; CHECK-LABEL: define <2 x i64> @pr153012(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[A:%.*]] = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 [[IDX]], i32 0
+; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i64> splat (i64 2), [[A]]
+; CHECK-NEXT: ret <2 x i64> [[B]]
+;
+entry:
+ %a = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 %idx, i32 0
+ %b = or disjoint <2 x i64> splat (i64 2), %a
+ ret <2 x i64> %b
+}
diff --git a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
index 9e43a28bf1e59..abd98a4dc64b8 100644
--- a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
@@ -5,8 +5,7 @@ define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
; CHECK-LABEL: define <4 x i32> @umax_fixed(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -19,8 +18,7 @@ define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -33,8 +31,7 @@ define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -46,8 +43,7 @@ define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1)
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -59,8 +55,7 @@ define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -72,8 +67,7 @@ define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42)
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42))
-; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -100,8 +94,7 @@ define <4 x float> @fabs_fixed(float %x) {
; CHECK-LABEL: define <4 x float> @fabs_fixed(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -113,8 +106,7 @@ define <vscale x 4 x float> @fabs_scalable(float %x) {
; CHECK-LABEL: define <vscale x 4 x float> @fabs_scalable(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -126,8 +118,7 @@ define <4 x float> @fma_fixed(float %x, float %y, float %z) {
; CHECK-LABEL: define <4 x float> @fma_fixed(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -141,8 +132,7 @@ define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
; CHECK-LABEL: define <vscale x 4 x float> @fma_scalable(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
-; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -156,8 +146,7 @@ define <4 x float> @scalar_argument(float %x) {
; CHECK-LABEL: define <4 x float> @scalar_argument(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 42)
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 42)
-; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
|
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
@lukel97 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
…ite loop (llvm#153069) Fixes llvm#153012 As we tolerate unfoldable constant expressions in `scalarizeOpOrCmp`, we may fold ```llvm define void @bug(ptr %ptr1, ptr %ptr2, i64 %idx) #0 { entry: %158 = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @Val to i64)>, i64 %idx, i32 0 %159 = or disjoint <2 x i64> splat (i64 2), %158 store <2 x i64> %159, ptr %ptr2 ret void } ``` to ```llvm define void @bug(ptr %ptr1, ptr %ptr2, i64 %idx) { entry: %.scalar = or disjoint i64 2, %idx %0 = or <2 x i64> splat (i64 2), <i64 5, i64 ptrtoint (ptr @Val to i64)> %1 = insertelement <2 x i64> %0, i64 %.scalar, i64 0 store <2 x i64> %1, ptr %ptr2, align 16 ret void } ``` And it would be folded back in `foldInsExtBinop`, resulting in an infinite loop. This patch forces scalarization iff InstSimplify can fold the constant expression. (cherry picked from commit 3a4a60d)
Backport 3a4a60d
Requested by: @lukel97