release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958

llvmbot · 2025-08-16T14:59:48Z

Backport 3a4a60d

Requested by: @lukel97

llvmbot · 2025-08-16T14:59:55Z

@lukel97 What do you think about merging this PR to the release branch?

llvmbot · 2025-08-16T15:00:20Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)

Changes

Backport 3a4a60d

Requested by: @lukel97

Full diff: https://github.com/llvm/llvm-project/pull/153958.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+11-17)
(modified) llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll (+1-2)
(modified) llvm/test/Transforms/VectorCombine/binop-scalarize.ll (+19)
(modified) llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll (+11-22)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..639f8686a271e 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -74,7 +74,7 @@ class VectorCombine {
                 const DataLayout *DL, TTI::TargetCostKind CostKind,
                 bool TryEarlyFoldsOnly)
       : F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
-        DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
+        DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), SQ(*DL),
         TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
 
   bool run();
@@ -88,6 +88,7 @@ class VectorCombine {
   AssumptionCache &AC;
   const DataLayout *DL;
   TTI::TargetCostKind CostKind;
+  const SimplifyQuery SQ;
 
   /// If true, only perform beneficial early IR transforms. Do not introduce new
   /// vector operations.
@@ -1185,17 +1186,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
   // Fold the vector constants in the original vectors into a new base vector to
   // get more accurate cost modelling.
   Value *NewVecC = nullptr;
-  TargetFolder Folder(*DL);
   if (CI)
-    NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
+    NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
   else if (UO)
     NewVecC =
-        Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags());
+        simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
   else if (BO)
-    NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]);
-  else if (II->arg_size() == 2)
-    NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
-                                         VecCs[1], II->getType(), &I);
+    NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
+  else if (II)
+    NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ);
+
+  if (!NewVecC)
+    return false;
 
   // Get cost estimate for the insert element. This cost will factor into
   // both sequences.
@@ -1203,6 +1205,7 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
   InstructionCost NewCost =
       ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
                                             CostKind, *Index, NewVecC);
+
   for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
     if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg(
                               II->getIntrinsicID(), Idx, &TTI)))
@@ -1247,15 +1250,6 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
   if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
     ScalarInst->copyIRFlags(&I);
 
-  // Create a new base vector if the constant folding failed.
-  if (!NewVecC) {
-    if (CI)
-      NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
-    else if (UO || BO)
-      NewVecC = Builder.CreateNAryOp(Opcode, VecCs);
-    else
-      NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs);
-  }
   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
   replaceValue(I, *Insert);
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
index 5f3229398792a..ad5f5a7107c2b 100644
--- a/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/intrinsic-scalarize.ll
@@ -13,8 +13,7 @@ define <2 x float> @maxnum(float %x, float %y) {
 ; AVX2-LABEL: define <2 x float> @maxnum(
 ; AVX2-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; AVX2-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
-; AVX2-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> poison, <2 x float> poison)
-; AVX2-NEXT:    [[V:%.*]] = insertelement <2 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; AVX2-NEXT:    [[V:%.*]] = insertelement <2 x float> poison, float [[V_SCALAR]], i64 0
 ; AVX2-NEXT:    ret <2 x float> [[V]]
 ;
   %x.insert = insertelement <2 x float> poison, float %x, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/binop-scalarize.ll b/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
index 52a706a0b59a7..bc07f8b086496 100644
--- a/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/binop-scalarize.ll
@@ -20,3 +20,22 @@ define <4 x i8> @udiv_ub(i8 %x, i8 %y) {
   %v = udiv <4 x i8> %x.insert, %y.insert
   ret <4 x i8> %v
 }
+
+
+; Unfoldable constant expression may cause infinite loop between 
+; scalarizing insertelement and folding binop(insert(x,a,idx),insert(y,b,idx))
+@val = external hidden global ptr, align 8
+
+define <2 x i64> @pr153012(i64 %idx) #0 {
+; CHECK-LABEL: define <2 x i64> @pr153012(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 [[IDX]], i32 0
+; CHECK-NEXT:    [[B:%.*]] = or disjoint <2 x i64> splat (i64 2), [[A]]
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+entry:
+  %a = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @val to i64)>, i64 %idx, i32 0
+  %b = or disjoint <2 x i64> splat (i64 2), %a
+  ret <2 x i64> %b
+}
diff --git a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
index 9e43a28bf1e59..abd98a4dc64b8 100644
--- a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
@@ -5,8 +5,7 @@ define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -19,8 +18,7 @@ define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -33,8 +31,7 @@ define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -46,8 +43,7 @@ define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -59,8 +55,7 @@ define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -72,8 +67,7 @@ define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42))
-; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -100,8 +94,7 @@ define <4 x float> @fabs_fixed(float %x) {
 ; CHECK-LABEL: define <4 x float> @fabs_fixed(
 ; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[V]]
 ;
   %x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -113,8 +106,7 @@ define <vscale x 4 x float> @fabs_scalable(float %x) {
 ; CHECK-LABEL: define <vscale x 4 x float> @fabs_scalable(
 ; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -126,8 +118,7 @@ define <4 x float> @fma_fixed(float %x, float %y, float %z) {
 ; CHECK-LABEL: define <4 x float> @fma_fixed(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[V]]
 ;
   %x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -141,8 +132,7 @@ define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
 ; CHECK-LABEL: define <vscale x 4 x float> @fma_scalable(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -156,8 +146,7 @@ define <4 x float> @scalar_argument(float %x) {
 ; CHECK-LABEL: define <4 x float> @scalar_argument(
 ; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[V_SCALAR:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 42)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 42)
-; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[V]]
 ;
   %x.insert = insertelement <4 x float> poison, float %x, i32 0

lukel97

LGTM

github-actions · 2025-08-18T05:56:09Z

@lukel97 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

@bug

…ite loop (llvm#153069) Fixes llvm#153012 As we tolerate unfoldable constant expressions in `scalarizeOpOrCmp`, we may fold ```llvm define void @bug(ptr %ptr1, ptr %ptr2, i64 %idx) #0 { entry: %158 = insertelement <2 x i64> <i64 5, i64 ptrtoint (ptr @Val to i64)>, i64 %idx, i32 0 %159 = or disjoint <2 x i64> splat (i64 2), %158 store <2 x i64> %159, ptr %ptr2 ret void } ``` to ```llvm define void @bug(ptr %ptr1, ptr %ptr2, i64 %idx) { entry: %.scalar = or disjoint i64 2, %idx %0 = or <2 x i64> splat (i64 2), <i64 5, i64 ptrtoint (ptr @Val to i64)> %1 = insertelement <2 x i64> %0, i64 %.scalar, i64 0 store <2 x i64> %1, ptr %ptr2, align 16 ret void } ``` And it would be folded back in `foldInsExtBinop`, resulting in an infinite loop. This patch forces scalarization iff InstSimplify can fold the constant expression. (cherry picked from commit 3a4a60d)

llvmbot added this to the LLVM 21.x Release milestone Aug 16, 2025

github-project-automation bot added this to LLVM Release Status Aug 16, 2025

github-project-automation bot moved this to Needs Triage in LLVM Release Status Aug 16, 2025

llvmbot requested a review from lukel97 August 16, 2025 14:59

llvmbot added vectorizers llvm:transforms llvm:vectorcombine labels Aug 16, 2025

llvmbot mentioned this pull request Aug 16, 2025

[VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop #153069

Merged

lukel97 approved these changes Aug 16, 2025

View reviewed changes

github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Aug 16, 2025

tru force-pushed the issue153069 branch from 533337d to 5476696 Compare August 18, 2025 05:55

tru merged commit 5476696 into llvm:release/21.x Aug 18, 2025

github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Aug 18, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958

release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958

Uh oh!

llvmbot commented Aug 16, 2025

Uh oh!

llvmbot commented Aug 16, 2025

Uh oh!

llvmbot commented Aug 16, 2025 •

edited

Loading

Uh oh!

lukel97 left a comment

Uh oh!

github-actions bot commented Aug 18, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958

release/21.x: [VectorCombine] Apply InstSimplify in scalarizeOpOrCmp to avoid infinite loop (#153069) #153958

Uh oh!

Conversation

llvmbot commented Aug 16, 2025

Uh oh!

llvmbot commented Aug 16, 2025

Uh oh!

llvmbot commented Aug 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Aug 18, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Aug 16, 2025 •

edited

Loading