From cde5645299b33c1b1478d2f3b74315ca315249f4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Fri, 23 May 2025 22:46:34 +0100
Subject: [PATCH 1/5] Precommit tests

---
 llvm/test/Transforms/InstCombine/fma.ll       | 63 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/fsh.ll       | 63 +++++++++++++++++++
 .../InstCombine/minmax-intrinsics.ll          | 33 ++++++++++
 3 files changed, 159 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index ae0067d41426c..1a8ce85e9f506 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -802,6 +802,69 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
   ret <2 x float> %r
 }
 
+define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[B]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> %a, <2 x float> %b)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[A]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
+  ret <2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x float> [[Y:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a, <vscale x 2 x float> %b)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[A]])
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %r
+}
+
+define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call fast <3 x float> @llvm.fma.v3f32(<3 x float> [[A]], <3 x float> splat (float 4.200000e+01), <3 x float> [[B]])
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_vec3(<3 x float> %a)
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call fast <3 x float> @llvm.fma(<3 x float> splat (float 42.0), <3 x float> %a, <3 x float> %b)
+  ret <3 x float> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 862853f992968..d8f0f439b37df 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -930,6 +930,69 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
   ret <2 x i31> %r
 }
 
+define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> [[A]], <2 x i32> [[B]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> [[A]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
+  ret <2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]], <vscale x 2 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
+  ret <vscale x 2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]])
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
+  ret <vscale x 2 x i32> %r
+}
+
+define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> splat (i32 42), <3 x i32> [[A]], <3 x i32> [[B]])
+; CHECK-NEXT:    ret <3 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_v3(<3 x i32> %a)
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call <3 x i32> @llvm.fshr(<3 x i32> splat (i32 42), <3 x i32> %a, <3 x i32> %b)
+  ret <3 x i32> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 9a8608da9fd5b..4e38370cf4e58 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2416,6 +2416,39 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
   ret <3 x i8> %r
 }
 
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  ret <3 x i8> %r
+}
+
+define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <vscale x 3 x i8> [[X:%.*]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX]], <vscale x 3 x i8> splat (i8 42))
+; CHECK-NEXT:    ret <vscale x 3 x i8> [[R]]
+;
+  %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> splat (i8 42), <vscale x 3 x i8> %sx)
+  ret <vscale x 3 x i8> %r
+}
+
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  ret <3 x i8> %r
+}
+
 ; negative test - must have 2 shuffles
 
 define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {

From c0da5806cac2bf323d9c3b347448003f54160ae4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Fri, 23 May 2025 23:22:31 +0100
Subject: [PATCH 2/5] [InstCombine] Fold shuffled intrinsic operands with
 constants

---
 .../InstCombine/InstCombineCalls.cpp          | 29 +++++++++++------
 .../InstCombine/InstCombineInternal.h         |  3 ++
 .../InstCombine/InstructionCombining.cpp      |  4 +--
 llvm/test/Transforms/InstCombine/fma.ll       | 32 +++++++++----------
 llvm/test/Transforms/InstCombine/fsh.ll       | 32 +++++++++----------
 .../InstCombine/minmax-intrinsics.ll          | 18 +++++------
 6 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 12e08c09ea67d..f1ff8180cde23 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1399,9 +1399,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
 
 /// If all arguments of the intrinsic are unary shuffles with the same mask,
 /// try to shuffle after the intrinsic.
-static Instruction *
-foldShuffledIntrinsicOperands(IntrinsicInst *II,
-                              InstCombiner::BuilderTy &Builder) {
+Instruction *
+InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
   // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
   //       etc. Use llvm::isTriviallyVectorizable() and related to determine
   //       which intrinsics are safe to shuffle?
@@ -1419,9 +1418,11 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   }
 
   Value *X;
+  Constant *C;
   ArrayRef<int> Mask;
-  if (!match(II->getArgOperand(0),
-             m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+  auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
+  if (!NonConstArg ||
+      !match(NonConstArg, m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
     return nullptr;
 
   // At least 1 operand must have 1 use because we are creating 2 instructions.
@@ -1433,11 +1434,19 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   NewArgs[0] = X;
   Type *SrcTy = X->getType();
   for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
-    if (!match(II->getArgOperand(i),
-               m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
-        X->getType() != SrcTy)
+    if (match(II->getArgOperand(i),
+              m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
+        X->getType() == SrcTy)
+      NewArgs[i] = X;
+    else if (match(II->getArgOperand(i), m_ImmConstant(C))) {
+      // If it's a constant, try find the constant that would be shuffled to C.
+      if (Constant *ShuffledC =
+              unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
+        NewArgs[i] = ShuffledC;
+      else
+        return nullptr;
+    } else
       return nullptr;
-    NewArgs[i] = X;
   }
 
   // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
@@ -3849,7 +3858,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         if (Instruction *R = FoldOpIntoSelect(*II, Sel))
           return R;
 
-  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
     return Shuf;
 
   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8b657b3f8555c..5e0cd17fb1924 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -147,6 +147,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
   Instruction *foldItoFPtoI(CastInst &FI);
   Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
   Instruction *visitCallInst(CallInst &CI);
   Instruction *visitInvokeInst(InvokeInst &II);
   Instruction *visitCallBrInst(CallBrInst &CBI);
@@ -604,6 +605,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldVectorBinop(BinaryOperator &Inst);
   Instruction *foldVectorSelect(SelectInst &Sel);
   Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
+  Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                              VectorType *NewCTy);
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
   /// as operand #0, see if we can fold the instruction into the PHI (which is
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9ddcef0396e39..3dc89772676df 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2100,8 +2100,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
 ///
 /// A 1-to-1 mapping is not required. Example:
 /// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
-static Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
-                                   VectorType *NewCTy) {
+Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                                              VectorType *NewCTy) {
   if (isa<ScalableVectorType>(NewCTy)) {
     Constant *Splat = C->getSplatValue();
     if (!Splat)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 1a8ce85e9f506..86a67c996b4d6 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -804,10 +804,9 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
 
 define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[B]])
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[B]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -817,9 +816,9 @@ define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y
 
 define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[A]])
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X1:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[A]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
@@ -828,10 +827,9 @@ define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
 
 define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x float> [[Y:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <vscale x 2 x float> [[R]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R1]]
 ;
   %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -841,9 +839,9 @@ define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2
 
 define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[A]])
-; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[X1:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[A]]
 ;
   %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
   %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
@@ -854,9 +852,9 @@ define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x
 ; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call fast <3 x float> @llvm.fma.v3f32(<3 x float> [[A]], <3 x float> splat (float 4.200000e+01), <3 x float> [[B]])
-; CHECK-NEXT:    ret <3 x float> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call fast <2 x float> @llvm.fma.v2f32(<2 x float> [[X]], <2 x float> splat (float 4.200000e+01), <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x float> [[B]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   call void @use_vec3(<3 x float> %a)
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index d8f0f439b37df..398117b5c1c5c 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -932,10 +932,9 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
 
 define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> [[A]], <2 x i32> [[B]])
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
   %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
@@ -945,9 +944,9 @@ define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> [[A]])
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X1:%.*]], <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
   %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
@@ -956,10 +955,9 @@ define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]], <vscale x 2 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[X]], <vscale x 2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[B]]
 ;
   %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -969,9 +967,9 @@ define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]])
-; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X1:%.*]], <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[A]]
 ;
   %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
@@ -982,9 +980,9 @@ define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32>
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> splat (i32 42), <3 x i32> [[A]], <3 x i32> [[B]])
-; CHECK-NEXT:    ret <3 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x i32> [[B]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   call void @use_v3(<3 x i32> %a)
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 4e38370cf4e58..85f2a1ccb3a3d 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2418,9 +2418,9 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
 
 define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
-; CHECK-NEXT:    ret <3 x i8> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X1:%.*]], <3 x i8> <i8 1, i8 0, i8 2>)
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
   %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
@@ -2429,9 +2429,9 @@ define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
 
 define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <vscale x 3 x i8> [[X:%.*]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX]], <vscale x 3 x i8> splat (i8 42))
-; CHECK-NEXT:    ret <vscale x 3 x i8> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX:%.*]], <vscale x 3 x i8> splat (i8 42))
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <vscale x 3 x i8> [[R]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 3 x i8> [[R1]]
 ;
   %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
   %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> splat (i8 42), <vscale x 3 x i8> %sx)
@@ -2440,9 +2440,9 @@ define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3
 
 define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
-; CHECK-NEXT:    ret <3 x i8> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X1:%.*]], <2 x i8> <i8 1, i8 0>)
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)

From 575d406e2d38d3d2e4541b38f9ce4a69cd1e455d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 27 May 2025 12:13:24 +0100
Subject: [PATCH 3/5] Fix not checking the first argument

Because we now take the mask from the first non-constant argument, it may no longer be the first overall argument.

So check all arguments now in case we have e.g.

fma(const, shuffle, shuffle)

so that the correct argument is used for the const.
---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp |  3 +--
 llvm/test/Transforms/InstCombine/fma.ll              |  4 ++--
 llvm/test/Transforms/InstCombine/fsh.ll              | 10 +++++-----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f1ff8180cde23..8bda5645b475d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1431,9 +1431,8 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
 
   // See if all arguments are shuffled with the same mask.
   SmallVector<Value *, 4> NewArgs(II->arg_size());
-  NewArgs[0] = X;
   Type *SrcTy = X->getType();
-  for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
+  for (unsigned i = 0, e = II->arg_size(); i != e; ++i) {
     if (match(II->getArgOperand(i),
               m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
         X->getType() == SrcTy)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 86a67c996b4d6..29268f32b4cfc 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -816,7 +816,7 @@ define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y
 
 define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X1:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1]])
+; CHECK-NEXT:    [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1:%.*]])
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    ret <2 x float> [[A]]
 ;
@@ -839,7 +839,7 @@ define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2
 
 define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[X1:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1]])
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1:%.*]])
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[A]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 398117b5c1c5c..0325c60997dfd 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -932,7 +932,7 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
 
 define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
-; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 2, i32 1>, <2 x i32> [[X:%.*]], <2 x i32> [[Y1:%.*]])
 ; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
@@ -944,7 +944,7 @@ define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X1:%.*]], <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1]])
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 2, i32 1>, <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1:%.*]])
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
@@ -955,7 +955,7 @@ define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
-; CHECK-NEXT:    [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[X]], <vscale x 2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[Y1:%.*]])
 ; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[B]]
 ;
@@ -967,7 +967,7 @@ define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X1:%.*]], <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1]])
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1:%.*]])
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[A]]
 ;
@@ -980,7 +980,7 @@ define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32>
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
-; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> splat (i32 42), <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
 ; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    ret <3 x i32> [[B]]
 ;

From 171e648bea2cabb57a0bcf9a0a56eff9a433c6c8 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 27 May 2025 14:08:51 +0100
Subject: [PATCH 4/5] Use args() + push_back()

---
 .../lib/Transforms/InstCombine/InstCombineCalls.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8bda5645b475d..e01dafd36d30d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1430,18 +1430,17 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
     return nullptr;
 
   // See if all arguments are shuffled with the same mask.
-  SmallVector<Value *, 4> NewArgs(II->arg_size());
+  SmallVector<Value *, 4> NewArgs;
   Type *SrcTy = X->getType();
-  for (unsigned i = 0, e = II->arg_size(); i != e; ++i) {
-    if (match(II->getArgOperand(i),
-              m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
+  for (Value *Arg : II->args()) {
+    if (match(Arg, m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
         X->getType() == SrcTy)
-      NewArgs[i] = X;
-    else if (match(II->getArgOperand(i), m_ImmConstant(C))) {
+      NewArgs.push_back(X);
+    else if (match(Arg, m_ImmConstant(C))) {
       // If it's a constant, try find the constant that would be shuffled to C.
       if (Constant *ShuffledC =
               unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
-        NewArgs[i] = ShuffledC;
+        NewArgs.push_back(ShuffledC);
       else
         return nullptr;
     } else

From 89d4bcaf68f1cc30409cab62653ffc697eee42fb Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 27 May 2025 14:12:09 +0100
Subject: [PATCH 5/5] Canonicalize arg order in tests

---
 llvm/test/Transforms/InstCombine/fma.ll               | 6 +++---
 llvm/test/Transforms/InstCombine/minmax-intrinsics.ll | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 29268f32b4cfc..f0d4f776a5d90 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -810,7 +810,7 @@ define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
-  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> %a, <2 x float> %b)
+  %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> <float 1.0, float 2.0>, <2 x float> %b)
   ret <2 x float> %r
 }
 
@@ -833,7 +833,7 @@ define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2
 ;
   %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a, <vscale x 2 x float> %b)
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> %a, <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %r
 }
 
@@ -859,7 +859,7 @@ define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x
   %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   call void @use_vec3(<3 x float> %a)
   %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-  %r = call fast <3 x float> @llvm.fma(<3 x float> splat (float 42.0), <3 x float> %a, <3 x float> %b)
+  %r = call fast <3 x float> @llvm.fma(<3 x float> %a, <3 x float> splat (float 42.0), <3 x float> %b)
   ret <3 x float> %r
 }
 
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 85f2a1ccb3a3d..38930956eda2f 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2423,7 +2423,7 @@ define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
 ; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> <i8 0, i8 1, i8 2>)
   ret <3 x i8> %r
 }
 
@@ -2434,7 +2434,7 @@ define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3
 ; CHECK-NEXT:    ret <vscale x 3 x i8> [[R1]]
 ;
   %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
-  %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> splat (i8 42), <vscale x 3 x i8> %sx)
+  %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> %sx, <vscale x 3 x i8> splat (i8 42))
   ret <vscale x 3 x i8> %r
 }
 
@@ -2445,7 +2445,7 @@ define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
 ; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> <i8 0, i8 1, i8 2>)
   ret <3 x i8> %r
 }