llvm
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlan.h‎
Lines changed: 32 additions & 23 deletions b/‎llvm/lib/Transforms/Vectorize/VPlan.h‎
Lines changed: 32 additions & 23 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp‎
Lines changed: 24 additions & 32 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp‎
Lines changed: 24 additions & 32 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp‎
Lines changed: 50 additions & 19 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp‎
Lines changed: 50 additions & 19 deletions
@@ -2661,12 +2661,6 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
 /// and needs to be lowered to concrete recipes before codegen. The operands are
 /// {ChainOp, VecOp1, VecOp2, [Condition]}.
 class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
-  /// Opcode of the extend for VecOp1 and VecOp2.
-  Instruction::CastOps ExtOp;
-
-  /// Non-neg flag of the extend recipe.
-  bool IsNonNeg = false;
-
   /// The scalar type after extending.
   Type *ResultTy = nullptr;
 
@@ -2679,8 +2673,8 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
             MulAcc->getVFScaleFactor(),
             WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()),
             MulAcc->getDebugLoc()),
-        ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()),
-        ResultTy(MulAcc->getResultType()) {
+        ResultTy(MulAcc->getResultType()),
+        VecOpInfo{MulAcc->getVecOp0Info(), MulAcc->getVecOp1Info()} {
     transferFlags(*MulAcc);
     setUnderlyingValue(MulAcc->getUnderlyingValue());
   }
@@ -2695,18 +2689,22 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
             R->getCondOp(), R->isOrdered(), R->getVFScaleFactor(),
             WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
             R->getDebugLoc()),
-        ExtOp(Ext0->getOpcode()), ResultTy(ResultTy) {
+        ResultTy(ResultTy),
+        VecOpInfo{
+            {Ext0->getOpcode(), Ext0->hasNonNegFlag() && Ext0->isNonNeg()},
+            {Ext1->getOpcode(), Ext1->hasNonNegFlag() && Ext1->isNonNeg()}} {
     assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
                Instruction::Add &&
            "The reduction instruction in MulAccumulateteReductionRecipe must "
            "be Add");
-    assert((ExtOp == Instruction::CastOps::ZExt ||
-            ExtOp == Instruction::CastOps::SExt) &&
+    unsigned ExtOp0 = getVecOp0Info().ExtOp;
+    unsigned ExtOp1 = getVecOp1Info().ExtOp;
+    assert((ExtOp0 == Instruction::CastOps::ZExt ||
+            ExtOp0 == Instruction::CastOps::SExt) &&
+           (ExtOp1 == Instruction::CastOps::ZExt ||
+            ExtOp1 == Instruction::CastOps::SExt) &&
            "VPMulAccumulateReductionRecipe only supports zext and sext.");
     setUnderlyingValue(R->getUnderlyingValue());
-    // Only set the non-negative flag if the original recipe contains.
-    if (Ext0->hasNonNegFlag())
-      IsNonNeg = Ext0->isNonNeg();
   }
 
   VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul,
@@ -2717,14 +2715,26 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
             R->getCondOp(), R->isOrdered(), R->getVFScaleFactor(),
             WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
             R->getDebugLoc()),
-        ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) {
+        ResultTy(ResultTy) {
     assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) ==
                Instruction::Add &&
            "The reduction instruction in MulAccumulateReductionRecipe must be "
            "Add");
     setUnderlyingValue(R->getUnderlyingValue());
   }
 
+  struct VecOperandInfo {
+    /// The operand's extend opcode.
+    Instruction::CastOps ExtOp{Instruction::CastOps::CastOpsEnd};
+    /// Non-neg portion of the operand's flags.
+    bool IsNonNeg = false;
+
+    bool isExtended() const {
+      return ExtOp != Instruction::CastOps::CastOpsEnd;
+    }
+    bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; }
+  };
+
   ~VPMulAccumulateReductionRecipe() override = default;
 
   VPMulAccumulateReductionRecipe *clone() override {
@@ -2758,16 +2768,15 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
   VPValue *getVecOp1() const { return getOperand(2); }
 
   /// Return true if this recipe contains extended operands.
-  bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
-
-  /// Return the opcode of the extends for the operands.
-  Instruction::CastOps getExtOpcode() const { return ExtOp; }
+  bool isExtended() const {
+    return getVecOp0Info().isExtended() || getVecOp1Info().isExtended();
+  }
 
-  /// Return if the operands are zero-extended.
-  bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; }
+  const VecOperandInfo &getVecOp0Info() const { return VecOpInfo[0]; }
+  const VecOperandInfo &getVecOp1Info() const { return VecOpInfo[1]; }
 
-  /// Return true if the operand extends have the non-negative flag.
-  bool isNonNeg() const { return IsNonNeg; }
+protected:
+  VecOperandInfo VecOpInfo[2];
 };
 
 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
 
@@ -2495,29 +2495,10 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
   std::optional<FastMathFlags> OptionalFMF =
       ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
 
-  if (isPartialReduction()) {
-    using namespace llvm::VPlanPatternMatch;
-    VPValue *Mul = getVecOp();
-    // Some chained partial reductions used for complex numbers will have a
-    // negation between the mul and reduction. This extracts the mul from that
-    // pattern to use it for further checking.
-    match(Mul, m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Mul)));
-    if (match(Mul,
-              m_Mul(m_ZExtOrSExt(m_VPValue()), m_ZExtOrSExt(m_VPValue())))) {
-      auto *MulR = cast<VPWidenRecipe>(Mul);
-      auto *Ext0R = cast<VPWidenCastRecipe>(MulR->getOperand(0));
-      auto *Ext1R = cast<VPWidenCastRecipe>(MulR->getOperand(1));
-      return Ctx.TTI.getPartialReductionCost(
-          Opcode, Ctx.Types.inferScalarType(Ext0R->getOperand(0)),
-          Ctx.Types.inferScalarType(Ext1R->getOperand(0)),
-          Ctx.Types.inferScalarType(getChainOp()), VF,
-          TargetTransformInfo::getPartialReductionExtendKind(
-              Ext0R->getOpcode()),
-          TargetTransformInfo::getPartialReductionExtendKind(
-              Ext1R->getOpcode()),
-          Instruction::Mul);
-    }
-  }
+  if (isPartialReduction())
+    return Ctx.TTI.getPartialReductionCost(
+        Opcode, ElementTy, ElementTy, ElementTy, VF,
+        TargetTransformInfo::PR_None, TargetTransformInfo::PR_None);
 
   // TODO: Support any-of reductions.
   assert(
@@ -2547,27 +2528,36 @@ VPExtendedReductionRecipe::computeCost(ElementCount VF,
       cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
   assert(RedTy->isIntegerTy() &&
          "ExtendedReduction only support integer type currently.");
+  if (isPartialReduction())
+    return Ctx.TTI.getPartialReductionCost(Opcode, RedTy, SrcVecTy, SrcVecTy,
+                                           VF, TargetTransformInfo::PR_None,
+                                           TargetTransformInfo::PR_None);
   return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy,
                                           std::nullopt, Ctx.CostKind);
 }
 
 InstructionCost
 VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
                                             VPCostContext &Ctx) const {
+  VecOperandInfo Ext0Info = getVecOp0Info();
+  VecOperandInfo Ext1Info = getVecOp1Info();
   if (isPartialReduction())
     return Ctx.TTI.getPartialReductionCost(
         RecurrenceDescriptor::getOpcode(getRecurrenceKind()),
         Ctx.Types.inferScalarType(getVecOp0()),
         Ctx.Types.inferScalarType(getVecOp1()),
         Ctx.Types.inferScalarType(getChainOp()), VF,
-        TargetTransformInfo::getPartialReductionExtendKind(getExtOpcode()),
-        TargetTransformInfo::getPartialReductionExtendKind(getExtOpcode()),
+        TargetTransformInfo::getPartialReductionExtendKind(Ext0Info.ExtOp),
+        TargetTransformInfo::getPartialReductionExtendKind(Ext1Info.ExtOp),
         Instruction::Mul);
+  // Only partial reductions support mixed extends
+  if (Ext0Info.ExtOp != Ext1Info.ExtOp)
+    return InstructionCost::getInvalid(Ctx.CostKind);
 
   Type *RedTy = Ctx.Types.inferScalarType(this);
   auto *SrcVecTy =
       cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
-  return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy,
+  return Ctx.TTI.getMulAccReductionCost(Ext0Info.isZExt(), RedTy, SrcVecTy,
                                         Ctx.CostKind);
 }
 
@@ -2653,18 +2643,20 @@ void VPMulAccumulateReductionRecipe::print(raw_ostream &O, const Twine &Indent,
     << " (";
   O << "mul";
   printFlags(O);
+  VecOperandInfo Ext0Info = getVecOp0Info();
+  VecOperandInfo Ext1Info = getVecOp1Info();
   if (isExtended())
     O << "(";
   getVecOp0()->printAsOperand(O, SlotTracker);
-  if (isExtended())
-    O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType()
-      << "), (";
+  if (Ext0Info.isExtended())
+    O << " " << Instruction::getOpcodeName(Ext0Info.ExtOp) << " to "
+      << *getResultType() << "), (";
   else
     O << ", ";
   getVecOp1()->printAsOperand(O, SlotTracker);
-  if (isExtended())
-    O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType()
-      << ")";
+  if (Ext1Info.isExtended())
+    O << " " << Instruction::getOpcodeName(Ext1Info.ExtOp) << " to "
+      << *getResultType() << ")";
   if (isConditional()) {
     O << ", ";
     getCondOp()->printAsOperand(O, SlotTracker);
 
@@ -2563,28 +2563,31 @@ expandVPMulAccumulateReduction(VPMulAccumulateReductionRecipe *MulAcc) {
   // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
   VPValue *Op0, *Op1;
   if (MulAcc->isExtended()) {
+    VPMulAccumulateReductionRecipe::VecOperandInfo Ext0Info =
+        MulAcc->getVecOp0Info();
+    VPMulAccumulateReductionRecipe::VecOperandInfo Ext1Info =
+        MulAcc->getVecOp1Info();
     Type *RedTy = MulAcc->getResultType();
-    if (MulAcc->isZExt())
-      Op0 = new VPWidenCastRecipe(
-          MulAcc->getExtOpcode(), MulAcc->getVecOp0(), RedTy,
-          VPIRFlags::NonNegFlagsTy(MulAcc->isNonNeg()), MulAcc->getDebugLoc());
+    if (Ext0Info.isZExt())
+      Op0 = new VPWidenCastRecipe(Ext0Info.ExtOp, MulAcc->getVecOp0(), RedTy,
+                                  VPIRFlags::NonNegFlagsTy(Ext0Info.IsNonNeg),
+                                  MulAcc->getDebugLoc());
     else
-      Op0 = new VPWidenCastRecipe(MulAcc->getExtOpcode(), MulAcc->getVecOp0(),
-                                  RedTy, {}, MulAcc->getDebugLoc());
+      Op0 = new VPWidenCastRecipe(Ext0Info.ExtOp, MulAcc->getVecOp0(), RedTy,
+                                  {}, MulAcc->getDebugLoc());
     Op0->getDefiningRecipe()->insertBefore(MulAcc);
     // Prevent reduce.add(mul(ext(A), ext(A))) generate duplicate
     // VPWidenCastRecipe.
     if (MulAcc->getVecOp0() == MulAcc->getVecOp1()) {
       Op1 = Op0;
     } else {
-      if (MulAcc->isZExt())
-        Op1 = new VPWidenCastRecipe(
-            MulAcc->getExtOpcode(), MulAcc->getVecOp1(), RedTy,
-            VPIRFlags::NonNegFlagsTy(MulAcc->isNonNeg()),
-            MulAcc->getDebugLoc());
+      if (Ext1Info.isZExt())
+        Op1 = new VPWidenCastRecipe(Ext1Info.ExtOp, MulAcc->getVecOp1(), RedTy,
+                                    VPIRFlags::NonNegFlagsTy(Ext1Info.IsNonNeg),
+                                    MulAcc->getDebugLoc());
       else
-        Op1 = new VPWidenCastRecipe(MulAcc->getExtOpcode(), MulAcc->getVecOp1(),
-                                    RedTy, {}, MulAcc->getDebugLoc());
+        Op1 = new VPWidenCastRecipe(Ext1Info.ExtOp, MulAcc->getVecOp1(), RedTy,
+                                    {}, MulAcc->getDebugLoc());
       Op1->getDefiningRecipe()->insertBefore(MulAcc);
     }
   } else {
@@ -2835,16 +2838,36 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
 
   // Clamp the range if using multiply-accumulate-reduction is profitable.
   auto IsMulAccValidAndClampRange =
-      [&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
+      [&](bool IsZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
           VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool {
     return LoopVectorizationPlanner::getDecisionAndClampRange(
         [&](ElementCount VF) {
           TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-          Type *SrcTy =
+          Type *SrcTy0 =
               Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
-          auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
-          InstructionCost MulAccCost =
-              Ctx.TTI.getMulAccReductionCost(isZExt, RedTy, SrcVecTy, CostKind);
+          Type *SrcTy1 =
+              Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : RedTy;
+          auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy0, VF));
+          InstructionCost MulAccCost;
+          if (Red->isPartialReduction()) {
+            TargetTransformInfo::PartialReductionExtendKind Ext0Kind =
+                Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+                           Ext0->getOpcode())
+                     : TargetTransformInfo::PR_None;
+            TargetTransformInfo::PartialReductionExtendKind Ext1Kind =
+                Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+                           Ext1->getOpcode())
+                     : TargetTransformInfo::PR_None;
+            MulAccCost = Ctx.TTI.getPartialReductionCost(
+                Opcode, SrcTy0, SrcTy1, RedTy, VF, Ext0Kind, Ext1Kind,
+                Mul->getOpcode());
+          } else {
+            // Currently only partial reductions support mixed extension types
+            if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
+              return false;
+            MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, RedTy, SrcVecTy,
+                                                        CostKind);
+          }
           InstructionCost MulCost = Mul->computeCost(VF, Ctx);
           InstructionCost RedCost = Red->computeCost(VF, Ctx);
           InstructionCost ExtCost = 0;
@@ -2863,6 +2886,12 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
 
   VPValue *VecOp = Red->getVecOp();
   VPValue *A, *B;
+  // Some chained partial reductions used for complex numbers will have a
+  // negation between the mul and reduction. This extracts the mul from that
+  // pattern to use it for further checking.
+  if (Red->isPartialReduction())
+    match(VecOp,
+          m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(VecOp)));
   // Try to match reduce.add(mul(...)).
   if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
     auto *RecipeA =
@@ -2872,8 +2901,10 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
     auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
 
     // Match reduce.add(mul(ext, ext)).
+    // Mixed extensions are valid for partial reductions
     if (RecipeA && RecipeB &&
-        (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) &&
+        (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B ||
+         Red->isPartialReduction()) &&
         match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
         match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
         IsMulAccValidAndClampRange(RecipeA->getOpcode() ==