@@ -9396,29 +9396,28 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93969396 // reduce.add(mul(ext, ext)) can folded into VPMulAccRecipe
93979397 if (match (VecOp, m_Mul (m_VPValue (A), m_VPValue (B))) &&
93989398 !VecOp->hasMoreThanOneUniqueUser ()) {
9399- VPRecipeBase *RecipeA = A->getDefiningRecipe ();
9400- VPRecipeBase *RecipeB = B->getDefiningRecipe ();
9399+ VPWidenCastRecipe *RecipeA =
9400+ dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe ());
9401+ VPWidenCastRecipe *RecipeB =
9402+ dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe ());
94019403 if (RecipeA && RecipeB && match (RecipeA, m_ZExtOrSExt (m_VPValue ())) &&
94029404 match (RecipeB, m_ZExtOrSExt (m_VPValue ())) &&
9403- cast<VPWidenCastRecipe>(RecipeA)->getOpcode () ==
9404- cast<VPWidenCastRecipe>(RecipeB)->getOpcode () &&
9405- !A->hasMoreThanOneUniqueUser () &&
9406- !B->hasMoreThanOneUniqueUser ()) {
9405+ (RecipeA->getOpcode () == RecipeB->getOpcode () || A == B)) {
94079406 return new VPMulAccRecipe (
94089407 RdxDesc, CurrentLinkI, PreviousLink, CondOp,
94099408 CM.useOrderedReductions (RdxDesc),
9410- cast<VPWidenRecipe>(VecOp->getDefiningRecipe ()),
9411- cast<VPWidenCastRecipe>(RecipeA),
9412- cast<VPWidenCastRecipe>(RecipeB));
9409+ cast<VPWidenRecipe>(VecOp->getDefiningRecipe ()), RecipeA,
9410+ RecipeB);
94139411 } else {
94149412 // Matched reduce.add(mul(...))
94159413 return new VPMulAccRecipe (
94169414 RdxDesc, CurrentLinkI, PreviousLink, CondOp,
94179415 CM.useOrderedReductions (RdxDesc),
94189416 cast<VPWidenRecipe>(VecOp->getDefiningRecipe ()));
94199417 }
9420- // Matched reduce.add(ext(mul(ext, ext)))
9421- // Note that 3 extend instructions must have same opcode.
9418+ // Matched reduce.add(ext(mul(ext(A), ext(B))))
9419+ // Note that 3 extend instructions must have same opcode or A == B
9420+ // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
94229421 } else if (match (VecOp,
94239422 m_ZExtOrSExt (m_Mul (m_ZExtOrSExt (m_VPValue ()),
94249423 m_ZExtOrSExt (m_VPValue ())))) &&
@@ -9431,11 +9430,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94319430 cast<VPWidenCastRecipe>(Mul->getOperand (0 )->getDefiningRecipe ());
94329431 VPWidenCastRecipe *Ext1 =
94339432 cast<VPWidenCastRecipe>(Mul->getOperand (1 )->getDefiningRecipe ());
9434- if (Ext->getOpcode () == Ext0->getOpcode () &&
9435- Ext0->getOpcode () == Ext1->getOpcode () &&
9436- !Mul->hasMoreThanOneUniqueUser () &&
9437- !Ext0->hasMoreThanOneUniqueUser () &&
9438- !Ext1->hasMoreThanOneUniqueUser ()) {
9433+ if ((Ext->getOpcode () == Ext0->getOpcode () || Ext0 == Ext1) &&
9434+ Ext0->getOpcode () == Ext1->getOpcode ()) {
94399435 return new VPMulAccRecipe (
94409436 RdxDesc, CurrentLinkI, PreviousLink, CondOp,
94419437 CM.useOrderedReductions (RdxDesc),
@@ -9447,8 +9443,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94479443 };
94489444 auto TryToMatchExtendedReduction = [&]() -> VPSingleDefRecipe * {
94499445 VPValue *A;
9450- if (match (VecOp, m_ZExtOrSExt (m_VPValue (A))) &&
9451- !VecOp->hasMoreThanOneUniqueUser ()) {
9446+ if (match (VecOp, m_ZExtOrSExt (m_VPValue (A)))) {
94529447 return new VPExtendedReductionRecipe (
94539448 RdxDesc, CurrentLinkI, PreviousLink,
94549449 cast<VPWidenCastRecipe>(VecOp), CondOp,
0 commit comments