varun-r-mallya
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 15 additions & 2 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll‎
Lines changed: 0 additions & 30 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll‎
Lines changed: 0 additions & 30 deletions
@@ -4378,8 +4378,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
   const SCEV *TC =
       vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
   assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
-  RemainingIterations =
-      SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+  const SCEV *KnownMinTC;
+  bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
+  // Use versions of TC and VF in which both are either scalable or fixed.
+  if (ScalableTC == MainLoopVF.isScalable())
+    RemainingIterations =
+        SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+  else if (ScalableTC) {
+    const SCEV *EstimatedTC = SE.getMulExpr(
+        KnownMinTC,
+        SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
+    RemainingIterations = SE.getURemExpr(
+        EstimatedTC, SE.getElementCount(TCType, MainLoopVF * IC));
+  } else
+    RemainingIterations =
+        SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
 
   // No iterations left to process in the epilogue.
   if (RemainingIterations->isZero())
 
@@ -37,7 +37,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali
 ;
 ; CHECK-ARMPL-LABEL: define void @sincos_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -51,15 +50,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]
@@ -269,7 +259,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias
 ;
 ; CHECK-ARMPL-LABEL: define void @modf_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -283,15 +272,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]
@@ -429,7 +409,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa
 ;
 ; CHECK-ARMPL-LABEL: define void @sincospi_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -443,15 +422,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]