-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LV] Choose best reduction for VPlan #166138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/sdesmalen-arm/lv-move-partial-reduction-condition
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,8 @@ | |
| using namespace llvm; | ||
| using namespace VPlanPatternMatch; | ||
|
|
||
| #define DEBUG_TYPE "loop-vectorize" | ||
|
|
||
| static cl::opt<bool> EnableWideActiveLaneMask( | ||
| "enable-wide-lane-mask", cl::init(false), cl::Hidden, | ||
| cl::desc("Enable use of wide get active lane mask instructions")); | ||
|
|
@@ -3761,7 +3763,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, | |
|
|
||
| /// This function tries to create abstract recipes from the reduction recipe for | ||
| /// following optimizations and cost estimation. | ||
| static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, | ||
| static bool tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, | ||
| VPCostContext &Ctx, | ||
| VFRange &Range) { | ||
| VPExpressionRecipe *AbstractR = nullptr; | ||
|
|
@@ -3773,19 +3775,76 @@ static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, | |
| AbstractR = ExtRed; | ||
| // Cannot create abstract inloop reduction recipes. | ||
| if (!AbstractR) | ||
| return; | ||
| return false; | ||
|
|
||
| AbstractR->insertBefore(*VPBB, IP); | ||
| Red->replaceAllUsesWith(AbstractR); | ||
| return true; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add |
||
| } | ||
|
|
||
| /// Lower a partial reduction back to a regular reduction, by | ||
| /// changing the in-loop partial reduction to a binop and removing | ||
| /// the scale factor from the PHI node. | ||
| static void lowerPartialReduction(VPlan &Plan, VPPartialReductionRecipe *Red, | ||
| VPCostContext &Ctx) { | ||
| VPRecipeBase *Acc = Red->getChainOp()->getDefiningRecipe(); | ||
| if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(Acc)) { | ||
| PhiR->setVFScaleFactor(1); | ||
|
|
||
| // We also need to update the scale factor of the reduction-start-vector | ||
| // operand. | ||
| VPValue *StartV, *IdentityV; | ||
| if (!match(PhiR->getOperand(0), | ||
| m_VPInstruction<VPInstruction::ReductionStartVector>( | ||
| m_VPValue(StartV), m_VPValue(IdentityV), m_VPValue()))) | ||
| llvm_unreachable("Unexpected operand for a partial reduction"); | ||
| Type *I32Ty = IntegerType::getInt32Ty(Plan.getContext()); | ||
| auto *ScaleFactorVPV = Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, 1)); | ||
| cast<VPInstruction>(PhiR->getOperand(0))->setOperand(2, ScaleFactorVPV); | ||
| } | ||
|
|
||
| if (auto *R = dyn_cast<VPPartialReductionRecipe>(Acc)) | ||
| if (R->getVFScaleFactor() != 1) | ||
| lowerPartialReduction(Plan, R, Ctx); | ||
|
|
||
| LLVM_DEBUG( | ||
| dbgs() << "LV: Lowering " << *Red | ||
| << " back to regular reduction, because it is not profitable\n"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we want an |
||
|
|
||
| // Lower the partial reduction to a regular binop. | ||
| VPBuilder Builder(Red); | ||
| VPInstruction *Add = Builder.createNaryOp( | ||
| RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()), | ||
| {Red->getChainOp(), Red->getVecOp()}); | ||
| if (Red->isConditional()) | ||
| Add = Builder.createSelect(Red->getCondOp(), Add, Red->getChainOp()); | ||
|
|
||
| Red->replaceAllUsesWith(Add); | ||
| Red->eraseFromParent(); | ||
| } | ||
|
|
||
| void VPlanTransforms::convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, | ||
| VFRange &Range) { | ||
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | ||
| vp_depth_first_deep(Plan.getVectorLoopRegion()))) { | ||
| for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
| if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) | ||
| tryToCreateAbstractReductionRecipe(Red, Ctx, Range); | ||
| auto *Red = dyn_cast<VPReductionRecipe>(&R); | ||
| if (!Red) | ||
| continue; | ||
|
|
||
| if (!tryToCreateAbstractReductionRecipe(Red, Ctx, Range) && | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If 'Red' is converted to AbstractRecipe, Red should be null. Can you check for |
||
| isa<VPPartialReductionRecipe>(Red)) { | ||
| // If there isn't a profitable VPExpression for a partial reduction, | ||
| // then that suggests using a partial reduction is not profitable | ||
| // for this VPlan. It seems better to resort to a regular (middle-block) | ||
| // reduction, so that the this plan is still profitable to consider. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extra |
||
| // Otherwise, the plan might be discarded in favour of a smaller VF. | ||
| // | ||
| // FIXME: There's a lot to unpick when it comes to partial | ||
| // reductions, but this should provide a temporary stop-gap until we | ||
| // reimplement the logic for creating partial reductions. | ||
| lowerPartialReduction(Plan, cast<VPPartialReductionRecipe>(Red), Ctx); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should only call this if the reduction actually is partial, otherwise we'll waste some time essentially doing nothing in the lower function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be great if we can lower this here, basically after this |
||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: opt -S -mcpu=neoverse-v2 -passes=loop-vectorize -mtriple=aarch64 < %s | FileCheck %s | ||
| target triple = "aarch64" | ||
|
|
||
| ; Check that a partial reduction is reverted back to a regular reduction, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add some check statements that make sure that the reversion to a normal reduction happened? I think we'll also want a test with a subtract, as well as chained add and chained subtract. |
||
| ; so that we compare "the VPlan with the best kind of reduction for <range>" | ||
| ; vs "the VPlan with the best kind of reduction for <other range>", | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable vscale_range(1,16) | ||
| define dso_local i64 @foo(ptr noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you rename this test to something like |
||
| ; CHECK-LABEL: define dso_local i64 @foo( | ||
| ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], 0 | ||
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[ITER_CHECK:.*]], label %[[BB27:.*]] | ||
| ; CHECK: [[ITER_CHECK]]: | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP1]] to i64 | ||
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 | ||
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] | ||
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: | ||
| ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 16 | ||
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] | ||
| ; CHECK: [[VECTOR_PH]]: | ||
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 16 | ||
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] | ||
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] | ||
| ; CHECK: [[VECTOR_BODY]]: | ||
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[INDEX]] | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 4 | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 8 | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 12 | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 | ||
| ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 | ||
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 | ||
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[WIDE_LOAD5]] to <4 x i64> | ||
| ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[WIDE_LOAD6]] to <4 x i64> | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i32> [[WIDE_LOAD7]] to <4 x i64> | ||
| ; CHECK-NEXT: [[TMP13]] = add <4 x i64> [[VEC_PHI]], [[TMP9]] | ||
| ; CHECK-NEXT: [[TMP14]] = add <4 x i64> [[VEC_PHI2]], [[TMP10]] | ||
| ; CHECK-NEXT: [[TMP15]] = add <4 x i64> [[VEC_PHI3]], [[TMP11]] | ||
| ; CHECK-NEXT: [[TMP16]] = add <4 x i64> [[VEC_PHI4]], [[TMP12]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | ||
| ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] | ||
| ; CHECK: [[MIDDLE_BLOCK]]: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you dont need to check post middle block. You can use something like '--filter-out-after' when generating the test |
||
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP14]], [[TMP13]] | ||
| ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i64> [[TMP15]], [[BIN_RDX]] | ||
| ; CHECK-NEXT: [[BIN_RDX9:%.*]] = add <4 x i64> [[TMP16]], [[BIN_RDX8]] | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX9]]) | ||
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] | ||
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[BB25:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] | ||
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: | ||
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 | ||
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] | ||
| ; CHECK: [[VEC_EPILOG_PH]]: | ||
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP18]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP4]], 4 | ||
| ; CHECK-NEXT: [[N_VEC11:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF10]] | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] | ||
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: | ||
| ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI13:%.*]] = phi <4 x i64> [ [[TMP19]], %[[VEC_EPILOG_PH]] ], [ [[TMP22:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[INDEX12]] | ||
| ; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4 | ||
| ; CHECK-NEXT: [[TMP21:%.*]] = sext <4 x i32> [[WIDE_LOAD14]] to <4 x i64> | ||
| ; CHECK-NEXT: [[TMP22]] = add <4 x i64> [[VEC_PHI13]], [[TMP21]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX12]], 4 | ||
| ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC11]] | ||
| ; CHECK-NEXT: br i1 [[TMP23]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP22]]) | ||
| ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC11]] | ||
| ; CHECK-NEXT: br i1 [[CMP_N16]], label %[[BB25]], label %[[VEC_EPILOG_SCALAR_PH]] | ||
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i64 [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP18]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: br label %[[BB29:.*]] | ||
| ; CHECK: [[BB25]]: | ||
| ; CHECK-NEXT: [[TMP26:%.*]] = phi i64 [ [[TMP35:%.*]], %[[BB29]] ], [ [[TMP18]], %[[MIDDLE_BLOCK]] ], [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] | ||
| ; CHECK-NEXT: br label %[[BB27]] | ||
| ; CHECK: [[BB27]]: | ||
| ; CHECK-NEXT: [[TMP28:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP26]], %[[BB25]] ] | ||
| ; CHECK-NEXT: ret i64 [[TMP28]] | ||
| ; CHECK: [[BB29]]: | ||
| ; CHECK-NEXT: [[TMP30:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP36:%.*]], %[[BB29]] ] | ||
| ; CHECK-NEXT: [[TMP31:%.*]] = phi i64 [ [[BC_MERGE_RDX17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP35]], %[[BB29]] ] | ||
| ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[TMP30]] | ||
| ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 | ||
| ; CHECK-NEXT: [[TMP34:%.*]] = sext i32 [[TMP33]] to i64 | ||
| ; CHECK-NEXT: [[TMP35]] = add i64 [[TMP31]], [[TMP34]] | ||
| ; CHECK-NEXT: [[TMP36]] = add nuw nsw i64 [[TMP30]], 1 | ||
| ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[TMP36]], [[TMP4]] | ||
| ; CHECK-NEXT: br i1 [[TMP37]], label %[[BB25]], label %[[BB29]], !llvm.loop [[LOOP5:![0-9]+]] | ||
| ; | ||
| %3 = icmp sgt i32 %1, 0 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you rename some of the labels and variables to have more meaningful names? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can simplify the cfg here a little by removing the check for sgt 0. |
||
| br i1 %3, label %4, label %8 | ||
|
|
||
| 4: ; preds = %2 | ||
| %5 = zext nneg i32 %1 to i64 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you just pass in a i64 as %1 instead? That way you can remove the zext. I don't think the test really relies upon it. |
||
| br label %10 | ||
|
|
||
| 6: ; preds = %10 | ||
| %7 = phi i64 [ %16, %10 ] | ||
| br label %8 | ||
|
|
||
| 8: ; preds = %6, %2 | ||
| %9 = phi i64 [ 0, %2 ], [ %7, %6 ] | ||
| ret i64 %9 | ||
|
|
||
| 10: ; preds = %4, %10 | ||
| %11 = phi i64 [ 0, %4 ], [ %17, %10 ] | ||
| %12 = phi i64 [ 0, %4 ], [ %16, %10 ] | ||
| %13 = getelementptr inbounds nuw i32, ptr %0, i64 %11 | ||
| %14 = load i32, ptr %13, align 4 | ||
| %15 = sext i32 %14 to i64 | ||
| %16 = add i64 %12, %15 | ||
| %17 = add nuw nsw i64 %11, 1 | ||
| %18 = icmp eq i64 %17, %5 | ||
| br i1 %18, label %6, label %10 | ||
| } | ||
| ;. | ||
| ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} | ||
| ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} | ||
| ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} | ||
| ; CHECK: [[PROF3]] = !{!"branch_weights", i32 4, i32 12} | ||
| ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} | ||
| ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} | ||
| ;. | ||
|
Comment on lines
+130
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can add |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rather than checking for ratio=1, you should either replace
OR