Skip to content

Commit bf2645a

Browse files
committed
Address review comments
1 parent 0a91b0d commit bf2645a

File tree

3 files changed

+4
-8
lines changed

3 files changed

+4
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5636,9 +5636,6 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
56365636
if (CostKind != TTI::TCK_RecipThroughput)
56375637
return Invalid;
56385638

5639-
if (VF.isScalable() && !ST->isSVEorStreamingSVEAvailable())
5640-
return Invalid;
5641-
56425639
if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
56435640
(!ST->isNeonAvailable() || !ST->hasDotProd()))
56445641
return Invalid;
@@ -5660,13 +5657,12 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
56605657

56615658
unsigned Ratio =
56625659
AccumType->getScalarSizeInBits() / InputTypeA->getScalarSizeInBits();
5663-
if (VF.getKnownMinValue() < Ratio)
5660+
if (VF.getKnownMinValue() <= Ratio)
56645661
return Invalid;
56655662

56665663
VectorType *InputVectorType = VectorType::get(InputTypeA, VF);
56675664
VectorType *AccumVectorType =
56685665
VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));
5669-
56705666
// We don't yet support all kinds of legalization (e.g. widening
56715667
// of <[vscale x] 1 x ..> accumulators)
56725668
auto TA = TLI->getTypeAction(AccumVectorType->getContext(),

llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
8282
; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
8383
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
8484
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
85-
; CHECK: Cost for VF 8: 25
85+
; CHECK: Cost for VF 8: 27
8686
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
8787
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
8888
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,15 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
8181
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
8282
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
8383
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
84-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
84+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
8585
; CHECK: middle.block:
8686
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
8787
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]]
8888
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY:%.*]]
8989
; CHECK: vec.epilog.iter.check:
9090
; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]]
9191
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
92-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]]
92+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
9393
; CHECK: vec.epilog.ph:
9494
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
9595
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]

0 commit comments

Comments
 (0)