Skip to content

Commit 0a91b0d

Browse files
committed
Widening is not supported for <1 x ..> either
1 parent b58b094 commit 0a91b0d

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5667,9 +5667,18 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
56675667
VectorType *AccumVectorType =
56685668
VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));
56695669

5670-
// We don't yet support widening for <vscale x 1 x ..> accumulators.
5671-
if (AccumVectorType->getElementCount() == ElementCount::getScalable(1))
5670+
// We don't yet support all kinds of legalization (e.g. widening
5671+
// of <[vscale x] 1 x ..> accumulators)
5672+
auto TA = TLI->getTypeAction(AccumVectorType->getContext(),
5673+
EVT::getEVT(AccumVectorType));
5674+
switch (TA) {
5675+
default:
56725676
return Invalid;
5677+
case TargetLowering::TypeLegal:
5678+
case TargetLowering::TypePromoteInteger:
5679+
case TargetLowering::TypeSplitVector:
5680+
break;
5681+
}
56735682

56745683
// Check what kind of type-legalisation happens.
56755684
std::pair<InstructionCost, MVT> AccumLT =

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,23 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
9898
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
9999
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
100100
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
101-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <1 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
101+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
102102
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
103103
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
104104
; CHECK: vec.epilog.vector.body:
105105
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
106-
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <1 x i32> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
106+
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
107107
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr null, align 1
108108
; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
109109
; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
110110
; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT11]] to <4 x i32>
111111
; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> [[TMP11]], [[TMP8]]
112-
; CHECK-NEXT: [[PARTIAL_REDUCE12]] = call <1 x i32> @llvm.vector.partial.reduce.add.v1i32.v4i32(<1 x i32> [[VEC_PHI9]], <4 x i32> [[TMP14]])
112+
; CHECK-NEXT: [[TMP13]] = add <4 x i32> [[TMP14]], [[VEC_PHI9]]
113113
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4
114114
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
115115
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116116
; CHECK: vec.epilog.middle.block:
117-
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> [[PARTIAL_REDUCE12]])
117+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
118118
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
119119
; CHECK-NEXT: br i1 [[CMP_N15]], label [[WHILE_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
120120
; CHECK: vec.epilog.scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64-none-unknown-elf"
77
; Tests for printing VPlans that are enabled under AArch64
88

99
define i32 @print_partial_reduction(ptr %a, ptr %b) {
10-
; CHECK: VPlan 'Initial VPlan for VF={4,8,16},UF>=1' {
10+
; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
1111
; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF
1212
; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
1313
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
@@ -71,7 +71,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
7171
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
7272
; CHECK-NEXT: No successors
7373
; CHECK-NEXT: }
74-
; CHECK: VPlan 'Final VPlan for VF={4,8,16},UF={1}' {
74+
; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' {
7575
; CHECK-NEXT: Live-in ir<1024> = original trip-count
7676
; CHECK-EMPTY:
7777
; CHECK-NEXT: ir-bb<entry>:

0 commit comments

Comments
 (0)