Skip to content

Commit de72cca

Browse files
authored
[CostModel] Provide a default model for histogram intrinsics (#149348)
Since we scalarize these intrinsics when the target does not support them, we should model that for costing purposes.
1 parent 910d7e9 commit de72cca

File tree

6 files changed

+290
-28
lines changed

6 files changed

+290
-28
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -878,9 +878,6 @@ class TargetTransformInfoImplBase {
878878
switch (ICA.getID()) {
879879
default:
880880
break;
881-
case Intrinsic::experimental_vector_histogram_add:
882-
// For now, we want explicit support from the target for histograms.
883-
return InstructionCost::getInvalid();
884881
case Intrinsic::allow_runtime_check:
885882
case Intrinsic::allow_ubsan_check:
886883
case Intrinsic::annotation:

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,6 +2105,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21052105
}
21062106
case Intrinsic::get_active_lane_mask:
21072107
case Intrinsic::experimental_vector_match:
2108+
case Intrinsic::experimental_vector_histogram_add:
2109+
case Intrinsic::experimental_vector_histogram_uadd_sat:
2110+
case Intrinsic::experimental_vector_histogram_umax:
2111+
case Intrinsic::experimental_vector_histogram_umin:
21082112
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
21092113
case Intrinsic::modf:
21102114
case Intrinsic::sincos:
@@ -2457,6 +2461,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24572461
return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
24582462
cast<VectorType>(ICA.getArgTypes()[0]), {},
24592463
CostKind, 0, cast<VectorType>(RetTy));
2464+
case Intrinsic::experimental_vector_histogram_add:
2465+
case Intrinsic::experimental_vector_histogram_uadd_sat:
2466+
case Intrinsic::experimental_vector_histogram_umax:
2467+
case Intrinsic::experimental_vector_histogram_umin: {
2468+
FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);
2469+
Type *EltTy = ICA.getArgTypes()[1];
2470+
2471+
// Targets with scalable vectors must handle this on their own.
2472+
if (!PtrsTy)
2473+
return InstructionCost::getInvalid();
2474+
2475+
Align Alignment = thisT()->DL.getABITypeAlign(EltTy);
2476+
InstructionCost Cost = 0;
2477+
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,
2478+
CostKind, 1, nullptr, nullptr);
2479+
Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,
2480+
CostKind);
2481+
switch (IID) {
2482+
default:
2483+
llvm_unreachable("Unhandled histogram update operation.");
2484+
case Intrinsic::experimental_vector_histogram_add:
2485+
Cost +=
2486+
thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);
2487+
break;
2488+
case Intrinsic::experimental_vector_histogram_uadd_sat: {
2489+
IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});
2490+
Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);
2491+
break;
2492+
}
2493+
case Intrinsic::experimental_vector_histogram_umax: {
2494+
IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});
2495+
Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);
2496+
break;
2497+
}
2498+
case Intrinsic::experimental_vector_histogram_umin: {
2499+
IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});
2500+
Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);
2501+
break;
2502+
}
2503+
}
2504+
Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,
2505+
CostKind);
2506+
Cost *= PtrsTy->getNumElements();
2507+
return Cost;
2508+
}
24602509
case Intrinsic::get_active_lane_mask: {
24612510
Type *ArgTy = ICA.getArgTypes()[0];
24622511
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,17 @@ static bool isUnpackedVectorVT(EVT VecVT) {
554554
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
555555
}
556556

557-
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
557+
static InstructionCost getHistogramCost(const AArch64Subtarget *ST,
558+
const IntrinsicCostAttributes &ICA) {
559+
// We need to know at least the number of elements in the vector of buckets
560+
// and the size of each element to update.
561+
if (ICA.getArgTypes().size() < 2)
562+
return InstructionCost::getInvalid();
563+
564+
// Only interested in costing for the hardware instruction from SVE2.
565+
if (!ST->hasSVE2())
566+
return InstructionCost::getInvalid();
567+
558568
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
559569
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
560570
unsigned TotalHistCnts = 1;
@@ -579,9 +589,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
579589

580590
unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
581591
TotalHistCnts = EC / NaturalVectorWidth;
592+
593+
return InstructionCost(BaseHistCntCost * TotalHistCnts);
582594
}
583595

584-
return InstructionCost(BaseHistCntCost * TotalHistCnts);
596+
return InstructionCost::getInvalid();
585597
}
586598

587599
InstructionCost
@@ -597,10 +609,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
597609
return InstructionCost::getInvalid();
598610

599611
switch (ICA.getID()) {
600-
case Intrinsic::experimental_vector_histogram_add:
601-
if (!ST->hasSVE2())
602-
return InstructionCost::getInvalid();
603-
return getHistogramCost(ICA);
612+
case Intrinsic::experimental_vector_histogram_add: {
613+
InstructionCost HistCost = getHistogramCost(ST, ICA);
614+
// If the cost isn't valid, we may still be able to scalarize
615+
if (HistCost.isValid())
616+
return HistCost;
617+
break;
618+
}
604619
case Intrinsic::umin:
605620
case Intrinsic::umax:
606621
case Intrinsic::smin:

0 commit comments

Comments
 (0)