Skip to content

Commit e7630a0

Browse files
authored
AMDGPU: Improve cost handling of canonicalize (llvm#101479)
1 parent 5ad15e5 commit e7630a0

File tree

4 files changed

+159
-164
lines changed

4 files changed

+159
-164
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
689689
case Intrinsic::fma:
690690
case Intrinsic::fmuladd:
691691
case Intrinsic::copysign:
692+
case Intrinsic::canonicalize:
692693
// There's a small benefit to using vector ops in the legalized code.
693694
case Intrinsic::round:
694695
case Intrinsic::uadd_sat:
@@ -742,15 +743,23 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
742743
break;
743744
case Intrinsic::copysign:
744745
return NElts * getFullRateInstrCost();
746+
case Intrinsic::canonicalize: {
747+
InstRate =
748+
SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
749+
break;
750+
}
745751
case Intrinsic::uadd_sat:
746752
case Intrinsic::usub_sat:
747753
case Intrinsic::sadd_sat:
748-
case Intrinsic::ssub_sat:
754+
case Intrinsic::ssub_sat: {
749755
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
750756
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
751757
NElts = 1;
752758
break;
753759
}
760+
default:
761+
break;
762+
}
754763

755764
return LT.first * NElts * InstRate;
756765
}

0 commit comments

Comments
 (0)