Skip to content

Commit b84223f

Browse files
committed
BasicTTI: Cleanup multiple result intrinsic handling
Avoid weird lambda returning function pointer and sink the libcall logic to where the operation is handled. This allows chaining the libcall logic to try sincos_stret and fallback to sincos. The resulting cost seems too low.
1 parent f4f247f commit b84223f

File tree

2 files changed

+57
-24
lines changed

2 files changed

+57
-24
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
302302
/// (e.g. scalarization).
303303
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
304304
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
305-
RTLIB::Libcall LC,
306305
std::optional<unsigned> CallRetElementIndex = {}) const {
307306
Type *RetTy = ICA.getReturnType();
308307
// Vector variants of the intrinsic can be mapped to a vector library call.
@@ -311,11 +310,43 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
311310
!isVectorizedStructTy(cast<StructType>(RetTy)))
312311
return std::nullopt;
313312

313+
Type *Ty = getContainedTypes(RetTy).front();
314+
EVT VT = getTLI()->getValueType(DL, Ty);
315+
316+
EVT ScalarVT = VT.getScalarType();
317+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
318+
319+
bool UsesMemoryOutArgument = true;
320+
321+
switch (ICA.getID()) {
322+
case Intrinsic::modf:
323+
LC = RTLIB::getMODF(ScalarVT);
324+
break;
325+
case Intrinsic::sincospi:
326+
LC = RTLIB::getSINCOSPI(ScalarVT);
327+
break;
328+
case Intrinsic::sincos:
329+
LC = RTLIB::getSINCOS_STRET(ScalarVT);
330+
UsesMemoryOutArgument = false;
331+
332+
if (getTLI()->getLibcallImpl(LC) == RTLIB::Unsupported) {
333+
LC = RTLIB::getSINCOS(ScalarVT);
334+
UsesMemoryOutArgument = true;
335+
}
336+
337+
break;
338+
default:
339+
return std::nullopt;
340+
}
341+
314342
// Find associated libcall.
315-
const char *LCName = getTLI()->getLibcallName(LC);
316-
if (!LCName)
343+
RTLIB::LibcallImpl LibcallImpl = getTLI()->getLibcallImpl(LC);
344+
if (LibcallImpl == RTLIB::Unsupported)
317345
return std::nullopt;
318346

347+
StringRef LCName =
348+
RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl);
349+
319350
// Search for a corresponding vector variant.
320351
LLVMContext &Ctx = RetTy->getContext();
321352
ElementCount VF = getVectorizedTypeVF(RetTy);
@@ -336,6 +367,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
336367
VecTy, {}, CostKind, 0, nullptr, {});
337368
}
338369

370+
// Technically this depends on the ABI, but assume sincos_stret passes in
371+
// registers.
372+
if (!UsesMemoryOutArgument)
373+
return Cost;
374+
339375
// Lowering to a library call (with output pointers) may require us to emit
340376
// reloads for the results.
341377
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
@@ -2137,30 +2173,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21372173
case Intrinsic::modf:
21382174
case Intrinsic::sincos:
21392175
case Intrinsic::sincospi: {
2140-
Type *Ty = getContainedTypes(RetTy).front();
2141-
EVT VT = getTLI()->getValueType(DL, Ty);
2142-
2143-
RTLIB::Libcall LC = [&] {
2144-
switch (ICA.getID()) {
2145-
case Intrinsic::modf:
2146-
return RTLIB::getMODF;
2147-
case Intrinsic::sincos:
2148-
return RTLIB::getSINCOS;
2149-
case Intrinsic::sincospi:
2150-
return RTLIB::getSINCOSPI;
2151-
default:
2152-
llvm_unreachable("unexpected intrinsic");
2153-
}
2154-
}()(VT.getScalarType());
2155-
21562176
std::optional<unsigned> CallRetElementIndex;
21572177
// The first element of the modf result is returned by value in the
21582178
// libcall.
21592179
if (ICA.getID() == Intrinsic::modf)
21602180
CallRetElementIndex = 0;
21612181

21622182
if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost(
2163-
ICA, CostKind, LC, CallRetElementIndex))
2183+
ICA, CostKind, CallRetElementIndex))
21642184
return *Cost;
21652185
// Otherwise, fallback to default scalarization cost.
21662186
break;

llvm/test/Analysis/CostModel/AArch64/sincos.ll

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos"
22
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
33
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -intrinsic-cost-strategy=intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB
4+
; RUN: opt < %s -mtriple=arm64-apple-macos10.9 -mattr=+neon -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck -check-prefix=SINCOS_STRET %s
45

56
define void @sincos() {
67
; CHECK-LABEL: 'sincos'
78
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
89
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
910
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
1011
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
11-
;
1212
; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
1313
; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
1414
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
1515
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
1616
; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
17-
;
1817
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
1918
; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
2019
; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
@@ -26,18 +25,32 @@ define void @sincos() {
2625
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
2726
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
2827
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
29-
;
3028
; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
3129
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
3230
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
3331
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
3432
; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
35-
;
3633
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
3734
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
3835
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
3936
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
4037
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
38+
;
39+
; SINCOS_STRET-LABEL: 'sincos'
40+
; SINCOS_STRET: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
41+
; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
42+
; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
43+
; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
44+
; SINCOS_STRET: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
45+
; SINCOS_STRET: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
46+
; SINCOS_STRET: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
47+
; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
48+
; SINCOS_STRET: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
49+
; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
50+
; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
51+
; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
52+
; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
53+
; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
4154
;
4255
%f16 = call { half, half } @llvm.sincos.f16(half poison)
4356
%f32 = call { float, float } @llvm.sincos.f32(float poison)

0 commit comments

Comments
 (0)