Skip to content

Commit 86c31ba

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merge llvm/main into amd-debug
2 parents 3f5d499 + 4d01418 commit 86c31ba

File tree

4 files changed

+546
-90
lines changed

4 files changed

+546
-90
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {
6262

6363
foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
6464
def ADD_#FPTy : RuntimeLibcall;
65+
def FAST_ADD_#FPTy : RuntimeLibcall;
66+
6567
def SUB_#FPTy : RuntimeLibcall;
68+
def FAST_SUB_#FPTy : RuntimeLibcall;
69+
6670
def MUL_#FPTy : RuntimeLibcall;
71+
def FAST_MUL_#FPTy : RuntimeLibcall;
72+
6773
def DIV_#FPTy : RuntimeLibcall;
74+
def FAST_DIV_#FPTy : RuntimeLibcall;
75+
6876
def REM_#FPTy : RuntimeLibcall;
6977
def FMA_#FPTy : RuntimeLibcall;
7078
def POWI_#FPTy : RuntimeLibcall;
79+
7180
def SQRT_#FPTy : RuntimeLibcall;
81+
def FAST_SQRT_#FPTy : RuntimeLibcall;
82+
7283
def CBRT_#FPTy : RuntimeLibcall;
7384
def LOG_#FPTy : RuntimeLibcall;
7485
def LOG_FINITE_#FPTy : RuntimeLibcall;
@@ -419,7 +430,6 @@ def OBJC_SYNC_EXIT : RuntimeLibcall;
419430
// Define implementation default libcalls
420431
//--------------------------------------------------------------------
421432

422-
defset list<RuntimeLibcallImpl> AllDefaultRuntimeLibcallImpls = {
423433
let IsDefault = true in {
424434

425435
//--------------------------------------------------------------------
@@ -686,7 +696,6 @@ def __llvm_deoptimize : RuntimeLibcallImpl<DEOPTIMIZE>;
686696

687697
// Clear cache
688698
def __clear_cache : RuntimeLibcallImpl<CLEAR_CACHE>;
689-
def __riscv_flush_icache : RuntimeLibcallImpl<RISCV_FLUSH_ICACHE>;
690699

691700
//--------------------------------------------------------------------
692701
// libm
@@ -925,7 +934,6 @@ def memset : RuntimeLibcallImpl<MEMSET>;
925934
def calloc : RuntimeLibcallImpl<CALLOC>;
926935

927936
} // End let IsDefault = true
928-
} // End defset AllDefaultRuntimeLibcallImpls
929937

930938
//--------------------------------------------------------------------
931939
// Define implementation other libcalls
@@ -947,6 +955,8 @@ def bzero : RuntimeLibcallImpl<BZERO>;
947955
def __bzero : RuntimeLibcallImpl<BZERO>;
948956
def _Unwind_SjLj_Resume : RuntimeLibcallImpl<UNWIND_RESUME>;
949957

958+
def __riscv_flush_icache : RuntimeLibcallImpl<RISCV_FLUSH_ICACHE>;
959+
950960
//===----------------------------------------------------------------------===//
951961
// F128 libm Runtime Libcalls
952962
//===----------------------------------------------------------------------===//
@@ -1016,8 +1026,8 @@ defset list<RuntimeLibcallImpl> LibmF128FiniteLibcalls = {
10161026
// unreasonable defaults like reporting f80 calls on most targets when
10171027
// they are relevant to only one.
10181028

1019-
defvar AllDefaultLibCalls =
1020-
!foreach(entry, AllDefaultRuntimeLibcallImpls, entry.Provides);
1029+
defvar AllDefaultRuntimeLibcallImpls
1030+
= !filter(entry, !instances<RuntimeLibcallImpl>(), entry.IsDefault);
10211031

10221032
// Exist in libgcc and compiler-rt for 64-bit targets, or if
10231033
// COMPILER_RT_ENABLE_SOFTWARE_INT128.
@@ -1030,15 +1040,27 @@ defvar CompilerRTOnlyInt128Libcalls = [
10301040
__mulodi4
10311041
];
10321042

1033-
defvar DefaultRuntimeLibcallImpls =
1034-
!listremove(!listremove(AllDefaultRuntimeLibcallImpls,
1035-
Int128RTLibcalls),
1036-
CompilerRTOnlyInt128Libcalls);
1043+
defvar DefaultRuntimeLibcallImpls_f80 =
1044+
!filter(entry, AllDefaultRuntimeLibcallImpls,
1045+
!match(!cast<string>(entry.Provides), "F80"));
1046+
1047+
defvar DefaultRuntimeLibcallImpls_ppcf128 =
1048+
!filter(entry, AllDefaultRuntimeLibcallImpls,
1049+
!match(!cast<string>(entry.Provides), "PPCF128"));
10371050

10381051
defvar DefaultRuntimeLibcallImpls_f128 =
1039-
!filter(entry, DefaultRuntimeLibcallImpls,
1052+
!filter(entry, AllDefaultRuntimeLibcallImpls,
10401053
!match(!cast<string>(entry.Provides), "_F128"));
10411054

1055+
defvar DefaultRuntimeLibcallImpls =
1056+
!listremove(
1057+
!listremove(
1058+
!listremove(
1059+
!listremove(AllDefaultRuntimeLibcallImpls, Int128RTLibcalls),
1060+
CompilerRTOnlyInt128Libcalls),
1061+
DefaultRuntimeLibcallImpls_f80),
1062+
DefaultRuntimeLibcallImpls_ppcf128);
1063+
10421064
defvar DefaultRuntimeLibcallImpls_atomic =
10431065
!filter(entry, DefaultRuntimeLibcallImpls,
10441066
!match(!cast<string>(entry.Provides), "ATOMIC"));
@@ -1461,6 +1483,7 @@ def AVRSystemLibrary
14611483
// Hexagon Runtime Libcalls
14621484
//===----------------------------------------------------------------------===//
14631485

1486+
defset list<RuntimeLibcallImpl> HexagonLibcalls = {
14641487
def __hexagon_divsi3 : RuntimeLibcallImpl<SDIV_I32>;
14651488
def __hexagon_divdi3 : RuntimeLibcallImpl<SDIV_I64>;
14661489
def __hexagon_udivsi3 : RuntimeLibcallImpl<UDIV_I32>;
@@ -1470,30 +1493,39 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
14701493
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
14711494
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;
14721495

1473-
// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
14741496
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1475-
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1497+
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;
14761498

14771499
def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1478-
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1500+
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;
14791501

14801502
def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1481-
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1503+
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;
14821504

14831505
def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1484-
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1506+
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;
14851507

14861508
def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1487-
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1509+
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;
14881510

14891511
def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1490-
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1512+
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;
14911513

14921514
// This is the only fast library function for sqrtd.
1493-
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
1515+
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;
14941516

14951517
def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
14961518
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;
1519+
}
1520+
1521+
def isHexagon : RuntimeLibcallPredicate<"TT.getArch() == Triple::hexagon">;
1522+
1523+
def HexagonSystemLibrary
1524+
: SystemRuntimeLibrary<isHexagon,
1525+
(add (sub DefaultLibcallImpls32,
1526+
__adddf3, __divsf3, __udivsi3, __udivdi3,
1527+
__umoddi3, __divdf3, __muldf3, __divsi3, __subdf3, sqrtf,
1528+
__divdi3, __umodsi3, __moddi3, __modsi3), HexagonLibcalls)>;
14971529

14981530
//===----------------------------------------------------------------------===//
14991531
// Lanai Runtime Libcalls
@@ -1880,10 +1912,23 @@ def PPCSystemLibrary
18801912
(sub DefaultRuntimeLibcallImpls, memcpy,
18811913
DefaultRuntimeLibcallImpls_f128),
18821914
__extendkftf2, __trunctfkf2,
1915+
DefaultRuntimeLibcallImpls_ppcf128,
18831916
LibmF128Libcalls, AIX32Calls, AIX64Calls,
18841917
AvailableIf<memcpy, isNotAIX>,
18851918
LibcallImpls<(add Int128RTLibcalls), isPPC64>)>;
18861919

1920+
//===----------------------------------------------------------------------===//
1921+
// RISCV Runtime Libcalls
1922+
//===----------------------------------------------------------------------===//
1923+
1924+
def isRISCV : RuntimeLibcallPredicate<"TT.isRISCV()">;
1925+
def isRISCV64 : RuntimeLibcallPredicate<"TT.isRISCV64()">;
1926+
1927+
def RISCVSystemLibrary
1928+
: SystemRuntimeLibrary<isRISCV,
1929+
(add DefaultRuntimeLibcallImpls, __riscv_flush_icache,
1930+
LibcallImpls<(add Int128RTLibcalls), isRISCV64>)>;
1931+
18871932
//===----------------------------------------------------------------------===//
18881933
// SPARC Runtime Libcalls
18891934
//===----------------------------------------------------------------------===//
@@ -1947,6 +1992,21 @@ def _allrem : RuntimeLibcallImpl<SREM_I64>;
19471992
def _aullrem : RuntimeLibcallImpl<UREM_I64>;
19481993
def _allmul : RuntimeLibcallImpl<MUL_I64>;
19491994

1995+
//===----------------------------------------------------------------------===//
1996+
// X86 Runtime Libcalls
1997+
//===----------------------------------------------------------------------===//
1998+
1999+
def isX86_32 : RuntimeLibcallPredicate<"TT.getArch() == Triple::x86">;
2000+
def isX86_64 : RuntimeLibcallPredicate<"TT.getArch() == Triple::x86_64">;
2001+
2002+
def X86_32SystemLibrary
2003+
: SystemRuntimeLibrary<isX86_32,
2004+
(add DefaultLibcallImpls32, DefaultRuntimeLibcallImpls_f80)>;
2005+
2006+
def X86_64SystemLibrary
2007+
: SystemRuntimeLibrary<isX86_64,
2008+
(add DefaultLibcallImpls64, DefaultRuntimeLibcallImpls_f80)>;
2009+
19502010
//===----------------------------------------------------------------------===//
19512011
// XCore Runtime Libcalls
19522012
//===----------------------------------------------------------------------===//

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140140
RTLIB::Libcall Call_F128,
141141
RTLIB::Libcall Call_PPCF128,
142142
SmallVectorImpl<SDValue> &Results);
143-
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
144-
RTLIB::Libcall Call_I8,
145-
RTLIB::Libcall Call_I16,
146-
RTLIB::Libcall Call_I32,
147-
RTLIB::Libcall Call_I64,
148-
RTLIB::Libcall Call_I128);
143+
144+
void
145+
ExpandFastFPLibCall(SDNode *Node, bool IsFast,
146+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151+
SmallVectorImpl<SDValue> &Results);
152+
153+
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154+
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155+
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149156
void ExpandArgFPLibCall(SDNode *Node,
150157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2228,6 +2235,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
22282235
ExpandFPLibCall(Node, LC, Results);
22292236
}
22302237

2238+
void SelectionDAGLegalize::ExpandFastFPLibCall(
2239+
SDNode *Node, bool IsFast,
2240+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2241+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2242+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2243+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2244+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2245+
SmallVectorImpl<SDValue> &Results) {
2246+
2247+
EVT VT = Node->getSimpleValueType(0);
2248+
2249+
RTLIB::Libcall LC;
2250+
2251+
// FIXME: Probably should define fast to respect nan/inf and only be
2252+
// approximate functions.
2253+
2254+
if (IsFast) {
2255+
LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
2256+
Call_F128.first, Call_PPCF128.first);
2257+
}
2258+
2259+
if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
2260+
// Fall back if we don't have a fast implementation.
2261+
LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
2262+
Call_F80.second, Call_F128.second,
2263+
Call_PPCF128.second);
2264+
}
2265+
2266+
ExpandFPLibCall(Node, LC, Results);
2267+
}
2268+
22312269
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
22322270
RTLIB::Libcall Call_I8,
22332271
RTLIB::Libcall Call_I16,
@@ -4514,6 +4552,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
45144552
return true;
45154553
}
45164554

4555+
/// Return if we can use the FAST_* variant of a math libcall for the node.
4556+
/// FIXME: This is just guessing, we probably should have unique specific sets
4557+
/// flags required per libcall.
4558+
static bool canUseFastMathLibcall(const SDNode *Node) {
4559+
// FIXME: Probably should define fast to respect nan/inf and only be
4560+
// approximate functions.
4561+
4562+
SDNodeFlags Flags = Node->getFlags();
4563+
return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
4564+
Flags.hasNoInfs() && Flags.hasNoSignedZeros();
4565+
}
4566+
45174567
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
45184568
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
45194569
SmallVector<SDValue, 8> Results;
@@ -4634,11 +4684,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46344684
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
46354685
break;
46364686
case ISD::FSQRT:
4637-
case ISD::STRICT_FSQRT:
4638-
ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4639-
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4640-
RTLIB::SQRT_PPCF128, Results);
4687+
case ISD::STRICT_FSQRT: {
4688+
// FIXME: Probably should define fast to respect nan/inf and only be
4689+
// approximate functions.
4690+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4691+
{RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4692+
{RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4693+
{RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4694+
{RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4695+
{RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4696+
Results);
46414697
break;
4698+
}
46424699
case ISD::FCBRT:
46434700
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
46444701
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4875,11 +4932,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48754932
RTLIB::LLRINT_PPCF128, Results);
48764933
break;
48774934
case ISD::FDIV:
4878-
case ISD::STRICT_FDIV:
4879-
ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4880-
RTLIB::DIV_F80, RTLIB::DIV_F128,
4881-
RTLIB::DIV_PPCF128, Results);
4935+
case ISD::STRICT_FDIV: {
4936+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4937+
{RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4938+
{RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4939+
{RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4940+
{RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4941+
{RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
48824942
break;
4943+
}
48834944
case ISD::FREM:
48844945
case ISD::STRICT_FREM:
48854946
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4893,17 +4954,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48934954
RTLIB::FMA_PPCF128, Results);
48944955
break;
48954956
case ISD::FADD:
4896-
case ISD::STRICT_FADD:
4897-
ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4898-
RTLIB::ADD_F80, RTLIB::ADD_F128,
4899-
RTLIB::ADD_PPCF128, Results);
4957+
case ISD::STRICT_FADD: {
4958+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4959+
{RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4960+
{RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4961+
{RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4962+
{RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4963+
{RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
49004964
break;
4965+
}
49014966
case ISD::FMUL:
4902-
case ISD::STRICT_FMUL:
4903-
ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4904-
RTLIB::MUL_F80, RTLIB::MUL_F128,
4905-
RTLIB::MUL_PPCF128, Results);
4967+
case ISD::STRICT_FMUL: {
4968+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4969+
{RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4970+
{RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4971+
{RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4972+
{RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4973+
{RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
49064974
break;
4975+
}
49074976
case ISD::FP16_TO_FP:
49084977
if (Node->getValueType(0) == MVT::f32) {
49094978
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
@@ -5076,11 +5145,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
50765145
break;
50775146
}
50785147
case ISD::FSUB:
5079-
case ISD::STRICT_FSUB:
5080-
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5081-
RTLIB::SUB_F80, RTLIB::SUB_F128,
5082-
RTLIB::SUB_PPCF128, Results);
5148+
case ISD::STRICT_FSUB: {
5149+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
5150+
{RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5151+
{RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5152+
{RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5153+
{RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5154+
{RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
50835155
break;
5156+
}
50845157
case ISD::SREM:
50855158
Results.push_back(ExpandIntLibCall(Node, true,
50865159
RTLIB::SREM_I8,

0 commit comments

Comments
 (0)