Skip to content

Commit ea2a255

Browse files
phoebewangPriyanshu3820
authored andcommitted
[GlobalOpt] Add TTI interface useFastCCForInternalCall for FASTCC (llvm#164768)
Background: X86 APX feature adds 16 registers within the same 64-bit mode. PR llvm#164638 is trying to extend such registers for FASTCC. However, a blocker issue is calling convention cannot be changeable with or without a feature. The solution is to disable FASTCC if APX is not ready. This is an NFC change to the final code generation, becasue X86 doesn't define an alternative ABI for FASTCC in 64-bit mode. We can solve the potential compatibility issue of llvm#164638 with this patch.
1 parent 79effee commit ea2a255

File tree

8 files changed

+99
-11
lines changed

8 files changed

+99
-11
lines changed

llvm/docs/LangRef.rst

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -333,11 +333,14 @@ added in the future:
333333
(e.g., by passing things in registers). This calling convention
334334
allows the target to use whatever tricks it wants to produce fast
335335
code for the target, without having to conform to an externally
336-
specified ABI (Application Binary Interface). `Tail calls can only
337-
be optimized when this, the tailcc, the GHC or the HiPE convention is
338-
used. <CodeGenerator.html#tail-call-optimization>`_ This calling
339-
convention does not support varargs and requires the prototype of all
340-
callees to exactly match the prototype of the function definition.
336+
specified ABI (Application Binary Interface). Targets may use different
337+
implementations according to different features. In this case, a
338+
TTI interface ``useFastCCForInternalCall`` must return false when
339+
any caller functions and the callee belong to different implementations.
340+
`Tail calls can only be optimized when this, the tailcc, the GHC or the
341+
HiPE convention is used. <CodeGenerator.html#tail-call-optimization>`_
342+
This calling convention does not support varargs and requires the prototype
343+
of all callees to exactly match the prototype of the function definition.
341344
"``coldcc``" - The cold calling convention
342345
This calling convention attempts to make code in the caller as
343346
efficient as possible under the assumption that the call is not

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,10 @@ class TargetTransformInfo {
972972
/// should use coldcc calling convention.
973973
LLVM_ABI bool useColdCCForColdCall(Function &F) const;
974974

975+
/// Return true if the input function is internal, should use fastcc calling
976+
/// convention.
977+
LLVM_ABI bool useFastCCForInternalCall(Function &F) const;
978+
975979
LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;
976980

977981
/// Identifies if the vector form of the intrinsic has a scalar operand.

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,8 @@ class TargetTransformInfoImplBase {
431431

432432
virtual bool useColdCCForColdCall(Function &F) const { return false; }
433433

434+
virtual bool useFastCCForInternalCall(Function &F) const { return true; }
435+
434436
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
435437
return false;
436438
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
609609
return TTIImpl->useColdCCForColdCall(F);
610610
}
611611

612+
bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const {
613+
return TTIImpl->useFastCCForInternalCall(F);
614+
}
615+
612616
bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable(
613617
Intrinsic::ID ID) const {
614618
return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID);

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7230,3 +7230,19 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
72307230

72317231
return false;
72327232
}
7233+
7234+
bool X86TTIImpl::useFastCCForInternalCall(Function &F) const {
7235+
bool HasEGPR = ST->hasEGPR();
7236+
const TargetMachine &TM = getTLI()->getTargetMachine();
7237+
7238+
for (User *U : F.users()) {
7239+
CallBase *CB = dyn_cast<CallBase>(U);
7240+
if (!CB || CB->getCalledOperand() != &F)
7241+
continue;
7242+
Function *CallerFunc = CB->getFunction();
7243+
if (TM.getSubtarget<X86Subtarget>(*CallerFunc).hasEGPR() != HasEGPR)
7244+
return false;
7245+
}
7246+
7247+
return true;
7248+
}

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
318318
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
319319
Type *ScalarValTy) const override;
320320

321+
bool useFastCCForInternalCall(Function &F) const override;
322+
321323
private:
322324
bool supportsGather() const;
323325
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M,
20182018

20192019
if (hasChangeableCC(&F, ChangeableCCCache)) {
20202020
// If this function has a calling convention worth changing, is not a
2021-
// varargs function, and is only called directly, promote it to use the
2022-
// Fast calling convention.
2023-
F.setCallingConv(CallingConv::Fast);
2024-
ChangeCalleesToFastCall(&F);
2025-
++NumFastCallFns;
2026-
Changed = true;
2021+
// varargs function, is only called directly, and is supported by the
2022+
// target, promote it to use the Fast calling convention.
2023+
TargetTransformInfo &TTI = GetTTI(F);
2024+
if (TTI.useFastCCForInternalCall(F)) {
2025+
F.setCallingConv(CallingConv::Fast);
2026+
ChangeCalleesToFastCall(&F);
2027+
++NumFastCallFns;
2028+
Changed = true;
2029+
}
20272030
}
20282031

20292032
if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -mtriple=x86_64 -S -passes=globalopt -o - < %s | FileCheck %s
3+
4+
define void @caller1() {
5+
; CHECK-LABEL: define void @caller1() local_unnamed_addr {
6+
; CHECK-NEXT: call void @callee1()
7+
; CHECK-NEXT: ret void
8+
;
9+
call void @callee1()
10+
ret void
11+
}
12+
13+
define internal void @callee1() "target-features"="+egpr" {
14+
; CHECK-LABEL: define internal void @callee1(
15+
; CHECK-SAME: ) unnamed_addr #[[ATTR0:[0-9]+]] {
16+
; CHECK-NEXT: ret void
17+
;
18+
ret void
19+
}
20+
21+
define void @caller2() "target-features"="+egpr" {
22+
; CHECK-LABEL: define void @caller2(
23+
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
24+
; CHECK-NEXT: call void @callee2()
25+
; CHECK-NEXT: ret void
26+
;
27+
call void @callee2()
28+
ret void
29+
}
30+
31+
define internal void @callee2() {
32+
; CHECK-LABEL: define internal void @callee2() unnamed_addr {
33+
; CHECK-NEXT: ret void
34+
;
35+
ret void
36+
}
37+
38+
define void @caller3() "target-features"="+egpr" {
39+
; CHECK-LABEL: define void @caller3(
40+
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
41+
; CHECK-NEXT: call fastcc void @callee3()
42+
; CHECK-NEXT: ret void
43+
;
44+
call void @callee3()
45+
ret void
46+
}
47+
48+
define internal void @callee3() "target-features"="+egpr" {
49+
; CHECK-LABEL: define internal fastcc void @callee3(
50+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
51+
; CHECK-NEXT: ret void
52+
;
53+
ret void
54+
}

0 commit comments

Comments
 (0)