diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b9507a2d054fe..17083103cb24b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -333,11 +333,14 @@ added in the future: (e.g., by passing things in registers). This calling convention allows the target to use whatever tricks it wants to produce fast code for the target, without having to conform to an externally - specified ABI (Application Binary Interface). `Tail calls can only - be optimized when this, the tailcc, the GHC or the HiPE convention is - used. `_ This calling - convention does not support varargs and requires the prototype of all - callees to exactly match the prototype of the function definition. + specified ABI (Application Binary Interface). Targets may use different + implementations according to different features. In this case, a + TTI interface ``useFastCCForInternalCall`` must return false when + any caller functions and the callee belong to different implementations. + `Tail calls can only be optimized when this, the tailcc, the GHC or the + HiPE convention is used. `_ + This calling convention does not support varargs and requires the prototype + of all callees to exactly match the prototype of the function definition. "``coldcc``" - The cold calling convention This calling convention attempts to make code in the caller as efficient as possible under the assumption that the call is not diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0f17312b03827..ce7de263f74f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -946,6 +946,10 @@ class TargetTransformInfo { /// should use coldcc calling convention. LLVM_ABI bool useColdCCForColdCall(Function &F) const; + /// Return true if the input function is internal, should use fastcc calling + /// convention. + LLVM_ABI bool useFastCCForInternalCall(Function &F) const; + LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; /// Identifies if the vector form of the intrinsic has a scalar operand. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index aacb88d2f9684..91872d74a6bab 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -431,6 +431,8 @@ class TargetTransformInfoImplBase { virtual bool useColdCCForColdCall(Function &F) const { return false; } + virtual bool useFastCCForInternalCall(Function &F) const { return true; } + virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const { return false; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 0426ac7e62fab..12186da5424f7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } +bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const { + return TTIImpl->useFastCCForInternalCall(F); +} + bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable( Intrinsic::ID ID) const { return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0b1430e373fc7..6717feae2c814 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7223,3 +7223,19 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, return false; } + +bool X86TTIImpl::useFastCCForInternalCall(Function &F) const { + bool HasEGPR = ST->hasEGPR(); + const TargetMachine &TM = getTLI()->getTargetMachine(); + + for (User *U : F.users()) { + CallBase *CB = dyn_cast(U); + if (!CB || CB->getCalledOperand() != &F) + continue; + Function *CallerFunc = CB->getParent()->getParent(); + if (TM.getSubtarget(*CallerFunc).hasEGPR() != HasEGPR) + return false; + } + + return true; +} diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index de5e1c297b1e4..f5e9e205d0839 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -319,6 +319,8 @@ class X86TTIImpl final : public BasicTTIImplBase { unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override; + bool useFastCCForInternalCall(Function &F) const override; + private: bool supportsGather() const; InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 99c4982c58b47..1516a5bb7a6c2 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M, if (hasChangeableCC(&F, ChangeableCCCache)) { // If this function has a calling convention worth changing, is not a - // varargs function, and is only called directly, promote it to use the - // Fast calling convention. - F.setCallingConv(CallingConv::Fast); - ChangeCalleesToFastCall(&F); - ++NumFastCallFns; - Changed = true; + // varargs function, is only called directly, and is supported by the + // target, promote it to use the Fast calling convention. + TargetTransformInfo &TTI = GetTTI(F); + if (TTI.useFastCCForInternalCall(F)) { + F.setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(&F); + ++NumFastCallFns; + Changed = true; + } } if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) && diff --git a/llvm/test/Transforms/GlobalOpt/X86/apx.ll b/llvm/test/Transforms/GlobalOpt/X86/apx.ll new file mode 100644 index 0000000000000..aaf6abac966e8 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/X86/apx.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=x86_64 -S -passes=globalopt -o - < %s | FileCheck %s + +define void @caller1() { +; CHECK-LABEL: define void @caller1() local_unnamed_addr { +; CHECK-NEXT: call void @callee1() +; CHECK-NEXT: ret void +; + call void @callee1() + ret void +} + +define internal void @callee1() "target-features"="+egpr" { +; CHECK-LABEL: define internal void @callee1( +; CHECK-SAME: ) unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller2() "target-features"="+egpr" { +; CHECK-LABEL: define void @caller2( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: call void @callee2() +; CHECK-NEXT: ret void +; + call void @callee2() + ret void +} + +define internal void @callee2() { +; CHECK-LABEL: define internal void @callee2() unnamed_addr { +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller3() "target-features"="+egpr" { +; CHECK-LABEL: define void @caller3( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: call fastcc void @callee3() +; CHECK-NEXT: ret void +; + call void @callee3() + ret void +} + +define internal void @callee3() "target-features"="+egpr" { +; CHECK-LABEL: define internal fastcc void @callee3( +; CHECK-SAME: ) unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + ret void +}