Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,10 @@ class TargetTransformInfo {
/// should use coldcc calling convention.
LLVM_ABI bool useColdCCForColdCall(Function &F) const;

/// Return true if the input function is internal, should use fastcc calling
/// convention.
LLVM_ABI bool useFastCCForInternalCall(Function &F) const;

LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;

/// Identifies if the vector form of the intrinsic has a scalar operand.
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ class TargetTransformInfoImplBase {

virtual bool useColdCCForColdCall(Function &F) const { return false; }

virtual bool useFastCCForInternalCall(Function &F) const { return true; }

virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
return false;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
return TTIImpl->useColdCCForColdCall(F);
}

bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const {
return TTIImpl->useFastCCForInternalCall(F);
}

bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID) const {
return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const override;

bool useFastCCForInternalCall(Function &F) const override {
return !ST->is64Bit() || ST->hasEGPR();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to check that both caller and callee have EGPR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think single direction is enough. We can call a function without EGPR from EGPR enabled function, but not the opposite direction.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I don't really follow.

If we have no-EGPR -> EGPR, then the EGPR function may expect arguments to be passed in EGPR registers, while the no-EGPR function will push them to the stack.

If we have EGPR -> no-EGPR, then the EGPR function may pass arguments in EGPR registers, while the no-EGPR function will expect them to be on the stack.

Am I missing something here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I get the point. I was confused with the inlining logic.


private:
bool supportsGather() const;
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/Transforms/IPO/GlobalOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M,

if (hasChangeableCC(&F, ChangeableCCCache)) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
F.setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
// varargs function, is only called directly, and is supported by the
// target, promote it to use the Fast calling convention.
TargetTransformInfo &TTI = GetTTI(F);
if (TTI.useFastCCForInternalCall(F)) {
F.setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
}
}

if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
; CHECK-NEXT: call void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define dso_local i32 @main() {
; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr {
; CHECK-NEXT: bb:
; CHECK-NEXT: store ptr null, ptr @_ZL3g_i, align 8
; CHECK-NEXT: call fastcc void @_ZL13PutsSomethingv()
; CHECK-NEXT: call void @_ZL13PutsSomethingv()
; CHECK-NEXT: ret i32 0
;
bb:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/gold/X86/merge-functions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

; Check that we've merged foo and bar
; CHECK: define dso_local noundef i32 @main()
; CHECK-NEXT: tail call fastcc void @bar()
; CHECK-NEXT: tail call fastcc void @bar()
; CHECK-NEXT: tail call void @bar()
; CHECK-NEXT: tail call void @bar()

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/gold/X86/unified-lto.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
; Constant propagation is not supported by thin LTO.
; With full LTO we fold argument into constant 43
; CHECK: define dso_local noundef i32 @main()
; CHECK-NEXT: tail call fastcc void @foo()
; CHECK-NEXT: tail call void @foo()
; CHECK-NEXT: ret i32 43

; CHECK: define internal fastcc void @foo()
; CHECK: define internal void @foo()
; CHECK-NEXT: store i32 43, ptr @_g, align 4

; ThinLTO doesn't import foo, because the latter has noinline attribute
Expand Down
Loading