diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index faab2503ced50..91c014236f6cb 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -190,6 +190,36 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass { /// Emit comments in assembly output if this is true. bool VerboseAsm; + /// Store symbols and type identifiers used to create callgraph section + /// entries related to a function. + struct FunctionInfo { + /// Numeric type identifier used in callgraph section for indirect calls + /// and targets. + using CGTypeId = uint64_t; + + /// Enumeration of function kinds, and their mapping to function kind values + /// stored in callgraph section entries. + /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp. + enum class FunctionKind : uint64_t { + /// Function cannot be target to indirect calls. + NOT_INDIRECT_TARGET = 0, + + /// Function may be target to indirect calls but its type id is unknown. + INDIRECT_TARGET_UNKNOWN_TID = 1, + + /// Function may be target to indirect calls and its type id is known. + INDIRECT_TARGET_KNOWN_TID = 2, + }; + + /// Map type identifiers to callsite labels. Labels are generated for each + /// indirect callsite in the function. + SmallVector> CallSiteLabels; + }; + + enum CallGraphSectionFormatVersion : uint64_t { + V_0 = 0, + }; + /// Output stream for the stack usage file (i.e., .su file). std::unique_ptr StackUsageStream; @@ -355,6 +385,13 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass { /// are available. Returns empty string otherwise. StringRef getConstantSectionSuffix(const Constant *C) const; + /// Generate and emit labels for callees of the indirect callsites which will + /// be used to populate the .callgraph section. + void emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI); + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// @@ -442,6 +479,8 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass { void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol); virtual void emitKCFITypeId(const MachineFunction &MF); + void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo); + void emitPseudoProbe(const MachineInstr &MI); void emitRemarksSection(remarks::RemarkStreamer &RS); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 79a1a8f340d24..06c4daf245fa0 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -531,11 +531,12 @@ class LLVM_ABI MachineFunction { unsigned TargetFlags; }; + using CallSiteInfoMap = DenseMap; + private: Delegate *TheDelegate = nullptr; GISelChangeObserver *Observer = nullptr; - using CallSiteInfoMap = DenseMap; /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 5ce58ae0a836f..d69560ca2d652 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -69,6 +69,9 @@ class LLVM_ABI MCObjectFileInfo { /// Language Specific Data Area information is emitted to. MCSection *LSDASection = nullptr; + /// Section containing call graph metadata. + MCSection *CallGraphSection = nullptr; + /// If exception handling is supported by the target and the target can /// support a compact representation of the CIE and FDE, this is the section /// to emit them into. @@ -359,6 +362,8 @@ class LLVM_ABI MCObjectFileInfo { MCSection *getFaultMapSection() const { return FaultMapSection; } MCSection *getRemarksSection() const { return RemarksSection; } + MCSection *getCallGraphSection(const MCSection &TextSec) const; + MCSection *getStackSizesSection(const MCSection &TextSec) const; MCSection *getBBAddrMapSection(const MCSection &TextSec) const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 616627183744e..1641c3eb535a9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { *StackUsageStream << "static\n"; } +/// Extracts a generalized numeric type identifier of a Function's type from +/// type metadata. Returns null if metadata cannot be found. +static ConstantInt *extractNumericCGTypeId(const Function &F) { + SmallVector Types; + F.getMetadata(LLVMContext::MD_type, Types); + for (const auto &Type : Types) { + if (Type->hasGeneralizedMDString()) { + MDString *MDGeneralizedTypeId = cast(Type->getOperand(1)); + uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString()); + IntegerType *Int64Ty = Type::getInt64Ty(F.getContext()); + return ConstantInt::get(Int64Ty, TypeIdVal); + } + } + return nullptr; +} + +/// Emits .callgraph section. +void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, + FunctionInfo &FuncInfo) { + if (!MF.getTarget().Options.EmitCallGraphSection) + return; + + // Switch to the call graph section for the function + MCSection *FuncCGSection = + getObjFileLowering().getCallGraphSection(*getCurrentSection()); + assert(FuncCGSection && "null callgraph section"); + OutStreamer->pushSection(); + OutStreamer->switchSection(FuncCGSection); + + // Emit format version number. + OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0); + + // Emit function's self information, which is composed of: + // 1) FunctionEntryPc + // 2) FunctionKind: Whether the function is indirect target, and if so, + // whether its type id is known. + // 3) FunctionTypeId: Emit only when the function is an indirect target + // and its type id is known. + + // Emit function entry pc. + const MCSymbol *FunctionSymbol = getFunctionBegin(); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + + // If this function has external linkage or has its address taken and + // it is not a callback, then anything could call it. + const Function &F = MF.getFunction(); + bool IsIndirectTarget = + !F.hasLocalLinkage() || F.hasAddressTaken(nullptr, + /*IgnoreCallbackUses=*/true, + /*IgnoreAssumeLikeCalls=*/true, + /*IgnoreLLVMUsed=*/false); + + // FIXME: FunctionKind takes a few values but emitted as a 64-bit value. + // Can be optimized to occupy 2 bits instead. + // Emit function kind, and type id if available. + if (!IsIndirectTarget) { + OutStreamer->emitInt64( + static_cast(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET)); + } else { + if (const auto *TypeId = extractNumericCGTypeId(F)) { + OutStreamer->emitInt64(static_cast( + FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID)); + OutStreamer->emitInt64(TypeId->getZExtValue()); + } else { + OutStreamer->emitInt64(static_cast( + FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID)); + } + } + + // Emit callsite labels, where each element is a pair of type id and + // indirect callsite pc. + const auto &CallSiteLabels = FuncInfo.CallSiteLabels; + OutStreamer->emitInt64(CallSiteLabels.size()); + for (const auto &[TypeId, Label] : CallSiteLabels) { + OutStreamer->emitInt64(TypeId); + OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize()); + } + FuncInfo.CallSiteLabels.clear(); + + OutStreamer->popSection(); +} + void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF, const MDNode &MD) { MCSymbol *S = MF.getContext().createTempSymbol("pcsection"); @@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) { return Name; } +void AsmPrinter::emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI) { + // Only indirect calls have type identifiers set. + const auto &CallSiteInfo = CallSitesInfoMap.find(&MI); + if (CallSiteInfo == CallSitesInfoMap.end()) + return; + + for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) { + MCSymbol *S = MF->getContext().createTempSymbol(); + OutStreamer->emitLabel(S); + uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue(); + FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S); + } +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::emitFunctionBody() { @@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() { MBBSectionRanges[MF->front().getSectionID()] = MBBSectionRange{CurrentFnBegin, nullptr}; + FunctionInfo FuncInfo; + const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); @@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() { break; } + if (TM.Options.EmitCallGraphSection && MI.isCall()) + emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI); + // If there is a post-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); @@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing stack size metadata. emitStackSizeSection(*MF); + // Emit section containing call graph metadata. + emitCallGraphSection(*MF, FuncInfo); + // Emit .su file containing function stack size information. emitStackUsage(*MF); @@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || + MF.getTarget().Options.EmitCallGraphSection || MF.getTarget().Options.BBAddrMap) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 0069d12dce396..393eed18a54e6 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -537,6 +537,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { EHFrameSection = Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags); + CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0); + StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0); PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0); @@ -1120,6 +1122,24 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name, llvm_unreachable("Unknown ObjectFormatType"); } +MCSection * +MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const { + if (Ctx->getObjectFileType() != MCContext::IsELF) + return CallGraphSection; + + const MCSectionELF &ElfSec = static_cast(TextSec); + unsigned Flags = ELF::SHF_LINK_ORDER; + StringRef GroupName; + if (const MCSymbol *Group = ElfSec.getGroup()) { + GroupName = Group->getName(); + Flags |= ELF::SHF_GROUP; + } + + return Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, Flags, 0, + GroupName, true, ElfSec.getUniqueID(), + cast(TextSec.getBeginSymbol())); +} + MCSection * MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const { if ((Ctx->getObjectFileType() != MCContext::IsELF) || diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll new file mode 100644 index 0000000000000..11362873fb151 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -0,0 +1,43 @@ +;; Test if temporary labels are generated for each indirect callsite with a callee_type metadata. +;; Test if the .callgraph section contains the MD5 hash of callee type ids generated from +;; generalized type id strings. + +; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +; CHECK: ball: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define ptr @ball() { +entry: + %fp_foo_val = load ptr, ptr null, align 8 + ; CHECK: [[LABEL_TMP0:\.L.*]]: + call void (...) %fp_foo_val(), !callee_type !0 + %fp_bar_val = load ptr, ptr null, align 8 + ; CHECK: [[LABEL_TMP1:\.L.*]]: + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2 + %fp_baz_val = load ptr, ptr null, align 8 + ; CHECK: [[LABEL_TMP2:\.L.*]]: + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 + ret ptr %call_fp_baz +} + +; CHECK: .section .callgraph,"o",@progbits,.text + +; CHECK-NEXT: .quad 0 +; CHECK-NEXT: .quad [[LABEL_FUNC]] +; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad 3 +!0 = !{!1} +!1 = !{i64 0, !"_ZTSFvE.generalized"} +;; Test for MD5 hash of _ZTSFvE.generalized and the generated temporary callsite label. +; CHECK-NEXT: .quad 4524972987496481828 +; CHECK-NEXT: .quad [[LABEL_TMP0]] +!2 = !{!3} +!3 = !{i64 0, !"_ZTSFicE.generalized"} +;; Test for MD5 hash of _ZTSFicE.generalized and the generated temporary callsite label. +; CHECK-NEXT: .quad 3498816979441845844 +; CHECK-NEXT: .quad [[LABEL_TMP1]] +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} +;; Test for MD5 hash of _ZTSFPvS_E.generalized and the generated temporary callsite label. +; CHECK-NEXT: .quad 8646233951371320954 +; CHECK-NEXT: .quad [[LABEL_TMP2]] diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll new file mode 100644 index 0000000000000..fa14a98008b45 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll @@ -0,0 +1,34 @@ +;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls. + +; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { +entry: + %call = tail call i32 %func(i8 signext %x), !callee_type !1 + ret i32 %call +} + +define i32 @main(i32 %argc) !type !3 { +entry: + %andop = and i32 %argc, 1 + %cmp = icmp eq i32 %andop, 0 + %foo.bar = select i1 %cmp, ptr @foo, ptr @bar + %call.i = tail call i32 %foo.bar(i8 signext 97), !callee_type !1 + ret i32 %call.i +} + +declare !type !2 i32 @foo(i8 signext) + +declare !type !2 i32 @bar(i8 signext) + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. + +; CHECK: Hex dump of section '.callgraph': + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +; CHECK-DAG: 5486bc59 814b8e30 +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{i64 0, !"_ZTSFiiE.generalized"} diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll new file mode 100644 index 0000000000000..4a9840eac4898 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -0,0 +1,38 @@ +;; Tests that we store the type identifiers in .callgraph section of the object file. + +; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .callgraph - | FileCheck %s + +declare !type !0 void @foo() + +declare !type !1 i32 @bar(i8) + +declare !type !2 ptr @baz(ptr) + +define void @main() { +entry: + %a = alloca i8, align 1 + %fp_foo_val = load ptr, ptr null, align 8 + call void (...) %fp_foo_val(), !callee_type !1 + %fp_bar_val = load ptr, ptr null, align 8 + %param = trunc i64 0 to i8 + %call_fp_bar = call i32 %fp_bar_val(i8 signext %param), !callee_type !3 + %fp_baz_val = load ptr, ptr null, align 8 + %call_fp_baz = call ptr %fp_baz_val(ptr %a), !callee_type !4 + ret void +} + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. + +; CHECK: Hex dump of section '.callgraph': + +; CHECK-DAG: 2444f731 f5eecb3e +!0 = !{i64 0, !"_ZTSFvE.generalized"} +!1 = !{!0} +; CHECK-DAG: 5486bc59 814b8e30 +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{!2} +; CHECK-DAG: 7ade6814 f897fd77 +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} diff --git a/llvm/test/MC/X86/verify-callgraph-section.s b/llvm/test/MC/X86/verify-callgraph-section.s new file mode 100644 index 0000000000000..ce07228facb15 --- /dev/null +++ b/llvm/test/MC/X86/verify-callgraph-section.s @@ -0,0 +1,58 @@ +/// Test the callgraph section to make sure the indirect callsites +/// (annotated by generated temporary labels .Ltmp*) are associated +/// with the corresponding callee type identifiers. + +// RUN: llvm-mc -triple=x86_64 -filetype=obj -o - < %s | llvm-readelf -x .callgraph - | FileCheck %s + + .text + .globl ball # -- Begin function ball + .p2align 4 + .type ball,@function +ball: # @ball +.Lfunc_begin0: +# %bb.0: # %entry + pushq %rbx + subq $32, %rsp + movl $0, 4(%rsp) + movq foo@GOTPCREL(%rip), %rcx + movq %rcx, 24(%rsp) + xorl %eax, %eax + callq *%rcx +.Ltmp0: + movq bar@GOTPCREL(%rip), %rax + movq %rax, 16(%rsp) + movsbl 3(%rsp), %edi + callq *%rax +.Ltmp1: + movq baz@GOTPCREL(%rip), %rax + movq %rax, 8(%rsp) + leaq 3(%rsp), %rbx + movq %rbx, %rdi + callq *%rax +.Ltmp2: + callq foo@PLT + movsbl 3(%rsp), %edi + callq bar@PLT + movq %rbx, %rdi + callq baz@PLT + addq $32, %rsp + popq %rbx + retq + .section .callgraph,"o",@progbits,.text + .quad 0 + .quad .Lfunc_begin0 + .quad 1 + .quad 3 + /// MD5 hash of the callee type ID for foo. + // CHECK: 2444f731 f5eecb3e + .quad 0x3ecbeef531f74424 + .quad .Ltmp0 + /// MD5 hash of the callee type ID for bar. + // CHECK: 5486bc59 814b8e30 + .quad 0x308e4b8159bc8654 + .quad .Ltmp1 + /// MD5 hash of the callee type ID for baz. + // CHECK: 7ade6814 f897fd77 + .quad 0x77fd97f81468de7a + .quad .Ltmp2 + .text