Skip to content

Commit 5369383

Browse files
necipfazilPrabhuk
authored andcommitted
[AsmPrinter][CallGraphSection] Emit call graph section
Collect the necessary information for constructing the call graph section, and emit to .callgraph section of the binary. Numeric type identifiers for indirect calls and targets are computed from type identifiers passed from clang front-end. CGSectionFuncComdatCreator pass is used to create comdats for functions whose symbols will be referenced from the call graph section. A call graph section is created per function group, and is linked to the relevant function. This enables dead-stripping of call graph symbols if linked function gets removed. Original RFC: https://lists.llvm.org/pipermail/llvm-dev/2021-June/151044.html Updated RFC: https://lists.llvm.org/pipermail/llvm-dev/2021-July/151739.html Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D105916?id=358342 Pull Request: llvm#87576
1 parent eb0584d commit 5369383

File tree

5 files changed

+252
-0
lines changed

5 files changed

+252
-0
lines changed

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_CODEGEN_ASMPRINTER_H
1616
#define LLVM_CODEGEN_ASMPRINTER_H
1717

18+
#include "llvm/ADT/DenseMap.h"
1819
#include "llvm/ADT/DenseMap.h"
1920
#include "llvm/ADT/MapVector.h"
2021
#include "llvm/ADT/SmallVector.h"
@@ -188,6 +189,32 @@ class AsmPrinter : public MachineFunctionPass {
188189
/// Emit comments in assembly output if this is true.
189190
bool VerboseAsm;
190191

192+
/// Store symbols and type identifiers used to create call graph section
193+
/// entries related to a function.
194+
struct FunctionInfo {
195+
/// Numeric type identifier used in call graph section for indirect calls
196+
/// and targets.
197+
using CGTypeId = uint64_t;
198+
199+
/// Enumeration of function kinds, and their mapping to function kind values
200+
/// stored in call graph section entries.
201+
/// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
202+
enum FunctionKind {
203+
/// Function cannot be target to indirect calls.
204+
NOT_INDIRECT_TARGET = 0,
205+
206+
/// Function may be target to indirect calls but its type id is unknown.
207+
INDIRECT_TARGET_UNKNOWN_TID = 1,
208+
209+
/// Function may be target to indirect calls and its type id is known.
210+
INDIRECT_TARGET_KNOWN_TID = 2,
211+
};
212+
213+
/// Map type identifiers to callsite labels. Labels are only for indirect
214+
/// calls and inclusive of all indirect calls of the function.
215+
SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
216+
};
217+
191218
/// Output stream for the stack usage file (i.e., .su file).
192219
std::unique_ptr<raw_fd_ostream> StackUsageStream;
193220

@@ -422,6 +449,8 @@ class AsmPrinter : public MachineFunctionPass {
422449
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
423450
virtual void emitKCFITypeId(const MachineFunction &MF);
424451

452+
void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
453+
425454
void emitPseudoProbe(const MachineInstr &MI);
426455

427456
void emitRemarksSection(remarks::RemarkStreamer &RS);

llvm/include/llvm/MC/MCObjectFileInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class MCObjectFileInfo {
6868
/// Language Specific Data Area information is emitted to.
6969
MCSection *LSDASection = nullptr;
7070

71+
/// Section containing metadata on call graph.
72+
MCSection *CallGraphSection = nullptr;
73+
7174
/// If exception handling is supported by the target and the target can
7275
/// support a compact representation of the CIE and FDE, this is the section
7376
/// to emit them into.
@@ -355,6 +358,8 @@ class MCObjectFileInfo {
355358
MCSection *getFaultMapSection() const { return FaultMapSection; }
356359
MCSection *getRemarksSection() const { return RemarksSection; }
357360

361+
MCSection *getCallGraphSection(const MCSection &TextSec) const;
362+
358363
MCSection *getStackSizesSection(const MCSection &TextSec) const;
359364

360365
MCSection *getBBAddrMapSection(const MCSection &TextSec) const;

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,105 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
15971597
*StackUsageStream << "static\n";
15981598
}
15991599

1600+
/// Extracts a generalized numeric type identifier of a Function's type from
1601+
/// type metadata. Returns null if metadata cannot be found.
1602+
static ConstantInt *extractNumericCGTypeId(const Function &F) {
1603+
SmallVector<MDNode *, 2> Types;
1604+
F.getMetadata(LLVMContext::MD_type, Types);
1605+
MDString *MDGeneralizedTypeId = nullptr;
1606+
for (const auto &Type : Types) {
1607+
if (Type->getNumOperands() == 2 && isa<MDString>(Type->getOperand(1))) {
1608+
auto *TMDS = cast<MDString>(Type->getOperand(1));
1609+
if (TMDS->getString().ends_with("generalized")) {
1610+
MDGeneralizedTypeId = TMDS;
1611+
break;
1612+
}
1613+
}
1614+
}
1615+
1616+
if (!MDGeneralizedTypeId) {
1617+
errs() << "warning: can't find indirect target type id metadata "
1618+
<< "for " << F.getName() << "\n";
1619+
return nullptr;
1620+
}
1621+
1622+
uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString());
1623+
Type *Int64Ty = Type::getInt64Ty(F.getContext());
1624+
return cast<ConstantInt>(ConstantInt::get(Int64Ty, TypeIdVal));
1625+
}
1626+
1627+
/// Emits call graph section.
1628+
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
1629+
FunctionInfo &FuncInfo) {
1630+
if (!MF.getTarget().Options.EmitCallGraphSection)
1631+
return;
1632+
1633+
// Switch to the call graph section for the function
1634+
MCSection *FuncCGSection =
1635+
getObjFileLowering().getCallGraphSection(*getCurrentSection());
1636+
assert(FuncCGSection && "null call graph section");
1637+
OutStreamer->pushSection();
1638+
OutStreamer->switchSection(FuncCGSection);
1639+
1640+
// Emit format version number.
1641+
OutStreamer->emitInt64(0);
1642+
1643+
// Emit function's self information, which is composed of:
1644+
// 1) FunctionEntryPc
1645+
// 2) FunctionKind: Whether the function is indirect target, and if so,
1646+
// whether its type id is known.
1647+
// 3) FunctionTypeId: Emit only when the function is an indirect target
1648+
// and its type id is known.
1649+
1650+
// Emit function entry pc.
1651+
const MCSymbol *FunctionSymbol = getFunctionBegin();
1652+
OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
1653+
1654+
// If this function has external linkage or has its address taken and
1655+
// it is not a callback, then anything could call it.
1656+
const Function &F = MF.getFunction();
1657+
bool IsIndirectTarget =
1658+
!F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
1659+
/*IgnoreCallbackUses=*/true,
1660+
/*IgnoreAssumeLikeCalls=*/true,
1661+
/*IgnoreLLVMUsed=*/false);
1662+
1663+
// FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
1664+
// Can be optimized to occupy 2 bits instead.
1665+
// Emit function kind, and type id if available.
1666+
if (!IsIndirectTarget) {
1667+
OutStreamer->emitInt64(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET);
1668+
} else {
1669+
const auto *TypeId = extractNumericCGTypeId(F);
1670+
if (TypeId) {
1671+
OutStreamer->emitInt64(
1672+
FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID);
1673+
OutStreamer->emitInt64(TypeId->getZExtValue());
1674+
} else {
1675+
OutStreamer->emitInt64(
1676+
FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID);
1677+
}
1678+
}
1679+
1680+
// Emit callsite labels, where each element is a pair of type id and
1681+
// indirect callsite pc.
1682+
const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
1683+
1684+
// Emit the count of pairs.
1685+
OutStreamer->emitInt64(CallSiteLabels.size());
1686+
1687+
// Emit the type id and call site label pairs.
1688+
for (const std::pair<uint64_t, MCSymbol *> &El : CallSiteLabels) {
1689+
auto TypeId = El.first;
1690+
const auto &Label = El.second;
1691+
OutStreamer->emitInt64(TypeId);
1692+
OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
1693+
}
1694+
FuncInfo.CallSiteLabels.clear();
1695+
1696+
OutStreamer->popSection();
1697+
}
1698+
16001699
void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
16011700
const MDNode &MD) {
16021701
MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
@@ -1746,6 +1845,8 @@ void AsmPrinter::emitFunctionBody() {
17461845
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
17471846

17481847
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1848+
FunctionInfo FuncInfo;
1849+
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
17491850
for (auto &MBB : *MF) {
17501851
// Print a label for the basic block.
17511852
emitBasicBlockStart(MBB);
@@ -1859,6 +1960,26 @@ void AsmPrinter::emitFunctionBody() {
18591960
break;
18601961
}
18611962

1963+
// FIXME: Some indirect calls can get lowered to jump instructions,
1964+
// resulting in emitting labels for them. The extra information can
1965+
// be neglected while disassembling but still takes space in the binary.
1966+
if (TM.Options.EmitCallGraphSection && MI.isCall()) {
1967+
// Only indirect calls have type identifiers set.
1968+
const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
1969+
if (CallSiteInfo != CallSitesInfoMap.end()) {
1970+
if (auto *TypeId = CallSiteInfo->second.TypeId) {
1971+
// Emit label.
1972+
MCSymbol *S = MF->getContext().createTempSymbol();
1973+
OutStreamer->emitLabel(S);
1974+
1975+
// Get type id value.
1976+
uint64_t TypeIdVal = TypeId->getZExtValue();
1977+
1978+
// Add to function's callsite labels.
1979+
FuncInfo.CallSiteLabels.emplace_back(TypeIdVal, S);
1980+
}
1981+
}
1982+
}
18621983
// If there is a post-instruction symbol, emit a label for it here.
18631984
if (MCSymbol *S = MI.getPostInstrSymbol())
18641985
OutStreamer->emitLabel(S);
@@ -2040,6 +2161,9 @@ void AsmPrinter::emitFunctionBody() {
20402161
// Emit section containing stack size metadata.
20412162
emitStackSizeSection(*MF);
20422163

2164+
// Emit section containing call graph metadata.
2165+
emitCallGraphSection(*MF, FuncInfo);
2166+
20432167
// Emit .su file containing function stack size information.
20442168
emitStackUsage(*MF);
20452169

@@ -2622,6 +2746,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
26222746
F.hasFnAttribute("function-instrument") ||
26232747
F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF) ||
26242748
NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection ||
2749+
MF.getTarget().Options.EmitCallGraphSection ||
26252750
MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) {
26262751
CurrentFnBegin = createTempSymbol("func_begin");
26272752
if (NeedsLocalForSize)

llvm/lib/MC/MCObjectFileInfo.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
534534
EHFrameSection =
535535
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
536536

537+
CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
538+
537539
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
538540

539541
PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
@@ -1132,6 +1134,24 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
11321134
llvm_unreachable("Unknown ObjectFormatType");
11331135
}
11341136

1137+
MCSection *
1138+
MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
1139+
if (Ctx->getObjectFileType() != MCContext::IsELF)
1140+
return CallGraphSection;
1141+
1142+
const MCSectionELF &ElfSec = static_cast<const MCSectionELF &>(TextSec);
1143+
unsigned Flags = ELF::SHF_LINK_ORDER;
1144+
StringRef GroupName;
1145+
if (const MCSymbol *Group = ElfSec.getGroup()) {
1146+
GroupName = Group->getName();
1147+
Flags |= ELF::SHF_GROUP;
1148+
}
1149+
1150+
return Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, Flags, 0,
1151+
GroupName, true, ElfSec.getUniqueID(),
1152+
cast<MCSymbolELF>(TextSec.getBeginSymbol()));
1153+
}
1154+
11351155
MCSection *
11361156
MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
11371157
if ((Ctx->getObjectFileType() != MCContext::IsELF) ||
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; Tests that we store the type identifiers in .callgraph section of the binary.
2+
3+
; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
4+
; RUN: llvm-readelf -x .callgraph - | FileCheck %s
5+
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
define dso_local void @foo() #0 !type !4 {
9+
entry:
10+
ret void
11+
}
12+
13+
define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
14+
entry:
15+
%a.addr = alloca i8, align 1
16+
store i8 %a, i8* %a.addr, align 1
17+
ret i32 0
18+
}
19+
20+
define dso_local i32* @baz(i8* %a) #0 !type !6 {
21+
entry:
22+
%a.addr = alloca i8*, align 8
23+
store i8* %a, i8** %a.addr, align 8
24+
ret i32* null
25+
}
26+
27+
define dso_local i32 @main() #0 !type !7 {
28+
entry:
29+
%retval = alloca i32, align 4
30+
%fp_foo = alloca void (...)*, align 8
31+
%a = alloca i8, align 1
32+
%fp_bar = alloca i32 (i8)*, align 8
33+
%fp_baz = alloca i32* (i8*)*, align 8
34+
store i32 0, i32* %retval, align 4
35+
store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** %fp_foo, align 8
36+
%0 = load void (...)*, void (...)** %fp_foo, align 8
37+
call void (...) %0() [ "type"(metadata !"_ZTSFvE.generalized") ]
38+
store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
39+
%1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
40+
%2 = load i8, i8* %a, align 1
41+
%call = call i32 %1(i8 signext %2) [ "type"(metadata !"_ZTSFicE.generalized") ]
42+
store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
43+
%3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
44+
%call1 = call i32* %3(i8* %a) [ "type"(metadata !"_ZTSFPvS_E.generalized") ]
45+
call void @foo() [ "type"(metadata !"_ZTSFvE.generalized") ]
46+
%4 = load i8, i8* %a, align 1
47+
%call2 = call i32 @bar(i8 signext %4) [ "type"(metadata !"_ZTSFicE.generalized") ]
48+
%call3 = call i32* @baz(i8* %a) [ "type"(metadata !"_ZTSFPvS_E.generalized") ]
49+
ret i32 0
50+
}
51+
52+
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
53+
54+
!llvm.module.flags = !{!0, !1, !2}
55+
!llvm.ident = !{!3}
56+
57+
; Check that the numeric type id (md5 hash) for the below type ids are emitted
58+
; to the callgraph section.
59+
60+
; CHECK: Hex dump of section '.callgraph':
61+
62+
!0 = !{i32 1, !"wchar_size", i32 4}
63+
!1 = !{i32 7, !"uwtable", i32 1}
64+
!2 = !{i32 7, !"frame-pointer", i32 2}
65+
!3 = !{!"clang version 13.0.0 ([email protected]:llvm/llvm-project.git 6d35f403b91c2f2c604e23763f699d580370ca96)"}
66+
; CHECK-DAG: 2444f731 f5eecb3e
67+
!4 = !{i64 0, !"_ZTSFvE.generalized"}
68+
; CHECK-DAG: 5486bc59 814b8e30
69+
!5 = !{i64 0, !"_ZTSFicE.generalized"}
70+
; CHECK-DAG: 7ade6814 f897fd77
71+
!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
72+
; CHECK-DAG: caaf769a 600968fa
73+
!7 = !{i64 0, !"_ZTSFiE.generalized"}

0 commit comments

Comments
 (0)