Skip to content

Commit 7c6a1c3

Browse files
authored
[llvm][AsmPrinter] Emit call graph section
Collect the necessary information for constructing the call graph section, and emit to .callgraph section of the binary. MD5 hash of the callee_type metadata string is used as the numerical type id emitted. Reviewers: ilovepi Reviewed By: ilovepi Pull Request: #87576
1 parent 648a7a6 commit 7c6a1c3

File tree

9 files changed

+347
-1
lines changed

9 files changed

+347
-1
lines changed

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,36 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
190190
/// Emit comments in assembly output if this is true.
191191
bool VerboseAsm;
192192

193+
/// Store symbols and type identifiers used to create callgraph section
194+
/// entries related to a function.
195+
struct FunctionInfo {
196+
/// Numeric type identifier used in callgraph section for indirect calls
197+
/// and targets.
198+
using CGTypeId = uint64_t;
199+
200+
/// Enumeration of function kinds, and their mapping to function kind values
201+
/// stored in callgraph section entries.
202+
/// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
203+
enum class FunctionKind : uint64_t {
204+
/// Function cannot be target to indirect calls.
205+
NOT_INDIRECT_TARGET = 0,
206+
207+
/// Function may be target to indirect calls but its type id is unknown.
208+
INDIRECT_TARGET_UNKNOWN_TID = 1,
209+
210+
/// Function may be target to indirect calls and its type id is known.
211+
INDIRECT_TARGET_KNOWN_TID = 2,
212+
};
213+
214+
/// Map type identifiers to callsite labels. Labels are generated for each
215+
/// indirect callsite in the function.
216+
SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
217+
};
218+
219+
enum CallGraphSectionFormatVersion : uint64_t {
220+
V_0 = 0,
221+
};
222+
193223
/// Output stream for the stack usage file (i.e., .su file).
194224
std::unique_ptr<raw_fd_ostream> StackUsageStream;
195225

@@ -355,6 +385,13 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
355385
/// are available. Returns empty string otherwise.
356386
StringRef getConstantSectionSuffix(const Constant *C) const;
357387

388+
/// Generate and emit labels for callees of the indirect callsites which will
389+
/// be used to populate the .callgraph section.
390+
void emitIndirectCalleeLabels(
391+
FunctionInfo &FuncInfo,
392+
const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
393+
const MachineInstr &MI);
394+
358395
//===------------------------------------------------------------------===//
359396
// XRay instrumentation implementation.
360397
//===------------------------------------------------------------------===//
@@ -442,6 +479,8 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
442479
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
443480
virtual void emitKCFITypeId(const MachineFunction &MF);
444481

482+
void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
483+
445484
void emitPseudoProbe(const MachineInstr &MI);
446485

447486
void emitRemarksSection(remarks::RemarkStreamer &RS);

llvm/include/llvm/CodeGen/MachineFunction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,11 +531,12 @@ class LLVM_ABI MachineFunction {
531531
unsigned TargetFlags;
532532
};
533533

534+
using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
535+
534536
private:
535537
Delegate *TheDelegate = nullptr;
536538
GISelChangeObserver *Observer = nullptr;
537539

538-
using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
539540
/// Map a call instruction to call site arguments forwarding info.
540541
CallSiteInfoMap CallSitesInfo;
541542

llvm/include/llvm/MC/MCObjectFileInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ class LLVM_ABI MCObjectFileInfo {
6969
/// Language Specific Data Area information is emitted to.
7070
MCSection *LSDASection = nullptr;
7171

72+
/// Section containing call graph metadata.
73+
MCSection *CallGraphSection = nullptr;
74+
7275
/// If exception handling is supported by the target and the target can
7376
/// support a compact representation of the CIE and FDE, this is the section
7477
/// to emit them into.
@@ -359,6 +362,8 @@ class LLVM_ABI MCObjectFileInfo {
359362
MCSection *getFaultMapSection() const { return FaultMapSection; }
360363
MCSection *getRemarksSection() const { return RemarksSection; }
361364

365+
MCSection *getCallGraphSection(const MCSection &TextSec) const;
366+
362367
MCSection *getStackSizesSection(const MCSection &TextSec) const;
363368

364369
MCSection *getBBAddrMapSection(const MCSection &TextSec) const;

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
16541654
*StackUsageStream << "static\n";
16551655
}
16561656

1657+
/// Extracts a generalized numeric type identifier of a Function's type from
1658+
/// type metadata. Returns null if metadata cannot be found.
1659+
static ConstantInt *extractNumericCGTypeId(const Function &F) {
1660+
SmallVector<MDNode *, 2> Types;
1661+
F.getMetadata(LLVMContext::MD_type, Types);
1662+
for (const auto &Type : Types) {
1663+
if (Type->hasGeneralizedMDString()) {
1664+
MDString *MDGeneralizedTypeId = cast<MDString>(Type->getOperand(1));
1665+
uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString());
1666+
IntegerType *Int64Ty = Type::getInt64Ty(F.getContext());
1667+
return ConstantInt::get(Int64Ty, TypeIdVal);
1668+
}
1669+
}
1670+
return nullptr;
1671+
}
1672+
1673+
/// Emits .callgraph section.
1674+
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
1675+
FunctionInfo &FuncInfo) {
1676+
if (!MF.getTarget().Options.EmitCallGraphSection)
1677+
return;
1678+
1679+
// Switch to the call graph section for the function
1680+
MCSection *FuncCGSection =
1681+
getObjFileLowering().getCallGraphSection(*getCurrentSection());
1682+
assert(FuncCGSection && "null callgraph section");
1683+
OutStreamer->pushSection();
1684+
OutStreamer->switchSection(FuncCGSection);
1685+
1686+
// Emit format version number.
1687+
OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0);
1688+
1689+
// Emit function's self information, which is composed of:
1690+
// 1) FunctionEntryPc
1691+
// 2) FunctionKind: Whether the function is indirect target, and if so,
1692+
// whether its type id is known.
1693+
// 3) FunctionTypeId: Emit only when the function is an indirect target
1694+
// and its type id is known.
1695+
1696+
// Emit function entry pc.
1697+
const MCSymbol *FunctionSymbol = getFunctionBegin();
1698+
OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
1699+
1700+
// If this function has external linkage or has its address taken and
1701+
// it is not a callback, then anything could call it.
1702+
const Function &F = MF.getFunction();
1703+
bool IsIndirectTarget =
1704+
!F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
1705+
/*IgnoreCallbackUses=*/true,
1706+
/*IgnoreAssumeLikeCalls=*/true,
1707+
/*IgnoreLLVMUsed=*/false);
1708+
1709+
// FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
1710+
// Can be optimized to occupy 2 bits instead.
1711+
// Emit function kind, and type id if available.
1712+
if (!IsIndirectTarget) {
1713+
OutStreamer->emitInt64(
1714+
static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET));
1715+
} else {
1716+
if (const auto *TypeId = extractNumericCGTypeId(F)) {
1717+
OutStreamer->emitInt64(static_cast<uint64_t>(
1718+
FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID));
1719+
OutStreamer->emitInt64(TypeId->getZExtValue());
1720+
} else {
1721+
OutStreamer->emitInt64(static_cast<uint64_t>(
1722+
FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
1723+
}
1724+
}
1725+
1726+
// Emit callsite labels, where each element is a pair of type id and
1727+
// indirect callsite pc.
1728+
const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
1729+
OutStreamer->emitInt64(CallSiteLabels.size());
1730+
for (const auto &[TypeId, Label] : CallSiteLabels) {
1731+
OutStreamer->emitInt64(TypeId);
1732+
OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
1733+
}
1734+
FuncInfo.CallSiteLabels.clear();
1735+
1736+
OutStreamer->popSection();
1737+
}
1738+
16571739
void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
16581740
const MDNode &MD) {
16591741
MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
@@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) {
17841866
return Name;
17851867
}
17861868

1869+
void AsmPrinter::emitIndirectCalleeLabels(
1870+
FunctionInfo &FuncInfo,
1871+
const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
1872+
const MachineInstr &MI) {
1873+
// Only indirect calls have type identifiers set.
1874+
const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
1875+
if (CallSiteInfo == CallSitesInfoMap.end())
1876+
return;
1877+
1878+
for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) {
1879+
MCSymbol *S = MF->getContext().createTempSymbol();
1880+
OutStreamer->emitLabel(S);
1881+
uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
1882+
FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
1883+
}
1884+
}
1885+
17871886
/// EmitFunctionBody - This method emits the body and trailer for a
17881887
/// function.
17891888
void AsmPrinter::emitFunctionBody() {
@@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() {
18301929
MBBSectionRanges[MF->front().getSectionID()] =
18311930
MBBSectionRange{CurrentFnBegin, nullptr};
18321931

1932+
FunctionInfo FuncInfo;
1933+
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
18331934
for (auto &MBB : *MF) {
18341935
// Print a label for the basic block.
18351936
emitBasicBlockStart(MBB);
@@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() {
19632064
break;
19642065
}
19652066

2067+
if (TM.Options.EmitCallGraphSection && MI.isCall())
2068+
emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI);
2069+
19662070
// If there is a post-instruction symbol, emit a label for it here.
19672071
if (MCSymbol *S = MI.getPostInstrSymbol())
19682072
OutStreamer->emitLabel(S);
@@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() {
21422246
// Emit section containing stack size metadata.
21432247
emitStackSizeSection(*MF);
21442248

2249+
// Emit section containing call graph metadata.
2250+
emitCallGraphSection(*MF, FuncInfo);
2251+
21452252
// Emit .su file containing function stack size information.
21462253
emitStackUsage(*MF);
21472254

@@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
28412948
F.hasFnAttribute("xray-instruction-threshold") ||
28422949
needFuncLabels(MF, *this) || NeedsLocalForSize ||
28432950
MF.getTarget().Options.EmitStackSizeSection ||
2951+
MF.getTarget().Options.EmitCallGraphSection ||
28442952
MF.getTarget().Options.BBAddrMap) {
28452953
CurrentFnBegin = createTempSymbol("func_begin");
28462954
if (NeedsLocalForSize)

llvm/lib/MC/MCObjectFileInfo.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
537537
EHFrameSection =
538538
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
539539

540+
CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
541+
540542
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
541543

542544
PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
@@ -1120,6 +1122,24 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
11201122
llvm_unreachable("Unknown ObjectFormatType");
11211123
}
11221124

1125+
MCSection *
1126+
MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
1127+
if (Ctx->getObjectFileType() != MCContext::IsELF)
1128+
return CallGraphSection;
1129+
1130+
const MCSectionELF &ElfSec = static_cast<const MCSectionELF &>(TextSec);
1131+
unsigned Flags = ELF::SHF_LINK_ORDER;
1132+
StringRef GroupName;
1133+
if (const MCSymbol *Group = ElfSec.getGroup()) {
1134+
GroupName = Group->getName();
1135+
Flags |= ELF::SHF_GROUP;
1136+
}
1137+
1138+
return Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, Flags, 0,
1139+
GroupName, true, ElfSec.getUniqueID(),
1140+
cast<MCSymbolELF>(TextSec.getBeginSymbol()));
1141+
}
1142+
11231143
MCSection *
11241144
MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
11251145
if ((Ctx->getObjectFileType() != MCContext::IsELF) ||
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
;; Test if temporary labels are generated for each indirect callsite with a callee_type metadata.
2+
;; Test if the .callgraph section contains the MD5 hash of callee type ids generated from
3+
;; generalized type id strings.
4+
5+
; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s
6+
7+
; CHECK: ball:
8+
; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]:
9+
define ptr @ball() {
10+
entry:
11+
%fp_foo_val = load ptr, ptr null, align 8
12+
; CHECK: [[LABEL_TMP0:\.L.*]]:
13+
call void (...) %fp_foo_val(), !callee_type !0
14+
%fp_bar_val = load ptr, ptr null, align 8
15+
; CHECK: [[LABEL_TMP1:\.L.*]]:
16+
%call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2
17+
%fp_baz_val = load ptr, ptr null, align 8
18+
; CHECK: [[LABEL_TMP2:\.L.*]]:
19+
%call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4
20+
ret ptr %call_fp_baz
21+
}
22+
23+
; CHECK: .section .callgraph,"o",@progbits,.text
24+
25+
; CHECK-NEXT: .quad 0
26+
; CHECK-NEXT: .quad [[LABEL_FUNC]]
27+
; CHECK-NEXT: .quad 1
28+
; CHECK-NEXT: .quad 3
29+
!0 = !{!1}
30+
!1 = !{i64 0, !"_ZTSFvE.generalized"}
31+
;; Test for MD5 hash of _ZTSFvE.generalized and the generated temporary callsite label.
32+
; CHECK-NEXT: .quad 4524972987496481828
33+
; CHECK-NEXT: .quad [[LABEL_TMP0]]
34+
!2 = !{!3}
35+
!3 = !{i64 0, !"_ZTSFicE.generalized"}
36+
;; Test for MD5 hash of _ZTSFicE.generalized and the generated temporary callsite label.
37+
; CHECK-NEXT: .quad 3498816979441845844
38+
; CHECK-NEXT: .quad [[LABEL_TMP1]]
39+
!4 = !{!5}
40+
!5 = !{i64 0, !"_ZTSFPvS_E.generalized"}
41+
;; Test for MD5 hash of _ZTSFPvS_E.generalized and the generated temporary callsite label.
42+
; CHECK-NEXT: .quad 8646233951371320954
43+
; CHECK-NEXT: .quad [[LABEL_TMP2]]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls.
2+
3+
; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
4+
; RUN: llvm-readelf -x .callgraph - | FileCheck %s
5+
6+
define i32 @check_tailcall(ptr %func, i8 %x) !type !0 {
7+
entry:
8+
%call = tail call i32 %func(i8 signext %x), !callee_type !1
9+
ret i32 %call
10+
}
11+
12+
define i32 @main(i32 %argc) !type !3 {
13+
entry:
14+
%andop = and i32 %argc, 1
15+
%cmp = icmp eq i32 %andop, 0
16+
%foo.bar = select i1 %cmp, ptr @foo, ptr @bar
17+
%call.i = tail call i32 %foo.bar(i8 signext 97), !callee_type !1
18+
ret i32 %call.i
19+
}
20+
21+
declare !type !2 i32 @foo(i8 signext)
22+
23+
declare !type !2 i32 @bar(i8 signext)
24+
25+
;; Check that the numeric type id (md5 hash) for the below type ids are emitted
26+
;; to the callgraph section.
27+
28+
; CHECK: Hex dump of section '.callgraph':
29+
30+
!0 = !{i64 0, !"_ZTSFiPvcE.generalized"}
31+
!1 = !{!2}
32+
; CHECK-DAG: 5486bc59 814b8e30
33+
!2 = !{i64 0, !"_ZTSFicE.generalized"}
34+
!3 = !{i64 0, !"_ZTSFiiE.generalized"}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
;; Tests that we store the type identifiers in .callgraph section of the object file.
2+
3+
; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
4+
; RUN: llvm-readelf -x .callgraph - | FileCheck %s
5+
6+
declare !type !0 void @foo()
7+
8+
declare !type !1 i32 @bar(i8)
9+
10+
declare !type !2 ptr @baz(ptr)
11+
12+
define void @main() {
13+
entry:
14+
%a = alloca i8, align 1
15+
%fp_foo_val = load ptr, ptr null, align 8
16+
call void (...) %fp_foo_val(), !callee_type !1
17+
%fp_bar_val = load ptr, ptr null, align 8
18+
%param = trunc i64 0 to i8
19+
%call_fp_bar = call i32 %fp_bar_val(i8 signext %param), !callee_type !3
20+
%fp_baz_val = load ptr, ptr null, align 8
21+
%call_fp_baz = call ptr %fp_baz_val(ptr %a), !callee_type !4
22+
ret void
23+
}
24+
25+
;; Check that the numeric type id (md5 hash) for the below type ids are emitted
26+
;; to the callgraph section.
27+
28+
; CHECK: Hex dump of section '.callgraph':
29+
30+
; CHECK-DAG: 2444f731 f5eecb3e
31+
!0 = !{i64 0, !"_ZTSFvE.generalized"}
32+
!1 = !{!0}
33+
; CHECK-DAG: 5486bc59 814b8e30
34+
!2 = !{i64 0, !"_ZTSFicE.generalized"}
35+
!3 = !{!2}
36+
; CHECK-DAG: 7ade6814 f897fd77
37+
!4 = !{!5}
38+
!5 = !{i64 0, !"_ZTSFPvS_E.generalized"}

0 commit comments

Comments
 (0)