Skip to content

Commit 42b195e

Browse files
authored
[llvm][AsmPrinter] Add direct calls to callgraph section (#155706)
Extend CallGraphSection to include metadata about direct calls. This simplifies the design of tools that must parse .callgraph section to not require dependency on MC layer.
1 parent 61a535a commit 42b195e

File tree

3 files changed

+89
-41
lines changed

3 files changed

+89
-41
lines changed

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "llvm/ADT/DenseMap.h"
1919
#include "llvm/ADT/MapVector.h"
20+
#include "llvm/ADT/SmallSet.h"
2021
#include "llvm/ADT/SmallVector.h"
2122
#include "llvm/Analysis/ProfileSummaryInfo.h"
2223
#include "llvm/Analysis/StaticDataProfileInfo.h"
@@ -192,28 +193,28 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
192193

193194
/// Store symbols and type identifiers used to create callgraph section
194195
/// entries related to a function.
195-
struct FunctionInfo {
196+
struct FunctionCallGraphInfo {
196197
/// Numeric type identifier used in callgraph section for indirect calls
197198
/// and targets.
198199
using CGTypeId = uint64_t;
199200

200-
/// Enumeration of function kinds, and their mapping to function kind values
201-
/// stored in callgraph section entries.
202-
/// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
203-
enum class FunctionKind : uint64_t {
204-
/// Function cannot be target to indirect calls.
205-
NOT_INDIRECT_TARGET = 0,
206-
207-
/// Function may be target to indirect calls but its type id is unknown.
208-
INDIRECT_TARGET_UNKNOWN_TID = 1,
209-
210-
/// Function may be target to indirect calls and its type id is known.
211-
INDIRECT_TARGET_KNOWN_TID = 2,
212-
};
213-
214201
/// Map type identifiers to callsite labels. Labels are generated for each
215202
/// indirect callsite in the function.
216203
SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
204+
SmallSet<MCSymbol *, 4> DirectCallees;
205+
};
206+
207+
/// Enumeration of function kinds, and their mapping to function kind values
208+
/// stored in callgraph section entries.
209+
enum class FunctionKind : uint64_t {
210+
/// Function cannot be target to indirect calls.
211+
NOT_INDIRECT_TARGET = 0,
212+
213+
/// Function may be target to indirect calls but its type id is unknown.
214+
INDIRECT_TARGET_UNKNOWN_TID = 1,
215+
216+
/// Function may be target to indirect calls and its type id is known.
217+
INDIRECT_TARGET_KNOWN_TID = 2,
217218
};
218219

219220
enum CallGraphSectionFormatVersion : uint64_t {
@@ -385,10 +386,11 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
385386
/// are available. Returns empty string otherwise.
386387
StringRef getConstantSectionSuffix(const Constant *C) const;
387388

388-
/// Generate and emit labels for callees of the indirect callsites which will
389-
/// be used to populate the .callgraph section.
390-
void emitIndirectCalleeLabels(
391-
FunctionInfo &FuncInfo,
389+
/// Iff MI is an indirect call, generate and emit a label after the callsites
390+
/// which will be used to populate the .callgraph section. For direct
391+
/// callsites add the callee symbol to direct callsites list of FuncCGInfo.
392+
void handleCallsiteForCallgraph(
393+
FunctionCallGraphInfo &FuncCGInfo,
392394
const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
393395
const MachineInstr &MI);
394396

@@ -479,7 +481,8 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
479481
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
480482
virtual void emitKCFITypeId(const MachineFunction &MF);
481483

482-
void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
484+
void emitCallGraphSection(const MachineFunction &MF,
485+
FunctionCallGraphInfo &FuncCGInfo);
483486

484487
void emitPseudoProbe(const MachineInstr &MI);
485488

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include "llvm/IR/GlobalValue.h"
7979
#include "llvm/IR/GlobalVariable.h"
8080
#include "llvm/IR/Instruction.h"
81+
#include "llvm/IR/Instructions.h"
8182
#include "llvm/IR/LLVMRemarkStreamer.h"
8283
#include "llvm/IR/Mangler.h"
8384
#include "llvm/IR/Metadata.h"
@@ -1673,7 +1674,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
16731674

16741675
/// Emits .callgraph section.
16751676
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
1676-
FunctionInfo &FuncInfo) {
1677+
FunctionCallGraphInfo &FuncCGInfo) {
16771678
if (!MF.getTarget().Options.EmitCallGraphSection)
16781679
return;
16791680

@@ -1712,27 +1713,34 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
17121713
// Emit function kind, and type id if available.
17131714
if (!IsIndirectTarget) {
17141715
OutStreamer->emitInt64(
1715-
static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET));
1716+
static_cast<uint64_t>(FunctionKind::NOT_INDIRECT_TARGET));
17161717
} else {
17171718
if (const auto *TypeId = extractNumericCGTypeId(F)) {
1718-
OutStreamer->emitInt64(static_cast<uint64_t>(
1719-
FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID));
1719+
OutStreamer->emitInt64(
1720+
static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_KNOWN_TID));
17201721
OutStreamer->emitInt64(TypeId->getZExtValue());
17211722
} else {
1722-
OutStreamer->emitInt64(static_cast<uint64_t>(
1723-
FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
1723+
OutStreamer->emitInt64(
1724+
static_cast<uint64_t>(FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
17241725
}
17251726
}
17261727

17271728
// Emit callsite labels, where each element is a pair of type id and
17281729
// indirect callsite pc.
1729-
const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
1730+
const auto &CallSiteLabels = FuncCGInfo.CallSiteLabels;
17301731
OutStreamer->emitInt64(CallSiteLabels.size());
17311732
for (const auto &[TypeId, Label] : CallSiteLabels) {
17321733
OutStreamer->emitInt64(TypeId);
17331734
OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
17341735
}
1735-
FuncInfo.CallSiteLabels.clear();
1736+
FuncCGInfo.CallSiteLabels.clear();
1737+
1738+
const auto &DirectCallees = FuncCGInfo.DirectCallees;
1739+
OutStreamer->emitInt64(DirectCallees.size());
1740+
for (const auto &CalleeSymbol : DirectCallees) {
1741+
OutStreamer->emitSymbolValue(CalleeSymbol, TM.getProgramPointerSize());
1742+
}
1743+
FuncCGInfo.DirectCallees.clear();
17361744

17371745
OutStreamer->popSection();
17381746
}
@@ -1867,20 +1875,40 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) {
18671875
return Name;
18681876
}
18691877

1870-
void AsmPrinter::emitIndirectCalleeLabels(
1871-
FunctionInfo &FuncInfo,
1878+
void AsmPrinter::handleCallsiteForCallgraph(
1879+
FunctionCallGraphInfo &FuncCGInfo,
18721880
const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
18731881
const MachineInstr &MI) {
1874-
// Only indirect calls have type identifiers set.
1882+
assert(MI.isCall() &&
1883+
"Callsite labels are meant for call instructions only.");
1884+
const MachineOperand &CalleeOperand = MI.getOperand(0);
1885+
if (CalleeOperand.isGlobal() || CalleeOperand.isSymbol()) {
1886+
// Handle direct calls.
1887+
MCSymbol *CalleeSymbol = nullptr;
1888+
switch (CalleeOperand.getType()) {
1889+
case llvm::MachineOperand::MO_GlobalAddress:
1890+
CalleeSymbol = getSymbol(CalleeOperand.getGlobal());
1891+
break;
1892+
case llvm::MachineOperand::MO_ExternalSymbol:
1893+
CalleeSymbol = GetExternalSymbolSymbol(CalleeOperand.getSymbolName());
1894+
break;
1895+
default:
1896+
llvm_unreachable(
1897+
"Expected to only handle direct call instructions here.");
1898+
}
1899+
FuncCGInfo.DirectCallees.insert(CalleeSymbol);
1900+
return; // Early exit after handling the direct call instruction.
1901+
}
18751902
const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
18761903
if (CallSiteInfo == CallSitesInfoMap.end())
18771904
return;
1878-
1905+
// Handle indirect callsite info.
1906+
// Only indirect calls have type identifiers set.
18791907
for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) {
18801908
MCSymbol *S = MF->getContext().createTempSymbol();
18811909
OutStreamer->emitLabel(S);
18821910
uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
1883-
FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
1911+
FuncCGInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
18841912
}
18851913
}
18861914

@@ -1930,7 +1958,7 @@ void AsmPrinter::emitFunctionBody() {
19301958
MBBSectionRanges[MF->front().getSectionID()] =
19311959
MBBSectionRange{CurrentFnBegin, nullptr};
19321960

1933-
FunctionInfo FuncInfo;
1961+
FunctionCallGraphInfo FuncCGInfo;
19341962
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
19351963
for (auto &MBB : *MF) {
19361964
// Print a label for the basic block.
@@ -2067,7 +2095,7 @@ void AsmPrinter::emitFunctionBody() {
20672095
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
20682096

20692097
if (TM.Options.EmitCallGraphSection && MI.isCall())
2070-
emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI);
2098+
handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
20712099

20722100
// If there is a post-instruction symbol, emit a label for it here.
20732101
if (MCSymbol *S = MI.getPostInstrSymbol())
@@ -2249,7 +2277,7 @@ void AsmPrinter::emitFunctionBody() {
22492277
emitStackSizeSection(*MF);
22502278

22512279
// Emit section containing call graph metadata.
2252-
emitCallGraphSection(*MF, FuncInfo);
2280+
emitCallGraphSection(*MF, FuncCGInfo);
22532281

22542282
// Emit .su file containing function stack size information.
22552283
emitStackUsage(*MF);

llvm/test/CodeGen/X86/call-graph-section-assembly.ll

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,34 @@
1-
;; Test if temporary labels are generated for each indirect callsite with a callee_type metadata.
2-
;; Test if the .callgraph section contains the MD5 hash of callee type ids generated from
3-
;; generalized type id strings.
1+
;; Test if temporary labels are generated for each indirect callsite.
2+
;; Test if the .callgraph section contains the MD5 hash of callees' type (type id)
3+
;; is correctly paired with its corresponding temporary label generated for indirect
4+
;; call sites annotated with !callee_type metadata.
5+
;; Test if the .callgraph section contains unique direct callees.
46

57
; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s
68

9+
declare !type !0 void @direct_foo()
10+
declare !type !1 i32 @direct_bar(i8)
11+
declare !type !2 ptr @direct_baz(ptr)
12+
713
; CHECK: ball:
814
; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]:
915
define ptr @ball() {
1016
entry:
17+
call void @direct_foo()
1118
%fp_foo_val = load ptr, ptr null, align 8
1219
; CHECK: [[LABEL_TMP0:\.L.*]]:
13-
call void (...) %fp_foo_val(), !callee_type !0
20+
call void (...) %fp_foo_val(), !callee_type !0
21+
call void @direct_foo()
1422
%fp_bar_val = load ptr, ptr null, align 8
1523
; CHECK: [[LABEL_TMP1:\.L.*]]:
16-
%call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2
24+
%call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2
25+
%call_fp_bar_direct = call i32 @direct_bar(i8 1)
1726
%fp_baz_val = load ptr, ptr null, align 8
1827
; CHECK: [[LABEL_TMP2:\.L.*]]:
1928
%call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4
29+
call void @direct_foo()
30+
%call_fp_baz_direct = call ptr @direct_baz(ptr null)
31+
call void @direct_foo()
2032
ret ptr %call_fp_baz
2133
}
2234

@@ -41,3 +53,8 @@ entry:
4153
;; Test for MD5 hash of _ZTSFPvS_E.generalized and the generated temporary callsite label.
4254
; CHECK-NEXT: .quad 8646233951371320954
4355
; CHECK-NEXT: .quad [[LABEL_TMP2]]
56+
;; Test for number of direct calls and {callsite_label, callee} pairs.
57+
; CHECK-NEXT: .quad 3
58+
; CHECK-NEXT: .quad direct_foo
59+
; CHECK-NEXT: .quad direct_bar
60+
; CHECK-NEXT: .quad direct_baz

0 commit comments

Comments
 (0)