Skip to content

Commit 1e9002a

Browse files
committed
[AIX] PGO codegen changes for function-sections.
The existing PGO implementation creates a dependence from the profiling data symbol to the function descriptors for every function defined within the compilation unit. This forces the binder to include either all or no functions from an object file when linking a profile-generate enabled object file, which can break links that require the aggressive "include only whats used" link semantics that XCOFF provides. To remedy this we break up the profiling data symbol into per-function CSECTs by using the rename directive to emit the profd symbol for a single function to a unique CSECT, then rename it to the common name afterwards. We also have to split the counters into per-function CSECTs as they need to have a .ref relocation from the counter section which is referenced by the functions, to the profiling-data symbol which is otherwise unreferenced.
1 parent 6d231fb commit 1e9002a

File tree

6 files changed

+469
-3
lines changed

6 files changed

+469
-3
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// RUN: split-file %s %t
2+
// RUN: cd %t
3+
// RUN: %clang_pgogen -ffunction-sections main.c -c -o main.o
4+
// RUN: %clang_pgogen -ffunction-sections needs_gc.c -c -o needs_gc.o
5+
// RUN: %clang_pgogen main.o needs_gc.o -o needs_gc.out
6+
// RUN: env LLVM_PROFILE_FILE=needs_gc.profraw %run ./needs_gc.out > /dev/null
7+
// RUN: llvm-profdata show --all-functions needs_gc.profraw | FileCheck %s
8+
9+
// CHECK-DAG: main
10+
// CHECK-DAG: baz
11+
// CHECK-DAG: get_message
12+
13+
14+
//--- main.c
15+
const char* get_message(void) {
16+
return "Hello World!";
17+
}
18+
19+
int foo(void);
20+
double bar(void);
21+
const char* baz();
22+
23+
int printf(const char*, ...);
24+
25+
int main(void) {
26+
printf("%s\n", baz());
27+
}
28+
29+
//--- needs_gc.c
30+
extern int not_def_one(const char *);
31+
extern double not_def_two(void);
32+
33+
extern const char* get_message(void);
34+
35+
char buf[512];
36+
int foo(const char *ptr, unsigned long size) {
37+
void *memcpy(void *, const void *, unsigned long);
38+
memcpy(buf, ptr, size);
39+
return not_def_one(buf);
40+
}
41+
42+
double bar(void) {
43+
return not_def_two();
44+
}
45+
46+
47+
const char* baz() {
48+
return get_message();
49+
}

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
#include "llvm/InitializePasses.h"
8888
#include "llvm/MC/SectionKind.h"
8989
#include "llvm/Pass.h"
90+
#include "llvm/ProfileData/InstrProf.h"
9091
#include "llvm/Support/Casting.h"
9192
#include "llvm/Support/CommandLine.h"
9293
#include "llvm/Support/Debug.h"
@@ -155,6 +156,7 @@ class GlobalMergeImpl {
155156
const TargetMachine *TM = nullptr;
156157
GlobalMergeOptions Opt;
157158
bool IsMachO = false;
159+
bool IsAIX = false;
158160

159161
private:
160162
bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
@@ -674,7 +676,9 @@ bool GlobalMergeImpl::run(Module &M) {
674676
if (!EnableGlobalMerge)
675677
return false;
676678

677-
IsMachO = M.getTargetTriple().isOSBinFormatMachO();
679+
Triple T(M.getTargetTriple());
680+
IsMachO = T.isOSBinFormatMachO();
681+
IsAIX = T.isOSBinFormatXCOFF();
678682

679683
auto &DL = M.getDataLayout();
680684
MapVector<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 0>>
@@ -717,6 +721,14 @@ bool GlobalMergeImpl::run(Module &M) {
717721
GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
718722
continue;
719723

724+
// Do not merge profiling counters as it will prevent us from breaking
725+
// the __llvm_prf_cnts section into subsections, which in turn creates
726+
// extra symbol dependencies that can break otherwise valid link steps.
727+
if (IsAIX && TM && TM->getFunctionSections() && GV.hasSection() &&
728+
Section.starts_with(
729+
getInstrProfSectionName(IPSK_cnts, Triple::XCOFF, false)))
730+
continue;
731+
720732
// Ignore all "required" globals:
721733
if (isMustKeepGlobalVariable(&GV))
722734
continue;

llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2422,7 +2422,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
24222422
if (!GO->hasSection())
24232423
report_fatal_error("#pragma clang section is not yet supported");
24242424

2425-
StringRef SectionName = GO->getSection();
2425+
std::string SectionName(GO->getSection());
24262426

24272427
// Handle the XCOFF::TD case first, then deal with the rest.
24282428
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
@@ -2445,6 +2445,25 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
24452445
else
24462446
report_fatal_error("XCOFF other section types not yet implemented.");
24472447

2448+
// The profiling instrumentation symbols are special in that we want to
2449+
// emit a unique CSECT for each when function sections are enabeld, which
2450+
// are then renamed back to the CSECT name specified by the explicit section.
2451+
// This is to work around the limitation of not having section groups or a
2452+
// similar feature in XCOFF.
2453+
if (TM.getFunctionSections()) {
2454+
std::string ProfilingDataSectionName =
2455+
getInstrProfSectionName(IPSK_data, Triple::XCOFF, false);
2456+
std::string ProfilingCounterSectionName =
2457+
getInstrProfSectionName(IPSK_cnts, Triple::XCOFF, false);
2458+
if ((SectionName == ProfilingDataSectionName &&
2459+
GO->getName().starts_with("__profd_")) ||
2460+
(SectionName == ProfilingCounterSectionName &&
2461+
GO->getName().starts_with("__profc_"))) {
2462+
SectionName += ".";
2463+
SectionName += GO->getName();
2464+
}
2465+
}
2466+
24482467
return getContext().getXCOFFSection(
24492468
SectionName, Kind, XCOFF::CsectProperties(MappingClass, XCOFF::XTY_SD),
24502469
/* MultiSymbolsAllowed*/ true);

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,22 @@ class PPCAIXAsmPrinter : public PPCAsmPrinter {
253253
DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>>
254254
GOAliasMap;
255255

256+
// The __profd_* symbol for the profiling instrumentation data and the
257+
// corresponding __profc_* counters it refernces.
258+
struct ProfilingSubSection {
259+
MCSectionXCOFF *ProfD;
260+
MCSectionXCOFF *ProfC;
261+
};
262+
263+
// Collect the 'sub-sections' of the profile-generate symbols
264+
// so we can:
265+
// 1) rename to the common CSECT name after emission.
266+
// 2) emit the refs from the profc_ symbol to the related CSECTs.
267+
SmallVector<ProfilingSubSection> ProfGenSubSections;
268+
269+
void emitSharedSectionPGORefs(Module &M);
270+
void emitSplitSectionPGORefs();
271+
256272
uint16_t getNumberOfVRSaved();
257273
void emitTracebackTable();
258274

@@ -2792,6 +2808,57 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
27922808
auto *Csect = static_cast<MCSectionXCOFF *>(
27932809
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
27942810

2811+
// When compiling with function sections enabled, we need some special
2812+
// codegen to rename the CSECTs. For each profiling data symbol find its
2813+
// associated profiling counters.
2814+
if (TM.getFunctionSections() &&
2815+
Csect->getName().starts_with("__llvm_prf_data.")) {
2816+
// Unraveling the initializer to find the related counters variable. The
2817+
// initializer is a structure whose third member is a subtract expression
2818+
// between the counters label and the label for the start of this structure.
2819+
// Use the subtract expression to get the GlobalValue for the counters
2820+
// global.
2821+
assert(GV->hasInitializer() &&
2822+
"profiling data symbol must have an initializer");
2823+
assert(isa<ConstantStruct>(GV->getInitializer()) &&
2824+
"expect the initializer for a profiling data symbol to be a struct");
2825+
const ConstantStruct *Initializer =
2826+
cast<ConstantStruct>(GV->getInitializer());
2827+
2828+
// The initializer structure is: { i64, i64, i32, ptr, ptr, i32, [4 x i16] }
2829+
// and the reference to the global variable for the counters is in the
2830+
// first i32 member.
2831+
const Constant *Member = Initializer->getAggregateElement(2);
2832+
assert(Member && "profiling data symbol has more then 3 elements");
2833+
2834+
// Want to decompose a constant expression of the form:
2835+
// sub (ptrtoint (ptr @__profc_sym), ptrtoint (ptr @__profd_sym))
2836+
// to get the GlobalVariable for the '@__profc_sym` symbol.
2837+
assert(isa<ConstantExpr>(Member) &&
2838+
"expected member initializer is a constant expression.");
2839+
const ConstantExpr *CExpr = cast<ConstantExpr>(Member);
2840+
assert(CExpr->getOpcode() == Instruction::Sub &&
2841+
"expected member intializer is a sub expression.");
2842+
2843+
Value *V1 = CExpr->getOperand(0);
2844+
assert(V1 && isa<ConstantExpr>(V1) &&
2845+
"expected sub expression operand to be constant expr.");
2846+
ConstantExpr *PointerToIntExpr = cast<ConstantExpr>(V1);
2847+
assert(PointerToIntExpr->isCast() && "unexpected operand type.");
2848+
2849+
Value *PointerToIntOperand = PointerToIntExpr->getOperand(0);
2850+
assert(isa<GlobalVariable>(PointerToIntOperand) &&
2851+
"expected global variable of profc symbol");
2852+
2853+
const GlobalVariable *ProfCGV = cast<GlobalVariable>(PointerToIntOperand);
2854+
// Map the global variable to its CSECT.
2855+
SectionKind ProfCKind = getObjFileLowering().getKindForGlobal(GV, TM);
2856+
MCSectionXCOFF *ProfCCsect = cast<MCSectionXCOFF>(
2857+
getObjFileLowering().SectionForGlobal(ProfCGV, ProfCKind, TM));
2858+
2859+
ProfGenSubSections.push_back({Csect, ProfCCsect});
2860+
}
2861+
27952862
// Switch to the containing csect.
27962863
OutStreamer->switchSection(Csect);
27972864

@@ -2893,7 +2960,7 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
28932960
getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
28942961
}
28952962

2896-
void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
2963+
void PPCAIXAsmPrinter::emitSharedSectionPGORefs(Module &M) {
28972964
if (!OutContext.hasXCOFFSection(
28982965
"__llvm_prf_cnts",
28992966
XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
@@ -2942,6 +3009,54 @@ void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
29423009
}
29433010
}
29443011

3012+
void PPCAIXAsmPrinter::emitSplitSectionPGORefs() {
3013+
MCSymbol *NamesSym = nullptr;
3014+
MCSymbol *VNDSSym = nullptr;
3015+
3016+
if (OutContext.hasXCOFFSection(
3017+
"__llvm_prf_names",
3018+
XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)))
3019+
NamesSym = OutContext.getOrCreateSymbol("__llvm_prf_names[RO]");
3020+
3021+
if (OutContext.hasXCOFFSection(
3022+
"__llvm_prf_vnds",
3023+
XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
3024+
VNDSSym = OutContext.getOrCreateSymbol("__llvm_prf_vnds[RW]");
3025+
3026+
for (auto SubSections : ProfGenSubSections) {
3027+
MCSectionXCOFF *ProfDCsect = SubSections.ProfD;
3028+
MCSectionXCOFF *ProfCCsect = SubSections.ProfC;
3029+
3030+
OutStreamer->switchSection(ProfCCsect);
3031+
3032+
if (NamesSym)
3033+
OutStreamer->emitXCOFFRefDirective(NamesSym);
3034+
3035+
if (VNDSSym)
3036+
OutStreamer->emitXCOFFRefDirective(VNDSSym);
3037+
3038+
OutStreamer->emitXCOFFRefDirective(ProfDCsect->getQualNameSymbol());
3039+
3040+
// Rename the subsection for the counters
3041+
OutStreamer->emitXCOFFRenameDirective(ProfCCsect->getQualNameSymbol(),
3042+
"__llvm_prf_cnts");
3043+
OutStreamer->addBlankLine();
3044+
3045+
// Rename the subsection for the data.
3046+
OutStreamer->switchSection(ProfDCsect);
3047+
OutStreamer->emitXCOFFRenameDirective(ProfDCsect->getQualNameSymbol(),
3048+
"__llvm_prf_data");
3049+
OutStreamer->addBlankLine();
3050+
}
3051+
}
3052+
3053+
void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
3054+
if (!TM.getFunctionSections())
3055+
emitSharedSectionPGORefs(M);
3056+
else
3057+
emitSplitSectionPGORefs();
3058+
}
3059+
29453060
void PPCAIXAsmPrinter::emitGCOVRefs() {
29463061
if (!OutContext.hasXCOFFSection(
29473062
"__llvm_gcov_ctr_section",
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; RUN: llc --function-sections -mtriple powerpc-ibm-aix-xcoff < %s | \
2+
; RUN: FileCheck %s
3+
4+
; RUN: llc --function-sections -mtriple powerpc64-ibm-aix-xcoff < %s | \
5+
; RUN: FileCheck %s
6+
7+
@i = external local_unnamed_addr global i32, align 4
8+
@__llvm_profile_raw_version = weak hidden local_unnamed_addr constant i64 72057594037927944
9+
@__profc_func1 = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
10+
@__profd_func1 = private global { i64, i64, i32, ptr, ptr, i32, [4 x i16] } { i64 -2545542355363006406, i64 742261418966908927, i32 sub (i32 ptrtoint (ptr @__profc_func1 to i32), i32 ptrtoint (ptr @__profd_func1 to i32)), ptr @func1.local, ptr null, i32 1, [4 x i16] zeroinitializer }, section "__llvm_prf_data", align 8
11+
@__profc_func2 = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
12+
@__profd_func2 = private global { i64, i64, i32, ptr, ptr, i32, [4 x i16] } { i64 -4377547752858689819, i64 742261418966908927, i32 sub (i32 ptrtoint (ptr @__profc_func2 to i32), i32 ptrtoint (ptr @__profd_func2 to i32)), ptr @func2.local, ptr null, i32 1, [4 x i16] zeroinitializer }, section "__llvm_prf_data", align 8
13+
@__llvm_prf_nm = private constant [13 x i8] c"\0B\00func1\01func2", section "__llvm_prf_names", align 1
14+
@__llvm_profile_filename = weak hidden local_unnamed_addr constant [19 x i8] c"default_%m.profraw\00"
15+
@llvm.used = appending global [3 x ptr] [ptr @__llvm_prf_nm, ptr @__profd_func1, ptr @__profd_func2], section "llvm.metadata"
16+
17+
@func1.local = private alias i32 (), ptr @func1
18+
@func2.local = private alias i32 (), ptr @func2
19+
20+
define i32 @func1() {
21+
entry:
22+
%pgocount = load i64, ptr @__profc_func1, align 8
23+
%0 = add i64 %pgocount, 1
24+
store i64 %0, ptr @__profc_func1, align 8
25+
%1 = load i32, ptr @i, align 4
26+
ret i32 %1
27+
}
28+
29+
define i32 @func2() {
30+
entry:
31+
%pgocount = load i64, ptr @__profc_func2, align 8
32+
%0 = add i64 %pgocount, 1
33+
store i64 %0, ptr @__profc_func2, align 8
34+
%1 = load i32, ptr @i, align 4
35+
%call = tail call i32 @external_func(i32 noundef %1)
36+
ret i32 %call
37+
}
38+
39+
declare i32 @external_func(i32 noundef)
40+
41+
; CHECK-DAG: .csect __llvm_prf_cnts.__profc_func1[RW]
42+
; CHECK-DAG: .csect __llvm_prf_data.__profd_func1[RW]
43+
; CHECK-DAG: .csect __llvm_prf_cnts.__profc_func2[RW]
44+
; CHECK-DAG: .csect __llvm_prf_data.__profd_func2[RW]
45+
; CHECK-DAG: .csect __llvm_prf_names[RO]
46+
47+
; CHECK: .csect __llvm_prf_cnts.__profc_func1[RW]
48+
; CHECK-NEXT: .ref __llvm_prf_names[RO]
49+
; CHECK-NEXT: .ref __llvm_prf_data.__profd_func1[RW]
50+
; CHECK-NEXT: .rename __llvm_prf_cnts.__profc_func1[RW],"__llvm_prf_cnts"
51+
52+
; CHECK: .csect __llvm_prf_data.__profd_func1[RW]
53+
; CHECK-NEXT: .rename __llvm_prf_data.__profd_func1[RW],"__llvm_prf_data"
54+
55+
; CHECK: .csect __llvm_prf_cnts.__profc_func2[RW]
56+
; CHECK-NEXT: .ref __llvm_prf_names[RO]
57+
; CHECK-NEXT: .ref __llvm_prf_data.__profd_func2[RW]
58+
; CHECK-NEXT: .rename __llvm_prf_cnts.__profc_func2[RW],"__llvm_prf_cnts"
59+
60+
; CHECK: .csect __llvm_prf_data.__profd_func2[RW]
61+
; CHECK-NEXT: .rename __llvm_prf_data.__profd_func2[RW],"__llvm_prf_data"

0 commit comments

Comments
 (0)