Skip to content

Commit b39751c

Browse files
committed
[AIX] PGO codegen changes for function-sections.
The existing PGO implementation creates a dependence from the profiling data symbol to the function descriptors for every function defined within the compilation unit. This forces the binder to include either all or no functions from an object file when linking a profile-generate enabled object file, which can break links that require the aggressive "include only whats used" link semantics that XCOFF provides. To remedy this we break up the profiling data symbol into per-function CSECTs by using the rename directive to emit the profd symbol for a single function to a unique CSECT, then rename it to the common name afterwards. We also have to split the counters into per-function CSECTs as they need to have a .ref relocation from the counter section which is referenced by the functions, to the profiling-data symbol which is otherwise unreferenced.
1 parent bf59716 commit b39751c

File tree

6 files changed

+469
-3
lines changed

6 files changed

+469
-3
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// RUN: split-file %s %t
2+
// RUN: cd %t
3+
// RUN: %clang_pgogen -ffunction-sections main.c -c -o main.o
4+
// RUN: %clang_pgogen -ffunction-sections needs_gc.c -c -o needs_gc.o
5+
// RUN: %clang_pgogen main.o needs_gc.o -o needs_gc.out
6+
// RUN: env LLVM_PROFILE_FILE=needs_gc.profraw %run ./needs_gc.out > /dev/null
7+
// RUN: llvm-profdata show --all-functions needs_gc.profraw | FileCheck %s
8+
9+
// CHECK-DAG: main
10+
// CHECK-DAG: baz
11+
// CHECK-DAG: get_message
12+
13+
14+
//--- main.c
15+
const char* get_message(void) {
16+
return "Hello World!";
17+
}
18+
19+
int foo(void);
20+
double bar(void);
21+
const char* baz();
22+
23+
int printf(const char*, ...);
24+
25+
int main(void) {
26+
printf("%s\n", baz());
27+
}
28+
29+
//--- needs_gc.c
30+
extern int not_def_one(const char *);
31+
extern double not_def_two(void);
32+
33+
extern const char* get_message(void);
34+
35+
char buf[512];
36+
int foo(const char *ptr, unsigned long size) {
37+
void *memcpy(void *, const void *, unsigned long);
38+
memcpy(buf, ptr, size);
39+
return not_def_one(buf);
40+
}
41+
42+
double bar(void) {
43+
return not_def_two();
44+
}
45+
46+
47+
const char* baz() {
48+
return get_message();
49+
}

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
#include "llvm/InitializePasses.h"
8888
#include "llvm/MC/SectionKind.h"
8989
#include "llvm/Pass.h"
90+
#include "llvm/ProfileData/InstrProf.h"
9091
#include "llvm/Support/Casting.h"
9192
#include "llvm/Support/CommandLine.h"
9293
#include "llvm/Support/Debug.h"
@@ -155,6 +156,7 @@ class GlobalMergeImpl {
155156
const TargetMachine *TM = nullptr;
156157
GlobalMergeOptions Opt;
157158
bool IsMachO = false;
159+
bool IsAIX = false;
158160

159161
private:
160162
bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
@@ -674,7 +676,9 @@ bool GlobalMergeImpl::run(Module &M) {
674676
if (!EnableGlobalMerge)
675677
return false;
676678

677-
IsMachO = M.getTargetTriple().isOSBinFormatMachO();
679+
Triple T(M.getTargetTriple());
680+
IsMachO = T.isOSBinFormatMachO();
681+
IsAIX = T.isOSBinFormatXCOFF();
678682

679683
auto &DL = M.getDataLayout();
680684
MapVector<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 0>>
@@ -717,6 +721,14 @@ bool GlobalMergeImpl::run(Module &M) {
717721
GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
718722
continue;
719723

724+
// Do not merge profiling counters as it will prevent us from breaking
725+
// the __llvm_prf_cnts section into subsections, which in turn creates
726+
// extra symbol dependencies that can break otherwise valid link steps.
727+
if (IsAIX && TM && TM->getFunctionSections() && GV.hasSection() &&
728+
Section.starts_with(
729+
getInstrProfSectionName(IPSK_cnts, Triple::XCOFF, false)))
730+
continue;
731+
720732
// Ignore all "required" globals:
721733
if (isMustKeepGlobalVariable(&GV))
722734
continue;

llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2435,7 +2435,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
24352435
if (!GO->hasSection())
24362436
report_fatal_error("#pragma clang section is not yet supported");
24372437

2438-
StringRef SectionName = GO->getSection();
2438+
std::string SectionName(GO->getSection());
24392439

24402440
// Handle the XCOFF::TD case first, then deal with the rest.
24412441
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
@@ -2458,6 +2458,25 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
24582458
else
24592459
report_fatal_error("XCOFF other section types not yet implemented.");
24602460

2461+
// The profiling instrumentation symbols are special in that we want to
2462+
// emit a unique CSECT for each when function sections are enabeld, which
2463+
// are then renamed back to the CSECT name specified by the explicit section.
2464+
// This is to work around the limitation of not having section groups or a
2465+
// similar feature in XCOFF.
2466+
if (TM.getFunctionSections()) {
2467+
std::string ProfilingDataSectionName =
2468+
getInstrProfSectionName(IPSK_data, Triple::XCOFF, false);
2469+
std::string ProfilingCounterSectionName =
2470+
getInstrProfSectionName(IPSK_cnts, Triple::XCOFF, false);
2471+
if ((SectionName == ProfilingDataSectionName &&
2472+
GO->getName().starts_with("__profd_")) ||
2473+
(SectionName == ProfilingCounterSectionName &&
2474+
GO->getName().starts_with("__profc_"))) {
2475+
SectionName += ".";
2476+
SectionName += GO->getName();
2477+
}
2478+
}
2479+
24612480
return getContext().getXCOFFSection(
24622481
SectionName, Kind, XCOFF::CsectProperties(MappingClass, XCOFF::XTY_SD),
24632482
/* MultiSymbolsAllowed*/ true);

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,22 @@ class PPCAIXAsmPrinter : public PPCAsmPrinter {
253253
DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>>
254254
GOAliasMap;
255255

256+
// The __profd_* symbol for the profiling instrumentation data and the
257+
// corresponding __profc_* counters it refernces.
258+
struct ProfilingSubSection {
259+
MCSectionXCOFF *ProfD;
260+
MCSectionXCOFF *ProfC;
261+
};
262+
263+
// Collect the 'sub-sections' of the profile-generate symbols
264+
// so we can:
265+
// 1) rename to the common CSECT name after emission.
266+
// 2) emit the refs from the profc_ symbol to the related CSECTs.
267+
SmallVector<ProfilingSubSection> ProfGenSubSections;
268+
269+
void emitSharedSectionPGORefs(Module &M);
270+
void emitSplitSectionPGORefs();
271+
256272
uint16_t getNumberOfVRSaved();
257273
void emitTracebackTable();
258274

@@ -2810,6 +2826,57 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
28102826
MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
28112827
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
28122828

2829+
// When compiling with function sections enabled, we need some special
2830+
// codegen to rename the CSECTs. For each profiling data symbol find its
2831+
// associated profiling counters.
2832+
if (TM.getFunctionSections() &&
2833+
Csect->getName().starts_with("__llvm_prf_data.")) {
2834+
// Unraveling the initializer to find the related counters variable. The
2835+
// initializer is a structure whose third member is a subtract expression
2836+
// between the counters label and the label for the start of this structure.
2837+
// Use the subtract expression to get the GlobalValue for the counters
2838+
// global.
2839+
assert(GV->hasInitializer() &&
2840+
"profiling data symbol must have an initializer");
2841+
assert(isa<ConstantStruct>(GV->getInitializer()) &&
2842+
"expect the initializer for a profiling data symbol to be a struct");
2843+
const ConstantStruct *Initializer =
2844+
cast<ConstantStruct>(GV->getInitializer());
2845+
2846+
// The initializer structure is: { i64, i64, i32, ptr, ptr, i32, [4 x i16] }
2847+
// and the reference to the global variable for the counters is in the
2848+
// first i32 member.
2849+
const Constant *Member = Initializer->getAggregateElement(2);
2850+
assert(Member && "profiling data symbol has more then 3 elements");
2851+
2852+
// Want to decompose a constant expression of the form:
2853+
// sub (ptrtoint (ptr @__profc_sym), ptrtoint (ptr @__profd_sym))
2854+
// to get the GlobalVariable for the '@__profc_sym` symbol.
2855+
assert(isa<ConstantExpr>(Member) &&
2856+
"expected member initializer is a constant expression.");
2857+
const ConstantExpr *CExpr = cast<ConstantExpr>(Member);
2858+
assert(CExpr->getOpcode() == Instruction::Sub &&
2859+
"expected member intializer is a sub expression.");
2860+
2861+
Value *V1 = CExpr->getOperand(0);
2862+
assert(V1 && isa<ConstantExpr>(V1) &&
2863+
"expected sub expression operand to be constant expr.");
2864+
ConstantExpr *PointerToIntExpr = cast<ConstantExpr>(V1);
2865+
assert(PointerToIntExpr->isCast() && "unexpected operand type.");
2866+
2867+
Value *PointerToIntOperand = PointerToIntExpr->getOperand(0);
2868+
assert(isa<GlobalVariable>(PointerToIntOperand) &&
2869+
"expected global variable of profc symbol");
2870+
2871+
const GlobalVariable *ProfCGV = cast<GlobalVariable>(PointerToIntOperand);
2872+
// Map the global variable to its CSECT.
2873+
SectionKind ProfCKind = getObjFileLowering().getKindForGlobal(GV, TM);
2874+
MCSectionXCOFF *ProfCCsect = cast<MCSectionXCOFF>(
2875+
getObjFileLowering().SectionForGlobal(ProfCGV, ProfCKind, TM));
2876+
2877+
ProfGenSubSections.push_back({Csect, ProfCCsect});
2878+
}
2879+
28132880
// Switch to the containing csect.
28142881
OutStreamer->switchSection(Csect);
28152882

@@ -2911,7 +2978,7 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
29112978
getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
29122979
}
29132980

2914-
void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
2981+
void PPCAIXAsmPrinter::emitSharedSectionPGORefs(Module &M) {
29152982
if (!OutContext.hasXCOFFSection(
29162983
"__llvm_prf_cnts",
29172984
XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
@@ -2960,6 +3027,54 @@ void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
29603027
}
29613028
}
29623029

3030+
void PPCAIXAsmPrinter::emitSplitSectionPGORefs() {
3031+
MCSymbol *NamesSym = nullptr;
3032+
MCSymbol *VNDSSym = nullptr;
3033+
3034+
if (OutContext.hasXCOFFSection(
3035+
"__llvm_prf_names",
3036+
XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)))
3037+
NamesSym = OutContext.getOrCreateSymbol("__llvm_prf_names[RO]");
3038+
3039+
if (OutContext.hasXCOFFSection(
3040+
"__llvm_prf_vnds",
3041+
XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
3042+
VNDSSym = OutContext.getOrCreateSymbol("__llvm_prf_vnds[RW]");
3043+
3044+
for (auto SubSections : ProfGenSubSections) {
3045+
MCSectionXCOFF *ProfDCsect = SubSections.ProfD;
3046+
MCSectionXCOFF *ProfCCsect = SubSections.ProfC;
3047+
3048+
OutStreamer->switchSection(ProfCCsect);
3049+
3050+
if (NamesSym)
3051+
OutStreamer->emitXCOFFRefDirective(NamesSym);
3052+
3053+
if (VNDSSym)
3054+
OutStreamer->emitXCOFFRefDirective(VNDSSym);
3055+
3056+
OutStreamer->emitXCOFFRefDirective(ProfDCsect->getQualNameSymbol());
3057+
3058+
// Rename the subsection for the counters
3059+
OutStreamer->emitXCOFFRenameDirective(ProfCCsect->getQualNameSymbol(),
3060+
"__llvm_prf_cnts");
3061+
OutStreamer->addBlankLine();
3062+
3063+
// Rename the subsection for the data.
3064+
OutStreamer->switchSection(ProfDCsect);
3065+
OutStreamer->emitXCOFFRenameDirective(ProfDCsect->getQualNameSymbol(),
3066+
"__llvm_prf_data");
3067+
OutStreamer->addBlankLine();
3068+
}
3069+
}
3070+
3071+
void PPCAIXAsmPrinter::emitPGORefs(Module &M) {
3072+
if (!TM.getFunctionSections())
3073+
emitSharedSectionPGORefs(M);
3074+
else
3075+
emitSplitSectionPGORefs();
3076+
}
3077+
29633078
void PPCAIXAsmPrinter::emitGCOVRefs() {
29643079
if (!OutContext.hasXCOFFSection(
29653080
"__llvm_gcov_ctr_section",
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; RUN: llc --function-sections -mtriple powerpc-ibm-aix-xcoff < %s | \
2+
; RUN: FileCheck %s
3+
4+
; RUN: llc --function-sections -mtriple powerpc64-ibm-aix-xcoff < %s | \
5+
; RUN: FileCheck %s
6+
7+
@i = external local_unnamed_addr global i32, align 4
8+
@__llvm_profile_raw_version = weak hidden local_unnamed_addr constant i64 72057594037927944
9+
@__profc_func1 = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
10+
@__profd_func1 = private global { i64, i64, i32, ptr, ptr, i32, [4 x i16] } { i64 -2545542355363006406, i64 742261418966908927, i32 sub (i32 ptrtoint (ptr @__profc_func1 to i32), i32 ptrtoint (ptr @__profd_func1 to i32)), ptr @func1.local, ptr null, i32 1, [4 x i16] zeroinitializer }, section "__llvm_prf_data", align 8
11+
@__profc_func2 = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
12+
@__profd_func2 = private global { i64, i64, i32, ptr, ptr, i32, [4 x i16] } { i64 -4377547752858689819, i64 742261418966908927, i32 sub (i32 ptrtoint (ptr @__profc_func2 to i32), i32 ptrtoint (ptr @__profd_func2 to i32)), ptr @func2.local, ptr null, i32 1, [4 x i16] zeroinitializer }, section "__llvm_prf_data", align 8
13+
@__llvm_prf_nm = private constant [13 x i8] c"\0B\00func1\01func2", section "__llvm_prf_names", align 1
14+
@__llvm_profile_filename = weak hidden local_unnamed_addr constant [19 x i8] c"default_%m.profraw\00"
15+
@llvm.used = appending global [3 x ptr] [ptr @__llvm_prf_nm, ptr @__profd_func1, ptr @__profd_func2], section "llvm.metadata"
16+
17+
@func1.local = private alias i32 (), ptr @func1
18+
@func2.local = private alias i32 (), ptr @func2
19+
20+
define i32 @func1() {
21+
entry:
22+
%pgocount = load i64, ptr @__profc_func1, align 8
23+
%0 = add i64 %pgocount, 1
24+
store i64 %0, ptr @__profc_func1, align 8
25+
%1 = load i32, ptr @i, align 4
26+
ret i32 %1
27+
}
28+
29+
define i32 @func2() {
30+
entry:
31+
%pgocount = load i64, ptr @__profc_func2, align 8
32+
%0 = add i64 %pgocount, 1
33+
store i64 %0, ptr @__profc_func2, align 8
34+
%1 = load i32, ptr @i, align 4
35+
%call = tail call i32 @external_func(i32 noundef %1)
36+
ret i32 %call
37+
}
38+
39+
declare i32 @external_func(i32 noundef)
40+
41+
; CHECK-DAG: .csect __llvm_prf_cnts.__profc_func1[RW]
42+
; CHECK-DAG: .csect __llvm_prf_data.__profd_func1[RW]
43+
; CHECK-DAG: .csect __llvm_prf_cnts.__profc_func2[RW]
44+
; CHECK-DAG: .csect __llvm_prf_data.__profd_func2[RW]
45+
; CHECK-DAG: .csect __llvm_prf_names[RO]
46+
47+
; CHECK: .csect __llvm_prf_cnts.__profc_func1[RW]
48+
; CHECK-NEXT: .ref __llvm_prf_names[RO]
49+
; CHECK-NEXT: .ref __llvm_prf_data.__profd_func1[RW]
50+
; CHECK-NEXT: .rename __llvm_prf_cnts.__profc_func1[RW],"__llvm_prf_cnts"
51+
52+
; CHECK: .csect __llvm_prf_data.__profd_func1[RW]
53+
; CHECK-NEXT: .rename __llvm_prf_data.__profd_func1[RW],"__llvm_prf_data"
54+
55+
; CHECK: .csect __llvm_prf_cnts.__profc_func2[RW]
56+
; CHECK-NEXT: .ref __llvm_prf_names[RO]
57+
; CHECK-NEXT: .ref __llvm_prf_data.__profd_func2[RW]
58+
; CHECK-NEXT: .rename __llvm_prf_cnts.__profc_func2[RW],"__llvm_prf_cnts"
59+
60+
; CHECK: .csect __llvm_prf_data.__profd_func2[RW]
61+
; CHECK-NEXT: .rename __llvm_prf_data.__profd_func2[RW],"__llvm_prf_data"

0 commit comments

Comments
 (0)