Skip to content

Commit 96c69b7

Browse files
authored
[LTO][AllocToken] Support AllocToken instrumentation in backend (#169358)
Unconditionally add AllocTokenPass to the optimization pipelines, and ensure that it runs last in LTO backend pipelines. The latter ensures that AllocToken instrumentation can be moved later in the LTO pipeline to avoid interference with other optimizations (e.g. PGHO) and enable late heap-allocation optimizations. In preparation of removing AllocTokenPass being added by Clang, add support for AllocTokenPass to read configuration options from LLVM module flags. To optimize given the pass is now runs unconditionally, only retrieve TargetLibraryInfo and OptimizationRemarkEmitter when necessary.
1 parent 0dec52b commit 96c69b7

File tree

16 files changed

+199
-33
lines changed

16 files changed

+199
-33
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,8 @@ static AllocTokenOptions getAllocTokenOptions(const LangOptions &LangOpts,
239239
AllocTokenOptions Opts;
240240
if (LangOpts.AllocTokenMode)
241241
Opts.Mode = *LangOpts.AllocTokenMode;
242-
Opts.MaxTokens = LangOpts.AllocTokenMax;
242+
if (LangOpts.AllocTokenMax)
243+
Opts.MaxTokens = *LangOpts.AllocTokenMax;
243244
Opts.Extended = CGOpts.SanitizeAllocTokenExtended;
244245
Opts.FastABI = CGOpts.SanitizeAllocTokenFastABI;
245246
return Opts;

clang/test/CodeGen/lto-newpm-pipeline.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,10 @@
3333
// CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
3434
// CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper
3535
// CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass
36-
// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
37-
// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
3836
// CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
3937
// CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
4038
// CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass
39+
// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
4140
// CHECK-FULL-O0-NEXT: Running pass: VerifierPass
4241
// CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
4342

@@ -49,11 +48,10 @@
4948
// CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
5049
// CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper
5150
// CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass
52-
// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
53-
// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
5451
// CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
5552
// CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
5653
// CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass
54+
// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
5755
// CHECK-THIN-O0-NEXT: Running pass: VerifierPass
5856
// CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
5957

llvm/include/llvm/Transforms/Instrumentation/AllocToken.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class Module;
2525

2626
struct AllocTokenOptions {
2727
AllocTokenMode Mode = DefaultAllocTokenMode;
28-
std::optional<uint64_t> MaxTokens;
28+
uint64_t MaxTokens = 0;
2929
bool FastABI = false;
3030
bool Extended = false;
3131
AllocTokenOptions() = default;

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
7474
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
7575
#include "llvm/Transforms/InstCombine/InstCombine.h"
76+
#include "llvm/Transforms/Instrumentation/AllocToken.h"
7677
#include "llvm/Transforms/Instrumentation/CGProfile.h"
7778
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
7879
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
@@ -1615,6 +1616,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
16151616
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
16161617
PTO.EagerlyInvalidateAnalyses));
16171618

1619+
// AllocToken transforms heap allocation calls; this needs to run late after
1620+
// other allocation call transformations (such as those in InstCombine).
1621+
if (!LTOPreLink)
1622+
MPM.addPass(AllocTokenPass());
1623+
16181624
invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
16191625

16201626
// Split out cold code. Splitting is done late to avoid hiding context from
@@ -1853,6 +1859,11 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
18531859
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
18541860
lowertypetests::DropTestKind::Assume));
18551861
MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::ThinLTOPostLink));
1862+
1863+
// AllocToken transforms heap allocation calls; this needs to run late after
1864+
// other allocation call transformations (such as those in InstCombine).
1865+
MPM.addPass(AllocTokenPass());
1866+
18561867
// Drop available_externally and unreferenced globals. This is necessary
18571868
// with ThinLTO in order to avoid leaving undefined references to dead
18581869
// globals in the object file.
@@ -1914,6 +1925,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
19141925

19151926
MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
19161927

1928+
// AllocToken transforms heap allocation calls; this needs to run late after
1929+
// other allocation call transformations (such as those in InstCombine).
1930+
MPM.addPass(AllocTokenPass());
1931+
19171932
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
19181933

19191934
// Emit annotation remarks.
@@ -2001,6 +2016,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
20012016

20022017
MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
20032018

2019+
// AllocToken transforms heap allocation calls; this needs to run late after
2020+
// other allocation call transformations (such as those in InstCombine).
2021+
MPM.addPass(AllocTokenPass());
2022+
20042023
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
20052024

20062025
// Emit annotation remarks.
@@ -2235,6 +2254,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
22352254

22362255
MPM.addPass(CoroCleanupPass());
22372256

2257+
// AllocToken transforms heap allocation calls; this needs to run late after
2258+
// other allocation call transformations (such as those in InstCombine).
2259+
MPM.addPass(AllocTokenPass());
2260+
22382261
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
22392262

22402263
// Emit annotation remarks.
@@ -2351,6 +2374,11 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
23512374

23522375
MPM.addPass(buildCoroWrapper(Phase));
23532376

2377+
// AllocToken transforms heap allocation calls; this needs to run late after
2378+
// other allocation call transformations (such as those in InstCombine).
2379+
if (!isLTOPreLink(Phase))
2380+
MPM.addPass(AllocTokenPass());
2381+
23542382
invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
23552383

23562384
if (isLTOPreLink(Phase))

llvm/lib/Transforms/Instrumentation/AllocToken.cpp

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -234,34 +234,53 @@ class TypeHashPointerSplitMode : public TypeHashMode {
234234
}
235235
};
236236

237-
// Apply opt overrides.
238-
AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) {
239-
if (!Opts.MaxTokens.has_value())
237+
// Apply opt overrides and module flags.
238+
static AllocTokenOptions resolveOptions(AllocTokenOptions Opts,
239+
const Module &M) {
240+
auto IntModuleFlagOrNull = [&](StringRef Key) {
241+
return mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(Key));
242+
};
243+
244+
if (auto *S = dyn_cast_or_null<MDString>(M.getModuleFlag("alloc-token-mode")))
245+
if (auto Mode = getAllocTokenModeFromString(S->getString()))
246+
Opts.Mode = *Mode;
247+
if (auto *Val = IntModuleFlagOrNull("alloc-token-max"))
248+
Opts.MaxTokens = Val->getZExtValue();
249+
if (auto *Val = IntModuleFlagOrNull("alloc-token-fast-abi"))
250+
Opts.FastABI |= Val->isOne();
251+
if (auto *Val = IntModuleFlagOrNull("alloc-token-extended"))
252+
Opts.Extended |= Val->isOne();
253+
254+
// Allow overriding options from command line options.
255+
if (ClMaxTokens.getNumOccurrences())
240256
Opts.MaxTokens = ClMaxTokens;
241-
Opts.FastABI |= ClFastABI;
242-
Opts.Extended |= ClExtended;
257+
if (ClFastABI.getNumOccurrences())
258+
Opts.FastABI = ClFastABI;
259+
if (ClExtended.getNumOccurrences())
260+
Opts.Extended = ClExtended;
261+
243262
return Opts;
244263
}
245264

246265
class AllocToken {
247266
public:
248267
explicit AllocToken(AllocTokenOptions Opts, Module &M,
249268
ModuleAnalysisManager &MAM)
250-
: Options(transformOptionsFromCl(std::move(Opts))), Mod(M),
269+
: Options(resolveOptions(std::move(Opts), M)), Mod(M),
251270
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
252-
Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) {
271+
Mode(IncrementMode(*IntPtrTy, Options.MaxTokens)) {
253272
switch (Options.Mode) {
254273
case TokenMode::Increment:
255274
break;
256275
case TokenMode::Random:
257-
Mode.emplace<RandomMode>(*IntPtrTy, *Options.MaxTokens,
276+
Mode.emplace<RandomMode>(*IntPtrTy, Options.MaxTokens,
258277
M.createRNG(DEBUG_TYPE));
259278
break;
260279
case TokenMode::TypeHash:
261-
Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens);
280+
Mode.emplace<TypeHashMode>(*IntPtrTy, Options.MaxTokens);
262281
break;
263282
case TokenMode::TypeHashPointerSplit:
264-
Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, *Options.MaxTokens);
283+
Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, Options.MaxTokens);
265284
break;
266285
}
267286
}
@@ -318,8 +337,6 @@ bool AllocToken::instrumentFunction(Function &F) {
318337
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
319338
return false;
320339

321-
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
322-
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
323340
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
324341
SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
325342

@@ -328,6 +345,10 @@ bool AllocToken::instrumentFunction(Function &F) {
328345
F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
329346
!F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
330347

348+
// Get TLI only when required.
349+
const TargetLibraryInfo *TLI =
350+
InstrumentFunction ? &FAM.getResult<TargetLibraryAnalysis>(F) : nullptr;
351+
331352
// Collect all allocation calls to avoid iterator invalidation.
332353
for (Instruction &I : instructions(F)) {
333354
// Collect all alloc_token_* intrinsics.
@@ -343,26 +364,28 @@ bool AllocToken::instrumentFunction(Function &F) {
343364
auto *CB = dyn_cast<CallBase>(&I);
344365
if (!CB)
345366
continue;
346-
if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, TLI))
367+
if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, *TLI))
347368
AllocCalls.emplace_back(CB, Func.value());
348369
}
349370

371+
// Return early to avoid unnecessarily instantiating the ORE.
372+
if (AllocCalls.empty() && IntrinsicInsts.empty())
373+
return false;
374+
375+
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
350376
bool Modified = false;
351377

352-
if (!AllocCalls.empty()) {
353-
for (auto &[CB, Func] : AllocCalls)
354-
Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
355-
if (Modified)
356-
NumFunctionsModified++;
357-
}
378+
for (auto &[CB, Func] : AllocCalls)
379+
Modified |= replaceAllocationCall(CB, Func, ORE, *TLI);
358380

359-
if (!IntrinsicInsts.empty()) {
360-
for (auto *II : IntrinsicInsts)
361-
replaceIntrinsicInst(II, ORE);
381+
for (auto *II : IntrinsicInsts) {
382+
replaceIntrinsicInst(II, ORE);
362383
Modified = true;
363-
NumFunctionsModified++;
364384
}
365385

386+
if (Modified)
387+
NumFunctionsModified++;
388+
366389
return Modified;
367390
}
368391

llvm/test/CodeGen/AArch64/print-pipeline-passes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt -mtriple=aarch64 -S -passes='default<O2>' -print-pipeline-passes < %s | FileCheck %s
33

44
; CHECK: loop-idiom-vectorize
5-
; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),function(annotation-remarks),verify,print{{$}}
5+
; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}}
66

77
define void @foo() {
88
entry:
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; Manually add instcombine to ensure the hot/cold transformation happens before
2+
; the LTO pipeline. The default LTO pipeline includes MemProfRemoveInfo which
3+
; strips the memprof attributes unless the summary index indicates support.
4+
; RUN: opt < %s -passes='function(instcombine),thinlto<O2>' -optimize-hot-cold-new -S | FileCheck %s
5+
; RUN: opt < %s -passes='function(instcombine),lto<O2>' -optimize-hot-cold-new -S | FileCheck %s
6+
; RUN: opt < %s -passes='function(instcombine),alloc-token' -optimize-hot-cold-new -S | FileCheck %s
7+
8+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
9+
10+
declare ptr @_Znwm(i64)
11+
12+
define ptr @new_hot() sanitize_alloc_token {
13+
; CHECK-LABEL: @new_hot(
14+
; CHECK: call {{.*}} @__alloc_token__Znwm12__hot_cold_t(i64 10, i8 -2, i64 2689373973731826898){{.*}} !alloc_token
15+
%ret = call ptr @_Znwm(i64 10) #0, !alloc_token !0
16+
ret ptr %ret
17+
}
18+
19+
attributes #0 = { builtin allocsize(0) "memprof"="hot" }
20+
!0 = !{!"int", i1 false}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; Test that all supported module flags are retrieved correctly.
2+
;
3+
; RUN: opt < %s -passes='inferattrs,alloc-token' -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
4+
; RUN: opt < %s -passes='inferattrs,alloc-token' -alloc-token-max=2 -alloc-token-fast-abi=0 -alloc-token-extended=0 -S | FileCheck %s --check-prefixes=CHECK,OVERRIDE
5+
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
7+
8+
declare ptr @_Znwm(i64)
9+
declare ptr @malloc(i64)
10+
declare ptr @my_malloc(i64)
11+
12+
define void @test() sanitize_alloc_token {
13+
; CHECK-LABEL: define void @test(
14+
; DEFAULT: call ptr @__alloc_token_0_malloc(i64 8)
15+
; DEFAULT: call ptr @__alloc_token_1__Znwm(i64 8)
16+
; DEFAULT: call ptr @__alloc_token_2_malloc(i64 8)
17+
; DEFAULT: call ptr @__alloc_token_0_my_malloc(i64 8)
18+
; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0)
19+
; OVERRIDE: call ptr @__alloc_token__Znwm(i64 8, i64 1)
20+
; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0)
21+
; OVERRIDE: call ptr @my_malloc(i64 8)
22+
%1 = call ptr @malloc(i64 8)
23+
%2 = call ptr @_Znwm(i64 8)
24+
%3 = call ptr @malloc(i64 8)
25+
%4 = call ptr @my_malloc(i64 8), !alloc_token !0
26+
ret void
27+
}
28+
29+
!0 = !{!"int", i1 0}
30+
31+
!llvm.module.flags = !{!1, !2, !3, !4}
32+
!1 = !{i32 1, !"alloc-token-mode", !"increment"}
33+
!2 = !{i32 1, !"alloc-token-max", i64 3}
34+
!3 = !{i32 1, !"alloc-token-fast-abi", i64 1}
35+
!4 = !{i32 1, !"alloc-token-extended", i64 1}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: opt -module-summary -o %t.thin.bc %s
2+
; RUN: llvm-lto2 run %t.thin.bc -o %t.thin.out \
3+
; RUN: -r=%t.thin.bc,main,plx \
4+
; RUN: -r=%t.thin.bc,_Znwm, \
5+
; RUN: -r=%t.thin.bc,sink,pl \
6+
; RUN: -supports-hot-cold-new -optimize-hot-cold-new
7+
; RUN: llvm-objdump -d -r %t.thin.out.1 | FileCheck %s
8+
9+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
10+
target triple = "x86_64-unknown-linux-gnu"
11+
12+
declare ptr @_Znwm(i64)
13+
14+
@sink = global ptr null
15+
16+
; CHECK-LABEL: <main>:
17+
; CHECK: callq
18+
; CHECK-NEXT: R_X86_64_PLT32 __alloc_token__Znwm12__hot_cold_t
19+
define void @main() sanitize_alloc_token {
20+
%call = call ptr @_Znwm(i64 8) #0
21+
store volatile ptr %call, ptr @sink
22+
ret void
23+
}
24+
25+
attributes #0 = { builtin allocsize(0) "memprof"="hot" }

llvm/test/LTO/X86/alloc-token.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; --- Full LTO ---
2+
; RUN: llvm-as %s -o %t.bc
3+
; RUN: llvm-lto -exported-symbol=main -o %t.out %t.bc
4+
; RUN: llvm-objdump -d -r %t.out | FileCheck %s
5+
; --- ThinLTO ---
6+
; RUN: opt -module-summary -o %t.thin.bc %s
7+
; RUN: llvm-lto2 run %t.thin.bc -o %t.thin.out \
8+
; RUN: -r=%t.thin.bc,main,plx \
9+
; RUN: -r=%t.thin.bc,_Znwm, \
10+
; RUN: -r=%t.thin.bc,sink,pl
11+
; RUN: llvm-objdump -d -r %t.thin.out.1 | FileCheck %s
12+
13+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
14+
target triple = "x86_64-unknown-linux-gnu"
15+
16+
declare ptr @_Znwm(i64)
17+
18+
@sink = global ptr null
19+
20+
; CHECK-LABEL: <main>:
21+
; CHECK: callq
22+
; CHECK-NEXT: R_X86_64_PLT32 __alloc_token__Znwm
23+
define void @main() sanitize_alloc_token {
24+
%call = call ptr @_Znwm(i64 8)
25+
store volatile ptr %call, ptr @sink
26+
ret void
27+
}

0 commit comments

Comments
 (0)