Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions clang/test/CodeGen/memprof-pgho.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Test end-to-end optimization pipeline with PGHO, that it does not interfere
// with other allocation instrumentation features.
//
// RUN: split-file %s %t
// RUN: llvm-profdata merge %t/memprof.yaml -o %t/use.profdata
// RUN: %clang_cc1 -O2 -debug-info-kind=limited -fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \
// RUN: %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,DEFAULT
// RUN: %clang_cc1 -O2 -fsanitize=alloc-token -debug-info-kind=limited -fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \
// RUN: %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,ALLOCTOKEN

//--- memprof.yaml
---
HeapProfileRecords:
- GUID: 0x7f8d88fcc70a347b
AllocSites:
- Callstack:
- { Function: 0x7f8d88fcc70a347b, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 13, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalAccessCount: 0
MinAccessCount: 0
MaxAccessCount: 0
TotalSize: 10
MinSize: 10
MaxSize: 10
AllocTimestamp: 100
DeallocTimestamp: 100
TotalLifetime: 100000
MinLifetime: 100000
MaxLifetime: 100000
AllocCpuId: 0
DeallocCpuId: 0
NumMigratedCpu: 0
NumLifetimeOverlaps: 0
NumSameAllocCpu: 0
NumSameDeallocCpu: 0
DataTypeId: 0
TotalAccessDensity: 0
MinAccessDensity: 0
MaxAccessDensity: 0
TotalLifetimeAccessDensity: 0
MinLifetimeAccessDensity: 0
MaxLifetimeAccessDensity: 0
AccessHistogramSize: 0
AccessHistogram: 0
...

//--- src.cpp
// CHECK-LABEL: define{{.*}} ptr @_Z3foov()
// DEFAULT: call {{.*}} ptr @_Znam12__hot_cold_t(i64 10, i8 -128)
// ALLOCTOKEN: call {{.*}} ptr @__alloc_token__Znam12__hot_cold_t(i64 10, i8 -128, i64 1538840549748785101){{.*}} !alloc_token
char *foo() {
return new char[10];
}

int main() {
char *a = foo();
delete[] a;
return 0;
}
33 changes: 33 additions & 0 deletions llvm/lib/LTO/LTOBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
Expand All @@ -42,6 +43,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Instrumentation/AllocToken.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
#include <optional>
Expand All @@ -68,6 +70,10 @@ static cl::opt<LTOBitcodeEmbedding> EmbedBitcode(
"Embed post merge, but before optimizations")),
cl::desc("Embed LLVM bitcode in object files produced by LTO"));

static cl::opt<std::string> LTOAllocTokenMode(
"lto-alloc-token-mode", cl::init(""),
cl::desc("Enable AllocToken instrumentation during LTO with chosen mode"));

static cl::opt<bool> ThinLTOAssumeMerged(
"thinlto-assume-merged", cl::init(false),
cl::desc("Assume the input has already undergone ThinLTO function "
Expand Down Expand Up @@ -198,6 +204,31 @@ static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins,
}
}

// Register instrumentation passes that need to run late in the pipeline; these
// are non-optimization passes and need to run after most optimizations to avoid
// interfering with them (e.g. PGHO) or to capture the final state of the code.
static void registerBackendInstrumentation(PassBuilder &PB) {
if (!LTOAllocTokenMode.empty()) {
AllocTokenOptions Opts;
if (auto Mode = getAllocTokenModeFromString(LTOAllocTokenMode))
Opts.Mode = *Mode;
else
report_fatal_error("invalid lto-alloc-token-mode: " +
Twine(LTOAllocTokenMode));

// ThinLTO backend
PB.registerOptimizerLastEPCallback(
[Opts](ModulePassManager &MPM, OptimizationLevel, ThinOrFullLTOPhase) {
MPM.addPass(AllocTokenPass(Opts));
});
// Full LTO backend
PB.registerFullLinkTimeOptimizationLastEPCallback(
[Opts](ModulePassManager &MPM, OptimizationLevel) {
MPM.addPass(AllocTokenPass(Opts));
});
}
}

static std::unique_ptr<TargetMachine>
createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
const Triple &TheTriple = M.getTargetTriple();
Expand Down Expand Up @@ -277,6 +308,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,

RegisterPassPlugins(Conf.PassPlugins, PB);

registerBackendInstrumentation(PB);

std::unique_ptr<TargetLibraryInfoImpl> TLII(
new TargetLibraryInfoImpl(TM->getTargetTriple()));
if (Conf.Freestanding)
Expand Down
80 changes: 43 additions & 37 deletions llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1806,119 +1806,125 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
// better to replace the hinted call with a non hinted call, to avoid the
// extra parameter and the if condition check of the hint value in the
// allocator. This can be considered in the future.
Value *NewCall = nullptr;
switch (Func) {
case LibFunc_Znwm12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znwm12__hot_cold_t, HotCold);
NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znwm12__hot_cold_t, HotCold);
break;
case LibFunc_Znwm:
return emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znwm12__hot_cold_t, HotCold);
NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znwm12__hot_cold_t, HotCold);
break;
case LibFunc_Znam12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znam12__hot_cold_t, HotCold);
NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znam12__hot_cold_t, HotCold);
break;
case LibFunc_Znam:
return emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znam12__hot_cold_t, HotCold);
NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
LibFunc_Znam12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewNoThrow(
NewCall = emitHotColdNewNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmRKSt9nothrow_t:
return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t,
HotCold);
NewCall = emitHotColdNewNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewNoThrow(
NewCall = emitHotColdNewNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamRKSt9nothrow_t:
return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t,
HotCold);
NewCall = emitHotColdNewNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewAligned(
NewCall = emitHotColdNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmSt11align_val_t:
return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t,
HotCold);
NewCall = emitHotColdNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewAligned(
NewCall = emitHotColdNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamSt11align_val_t:
return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t,
HotCold);
NewCall = emitHotColdNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewAlignedNoThrow(
NewCall = emitHotColdNewAlignedNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
HotCold);
break;
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
return emitHotColdNewAlignedNoThrow(
NewCall = emitHotColdNewAlignedNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
return emitHotColdNewAlignedNoThrow(
NewCall = emitHotColdNewAlignedNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
HotCold);
break;
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
return emitHotColdNewAlignedNoThrow(
NewCall = emitHotColdNewAlignedNoThrow(
CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_size_returning_new:
return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
LibFunc_size_returning_new_hot_cold,
HotCold);
NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
LibFunc_size_returning_new_hot_cold,
HotCold);
break;
case LibFunc_size_returning_new_hot_cold:
if (OptimizeExistingHotColdNew)
return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
LibFunc_size_returning_new_hot_cold,
HotCold);
NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
LibFunc_size_returning_new_hot_cold,
HotCold);
break;
case LibFunc_size_returning_new_aligned:
return emitHotColdSizeReturningNewAligned(
NewCall = emitHotColdSizeReturningNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_size_returning_new_aligned_hot_cold, HotCold);
break;
case LibFunc_size_returning_new_aligned_hot_cold:
if (OptimizeExistingHotColdNew)
return emitHotColdSizeReturningNewAligned(
NewCall = emitHotColdSizeReturningNewAligned(
CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
LibFunc_size_returning_new_aligned_hot_cold, HotCold);
break;
default:
return nullptr;
}
return nullptr;

if (auto *NewCI = dyn_cast_or_null<Instruction>(NewCall))
if (MDNode *MD = CI->getMetadata(LLVMContext::MD_alloc_token))
NewCI->setMetadata(LLVMContext::MD_alloc_token, MD);

return NewCall;
}

//===----------------------------------------------------------------------===//
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/LTO/X86/alloc-token.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: llvm-as %s -o %t.bc
;
; RUN: llvm-lto2 run -lto-alloc-token-mode=default %t.bc -o %t.out \
; RUN: -r=%t.bc,main,plx \
; RUN: -r=%t.bc,_Znwm, \
; RUN: -r=%t.bc,sink,pl
; RUN: llvm-objdump -d -r %t.out.0 | FileCheck %s --check-prefixes=CHECK,DEFAULT
;
; RUN: llvm-lto2 run -lto-alloc-token-mode=default -alloc-token-fast-abi -alloc-token-max=1 %t.bc -o %t.out \
; RUN: -r=%t.bc,main,plx \
; RUN: -r=%t.bc,_Znwm, \
; RUN: -r=%t.bc,sink,pl
; RUN: llvm-objdump -d -r %t.out.0 | FileCheck %s --check-prefixes=CHECK,FASTABI

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare ptr @_Znwm(i64) #0

@sink = global ptr null

; CHECK-LABEL: <main>:
; CHECK: callq
; DEFAULT-NEXT: R_X86_64_PLT32 __alloc_token__Znwm
; FASTABI-NEXT: R_X86_64_PLT32 __alloc_token_0__Znwm
define void @main() sanitize_alloc_token {
%call = call ptr @_Znwm(i64 8) #0, !alloc_token !0
store volatile ptr %call, ptr @sink
ret void
}

attributes #0 = { nobuiltin allocsize(0) }

!0 = !{!"int", i1 0}
13 changes: 13 additions & 0 deletions llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,16 @@ define void @size_returning_aligned_update_test() {
ret void
}

;; Check that !alloc_token is preserved.
; HOTCOLD-LABEL: @new_alloc_token()
define void @new_alloc_token() {
;; Attribute cold converted to __hot_cold_t cold value.
; HOTCOLD: @_Znwm12__hot_cold_t(i64 10, i8 [[COLD]]), !alloc_token ![[ALLOC_TOKEN:[0-9]+]]
%call = call ptr @_Znwm(i64 10) #0, !alloc_token !0
call void @dummy(ptr %call)
ret void
}

;; So that instcombine doesn't optimize out the call.
declare void @dummy(ptr)

Expand Down Expand Up @@ -649,3 +659,6 @@ attributes #5 = { "memprof" = "hot" }
attributes #8 = { "memprof" = "ambiguous" }

attributes #6 = { nobuiltin allocsize(0) "memprof"="cold" }

; CHECK: [[ALLOC_TOKEN]] = !{!"MyType", i1 false}
!0 = !{!"MyType", i1 false}