From d596966645e76f444cea4f6e4377848209b6cbc5 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 24 Nov 2025 17:36:32 +0100 Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.8-beta.1 [skip ci] --- clang/test/CodeGen/memprof-pgho.cpp | 61 ++++++++++++++ .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 80 ++++++++++--------- .../InstCombine/simplify-libcalls-new.ll | 13 +++ 3 files changed, 117 insertions(+), 37 deletions(-) create mode 100644 clang/test/CodeGen/memprof-pgho.cpp diff --git a/clang/test/CodeGen/memprof-pgho.cpp b/clang/test/CodeGen/memprof-pgho.cpp new file mode 100644 index 0000000000000..73bec2b303f13 --- /dev/null +++ b/clang/test/CodeGen/memprof-pgho.cpp @@ -0,0 +1,61 @@ +// Test end-to-end optimization pipeline with PGHO, that it does not interfere +// with other allocation instrumentation features. +// +// RUN: split-file %s %t +// RUN: llvm-profdata merge %t/memprof.yaml -o %t/use.profdata +// RUN: %clang_cc1 -O2 -debug-info-kind=limited -fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \ +// RUN: %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,DEFAULT +// RUN: %clang_cc1 -O2 -fsanitize=alloc-token -debug-info-kind=limited -fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \ +// RUN: %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,ALLOCTOKEN + +//--- memprof.yaml +--- +HeapProfileRecords: + - GUID: 0x7f8d88fcc70a347b + AllocSites: + - Callstack: + - { Function: 0x7f8d88fcc70a347b, LineOffset: 1, Column: 10, IsInlineFrame: false } + - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 13, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalAccessCount: 0 + MinAccessCount: 0 + MaxAccessCount: 0 + TotalSize: 10 + MinSize: 10 + MaxSize: 10 + AllocTimestamp: 100 + DeallocTimestamp: 100 + TotalLifetime: 100000 + MinLifetime: 100000 + MaxLifetime: 100000 + AllocCpuId: 0 + DeallocCpuId: 0 + NumMigratedCpu: 0 + NumLifetimeOverlaps: 0 + NumSameAllocCpu: 0 + NumSameDeallocCpu: 0 + DataTypeId: 0 + TotalAccessDensity: 0 + MinAccessDensity: 0 + MaxAccessDensity: 0 + TotalLifetimeAccessDensity: 0 + MinLifetimeAccessDensity: 0 + MaxLifetimeAccessDensity: 0 + AccessHistogramSize: 0 + AccessHistogram: 0 +... + +//--- src.cpp +// CHECK-LABEL: define{{.*}} ptr @_Z3foov() +// DEFAULT: call {{.*}} ptr @_Znam12__hot_cold_t(i64 10, i8 -128) +// ALLOCTOKEN: call {{.*}} ptr @__alloc_token__Znam12__hot_cold_t(i64 10, i8 -128, i64 1538840549748785101){{.*}} !alloc_token +char *foo() { + return new char[10]; +} + +int main() { + char *a = foo(); + delete[] a; + return 0; +} diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 4f4e64b1c7b70..4ba4e484fb77d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1806,119 +1806,125 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, // better to replace the hinted call with a non hinted call, to avoid the // extra parameter and the if condition check of the hint value in the // allocator. This can be considered in the future. + Value *NewCall = nullptr; switch (Func) { case LibFunc_Znwm12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znwm12__hot_cold_t, HotCold); + NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znwm12__hot_cold_t, HotCold); break; case LibFunc_Znwm: - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znwm12__hot_cold_t, HotCold); + NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znwm12__hot_cold_t, HotCold); break; case LibFunc_Znam12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znam12__hot_cold_t, HotCold); + NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znam12__hot_cold_t, HotCold); break; case LibFunc_Znam: - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znam12__hot_cold_t, HotCold); + NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znam12__hot_cold_t, HotCold); break; case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewNoThrow( + NewCall = emitHotColdNewNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmRKSt9nothrow_t: - return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, - HotCold); + NewCall = emitHotColdNewNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewNoThrow( + NewCall = emitHotColdNewNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamRKSt9nothrow_t: - return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, - HotCold); + NewCall = emitHotColdNewNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewAligned( + NewCall = emitHotColdNewAligned( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmSt11align_val_t: - return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t, - HotCold); + NewCall = emitHotColdNewAligned( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewAligned( + NewCall = emitHotColdNewAligned( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_t: - return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t, - HotCold); + NewCall = emitHotColdNewAligned( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewAlignedNoThrow( + NewCall = emitHotColdNewAlignedNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: - return emitHotColdNewAlignedNoThrow( + NewCall = emitHotColdNewAlignedNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) - return emitHotColdNewAlignedNoThrow( + NewCall = emitHotColdNewAlignedNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: - return emitHotColdNewAlignedNoThrow( + NewCall = emitHotColdNewAlignedNoThrow( CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_size_returning_new: - return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, - LibFunc_size_returning_new_hot_cold, - HotCold); + NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, + LibFunc_size_returning_new_hot_cold, + HotCold); break; case LibFunc_size_returning_new_hot_cold: if (OptimizeExistingHotColdNew) - return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, - LibFunc_size_returning_new_hot_cold, - HotCold); + NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, + LibFunc_size_returning_new_hot_cold, + HotCold); break; case LibFunc_size_returning_new_aligned: - return emitHotColdSizeReturningNewAligned( + NewCall = emitHotColdSizeReturningNewAligned( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_size_returning_new_aligned_hot_cold, HotCold); break; case LibFunc_size_returning_new_aligned_hot_cold: if (OptimizeExistingHotColdNew) - return emitHotColdSizeReturningNewAligned( + NewCall = emitHotColdSizeReturningNewAligned( CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, LibFunc_size_returning_new_aligned_hot_cold, HotCold); break; default: return nullptr; } - return nullptr; + + if (auto *NewCI = dyn_cast_or_null(NewCall)) + if (MDNode *MD = CI->getMetadata(LLVMContext::MD_alloc_token)) + NewCI->setMetadata(LLVMContext::MD_alloc_token, MD); + + return NewCall; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll index 5a4fb04f5f2c0..2765c75f4d5e0 100644 --- a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll @@ -610,6 +610,16 @@ define void @size_returning_aligned_update_test() { ret void } +;; Check that !alloc_token is preserved. +; HOTCOLD-LABEL: @new_alloc_token() +define void @new_alloc_token() { + ;; Attribute cold converted to __hot_cold_t cold value. + ; HOTCOLD: @_Znwm12__hot_cold_t(i64 10, i8 [[COLD]]), !alloc_token ![[ALLOC_TOKEN:[0-9]+]] + %call = call ptr @_Znwm(i64 10) #0, !alloc_token !0 + call void @dummy(ptr %call) + ret void +} + ;; So that instcombine doesn't optimize out the call. declare void @dummy(ptr) @@ -649,3 +659,6 @@ attributes #5 = { "memprof" = "hot" } attributes #8 = { "memprof" = "ambiguous" } attributes #6 = { nobuiltin allocsize(0) "memprof"="cold" } + +; CHECK: [[ALLOC_TOKEN]] = !{!"MyType", i1 false} +!0 = !{!"MyType", i1 false} From 451e91c58d92250121da33136a6db9597644f50d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 25 Nov 2025 15:31:35 +0100 Subject: [PATCH 2/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.8-beta.1 [skip ci] --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 4ba4e484fb77d..d1548694baa27 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1921,8 +1921,7 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, } if (auto *NewCI = dyn_cast_or_null(NewCall)) - if (MDNode *MD = CI->getMetadata(LLVMContext::MD_alloc_token)) - NewCI->setMetadata(LLVMContext::MD_alloc_token, MD); + NewCI->copyMetadata(*CI); return NewCall; } From a161378a10a3e51a7ddaf78b84dba4d5d4fb14c7 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 27 Nov 2025 15:59:14 +0100 Subject: [PATCH 3/4] address review comments Created using spr 1.3.8-beta.1 --- clang/lib/CodeGen/BackendUtil.cpp | 3 +- clang/test/CodeGen/lto-newpm-pipeline.c | 6 +- .../Transforms/Instrumentation/AllocToken.h | 2 +- llvm/lib/Passes/PassBuilderPipelines.cpp | 22 ++++++- .../Transforms/Instrumentation/AllocToken.cpp | 66 ++++++++++--------- .../CodeGen/AArch64/print-pipeline-passes.ll | 2 +- .../AllocToken/module-flags.ll | 15 +++-- llvm/test/Other/new-pm-O0-defaults.ll | 12 ++-- llvm/test/Other/new-pm-defaults.ll | 1 + llvm/test/Other/new-pm-lto-defaults.ll | 1 - 10 files changed, 76 insertions(+), 54 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 5590d217e96ff..1462c686f4053 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -239,7 +239,8 @@ static AllocTokenOptions getAllocTokenOptions(const LangOptions &LangOpts, AllocTokenOptions Opts; if (LangOpts.AllocTokenMode) Opts.Mode = *LangOpts.AllocTokenMode; - Opts.MaxTokens = LangOpts.AllocTokenMax; + if (LangOpts.AllocTokenMax) + Opts.MaxTokens = *LangOpts.AllocTokenMax; Opts.Extended = CGOpts.SanitizeAllocTokenExtended; Opts.FastABI = CGOpts.SanitizeAllocTokenFastABI; return Opts; diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index dceaaf136ebfc..5673c72b49eff 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -33,11 +33,10 @@ // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass -// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass +// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-FULL-O0-NEXT: Running pass: VerifierPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -49,11 +48,10 @@ // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass -// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass +// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-THIN-O0-NEXT: Running pass: VerifierPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h b/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h index 077703c214745..299fc03c5d96b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h @@ -25,7 +25,7 @@ class Module; struct AllocTokenOptions { AllocTokenMode Mode = DefaultAllocTokenMode; - std::optional MaxTokens; + uint64_t MaxTokens = 0; bool FastABI = false; bool Extended = false; AllocTokenOptions() = default; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4e68344136c38..c6beb3fdf09bd 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1452,7 +1452,6 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase) { const bool LTOPreLink = isLTOPreLink(LTOPhase); - const bool LTOPostLink = isLTOPostLink(LTOPhase); ModulePassManager MPM; // Run partial inlining pass to partially inline functions that have @@ -1617,7 +1616,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); - if (LTOPostLink) + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). + if (!LTOPreLink) MPM.addPass(AllocTokenPass()); invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase); @@ -1858,7 +1859,11 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::Assume)); MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::ThinLTOPostLink)); + + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). MPM.addPass(AllocTokenPass()); + // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -1919,6 +1924,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, lowertypetests::DropTestKind::Assume)); MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink)); + + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). MPM.addPass(AllocTokenPass()); invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); @@ -2007,6 +2015,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, lowertypetests::DropTestKind::Assume)); MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink)); + + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). MPM.addPass(AllocTokenPass()); invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); @@ -2242,6 +2253,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true)); MPM.addPass(CoroCleanupPass()); + + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). MPM.addPass(AllocTokenPass()); invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); @@ -2360,7 +2374,9 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, MPM.addPass(buildCoroWrapper(Phase)); - if (isLTOPostLink(Phase)) + // AllocToken transforms heap allocation calls; this needs to run late after + // other allocation call transformations (such as those in InstCombine). + if (!isLTOPreLink(Phase)) MPM.addPass(AllocTokenPass()); invokeOptimizerLastEPCallbacks(MPM, Level, Phase); diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 1be0ab802e177..13f7a46af5481 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -67,24 +67,24 @@ cl::opt ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), cl::Hidden, cl::init("__alloc_token_")); -cl::opt +cl::opt, false, cl::parser> ClMaxTokens("alloc-token-max", cl::desc("Maximum number of tokens (0 = target SIZE_MAX)"), - cl::Hidden, cl::init(0)); + cl::Hidden, cl::init(std::nullopt)); -cl::opt +cl::opt, false, cl::parser> ClFastABI("alloc-token-fast-abi", cl::desc("The token ID is encoded in the function name"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(std::nullopt)); // Instrument libcalls only by default - compatible allocators only need to take // care of providing standard allocation functions. With extended coverage, also // instrument non-libcall allocation function calls with !alloc_token // metadata. -cl::opt +cl::opt, false, cl::parser> ClExtended("alloc-token-extended", cl::desc("Extend coverage to custom allocation functions"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(std::nullopt)); // C++ defines ::operator new (and variants) as replaceable (vs. standard // library versions), which are nobuiltin, and are therefore not covered by @@ -237,11 +237,6 @@ class TypeHashPointerSplitMode : public TypeHashMode { // Apply opt overrides and module flags. static AllocTokenOptions resolveOptions(AllocTokenOptions Opts, const Module &M) { - if (!Opts.MaxTokens.has_value()) - Opts.MaxTokens = ClMaxTokens; - Opts.FastABI |= ClFastABI; - Opts.Extended |= ClExtended; - auto IntModuleFlagOrNull = [&](StringRef Key) { return mdconst::extract_or_null(M.getModuleFlag(Key)); }; @@ -249,16 +244,21 @@ static AllocTokenOptions resolveOptions(AllocTokenOptions Opts, if (auto *S = dyn_cast_or_null(M.getModuleFlag("alloc-token-mode"))) if (auto Mode = getAllocTokenModeFromString(S->getString())) Opts.Mode = *Mode; - if (auto *Val = IntModuleFlagOrNull("alloc-token-max")) Opts.MaxTokens = Val->getZExtValue(); - if (auto *Val = IntModuleFlagOrNull("alloc-token-fast-abi")) Opts.FastABI |= Val->isOne(); - if (auto *Val = IntModuleFlagOrNull("alloc-token-extended")) Opts.Extended |= Val->isOne(); + // Allow overriding options from command line options. + if (ClMaxTokens.has_value()) + Opts.MaxTokens = *ClMaxTokens; + if (ClFastABI.has_value()) + Opts.FastABI = *ClFastABI; + if (ClExtended.has_value()) + Opts.Extended = *ClExtended; + return Opts; } @@ -268,19 +268,19 @@ class AllocToken { ModuleAnalysisManager &MAM) : Options(resolveOptions(std::move(Opts), M)), Mod(M), FAM(MAM.getResult(M).getManager()), - Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) { + Mode(IncrementMode(*IntPtrTy, Options.MaxTokens)) { switch (Options.Mode) { case TokenMode::Increment: break; case TokenMode::Random: - Mode.emplace(*IntPtrTy, *Options.MaxTokens, + Mode.emplace(*IntPtrTy, Options.MaxTokens, M.createRNG(DEBUG_TYPE)); break; case TokenMode::TypeHash: - Mode.emplace(*IntPtrTy, *Options.MaxTokens); + Mode.emplace(*IntPtrTy, Options.MaxTokens); break; case TokenMode::TypeHashPointerSplit: - Mode.emplace(*IntPtrTy, *Options.MaxTokens); + Mode.emplace(*IntPtrTy, Options.MaxTokens); break; } } @@ -337,8 +337,6 @@ bool AllocToken::instrumentFunction(Function &F) { if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; - auto &ORE = FAM.getResult(F); - auto &TLI = FAM.getResult(F); SmallVector, 4> AllocCalls; SmallVector IntrinsicInsts; @@ -347,6 +345,10 @@ bool AllocToken::instrumentFunction(Function &F) { F.hasFnAttribute(Attribute::SanitizeAllocToken) && !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); + // Get TLI only when required. + const TargetLibraryInfo *TLI = + InstrumentFunction ? &FAM.getResult(F) : nullptr; + // Collect all allocation calls to avoid iterator invalidation. for (Instruction &I : instructions(F)) { // Collect all alloc_token_* intrinsics. @@ -362,26 +364,28 @@ bool AllocToken::instrumentFunction(Function &F) { auto *CB = dyn_cast(&I); if (!CB) continue; - if (std::optional Func = shouldInstrumentCall(*CB, TLI)) + if (std::optional Func = shouldInstrumentCall(*CB, *TLI)) AllocCalls.emplace_back(CB, Func.value()); } + // Return early to avoid unnecessarily instantiating the ORE. + if (AllocCalls.empty() && IntrinsicInsts.empty()) + return false; + + auto &ORE = FAM.getResult(F); bool Modified = false; - if (!AllocCalls.empty()) { - for (auto &[CB, Func] : AllocCalls) - Modified |= replaceAllocationCall(CB, Func, ORE, TLI); - if (Modified) - NumFunctionsModified++; - } + for (auto &[CB, Func] : AllocCalls) + Modified |= replaceAllocationCall(CB, Func, ORE, *TLI); - if (!IntrinsicInsts.empty()) { - for (auto *II : IntrinsicInsts) - replaceIntrinsicInst(II, ORE); + for (auto *II : IntrinsicInsts) { + replaceIntrinsicInst(II, ORE); Modified = true; - NumFunctionsModified++; } + if (Modified) + NumFunctionsModified++; + return Modified; } diff --git a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll index 5852f97a63798..86090324c770c 100644 --- a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll +++ b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll @@ -2,7 +2,7 @@ ; RUN: opt -mtriple=aarch64 -S -passes='default' -print-pipeline-passes < %s | FileCheck %s ; CHECK: loop-idiom-vectorize -; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),function(annotation-remarks),verify,print{{$}} +; O0: {{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}} define void @foo() { entry: diff --git a/llvm/test/Instrumentation/AllocToken/module-flags.ll b/llvm/test/Instrumentation/AllocToken/module-flags.ll index d92c22ba35fb2..7b86510fe6eaf 100644 --- a/llvm/test/Instrumentation/AllocToken/module-flags.ll +++ b/llvm/test/Instrumentation/AllocToken/module-flags.ll @@ -1,6 +1,7 @@ ; Test that all supported module flags are retrieved correctly. ; -; RUN: opt < %s -passes='inferattrs,alloc-token' -S | FileCheck %s +; RUN: opt < %s -passes='inferattrs,alloc-token' -S | FileCheck %s --check-prefixes=CHECK,DEFAULT +; RUN: opt < %s -passes='inferattrs,alloc-token' -alloc-token-max=2 -alloc-token-fast-abi=0 -alloc-token-extended=0 -S | FileCheck %s --check-prefixes=CHECK,OVERRIDE target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -10,10 +11,14 @@ declare ptr @my_malloc(i64) define void @test() sanitize_alloc_token { ; CHECK-LABEL: define void @test( -; CHECK: call ptr @__alloc_token_0_malloc(i64 8) -; CHECK: call ptr @__alloc_token_1__Znwm(i64 8) -; CHECK: call ptr @__alloc_token_2_malloc(i64 8) -; CHECK: call ptr @__alloc_token_0_my_malloc(i64 8) +; DEFAULT: call ptr @__alloc_token_0_malloc(i64 8) +; DEFAULT: call ptr @__alloc_token_1__Znwm(i64 8) +; DEFAULT: call ptr @__alloc_token_2_malloc(i64 8) +; DEFAULT: call ptr @__alloc_token_0_my_malloc(i64 8) +; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0) +; OVERRIDE: call ptr @__alloc_token__Znwm(i64 8, i64 1) +; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0) +; OVERRIDE: call ptr @my_malloc(i64 8) %1 = call ptr @malloc(i64 8) %2 = call ptr @_Znwm(i64 8) %3 = call ptr @malloc(i64 8) diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll index 8087f745680b9..a7f43d1fc4591 100644 --- a/llvm/test/Other/new-pm-O0-defaults.ll +++ b/llvm/test/Other/new-pm-O0-defaults.ll @@ -9,13 +9,13 @@ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-CORO +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-CORO,CHECK-ALLOCTOKEN ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager -enable-matrix \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-MATRIX,CHECK-CORO +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-MATRIX,CHECK-CORO,CHECK-ALLOCTOKEN ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DIS,CHECK-CORO +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DIS,CHECK-CORO,CHECK-ALLOCTOKEN ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-PRE-LINK,CHECK-CORO @@ -41,14 +41,13 @@ ; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis ; CHECK-CORO-NEXT: Running pass: CoroConditionalWrapper +; CHECK-ALLOCTOKEN-NEXT: Running pass: AllocTokenPass ; CHECK-PRE-LINK: Running pass: CanonicalizeAliasesPass ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass ; CHECK-THINLTO: Running pass: LowerTypeTestsPass ; CHECK-THINLTO-NEXT: Running pass: CoroConditionalWrapper ; CHECK-THINLTO-NEXT: Running pass: AllocTokenPass ; CHECK-THINLTO-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-THINLTO-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-THINLTO-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-THINLTO-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-THINLTO-NEXT: Running pass: GlobalDCEPass ; CHECK-LTO: Running pass: CrossDSOCFIPass on [module] @@ -58,11 +57,10 @@ ; CHECK-LTO-NEXT: Running pass: LowerTypeTestsPass ; CHECK-LTO-NEXT: CoroConditionalWrapper ; CHECK-LTO-NEXT: Running pass: AllocTokenPass -; CHECK-LTO-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-LTO-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-CORO-NEXT: Running pass: AnnotationRemarksPass ; CHECK-CORO-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LTO-NEXT: Running pass: AnnotationRemarksPass +; CHECK-LTO-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-NEXT: Running pass: PrintModulePass ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 1f437a662cc96..f074b2fdd3ab8 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -285,6 +285,7 @@ ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-DEFAULT-NEXT: Running pass: AllocToken ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpModulePass ; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass ; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 0dea345a8125a..de0feca55e5b2 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -164,7 +164,6 @@ ; CHECK-O1-NEXT: Running pass: CoroConditionalWrapper ; CHECK-O23SZ-NEXT: Running pass: CoroCleanupPass ; CHECK-O-NEXT: Running pass: AllocTokenPass -; CHECK-O1-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-EP-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: PrintModulePass From bcd578731629cfed6ba1d43dfe70404a2549a2d9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 27 Nov 2025 17:09:25 +0100 Subject: [PATCH 4/4] simplify copts, add extra tests Created using spr 1.3.8-beta.1 --- .../Transforms/Instrumentation/AllocToken.cpp | 24 +++++++++--------- .../AllocToken/hot-cold-new.ll | 20 +++++++++++++++ llvm/test/LTO/X86/alloc-token-hot-cold-new.ll | 25 +++++++++++++++++++ 3 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Instrumentation/AllocToken/hot-cold-new.ll create mode 100644 llvm/test/LTO/X86/alloc-token-hot-cold-new.ll diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 13f7a46af5481..38eeee287b94e 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -67,24 +67,24 @@ cl::opt ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), cl::Hidden, cl::init("__alloc_token_")); -cl::opt, false, cl::parser> +cl::opt ClMaxTokens("alloc-token-max", cl::desc("Maximum number of tokens (0 = target SIZE_MAX)"), - cl::Hidden, cl::init(std::nullopt)); + cl::Hidden, cl::init(0)); -cl::opt, false, cl::parser> +cl::opt ClFastABI("alloc-token-fast-abi", cl::desc("The token ID is encoded in the function name"), - cl::Hidden, cl::init(std::nullopt)); + cl::Hidden, cl::init(false)); // Instrument libcalls only by default - compatible allocators only need to take // care of providing standard allocation functions. With extended coverage, also // instrument non-libcall allocation function calls with !alloc_token // metadata. -cl::opt, false, cl::parser> +cl::opt ClExtended("alloc-token-extended", cl::desc("Extend coverage to custom allocation functions"), - cl::Hidden, cl::init(std::nullopt)); + cl::Hidden, cl::init(false)); // C++ defines ::operator new (and variants) as replaceable (vs. standard // library versions), which are nobuiltin, and are therefore not covered by @@ -252,12 +252,12 @@ static AllocTokenOptions resolveOptions(AllocTokenOptions Opts, Opts.Extended |= Val->isOne(); // Allow overriding options from command line options. - if (ClMaxTokens.has_value()) - Opts.MaxTokens = *ClMaxTokens; - if (ClFastABI.has_value()) - Opts.FastABI = *ClFastABI; - if (ClExtended.has_value()) - Opts.Extended = *ClExtended; + if (ClMaxTokens.getNumOccurrences()) + Opts.MaxTokens = ClMaxTokens; + if (ClFastABI.getNumOccurrences()) + Opts.FastABI = ClFastABI; + if (ClExtended.getNumOccurrences()) + Opts.Extended = ClExtended; return Opts; } diff --git a/llvm/test/Instrumentation/AllocToken/hot-cold-new.ll b/llvm/test/Instrumentation/AllocToken/hot-cold-new.ll new file mode 100644 index 0000000000000..36f3df1096fe4 --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/hot-cold-new.ll @@ -0,0 +1,20 @@ +; Manually add instcombine to ensure the hot/cold transformation happens before +; the LTO pipeline. The default LTO pipeline includes MemProfRemoveInfo which +; strips the memprof attributes unless the summary index indicates support. +; RUN: opt < %s -passes='function(instcombine),thinlto' -optimize-hot-cold-new -S | FileCheck %s +; RUN: opt < %s -passes='function(instcombine),lto' -optimize-hot-cold-new -S | FileCheck %s +; RUN: opt < %s -passes='function(instcombine),alloc-token' -optimize-hot-cold-new -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +declare ptr @_Znwm(i64) + +define ptr @new_hot() sanitize_alloc_token { +; CHECK-LABEL: @new_hot( +; CHECK: call {{.*}} @__alloc_token__Znwm12__hot_cold_t(i64 10, i8 -2, i64 2689373973731826898){{.*}} !alloc_token + %ret = call ptr @_Znwm(i64 10) #0, !alloc_token !0 + ret ptr %ret +} + +attributes #0 = { builtin allocsize(0) "memprof"="hot" } +!0 = !{!"int", i1 false} diff --git a/llvm/test/LTO/X86/alloc-token-hot-cold-new.ll b/llvm/test/LTO/X86/alloc-token-hot-cold-new.ll new file mode 100644 index 0000000000000..7f7a8e45b7da0 --- /dev/null +++ b/llvm/test/LTO/X86/alloc-token-hot-cold-new.ll @@ -0,0 +1,25 @@ +; RUN: opt -module-summary -o %t.thin.bc %s +; RUN: llvm-lto2 run %t.thin.bc -o %t.thin.out \ +; RUN: -r=%t.thin.bc,main,plx \ +; RUN: -r=%t.thin.bc,_Znwm, \ +; RUN: -r=%t.thin.bc,sink,pl \ +; RUN: -supports-hot-cold-new -optimize-hot-cold-new +; RUN: llvm-objdump -d -r %t.thin.out.1 | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare ptr @_Znwm(i64) + +@sink = global ptr null + +; CHECK-LABEL:
: +; CHECK: callq +; CHECK-NEXT: R_X86_64_PLT32 __alloc_token__Znwm12__hot_cold_t +define void @main() sanitize_alloc_token { + %call = call ptr @_Znwm(i64 8) #0 + store volatile ptr %call, ptr @sink + ret void +} + +attributes #0 = { builtin allocsize(0) "memprof"="hot" }