From 29d61f1d05b30dbef3bac812b34ba0340c78cc68 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 8 Nov 2024 18:24:03 -0800 Subject: [PATCH 1/3] [memprof] Teach extractCallsFromIR to recognize heap allocation functions This patch teaches extractCallsFromIR to recognize heap allocation functions. Specifically, when we encounter a callee that is known to be a heap allocation function like "new", we set the callee GUID to 0. Note that I am planning to do the same for the caller-callee pairs extracted from the profile. That is, when I encounter a frame that does not have a callee, we assume that the frame is calling some heap allocation function with GUID 0. Technically, I'm not recognizing enough functions in this patch. TCMalloc is known to drop certain frames in the call stack immediately above new. This patch is meant to lay the groundwork, setting up GetTLI, plumbing it to extractCallsFromIR, and adjusting the unit tests. I'll address remaining issues in subsequent patches. --- .../Transforms/Instrumentation/MemProfiler.h | 5 +- .../Instrumentation/MemProfiler.cpp | 14 ++- .../Instrumentation/MemProfUseTest.cpp | 94 ++++++++++++++++++- 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index f168ffc4fdb1e..2f0bf40536887 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -18,6 +18,7 @@ namespace llvm { class Function; class Module; +class TargetLibraryInfo; namespace vfs { class FileSystem; @@ -86,7 +87,9 @@ using CallEdgeTy = std::pair; // Extract all calls from the IR. Arrange them in a map from caller GUIDs to a // list of call sites, each of the form {LineLocation, CalleeGUID}. -DenseMap> extractCallsFromIR(Module &M); +DenseMap> +extractCallsFromIR(Module &M, + function_ref GetTLI); } // namespace memprof } // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 64e850c7d9316..a93792971929f 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -795,8 +795,8 @@ struct AllocMatchInfo { bool Matched = false; }; -DenseMap> -memprof::extractCallsFromIR(Module &M) { +DenseMap> memprof::extractCallsFromIR( + Module &M, function_ref GetTLI) { DenseMap> Calls; auto GetOffset = [](const DILocation *DIL) { @@ -820,6 +820,8 @@ memprof::extractCallsFromIR(Module &M) { continue; StringRef CalleeName = CalledFunction->getName(); + bool IsAlloc = + isAllocationWithHotColdVariant(CalledFunction, GetTLI(F)); for (const DILocation *DIL = I.getDebugLoc(); DIL; DIL = DIL->getInlinedAt()) { StringRef CallerName = DIL->getSubprogramLinkageName(); @@ -827,9 +829,17 @@ memprof::extractCallsFromIR(Module &M) { "Be sure to enable -fdebug-info-for-profiling"); uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + // Pretend that we are calling a function with GUID == 0 if we are + // calling a heap allocation function. + if (IsAlloc) + CalleeGUID = 0; LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; Calls[CallerGUID].emplace_back(Loc, CalleeGUID); CalleeName = CallerName; + // FIXME: Recognize other frames that are associated with heap + // allocation functions. It may be too early to reset IsAlloc to + // false here. + IsAlloc = false; } } } diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp index c864b06e991dc..4592a721af317 100644 --- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp +++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" @@ -80,7 +82,16 @@ declare !dbg !19 void @_Z2f3v() std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); ASSERT_TRUE(M); - auto Calls = extractCallsFromIR(*M); + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + PassBuilder PB; + PB.registerFunctionAnalyses(FAM); + + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto Calls = extractCallsFromIR(*M, GetTLI); // Expect exactly one caller. ASSERT_THAT(Calls, SizeIs(1)); @@ -177,7 +188,16 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); ASSERT_TRUE(M); - auto Calls = extractCallsFromIR(*M); + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + PassBuilder PB; + PB.registerFunctionAnalyses(FAM); + + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto Calls = extractCallsFromIR(*M, GetTLI); // Expect exactly 4 callers. ASSERT_THAT(Calls, SizeIs(4)); @@ -220,4 +240,74 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr EXPECT_THAT(G3CallSites[1], Pair(FieldsAre(2U, 3U), IndexedMemProfRecord::getGUID("_Z2g2v"))); } + +TEST(MemProf, ExtractDirectCallsFromIRCallingNew) { + // The following IR is generated from: + // + // int *foo() { + // return ::new (int); + // } + StringRef IR = R"IR( +define dso_local noundef ptr @_Z3foov() #0 !dbg !10 { +entry: + %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !dbg !13 + ret ptr %call, !dbg !14 +} + +; Function Attrs: nobuiltin allocsize(0) +declare noundef nonnull ptr @_Znwm(i64 noundef) #1 + +attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { builtin allocsize(0) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "foobar.cc", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"} +!6 = !{i32 8, !"PIC Level", i32 2} +!7 = !{i32 7, !"PIE Level", i32 2} +!8 = !{i32 7, !"uwtable", i32 2} +!9 = !{!"clang"} +!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 2, column: 10, scope: !10) +!14 = !DILocation(line: 2, column: 3, scope: !10) +)IR"; + + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); + ASSERT_TRUE(M); + + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + PassBuilder PB; + PB.registerFunctionAnalyses(FAM); + + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto Calls = extractCallsFromIR(*M, GetTLI); + + // Expect exactly one caller. + ASSERT_THAT(Calls, SizeIs(1)); + + // Verify each key-value pair. + + auto FooIt = Calls.find(IndexedMemProfRecord::getGUID("_Z3foov")); + ASSERT_NE(FooIt, Calls.end()); + const auto &[FooCallerGUID, FooCallSites] = *FooIt; + EXPECT_EQ(FooCallerGUID, IndexedMemProfRecord::getGUID("_Z3foov")); + ASSERT_THAT(FooCallSites, SizeIs(1)); + EXPECT_THAT(FooCallSites[0], Pair(FieldsAre(1U, 10U), 0)); +} } // namespace From 38cfee5c47daec6a2e9d9e0cbf80aca8d603aa1a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 12 Nov 2024 14:21:23 -0800 Subject: [PATCH 2/3] Use TargetLibraryInfoWrapperPass. --- .../Transforms/Instrumentation/MemProfiler.h | 3 +- .../Instrumentation/MemProfiler.cpp | 7 ++-- .../Instrumentation/MemProfUseTest.cpp | 42 +++++++------------ 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index 2f0bf40536887..356735a709678 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -88,8 +88,7 @@ using CallEdgeTy = std::pair; // Extract all calls from the IR. Arrange them in a map from caller GUIDs to a // list of call sites, each of the form {LineLocation, CalleeGUID}. DenseMap> -extractCallsFromIR(Module &M, - function_ref GetTLI); +extractCallsFromIR(Module &M, const TargetLibraryInfo &GetTLI); } // namespace memprof } // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index a93792971929f..eda07a3d40e15 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -795,8 +795,8 @@ struct AllocMatchInfo { bool Matched = false; }; -DenseMap> memprof::extractCallsFromIR( - Module &M, function_ref GetTLI) { +DenseMap> +memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) { DenseMap> Calls; auto GetOffset = [](const DILocation *DIL) { @@ -820,8 +820,7 @@ DenseMap> memprof::extractCallsFromIR( continue; StringRef CalleeName = CalledFunction->getName(); - bool IsAlloc = - isAllocationWithHotColdVariant(CalledFunction, GetTLI(F)); + bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI); for (const DILocation *DIL = I.getDebugLoc(); DIL; DIL = DIL->getInlinedAt()) { StringRef CallerName = DIL->getSubprogramLinkageName(); diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp index 4592a721af317..cd0e8357a2b2d 100644 --- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp +++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp @@ -82,16 +82,12 @@ declare !dbg !19 void @_Z2f3v() std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); ASSERT_TRUE(M); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TargetLibraryAnalysis(); }); - PassBuilder PB; - PB.registerFunctionAnalyses(FAM); + auto *F = M->getFunction("_Z3foov"); + ASSERT_NE(F, nullptr); - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult(F); - }; - - auto Calls = extractCallsFromIR(*M, GetTLI); + TargetLibraryInfoWrapperPass WrapperPass; + auto &TLI = WrapperPass.getTLI(*F); + auto Calls = extractCallsFromIR(*M, TLI); // Expect exactly one caller. ASSERT_THAT(Calls, SizeIs(1)); @@ -188,16 +184,12 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); ASSERT_TRUE(M); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TargetLibraryAnalysis(); }); - PassBuilder PB; - PB.registerFunctionAnalyses(FAM); - - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult(F); - }; + auto *F = M->getFunction("_Z3foov"); + ASSERT_NE(F, nullptr); - auto Calls = extractCallsFromIR(*M, GetTLI); + TargetLibraryInfoWrapperPass WrapperPass; + auto &TLI = WrapperPass.getTLI(*F); + auto Calls = extractCallsFromIR(*M, TLI); // Expect exactly 4 callers. ASSERT_THAT(Calls, SizeIs(4)); @@ -287,16 +279,12 @@ attributes #2 = { builtin allocsize(0) } std::unique_ptr M = parseAssemblyString(IR, Err, Ctx); ASSERT_TRUE(M); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TargetLibraryAnalysis(); }); - PassBuilder PB; - PB.registerFunctionAnalyses(FAM); - - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult(F); - }; + auto *F = M->getFunction("_Z3foov"); + ASSERT_NE(F, nullptr); - auto Calls = extractCallsFromIR(*M, GetTLI); + TargetLibraryInfoWrapperPass WrapperPass; + auto &TLI = WrapperPass.getTLI(*F); + auto Calls = extractCallsFromIR(*M, TLI); // Expect exactly one caller. ASSERT_THAT(Calls, SizeIs(1)); From b12928dded04f5bfdbf90f01564040d689276dfc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 12 Nov 2024 15:34:33 -0800 Subject: [PATCH 3/3] Fix a parameter name. --- llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index 356735a709678..5177ac97cdfe3 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -88,7 +88,7 @@ using CallEdgeTy = std::pair; // Extract all calls from the IR. Arrange them in a map from caller GUIDs to a // list of call sites, each of the form {LineLocation, CalleeGUID}. DenseMap> -extractCallsFromIR(Module &M, const TargetLibraryInfo &GetTLI); +extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI); } // namespace memprof } // namespace llvm