From 8cc8861c9978fdd3b4b8f9aca2816376396719d8 Mon Sep 17 00:00:00 2001 From: Franklin Date: Sun, 9 Mar 2025 18:06:10 +0800 Subject: [PATCH] [BOLT][Linux] Skip some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip some functions to simplify things, making BOLT for Linux more reliable and saving development efforts. **skip functions defined in assembly code** We use "-bolt-function-list-file" to gather a list of C function when building Linux kernel, then BOLT can choose to optimize them only. BOLT can not handle some functions defined in assembly code reliably, since they may have extra semantics/enforcements BOLT can never know. For example, irq_entries_start defined in arch/x86/include/asm/idtentry.h is actually an “array” but BOLT views it as an ordinary function. If BOLT applies basic block reordering, instrumentation, etc to it, run time errors would happen. We could explicitly specify those functions to skip, but to find all of them we usually need to test & debug many times. That is a lot of work and we may still miss some. In my own experience, when adapting BOLT for Linux to a new architecture or Linux version, we may spend lots of time on fixing runtime errors related to functions defined in assembly code. Only handling C functions makes BOLT for Linux more reliable, and may save lots of development efforts. "-bolt-function-list-file" can be used as follows: * put "-mllvm -bolt-function-list-file=filename" in KCFLAGS when buildling Linux * specify "-funcs-file-no-regex=filename" when invoking llvm-bolt **skip functions that should keep their address** Some functions's address are used in add/sub/comparision. To make things simple, they should keep their address (after relocation mode is supported). See __bpf_call_base in kernel/bpf/core.c for an example. Currently, we just skip them. "bolt-keep-address-function-list-file" can be used as follows: * put "-mllvm -bolt-keep-address-function-list-file=filename" in KCFLAGS when buildling Linux * specify "-keep-address-funcs-file-no-regex=filename" when invoking llvm-bolt **skip functions not in .text** Functions in sections other than .text (e.g. .head.text, .init.text, .exit.text) are (mostly) run during initialization and shutdown, and not (much) relevant to application performance. Skipping them also helps to avoid runtime errors, especially those even before the first message is printed out, which are not easy to debug. Only handling functions in .text also make sure no function is moved to a different section (after relocation mode is supported). Linux kernel code may compare function pointers with section boundary symbols, and if we move functions to another section, runtime errors may happen. --- bolt/include/bolt/Core/BinaryContext.h | 13 ++++++- bolt/include/bolt/Core/BinaryFunction.h | 6 ++++ bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 27 ++++++++++++++ bolt/lib/Rewrite/RewriteInstance.cpp | 23 +++++++++++- llvm/lib/CodeGen/CodeGenPrepare.cpp | 45 ++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 2 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 8bec1db70e25a..f4c3ea6c9fe47 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -433,6 +433,13 @@ class BinaryContext { Address); } + bool isInRange(StringRef NameStart, StringRef NameEnd, + uint64_t Address) const { + ErrorOr Start = getSymbolValue(NameStart); + ErrorOr End = getSymbolValue(NameEnd); + return Start && End && *Start <= Address && Address < *End; + } + /// Return size of an entry for the given jump table \p Type. uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const { return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); @@ -911,7 +918,11 @@ class BinaryContext { /// Return a value of the global \p Symbol or an error if the value /// was not set. ErrorOr getSymbolValue(const MCSymbol &Symbol) const { - const BinaryData *BD = getBinaryDataByName(Symbol.getName()); + return getSymbolValue(Symbol.getName()); + } + + ErrorOr getSymbolValue(StringRef Name) const { + const BinaryData *BD = getBinaryDataByName(Name); if (!BD) return std::make_error_code(std::errc::bad_address); return BD->getAddress(); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 942840a7621fd..edfb5256467ba 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -294,6 +294,9 @@ class BinaryFunction { /// Pseudo functions should not be disassembled or emitted. bool IsPseudo{false}; + // True if address of this function can not be changed + bool KeepAddress{false}; + /// True if the original function code has all necessary relocations to track /// addresses of functions emitted to new locations. Typically set for /// functions that we are not going to emit. @@ -1318,6 +1321,9 @@ class BinaryFunction { /// otherwise processed. bool isPseudo() const { return IsPseudo; } + /// Return true if address of this function can not be changed + bool mustKeepAddress() const { return KeepAddress; } + /// Return true if the function contains explicit or implicit indirect branch /// to its split fragments, e.g., split jump table, landing pad in split /// fragment. diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 5a5e044184d0b..c724e4a59c0f0 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -351,6 +351,33 @@ class LinuxKernelRewriter final : public MetadataRewriter { if (Error E = detectLinuxKernelVersion()) return E; + auto ShouldIgnore = [this](const BinaryFunction &Function) { + std::optional SectionName = Function.getOriginSectionName(); + if (!SectionName || *SectionName != ".text") + return true; + + uint64_t Address = Function.getAddress(); + + if (BC.isX86()) { + BinaryData *BDStart = BC.getBinaryDataByName("irq_entries_start"); + if (BDStart && BDStart->containsAddress(Address)) + return true; + + if (BC.isInRange("__static_call_text_start", "__static_call_text_end", + Address)) + return true; + } + + if (BC.isInRange("__noinstr_text_start", "__noinstr_text_end", Address)) + return true; + + return false; + }; + + for (BinaryFunction *Function : BC.getAllBinaryFunctions()) + if (ShouldIgnore(*Function)) + Function->setIgnored(); + processLKSections(); if (Error E = processSMPLocks()) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 70a9f084f009b..535612e35af08 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -137,6 +137,16 @@ static cl::opt FunctionNamesFileNR( cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, cl::cat(BoltCategory)); +static cl::list KeepAddressFunctionNamesNR( + "keep-address-funcs-no-regex", cl::CommaSeparated, + cl::desc("KeepAddress functions from the list (non-regex)"), + cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt KeepAddressFunctionNamesFileNR( + "keep-address-funcs-file-no-regex", + cl::desc("file with list of KeepAddress functions to optimize (non-regex)"), + cl::Hidden, cl::cat(BoltCategory)); + cl::opt KeepTmp("keep-tmp", cl::desc("preserve intermediate .o file"), @@ -2982,6 +2992,8 @@ void RewriteInstance::selectFunctionsToProcess() { populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); + populateFunctionNames(opts::KeepAddressFunctionNamesFileNR, + opts::KeepAddressFunctionNamesNR); // Make a set of functions to process to speed up lookups. std::unordered_set ForceFunctionsNR( @@ -2996,6 +3008,10 @@ void RewriteInstance::selectFunctionsToProcess() { exit(1); } + std::unordered_set KeepAddressFunctionsNR( + opts::KeepAddressFunctionNamesNR.begin(), + opts::KeepAddressFunctionNamesNR.end()); + uint64_t LiteThresholdExecCount = 0; if (opts::LiteThresholdPct) { if (opts::LiteThresholdPct > 100) @@ -3043,7 +3059,8 @@ void RewriteInstance::selectFunctionsToProcess() { for (std::string &Name : opts::SkipFunctionNames) if (Function.hasNameRegex(Name)) return true; - + if (BC->HasRelocations && Function.mustKeepAddress()) + return true; return false; }; @@ -3094,6 +3111,10 @@ void RewriteInstance::selectFunctionsToProcess() { for (auto &BFI : BC->getBinaryFunctions()) { BinaryFunction &Function = BFI.second; + for (const StringRef Name : Function.getNames()) + if (KeepAddressFunctionsNR.count(Name.str())) + Function.KeepAddress = true; + // Pseudo functions are explicitly marked by us not to be processed. if (Function.isPseudo()) { Function.IsIgnored = true; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 088062afab177..09242fe1cb602 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -85,6 +85,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -274,6 +275,14 @@ static cl::opt DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes.")); +cl::opt + BoltFunctionListFile("bolt-function-list-file", cl::Hidden, + cl::desc("Specify BOLT function list file")); + +cl::opt BoltKeepAddressFunctionListFile( + "bolt-keep-address-function-list-file", cl::Hidden, + cl::desc("Specify BOLT KeepAddress function list file")); + namespace { enum ExtType { @@ -505,7 +514,43 @@ class CodeGenPrepareLegacyPass : public FunctionPass { char CodeGenPrepareLegacyPass::ID = 0; +template void GatherForBoltKA(raw_fd_ostream &OS, T &I) { + switch (I.getOpcode()) { + case Instruction::ICmp: + case Instruction::PtrToInt: + for (Use &U : I.operands()) + if (auto *FF = dyn_cast(U.get())) + OS << FF->getName() << "\n"; + break; + default:; + } + for (Use &U : I.operands()) + if (auto *CE = dyn_cast(U.get())) + GatherForBoltKA(OS, *CE); +} + bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { + if (!BoltFunctionListFile.empty()) { + std::error_code EC; + raw_fd_ostream OS(BoltFunctionListFile, EC, sys::fs::OpenFlags::OF_Append); + if (EC) + report_fatal_error(Twine(BoltFunctionListFile) + ": " + EC.message()); + OS << F.getName() << "\n"; + } + + if (!BoltKeepAddressFunctionListFile.empty()) { + std::error_code EC; + raw_fd_ostream OS(BoltKeepAddressFunctionListFile, EC, + sys::fs::OpenFlags::OF_Append); + if (EC) + report_fatal_error(Twine(BoltKeepAddressFunctionListFile) + ": " + + EC.message()); + + for (BasicBlock &BB : F) + for (Instruction &I : BB) + GatherForBoltKA(OS, I); + } + if (skipFunction(F)) return false; auto TM = &getAnalysis().getTM();