Skip to content

Conversation

@elhewaty
Copy link
Member

@elhewaty elhewaty commented Nov 9, 2024

First step to fix #115154

@llvmbot
Copy link
Member

llvmbot commented Nov 9, 2024

@llvm/pr-subscribers-bolt

Author: None (elhewaty)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/115616.diff

6 Files Affected:

  • (modified) bolt/include/bolt/Core/BinaryContext.h (+14)
  • (added) bolt/include/bolt/Passes/DiscoverNoReturnPass.h (+36)
  • (modified) bolt/lib/Core/BinaryContext.cpp (+14)
  • (modified) bolt/lib/Passes/CMakeLists.txt (+1)
  • (added) bolt/lib/Passes/DiscoverNoReturnPass.cpp (+84)
  • (modified) bolt/lib/Rewrite/BinaryPassManager.cpp (+2)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 08ce892054874c..749a82f0f453b0 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -272,6 +272,10 @@ class BinaryContext {
   /// DWARF line info for CUs.
   std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
 
+  /// Container to cache results about functions that their paths lead to
+  /// a no-return function.
+  std::unordered_map<BinaryFunction *, bool> CallsNoReturnFunction;
+
   /// Internal helper for removing section name from a lookup table.
   void deregisterSectionName(const BinarySection &Section);
 
@@ -282,6 +286,16 @@ class BinaryContext {
                       std::unique_ptr<DWARFContext> DwCtx,
                       JournalingStreams Logger);
 
+  void setHasPathToNoReturn(BinaryFunction *Func, bool value = true) {
+    CallsNoReturnFunction[Func] = value;
+  }
+
+  bool cachedInNoReturnMap(BinaryFunction *Func) {
+    return CallsNoReturnFunction.find(Func) != CallsNoReturnFunction.end();
+  }
+
+  bool hasPathToNoReturn(BinaryFunction *Func);
+
   /// Superset of compiler units that will contain overwritten code that needs
   /// new debug info. In a few cases, functions may end up not being
   /// overwritten, but it is okay to re-generate debug info for them.
diff --git a/bolt/include/bolt/Passes/DiscoverNoReturnPass.h b/bolt/include/bolt/Passes/DiscoverNoReturnPass.h
new file mode 100644
index 00000000000000..3a34e1344f3bb4
--- /dev/null
+++ b/bolt/include/bolt/Passes/DiscoverNoReturnPass.h
@@ -0,0 +1,36 @@
+//===- bolt/Passes/Discover.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_DISCOVERNORETURN_H
+#define BOLT_PASSES_DISCOVERNORETURN_H
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Passes/BinaryPasses.h"
+#include <unordered_map>
+
+namespace llvm {
+
+namespace bolt {
+
+class DiscoverNoReturnPass : public BinaryFunctionPass {
+public:
+  explicit DiscoverNoReturnPass() : BinaryFunctionPass(false) {}
+
+  const char *getName() const override { return "discover-no-return"; }
+
+  Error runOnFunctions(BinaryContext &BC) override;
+
+private:
+  std::unordered_map<BinaryFunction *, bool> Visited;
+  bool traverseFromFunction(BinaryFunction *Func, BinaryContext &BC);
+};
+} // namespace bolt
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index f246750209d6c4..c8eee1786b794a 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -308,6 +308,20 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
   return std::move(BC);
 }
 
+bool BinaryContext::hasPathToNoReturn(BinaryFunction *Func) {
+  // Dummy way to mark no-return functions.
+  // FIXME: Find a better way.
+  if (std::string FuncName = Func->getPrintName();
+      FuncName == "__cxa_throw@PLT" || FuncName != "_Unwind_Resume@PLT" ||
+      FuncName == "__cxa_rethrow@PLT" || FuncName != "exit@PLT" ||
+      FuncName == "abort@PLT" || FuncName == "setjmp@PLT" ||
+      FuncName == "longjmp@PLT")
+    return true;
+
+  auto itr = CallsNoReturnFunction.find(Func);
+  return itr != CallsNoReturnFunction.end() && itr->second;
+}
+
 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
   if (opts::HotText &&
       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 1c1273b3d2420d..7c950ec1decf59 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMBOLTPasses
   CacheMetrics.cpp
   DataflowAnalysis.cpp
   DataflowInfoManager.cpp
+  DiscoverNoReturnPass.cpp
   FrameAnalysis.cpp
   FrameOptimizer.cpp
   FixRelaxationPass.cpp
diff --git a/bolt/lib/Passes/DiscoverNoReturnPass.cpp b/bolt/lib/Passes/DiscoverNoReturnPass.cpp
new file mode 100644
index 00000000000000..5b64dc1da2f01f
--- /dev/null
+++ b/bolt/lib/Passes/DiscoverNoReturnPass.cpp
@@ -0,0 +1,84 @@
+//===- bolt/Passes/ReorderSection.cpp - Reordering of section data --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DiscoverNoReturnPass class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/DiscoverNoReturnPass.h"
+
+using namespace llvm;
+
+namespace opts {
+extern cl::OptionCategory BoltCategory;
+
+static cl::opt<bool> DiscoverNoReturnAnalysis(
+    "discover-no-return",
+    cl::desc("analyze the binary and mark no-return functions"), cl::init(true),
+    cl::cat(BoltCategory), cl::ReallyHidden);
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+Error DiscoverNoReturnPass::runOnFunctions(BinaryContext &BC) {
+  bool Changed;
+  do {
+    Changed = false;
+    for (auto &BFI : BC.getBinaryFunctions()) {
+      auto &Func = BFI.second;
+      bool PrevStat = BC.hasPathToNoReturn(&Func);
+      bool CurStat = traverseFromFunction(&Func, BC);
+      Changed |= (PrevStat != CurStat);
+    }
+  } while (Changed);
+
+  return Error::success();
+}
+
+bool DiscoverNoReturnPass::traverseFromFunction(BinaryFunction *Func,
+                                                BinaryContext &BC) {
+  // The Function cached before, so return its value
+  if (BC.cachedInNoReturnMap(Func))
+    return BC.hasPathToNoReturn(Func);
+
+  Visited[Func] = true;
+  bool Result = true;
+  bool hasCalls = 0;
+  bool hasReturns = 0;
+  for (auto &BB : *Func) {
+    if (!BB.getNumCalls())
+      continue;
+    for (auto &Inst : BB) {
+      if (BC.MIB->isCall(Inst)) {
+        hasCalls = true;
+        const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
+        BinaryFunction *TargetFunction = BC.SymbolToFunctionMap[TargetSymbol];
+        if (!Visited.count(TargetFunction))
+          Result &= traverseFromFunction(TargetFunction, BC);
+      }
+      hasReturns |= BC.MIB->isReturn(Inst);
+    }
+  }
+
+  // This functions is represented as a leaf in the call graph and doesn't
+  // have a no-return attribute.
+  if (!hasCalls && hasReturns)
+    Result = false;
+
+  // If the function doens't have a return instruction then it's a
+  // no-return function.
+  if (!hasReturns)
+    Result = true;
+
+  BC.setHasPathToNoReturn(Func, Result);
+  return Result;
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index b0906041833484..f705e925dd9be9 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -13,6 +13,7 @@
 #include "bolt/Passes/AsmDump.h"
 #include "bolt/Passes/CMOVConversion.h"
 #include "bolt/Passes/ContinuityStats.h"
+#include "bolt/Passes/DiscoverNoReturnPass.h"
 #include "bolt/Passes/FixRISCVCallsPass.h"
 #include "bolt/Passes/FixRelaxationPass.h"
 #include "bolt/Passes/FrameOptimizer.h"
@@ -443,6 +444,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
 
   Manager.registerPass(std::make_unique<CMOVConversion>(),
                        opts::CMOVConversionFlag);
+  Manager.registerPass(std::make_unique<DiscoverNoReturnPass>());
 
   // This pass syncs local branches with CFG. If any of the following
   // passes breaks the sync - they either need to re-run the pass or

@elhewaty elhewaty requested a review from kbeyls November 9, 2024 18:48
@elhewaty
Copy link
Member Author

elhewaty commented Nov 9, 2024

@bgergely0 Can you share your thoughts.

@elhewaty elhewaty force-pushed the bolt-no-return-analysis branch from 69e8ed4 to 47e2fa2 Compare November 9, 2024 19:04
@bgergely0
Copy link
Contributor

bgergely0 commented Nov 11, 2024

I think currently this marks all functions noreturn. Can you please share an example code, and which functions this marks as noreturn and which it does not? (Or we can test using the example I had in the original issue)

@elhewaty
Copy link
Member Author

@bgergely0, I am not familiar with bolt testing flow, I am looking at it. But why do you think it will mark them all?

@bgergely0
Copy link
Contributor

@elhewaty I am not referring to the unittests or similar. I mean you should check the example code in from the issue, compile it, and run BOLT on it, and after the Pass is done, check which functions it marked as noreturn.

You can run an instrumentation, and BOLT will run this Pass during it.

@elhewaty
Copy link
Member Author

@bgergely0, I ran the pass, and it marks all of them as no-return, as you said. I modified the pass a little, but why is the size of the my_error function in the issue zero? I think LLVM optimized it away. Does it work as intended in X86(which I am using)?

It marks abort as a no-return but not my_error as it doesn't see anything in it to traverse. any hint?

@bgergely0
Copy link
Contributor

@elhewaty Hi! I would check with objdump, to see if my_error is indeed in the binary you built. But if BOLT finds it, I suspect it is in there.

What I did to test the pass was commenting out most other passes in the BinaryPassManager, to only have the effect of this pass. I could not comment out all (BOLT stops working if we remove too many Passes), so I kept these:

  • FinalizeFunctions
  • AssignSections
  • PatchEntries
  • InstructionLowering
  • CheckLargeFunctions
  • LowerAnnotations
  • CleanMCState

I will check with the x86 target as well, but I think it could be another BOLT pass inlining something.

Comment on lines +60 to +62
const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
BinaryFunction *TargetFunction = BC.SymbolToFunctionMap[TargetSymbol];
if (!Visited.count(TargetFunction))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TargetFuntion needs to be checked for nullptr

Comment on lines +71 to +77
if (!hasCalls && hasReturns)
Result = false;

// If the function doens't have a return instruction then it's a
// no-return function.
if (!hasReturns)
Result = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tailcalls should also be taken into account (BC.MIB->isTailCall(Inst)). If a function ends in a tailcall instruction, it depends on the Called function if it is NoReturn.

Comment on lines +314 to +319
if (std::string FuncName = Func->getPrintName();
FuncName == "__cxa_throw@PLT" || FuncName != "_Unwind_Resume@PLT" ||
FuncName == "__cxa_rethrow@PLT" || FuncName != "exit@PLT" ||
FuncName == "abort@PLT" || FuncName == "setjmp@PLT" ||
FuncName == "longjmp@PLT")
return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FuncName != "A" || FuncName != "B" is always true.

@bgergely0
Copy link
Contributor

Hey @elhewaty !

I left a few comments some time ago, but forgot to hit finalize on the review, sorry!

After some more looking into this, I believe there is additional problems we need to tackle. The biggest is, we have no information about PLT entries. I can see you listed some, but I don't have a solution that covers any possible libraries that can be linked dynamically to the binary BOLT is ran on.


/// Container to cache results about functions that their paths lead to
/// a no-return function.
std::unordered_map<BinaryFunction *, bool> CallsNoReturnFunction;
Copy link
Contributor

@atrosinenko atrosinenko Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, it looks like LLVM Programmer's Manual advises against the usage of std::unordered_map, though there are several tens of occurrences even in the existing code under llvm/lib. The same in DiscoverNoReturnPass.h below.

@@ -0,0 +1,36 @@
//===- bolt/Passes/Discover.h -----------------------------------*- C++ -*-===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] Invalid file name in the comment.

Comment on lines +9 to +10
#ifndef BOLT_PASSES_DISCOVERNORETURN_H
#define BOLT_PASSES_DISCOVERNORETURN_H
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] Defined name is out of sync with the file name.

std::unique_ptr<DWARFContext> DwCtx,
JournalingStreams Logger);

void setHasPathToNoReturn(BinaryFunction *Func, bool value = true) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] value -> Value

bool BinaryContext::hasPathToNoReturn(BinaryFunction *Func) {
// Dummy way to mark no-return functions.
// FIXME: Find a better way.
if (std::string FuncName = Func->getPrintName();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if getPrintName() is the right accessor to call - maybe getOneName() (or even asking hasName(...)) is more appropriate?

FuncName == "longjmp@PLT")
return true;

auto itr = CallsNoReturnFunction.find(Func);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] itr -> Itr

@@ -0,0 +1,84 @@
//===- bolt/Passes/ReorderSection.cpp - Reordering of section data --------===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] Wrong file name in the comment.

Comment on lines +52 to +53
bool hasCalls = 0;
bool hasReturns = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit]

Suggested change
bool hasCalls = 0;
bool hasReturns = 0;
bool HasCalls = 0;
bool HasReturns = 0;

Comment on lines +35 to +36
bool PrevStat = BC.hasPathToNoReturn(&Func);
bool CurStat = traverseFromFunction(&Func, BC);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] Was "State" intended here instead of "Stat"?

@atrosinenko
Copy link
Contributor

Left a few unsorted comments, mostly stylistic nit-picking.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[BOLT] Lack of support for noreturn functions results in incorrect CFG generation.

4 participants