-
Notifications
You must be signed in to change notification settings - Fork 15k
Revert "[BOLT] Refactor MCInstReference and move it to Core (NFC)" #155639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "[BOLT] Refactor MCInstReference and move it to Core (NFC)" #155639
Conversation
@llvm/pr-subscribers-bolt Author: Anatoly Trosinenko (atrosinenko) ChangesReverts llvm/llvm-project#138655. As reported by @nico: I will resend the patch after implementing a better way to handle the iterators. Patch is 21.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155639.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
deleted file mode 100644
index 2e93dfaf4c275..0000000000000
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BOLT_CORE_MCINSTUTILS_H
-#define BOLT_CORE_MCINSTUTILS_H
-
-#include "bolt/Core/BinaryBasicBlock.h"
-#include <map>
-#include <variant>
-
-namespace llvm {
-namespace bolt {
-
-class BinaryFunction;
-
-/// MCInstReference represents a reference to a constant MCInst as stored either
-/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
-/// (after a CFG is created).
-class MCInstReference {
- using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
-
- // Two cases are possible:
- // * functions with CFG reconstructed - a function stores a collection of
- // basic blocks, each basic block stores a contiguous vector of MCInst
- // * functions without CFG - there are no basic blocks created,
- // the instructions are directly stored in std::map in BinaryFunction
- //
- // In both cases, the direct parent of MCInst is stored together with an
- // iterator pointing to the instruction.
-
- // Helper struct: CFG is available, the direct parent is a basic block,
- // iterator's type is `MCInst *`.
- struct RefInBB {
- RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
- : BB(BB), It(Inst) {}
- RefInBB(const RefInBB &Other) = default;
- RefInBB &operator=(const RefInBB &Other) = default;
-
- const BinaryBasicBlock *BB;
- BinaryBasicBlock::const_iterator It;
-
- bool operator==(const RefInBB &Other) const {
- return BB == Other.BB && It == Other.It;
- }
- };
-
- // Helper struct: CFG is *not* available, the direct parent is a function,
- // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
- // is an instruction's offset).
- struct RefInBF {
- RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
- : BF(BF), It(It) {}
- RefInBF(const RefInBF &Other) = default;
- RefInBF &operator=(const RefInBF &Other) = default;
-
- const BinaryFunction *BF;
- nocfg_const_iterator It;
-
- bool operator==(const RefInBF &Other) const {
- return BF == Other.BF && It->first == Other.It->first;
- }
- };
-
- std::variant<RefInBB, RefInBF> Reference;
-
- // Utility methods to be used like this:
- //
- // if (auto *Ref = tryGetRefInBB())
- // return Ref->doSomething(...);
- // return getRefInBF().doSomethingElse(...);
- const RefInBB *tryGetRefInBB() const {
- assert(std::get_if<RefInBB>(&Reference) ||
- std::get_if<RefInBF>(&Reference));
- return std::get_if<RefInBB>(&Reference);
- }
- const RefInBF &getRefInBF() const {
- assert(std::get_if<RefInBF>(&Reference));
- return *std::get_if<RefInBF>(&Reference);
- }
-
-public:
- /// Constructs an empty reference.
- MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
- /// Constructs a reference to the instruction inside the basic block.
- MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
- : Reference(RefInBB(BB, Inst)) {
- assert(BB && Inst && "Neither BB nor Inst should be nullptr");
- }
- /// Constructs a reference to the instruction inside the basic block.
- MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
- : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
- assert(BB && "Basic block should not be nullptr");
- }
- /// Constructs a reference to the instruction inside the function without
- /// CFG information.
- MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
- : Reference(RefInBF(BF, It)) {
- assert(BF && "Function should not be nullptr");
- }
-
- /// Locates an instruction inside a function and returns a reference.
- static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
-
- bool operator==(const MCInstReference &Other) const {
- return Reference == Other.Reference;
- }
-
- const MCInst &getMCInst() const {
- assert(!empty() && "Empty reference");
- if (auto *Ref = tryGetRefInBB())
- return *Ref->It;
- return getRefInBF().It->second;
- }
-
- operator const MCInst &() const { return getMCInst(); }
-
- bool empty() const {
- if (auto *Ref = tryGetRefInBB())
- return Ref->BB == nullptr;
- return getRefInBF().BF == nullptr;
- }
-
- bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }
-
- const BinaryFunction *getFunction() const {
- assert(!empty() && "Empty reference");
- if (auto *Ref = tryGetRefInBB())
- return Ref->BB->getFunction();
- return getRefInBF().BF;
- }
-
- const BinaryBasicBlock *getBasicBlock() const {
- assert(!empty() && "Empty reference");
- if (auto *Ref = tryGetRefInBB())
- return Ref->BB;
- return nullptr;
- }
-
- raw_ostream &print(raw_ostream &OS) const;
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS,
- const MCInstReference &Ref) {
- return Ref.print(OS);
-}
-
-} // namespace bolt
-} // namespace llvm
-
-#endif
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index cb865a725d72a..721fd664a3253 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -11,13 +11,187 @@
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
-#include "bolt/Core/MCInstUtils.h"
#include "bolt/Passes/BinaryPasses.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
namespace llvm {
namespace bolt {
+
+/// @brief MCInstReference represents a reference to an MCInst as stored either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
+/// (after a CFG is created). It aims to store the necessary information to be
+/// able to find the specific MCInst in either the BinaryFunction or
+/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
+/// the corresponding instruction can be computed.
+
+struct MCInstInBBReference {
+ BinaryBasicBlock *BB;
+ int64_t BBIndex;
+ MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
+ : BB(BB), BBIndex(BBIndex) {}
+ MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
+ static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
+ for (BinaryBasicBlock &BB : BF)
+ for (size_t I = 0; I < BB.size(); ++I)
+ if (Inst == &BB.getInstructionAtIndex(I))
+ return MCInstInBBReference(&BB, I);
+ return {};
+ }
+ bool operator==(const MCInstInBBReference &RHS) const {
+ return BB == RHS.BB && BBIndex == RHS.BBIndex;
+ }
+ bool operator<(const MCInstInBBReference &RHS) const {
+ return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
+ }
+ operator MCInst &() const {
+ assert(BB != nullptr);
+ return BB->getInstructionAtIndex(BBIndex);
+ }
+ uint64_t getAddress() const {
+ // 4 bytes per instruction on AArch64.
+ // FIXME: the assumption of 4 byte per instruction needs to be fixed before
+ // this method gets used on any non-AArch64 binaries (but should be fine for
+ // pac-ret analysis, as that is an AArch64-specific feature).
+ return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
+ }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
+
+struct MCInstInBFReference {
+ BinaryFunction *BF;
+ uint64_t Offset;
+ MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
+ : BF(BF), Offset(Offset) {}
+
+ static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
+ for (auto &I : BF.instrs())
+ if (Inst == &I.second)
+ return MCInstInBFReference(&BF, I.first);
+ return {};
+ }
+
+ MCInstInBFReference() : BF(nullptr), Offset(0) {}
+ bool operator==(const MCInstInBFReference &RHS) const {
+ return BF == RHS.BF && Offset == RHS.Offset;
+ }
+ bool operator<(const MCInstInBFReference &RHS) const {
+ return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset);
+ }
+ operator MCInst &() const {
+ assert(BF != nullptr);
+ return *BF->getInstructionAtOffset(Offset);
+ }
+
+ uint64_t getOffset() const { return Offset; }
+
+ uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
+
+struct MCInstReference {
+ enum Kind { FunctionParent, BasicBlockParent };
+ Kind ParentKind;
+ union U {
+ MCInstInBBReference BBRef;
+ MCInstInBFReference BFRef;
+ U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
+ U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
+ } U;
+ MCInstReference(MCInstInBBReference BBRef)
+ : ParentKind(BasicBlockParent), U(BBRef) {}
+ MCInstReference(MCInstInBFReference BFRef)
+ : ParentKind(FunctionParent), U(BFRef) {}
+ MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
+ : MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
+ MCInstReference(BinaryFunction *BF, uint32_t Offset)
+ : MCInstReference(MCInstInBFReference(BF, Offset)) {}
+
+ static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
+ if (BF.hasCFG())
+ return MCInstInBBReference::get(Inst, BF);
+ return MCInstInBFReference::get(Inst, BF);
+ }
+
+ bool operator<(const MCInstReference &RHS) const {
+ if (ParentKind != RHS.ParentKind)
+ return ParentKind < RHS.ParentKind;
+ switch (ParentKind) {
+ case BasicBlockParent:
+ return U.BBRef < RHS.U.BBRef;
+ case FunctionParent:
+ return U.BFRef < RHS.U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ bool operator==(const MCInstReference &RHS) const {
+ if (ParentKind != RHS.ParentKind)
+ return false;
+ switch (ParentKind) {
+ case BasicBlockParent:
+ return U.BBRef == RHS.U.BBRef;
+ case FunctionParent:
+ return U.BFRef == RHS.U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ operator MCInst &() const {
+ switch (ParentKind) {
+ case BasicBlockParent:
+ return U.BBRef;
+ case FunctionParent:
+ return U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ operator bool() const {
+ switch (ParentKind) {
+ case BasicBlockParent:
+ return U.BBRef.BB != nullptr;
+ case FunctionParent:
+ return U.BFRef.BF != nullptr;
+ }
+ llvm_unreachable("");
+ }
+
+ uint64_t getAddress() const {
+ switch (ParentKind) {
+ case BasicBlockParent:
+ return U.BBRef.getAddress();
+ case FunctionParent:
+ return U.BFRef.getAddress();
+ }
+ llvm_unreachable("");
+ }
+
+ BinaryFunction *getFunction() const {
+ switch (ParentKind) {
+ case FunctionParent:
+ return U.BFRef.BF;
+ case BasicBlockParent:
+ return U.BBRef.BB->getFunction();
+ }
+ llvm_unreachable("");
+ }
+
+ BinaryBasicBlock *getBasicBlock() const {
+ switch (ParentKind) {
+ case FunctionParent:
+ return nullptr;
+ case BasicBlockParent:
+ return U.BBRef.BB;
+ }
+ llvm_unreachable("");
+ }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
+
namespace PAuthGadgetScanner {
// The report classes are designed to be used in an immutable manner.
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index 58cfcab370f16..fc72dc023c590 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -32,7 +32,6 @@ add_llvm_library(LLVMBOLTCore
GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
- MCInstUtils.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
Relocation.cpp
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
deleted file mode 100644
index 3cdb9673d4dc0..0000000000000
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- bolt/Passes/MCInstUtils.cpp ----------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "bolt/Core/MCInstUtils.h"
-#include "bolt/Core/BinaryBasicBlock.h"
-#include "bolt/Core/BinaryFunction.h"
-
-#include <iterator>
-
-using namespace llvm;
-using namespace llvm::bolt;
-
-MCInstReference MCInstReference::get(const MCInst *Inst,
- const BinaryFunction &BF) {
- if (BF.hasCFG()) {
- for (BinaryBasicBlock &BB : BF)
- for (MCInst &MI : BB)
- if (&MI == Inst)
- return MCInstReference(&BB, Inst);
- return {};
- }
-
- for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
- if (&I->second == Inst)
- return MCInstReference(&BF, I);
- }
- return {};
-}
-
-raw_ostream &MCInstReference::print(raw_ostream &OS) const {
- if (const RefInBB *Ref = tryGetRefInBB()) {
- OS << "MCInstBBRef<";
- if (Ref->BB == nullptr) {
- OS << "BB:(null)";
- } else {
- unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It);
- OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
- }
- OS << ">";
- return OS;
- }
-
- const RefInBF &Ref = getRefInBF();
- OS << "MCInstBFRef<";
- if (Ref.BF == nullptr)
- OS << "BF:(null)";
- else
- OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.It->first;
- OS << ">";
- return OS;
-}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 5d884e2d37354..65c84ebc8c4f4 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -24,6 +24,39 @@
namespace llvm {
namespace bolt {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
+ OS << "MCInstBBRef<";
+ if (Ref.BB == nullptr)
+ OS << "BB:(null)";
+ else
+ OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
+ OS << ">";
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
+ OS << "MCInstBFRef<";
+ if (Ref.BF == nullptr)
+ OS << "BF:(null)";
+ else
+ OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
+ OS << ">";
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
+ switch (Ref.ParentKind) {
+ case MCInstReference::BasicBlockParent:
+ OS << Ref.U.BBRef;
+ return OS;
+ case MCInstReference::FunctionParent:
+ OS << Ref.U.BFRef;
+ return OS;
+ }
+ llvm_unreachable("");
+}
+
namespace PAuthGadgetScanner {
[[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
@@ -58,10 +91,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (int64_t I = 0, E = BB.size(); I < E; ++I)
- Fn(MCInstReference(&BB, I));
+ Fn(MCInstInBBReference(&BB, I));
} else {
- for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I)
- Fn(MCInstReference(&BF, I));
+ for (auto I : BF.instrs())
+ Fn(MCInstInBFReference(&BF, I.first));
}
}
@@ -533,7 +566,7 @@ class SrcSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
MCInstReference Ref = MCInstReference::get(Inst, BF);
- assert(!Ref.empty() && "Expected Inst to be found");
+ assert(Ref && "Expected Inst to be found");
Result.push_back(Ref);
}
return Result;
@@ -1105,7 +1138,7 @@ class DstSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
MCInstReference Ref = MCInstReference::get(Inst, BF);
- assert(!Ref.empty() && "Expected Inst to be found");
+ assert(Ref && "Expected Inst to be found");
Result.push_back(Ref);
}
return Result;
@@ -1312,7 +1345,8 @@ static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
// (such as isBranch at the time of writing this comment), some don't (such
// as isCall). For that reason, call MCInstrDesc's methods explicitly when
// it is important.
- const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode());
+ const MCInstrDesc &Desc =
+ BC.MII->get(static_cast<const MCInst &>(Inst).getOpcode());
// Tail call should be a branch (but not necessarily an indirect one).
if (!Desc.isBranch())
return false;
@@ -1507,7 +1541,7 @@ void FunctionAnalysisContext::findUnsafeUses(
// This is printed as "[message] in function [name], basic block ...,
// at address ..." when the issue is reported to the user.
Reports.push_back(make_generic_report(
- MCInstReference(&BB, FirstInst),
+ MCInstReference::get(FirstInst, BF),
"Warning: possibly imprecise CFG, the analysis quality may be "
"degraded in this function. According to BOLT, unreachable code is "
"found" /* in function [name]... */));
@@ -1671,49 +1705,31 @@ void Analysis::runOnFunction(BinaryFunction &BF,
}
}
-// Compute the instruction address for printing (may be slow).
-static uint64_t getAddress(const MCInstReference &Inst) {
- const BinaryFunction *BF = Inst.getFunction();
-
- if (Inst.hasCFG()) {
- const BinaryBasicBlock *BB = Inst.getBasicBlock();
-
- auto It = static_cast<BinaryBasicBlock::const_iterator>(&Inst.getMCInst());
- unsigned IndexInBB = std::distance(BB->begin(), It);
-
- // FIXME: this assumes all instructions are 4 bytes in size. This is true
- // for AArch64, but it might be good to extract this function so it can be
- // used elsewhere and for other targets too.
- return BF->getAddress() + BB->getOffset() + IndexInBB * 4;
- }
-
- for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) {
- if (&I->second == &Inst.getMCInst())
- return BF->getAddress() + I->first;
- }
- llvm_unreachable("Instruction not found in function");
-}
-
static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
size_t StartIndex = 0, size_t EndIndex = -1) {
if (EndIndex == (size_t)-1)
EndIndex = BB->size() - 1;
const BinaryFunction *BF = BB->getFunction();
for (unsigned I = StartIndex; I <= EndIndex; ++I) {
- MCInstReference Inst(BB, I);
+ // FIXME: this assumes all instructions are 4 bytes in size. This is true
+ // for AArch64, but it might be good to extract this function so it can be
+ // used elsewhere and for other targets too.
+ uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I;
+ const MCInst &Inst = BB->getInstructionAtIndex(I);
if (BC.MIB->isCFI(Inst))
continue;
- BC.printInstruction(outs(), Inst, getAddress(Inst), BF);
+ BC.printInstruction(outs(), Inst, Address, BF);
}
}
static void reportFoundGadgetInSingleBBSingleRelatedInst(
raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
const MCInstReference Location) {
- const BinaryBasicBlock *BB = Location.getBasicBlock();
- assert(RelatedInst.hasCFG());
- assert(Location.hasCFG());
- if (BB == RelatedInst.getBasicBlock()) {
+ BinaryBasicBlock *BB = Location.getBasicBlock();
+ assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
+ assert(Location.ParentKind == MCInstReference::BasicBlockParent);
+ MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
+ if (BB == RelatedInstBB.BB) {
OS << " This happens in the following basic block:\n";
printBB(BC, BB);
}
@@ -1721,16 +1737,16 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst(
void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
StringRef IssueKind) const {
- const BinaryBasicBlock *BB = Location.getBasicBlock();
- const BinaryFunction *BF = Location.getFunction();
+ BinaryFunction *BF = Location.getFunction();
+ BinaryBasicBlock *BB = Location.getBasicBlock();
OS << "\n...
[truncated]
|
Reverts #138655.
As reported by @nico:
I will resend the patch after implementing a better way to handle the iterators.