Skip to content

Commit fa5557f

Browse files
authored
[BOLT] Refactor MCInstReference and move it to Core (NFC) (llvm#138655)
Refactor MCInstReference class and move it from PAuth gadget scanner to Core. MCInstReference is a class representing a constant reference to an instruction inside a parent entity - either inside a basic block (which has a reference to its parent function) or directly inside a function (when CFG information is not available).
1 parent 8687ef7 commit fa5557f

File tree

5 files changed

+256
-234
lines changed

5 files changed

+256
-234
lines changed

bolt/include/bolt/Core/MCInstUtils.h

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_CORE_MCINSTUTILS_H
10+
#define BOLT_CORE_MCINSTUTILS_H
11+
12+
#include "bolt/Core/BinaryBasicBlock.h"
13+
#include <map>
14+
#include <variant>
15+
16+
namespace llvm {
17+
namespace bolt {
18+
19+
class BinaryFunction;
20+
21+
/// MCInstReference represents a reference to a constant MCInst as stored either
22+
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
23+
/// (after a CFG is created).
24+
class MCInstReference {
25+
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
26+
27+
// Two cases are possible:
28+
// * functions with CFG reconstructed - a function stores a collection of
29+
// basic blocks, each basic block stores a contiguous vector of MCInst
30+
// * functions without CFG - there are no basic blocks created,
31+
// the instructions are directly stored in std::map in BinaryFunction
32+
//
33+
// In both cases, the direct parent of MCInst is stored together with an
34+
// iterator pointing to the instruction.
35+
36+
// Helper struct: CFG is available, the direct parent is a basic block,
37+
// iterator's type is `MCInst *`.
38+
struct RefInBB {
39+
RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
40+
: BB(BB), It(Inst) {}
41+
RefInBB(const RefInBB &Other) = default;
42+
RefInBB &operator=(const RefInBB &Other) = default;
43+
44+
const BinaryBasicBlock *BB;
45+
BinaryBasicBlock::const_iterator It;
46+
47+
bool operator==(const RefInBB &Other) const {
48+
return BB == Other.BB && It == Other.It;
49+
}
50+
};
51+
52+
// Helper struct: CFG is *not* available, the direct parent is a function,
53+
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
54+
// is an instruction's offset).
55+
struct RefInBF {
56+
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
57+
: BF(BF), It(It) {}
58+
RefInBF(const RefInBF &Other) = default;
59+
RefInBF &operator=(const RefInBF &Other) = default;
60+
61+
const BinaryFunction *BF;
62+
nocfg_const_iterator It;
63+
64+
bool operator==(const RefInBF &Other) const {
65+
return BF == Other.BF && It->first == Other.It->first;
66+
}
67+
};
68+
69+
std::variant<RefInBB, RefInBF> Reference;
70+
71+
// Utility methods to be used like this:
72+
//
73+
// if (auto *Ref = tryGetRefInBB())
74+
// return Ref->doSomething(...);
75+
// return getRefInBF().doSomethingElse(...);
76+
const RefInBB *tryGetRefInBB() const {
77+
assert(std::get_if<RefInBB>(&Reference) ||
78+
std::get_if<RefInBF>(&Reference));
79+
return std::get_if<RefInBB>(&Reference);
80+
}
81+
const RefInBF &getRefInBF() const {
82+
assert(std::get_if<RefInBF>(&Reference));
83+
return *std::get_if<RefInBF>(&Reference);
84+
}
85+
86+
public:
87+
/// Constructs an empty reference.
88+
MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
89+
/// Constructs a reference to the instruction inside the basic block.
90+
MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
91+
: Reference(RefInBB(BB, Inst)) {
92+
assert(BB && Inst && "Neither BB nor Inst should be nullptr");
93+
}
94+
/// Constructs a reference to the instruction inside the basic block.
95+
MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
96+
: Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
97+
assert(BB && "Basic block should not be nullptr");
98+
}
99+
/// Constructs a reference to the instruction inside the function without
100+
/// CFG information.
101+
MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
102+
: Reference(RefInBF(BF, It)) {
103+
assert(BF && "Function should not be nullptr");
104+
}
105+
106+
/// Locates an instruction inside a function and returns a reference.
107+
static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
108+
109+
bool operator==(const MCInstReference &Other) const {
110+
return Reference == Other.Reference;
111+
}
112+
113+
const MCInst &getMCInst() const {
114+
assert(!empty() && "Empty reference");
115+
if (auto *Ref = tryGetRefInBB())
116+
return *Ref->It;
117+
return getRefInBF().It->second;
118+
}
119+
120+
operator const MCInst &() const { return getMCInst(); }
121+
122+
bool empty() const {
123+
if (auto *Ref = tryGetRefInBB())
124+
return Ref->BB == nullptr;
125+
return getRefInBF().BF == nullptr;
126+
}
127+
128+
bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }
129+
130+
const BinaryFunction *getFunction() const {
131+
assert(!empty() && "Empty reference");
132+
if (auto *Ref = tryGetRefInBB())
133+
return Ref->BB->getFunction();
134+
return getRefInBF().BF;
135+
}
136+
137+
const BinaryBasicBlock *getBasicBlock() const {
138+
assert(!empty() && "Empty reference");
139+
if (auto *Ref = tryGetRefInBB())
140+
return Ref->BB;
141+
return nullptr;
142+
}
143+
144+
raw_ostream &print(raw_ostream &OS) const;
145+
};
146+
147+
static inline raw_ostream &operator<<(raw_ostream &OS,
148+
const MCInstReference &Ref) {
149+
return Ref.print(OS);
150+
}
151+
152+
} // namespace bolt
153+
} // namespace llvm
154+
155+
#endif

bolt/include/bolt/Passes/PAuthGadgetScanner.h

Lines changed: 1 addition & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -11,187 +11,13 @@
1111

1212
#include "bolt/Core/BinaryContext.h"
1313
#include "bolt/Core/BinaryFunction.h"
14+
#include "bolt/Core/MCInstUtils.h"
1415
#include "bolt/Passes/BinaryPasses.h"
1516
#include "llvm/Support/raw_ostream.h"
1617
#include <memory>
1718

1819
namespace llvm {
1920
namespace bolt {
20-
21-
/// @brief MCInstReference represents a reference to an MCInst as stored either
22-
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
23-
/// (after a CFG is created). It aims to store the necessary information to be
24-
/// able to find the specific MCInst in either the BinaryFunction or
25-
/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
26-
/// the corresponding instruction can be computed.
27-
28-
struct MCInstInBBReference {
29-
BinaryBasicBlock *BB;
30-
int64_t BBIndex;
31-
MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
32-
: BB(BB), BBIndex(BBIndex) {}
33-
MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
34-
static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
35-
for (BinaryBasicBlock &BB : BF)
36-
for (size_t I = 0; I < BB.size(); ++I)
37-
if (Inst == &BB.getInstructionAtIndex(I))
38-
return MCInstInBBReference(&BB, I);
39-
return {};
40-
}
41-
bool operator==(const MCInstInBBReference &RHS) const {
42-
return BB == RHS.BB && BBIndex == RHS.BBIndex;
43-
}
44-
bool operator<(const MCInstInBBReference &RHS) const {
45-
return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
46-
}
47-
operator MCInst &() const {
48-
assert(BB != nullptr);
49-
return BB->getInstructionAtIndex(BBIndex);
50-
}
51-
uint64_t getAddress() const {
52-
// 4 bytes per instruction on AArch64.
53-
// FIXME: the assumption of 4 byte per instruction needs to be fixed before
54-
// this method gets used on any non-AArch64 binaries (but should be fine for
55-
// pac-ret analysis, as that is an AArch64-specific feature).
56-
return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
57-
}
58-
};
59-
60-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
61-
62-
struct MCInstInBFReference {
63-
BinaryFunction *BF;
64-
uint64_t Offset;
65-
MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
66-
: BF(BF), Offset(Offset) {}
67-
68-
static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
69-
for (auto &I : BF.instrs())
70-
if (Inst == &I.second)
71-
return MCInstInBFReference(&BF, I.first);
72-
return {};
73-
}
74-
75-
MCInstInBFReference() : BF(nullptr), Offset(0) {}
76-
bool operator==(const MCInstInBFReference &RHS) const {
77-
return BF == RHS.BF && Offset == RHS.Offset;
78-
}
79-
bool operator<(const MCInstInBFReference &RHS) const {
80-
return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset);
81-
}
82-
operator MCInst &() const {
83-
assert(BF != nullptr);
84-
return *BF->getInstructionAtOffset(Offset);
85-
}
86-
87-
uint64_t getOffset() const { return Offset; }
88-
89-
uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
90-
};
91-
92-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
93-
94-
struct MCInstReference {
95-
enum Kind { FunctionParent, BasicBlockParent };
96-
Kind ParentKind;
97-
union U {
98-
MCInstInBBReference BBRef;
99-
MCInstInBFReference BFRef;
100-
U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
101-
U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
102-
} U;
103-
MCInstReference(MCInstInBBReference BBRef)
104-
: ParentKind(BasicBlockParent), U(BBRef) {}
105-
MCInstReference(MCInstInBFReference BFRef)
106-
: ParentKind(FunctionParent), U(BFRef) {}
107-
MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
108-
: MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
109-
MCInstReference(BinaryFunction *BF, uint32_t Offset)
110-
: MCInstReference(MCInstInBFReference(BF, Offset)) {}
111-
112-
static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
113-
if (BF.hasCFG())
114-
return MCInstInBBReference::get(Inst, BF);
115-
return MCInstInBFReference::get(Inst, BF);
116-
}
117-
118-
bool operator<(const MCInstReference &RHS) const {
119-
if (ParentKind != RHS.ParentKind)
120-
return ParentKind < RHS.ParentKind;
121-
switch (ParentKind) {
122-
case BasicBlockParent:
123-
return U.BBRef < RHS.U.BBRef;
124-
case FunctionParent:
125-
return U.BFRef < RHS.U.BFRef;
126-
}
127-
llvm_unreachable("");
128-
}
129-
130-
bool operator==(const MCInstReference &RHS) const {
131-
if (ParentKind != RHS.ParentKind)
132-
return false;
133-
switch (ParentKind) {
134-
case BasicBlockParent:
135-
return U.BBRef == RHS.U.BBRef;
136-
case FunctionParent:
137-
return U.BFRef == RHS.U.BFRef;
138-
}
139-
llvm_unreachable("");
140-
}
141-
142-
operator MCInst &() const {
143-
switch (ParentKind) {
144-
case BasicBlockParent:
145-
return U.BBRef;
146-
case FunctionParent:
147-
return U.BFRef;
148-
}
149-
llvm_unreachable("");
150-
}
151-
152-
operator bool() const {
153-
switch (ParentKind) {
154-
case BasicBlockParent:
155-
return U.BBRef.BB != nullptr;
156-
case FunctionParent:
157-
return U.BFRef.BF != nullptr;
158-
}
159-
llvm_unreachable("");
160-
}
161-
162-
uint64_t getAddress() const {
163-
switch (ParentKind) {
164-
case BasicBlockParent:
165-
return U.BBRef.getAddress();
166-
case FunctionParent:
167-
return U.BFRef.getAddress();
168-
}
169-
llvm_unreachable("");
170-
}
171-
172-
BinaryFunction *getFunction() const {
173-
switch (ParentKind) {
174-
case FunctionParent:
175-
return U.BFRef.BF;
176-
case BasicBlockParent:
177-
return U.BBRef.BB->getFunction();
178-
}
179-
llvm_unreachable("");
180-
}
181-
182-
BinaryBasicBlock *getBasicBlock() const {
183-
switch (ParentKind) {
184-
case FunctionParent:
185-
return nullptr;
186-
case BasicBlockParent:
187-
return U.BBRef.BB;
188-
}
189-
llvm_unreachable("");
190-
}
191-
};
192-
193-
raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
194-
19521
namespace PAuthGadgetScanner {
19622

19723
// The report classes are designed to be used in an immutable manner.

bolt/lib/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ add_llvm_library(LLVMBOLTCore
3232
GDBIndex.cpp
3333
HashUtilities.cpp
3434
JumpTable.cpp
35+
MCInstUtils.cpp
3536
MCPlusBuilder.cpp
3637
ParallelUtilities.cpp
3738
Relocation.cpp

0 commit comments

Comments
 (0)